| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::SA; |
|
2
|
|
|
|
|
|
|
|
|
3
|
10
|
|
|
10
|
|
258620
|
use 5.008; |
|
|
10
|
|
|
|
|
42
|
|
|
|
10
|
|
|
|
|
445
|
|
|
4
|
10
|
|
|
10
|
|
64
|
use strict; |
|
|
10
|
|
|
|
|
27
|
|
|
|
10
|
|
|
|
|
374
|
|
|
5
|
10
|
|
|
10
|
|
70
|
use warnings; |
|
|
10
|
|
|
|
|
22
|
|
|
|
10
|
|
|
|
|
391
|
|
|
6
|
10
|
|
|
10
|
|
9396
|
use English qw{-no_match_vars}; |
|
|
10
|
|
|
|
|
45913
|
|
|
|
10
|
|
|
|
|
66
|
|
|
7
|
10
|
|
|
10
|
|
6147
|
use Carp; |
|
|
10
|
|
|
|
|
25
|
|
|
|
10
|
|
|
|
|
23965
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
|
14
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
|
15
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# This allows declaration use Lingua::SA ':all'; |
|
18
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
|
19
|
|
|
|
|
|
|
# will save memory. |
|
20
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
|
21
|
|
|
|
|
|
|
transliterate |
|
22
|
|
|
|
|
|
|
vibhakti |
|
23
|
|
|
|
|
|
|
sandhi |
|
24
|
|
|
|
|
|
|
) ] ); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our @EXPORT = qw( |
|
29
|
|
|
|
|
|
|
); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Preloaded methods go here. |
|
34
|
|
|
|
|
|
|
########################################################### |
|
35
|
|
|
|
|
|
|
sub sandhi{ |
|
36
|
2303
|
|
|
2303
|
0
|
314455
|
my ($in)=@_; |
|
37
|
2303
|
|
|
|
|
5710
|
$in=~s/ \+ ([^\[])/$1/g; # replace + and surround spaces with nothing |
|
38
|
2303
|
|
|
|
|
4738
|
$in=~s/aa/A/g; # replace aa with A |
|
39
|
2303
|
|
|
|
|
3762
|
$in=~s/ii/I/g; # replace ii with I |
|
40
|
2303
|
|
|
|
|
3933
|
$in=~s/uu/U/g; # replace uu with U |
|
41
|
2303
|
|
|
|
|
3559
|
$in=~s/Ru/R/g; # replace Ru with R |
|
42
|
2303
|
|
|
|
|
6764
|
return $in; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
########################################################### |
|
45
|
|
|
|
|
|
|
sub vibhakti { |
|
46
|
|
|
|
|
|
|
####### This is currently only for svaraanta (halant will be 8000+) |
|
47
|
|
|
|
|
|
|
# USAGE: my $response=vibhakti({naam=>$noun, vibhakti=>$vibhakti, |
|
48
|
|
|
|
|
|
|
# linga=>$linga, vachana=>$vachana}); |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# 2008-06-05 Fixed 3351 and 3361 from inaaH to inaH (v 0.06) |
|
51
|
|
|
|
|
|
|
|
|
52
|
467
|
|
|
467
|
0
|
9891
|
my ($arg_ref) = @_; |
|
53
|
|
|
|
|
|
|
|
|
54
|
467
|
100
|
|
|
|
2506
|
confess "Argument naam not passed to vibhakti()" if !defined $arg_ref->{naam}; |
|
55
|
466
|
100
|
|
|
|
2033
|
confess "Argument linga not passed to vibhakti()" if !defined $arg_ref->{linga}; |
|
56
|
465
|
100
|
|
|
|
1385
|
confess "Argument vibhakti not passed to vibhakti()" if !defined $arg_ref->{vibhakti}; |
|
57
|
464
|
100
|
|
|
|
1130
|
confess "Argument vachana not passed to vibhakti()" if !defined $arg_ref->{vachana}; |
|
58
|
|
|
|
|
|
|
|
|
59
|
463
|
|
|
|
|
1358
|
my ( $noun, $vibhakti, $linga, $vachana ) = |
|
60
|
|
|
|
|
|
|
( $arg_ref -> {naam}, $arg_ref -> {vibhakti}, $arg_ref -> {linga}, |
|
61
|
|
|
|
|
|
|
$arg_ref -> {vachana}); |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# The last character of noun is chopped to be aakaar |
|
64
|
|
|
|
|
|
|
# (what happens when halant nouns are included?) |
|
65
|
463
|
|
|
|
|
876
|
$noun = sandhi($noun); |
|
66
|
463
|
|
|
|
|
1062
|
my $aakaar = chop($noun); |
|
67
|
|
|
|
|
|
|
|
|
68
|
463
|
|
|
|
|
15328
|
$vibhakti = sandhi($vibhakti); |
|
69
|
463
|
|
|
|
|
854
|
$linga = sandhi($linga); |
|
70
|
|
|
|
|
|
|
|
|
71
|
463
|
|
|
|
|
3298
|
my %aakaar = qw(0 0 a 1 A 2 i 3 I 4 u 5 U 6 R 7); |
|
72
|
463
|
|
|
|
|
2402
|
my %linga = qw(puM 1 strI 2 napuMsaka 3 1 1 2 2 3 3); |
|
73
|
463
|
|
|
|
|
2094
|
my %vachana = qw(ekavachana 1 dvivachana 2 bahuvachana 3 1 1 2 2 3 3); |
|
74
|
463
|
|
|
|
|
5508
|
my %vibhakti = qw#prathamA 1 dvitIyA 2 tRtIyA 3 chaturthI 4 paJchamI 5 |
|
75
|
|
|
|
|
|
|
ShaShThI 6 saptamI 7 sambodhana 8 |
|
76
|
|
|
|
|
|
|
1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8#; |
|
77
|
|
|
|
|
|
|
|
|
78
|
463
|
100
|
|
|
|
1339
|
confess "Unsupported noun supplied to vibhakti(): $noun$aakaar ending in $aakaar" |
|
79
|
|
|
|
|
|
|
if !defined $aakaar{$aakaar}; |
|
80
|
462
|
100
|
|
|
|
1225
|
confess "Invalid linga $linga supplied to vibhakti()" |
|
81
|
|
|
|
|
|
|
if !defined $linga{$linga}; |
|
82
|
461
|
100
|
|
|
|
1255
|
confess "Invalid vibhakti $vibhakti supplied to vibhakti()" |
|
83
|
|
|
|
|
|
|
if !defined $vibhakti{$vibhakti}; |
|
84
|
460
|
100
|
|
|
|
1200
|
confess "Invalid vachana $vachana supplied to vibhakti()" |
|
85
|
|
|
|
|
|
|
if !defined $vachana{$vachana}; |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# coef for swarAnt nouns range from 1111 to 7373 |
|
88
|
|
|
|
|
|
|
# with 7 sets of 72 coefs posible (not all taken) |
|
89
|
459
|
|
|
|
|
2130
|
my $coef = |
|
90
|
|
|
|
|
|
|
$aakaar{$aakaar} * 1000 + |
|
91
|
|
|
|
|
|
|
$linga{$linga} * 100 + |
|
92
|
|
|
|
|
|
|
$vibhakti{$vibhakti} * 10 + |
|
93
|
|
|
|
|
|
|
$vachana{$vachana}; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
#### 1000 through 7000 aakaar. 1=a 2=A 3=i 4=I 5=u 6=U 7=Ru |
|
96
|
|
|
|
|
|
|
##### 100 puM 200 strI 300 na |
|
97
|
|
|
|
|
|
|
###### 10 through 80 8 vibhakti (8th being sambodhan) |
|
98
|
|
|
|
|
|
|
####### 1 through 3 eka, dwi, bahuvachan |
|
99
|
|
|
|
|
|
|
## possibilities for nouns are in the following series: |
|
100
|
|
|
|
|
|
|
# Masculine:1100, 3100, 5100, 7100 # examples of 2100? |
|
101
|
|
|
|
|
|
|
# Feminine: 2200, 3200, 4200, 5200, 6200, 7200 |
|
102
|
|
|
|
|
|
|
# Neutar: 1300, 3300, 5300, 7300 |
|
103
|
|
|
|
|
|
|
# 2100, 4100 exist, but I do not know the examples. |
|
104
|
|
|
|
|
|
|
# not sure of 6100, 6300 |
|
105
|
|
|
|
|
|
|
# 1200, 2300, 4300 do not exist |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# These are the noun suffixes |
|
108
|
459
|
|
|
|
|
71861
|
my %ending = qw( |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
1111 aH 1112 au 1113 AH |
|
111
|
|
|
|
|
|
|
1121 am 1122 au 1123 An |
|
112
|
|
|
|
|
|
|
1131 ena 1132 Abhyaam 1133 aiH |
|
113
|
|
|
|
|
|
|
1141 Aya 1142 Abhyaam 1143 ebhyaH |
|
114
|
|
|
|
|
|
|
1151 At 1152 Abhyaam 1153 ebhyaH |
|
115
|
|
|
|
|
|
|
1161 asya 1162 ayoH 1163 Anaam |
|
116
|
|
|
|
|
|
|
1171 e 1172 ayoH 1173 eSu |
|
117
|
|
|
|
|
|
|
1181 a 1182 au 1183 AH |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
2111 AH 2112 au 2113 AH |
|
120
|
|
|
|
|
|
|
2121 Am 2122 au 2123 aH |
|
121
|
|
|
|
|
|
|
2131 A 2132 Abhyaam 2133 AbhiH |
|
122
|
|
|
|
|
|
|
2141 e 2142 Abhyaam 2143 AbhyaH |
|
123
|
|
|
|
|
|
|
2151 aH 2152 Abhyaam 2153 AbhyaH |
|
124
|
|
|
|
|
|
|
2161 aH 2162 oH 2163 Am |
|
125
|
|
|
|
|
|
|
2171 i 2172 oH 2173 Asu |
|
126
|
|
|
|
|
|
|
2181 Am 2182 au 2183 AH |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
3111 iH 3112 I 3113 ayaH |
|
129
|
|
|
|
|
|
|
3121 im 3122 I 3123 In |
|
130
|
|
|
|
|
|
|
3131 inaa 3132 ibhyaam 3133 ibhiH |
|
131
|
|
|
|
|
|
|
3141 aye 3142 ibhyaam 3143 ibhyaH |
|
132
|
|
|
|
|
|
|
3151 eH 3152 ibhyaam 3153 ibhyaH |
|
133
|
|
|
|
|
|
|
3161 eH 3162 yoH 3163 Inaam |
|
134
|
|
|
|
|
|
|
3171 au 3172 yoH 3173 iSu |
|
135
|
|
|
|
|
|
|
3181 e 3182 I 3183 ayaH |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
5111 uH 5112 U 5113 avaH |
|
138
|
|
|
|
|
|
|
5121 um 5122 U 5123 Un |
|
139
|
|
|
|
|
|
|
5131 unaa 5132 ubhyaam 5133 ubhiH |
|
140
|
|
|
|
|
|
|
5141 ave 5142 ubhyaam 5143 ubhyaH |
|
141
|
|
|
|
|
|
|
5151 oH 5152 ubhyaam 5153 ubhyaH |
|
142
|
|
|
|
|
|
|
5161 oH 5162 voH 5163 Unaam |
|
143
|
|
|
|
|
|
|
5171 au 5172 voH 5173 uSu |
|
144
|
|
|
|
|
|
|
5181 o 5182 U 5183 avaH |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
7111 A 7112 Arau 7113 AraH |
|
147
|
|
|
|
|
|
|
7121 Aram 7122 Arau 7123 RRn |
|
148
|
|
|
|
|
|
|
7131 raa 7132 Rbhyaam 7133 RbhiH |
|
149
|
|
|
|
|
|
|
7141 re 7142 Rbhyaam 7143 RbhyaH |
|
150
|
|
|
|
|
|
|
7151 uH 7152 Rbhyaam 7153 RbhyaH |
|
151
|
|
|
|
|
|
|
7161 uH 7162 roH 7163 RRNaam |
|
152
|
|
|
|
|
|
|
7171 ari 7172 roH 7173 RSu |
|
153
|
|
|
|
|
|
|
7181 aH|ar 7182 Arau 7183 AraH |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
2211 A 2212 e 2213 AH |
|
156
|
|
|
|
|
|
|
2221 Am 2222 e 2223 AH |
|
157
|
|
|
|
|
|
|
2231 ayaa 2232 Abhyaam 2233 AbhiH |
|
158
|
|
|
|
|
|
|
2241 Ayai 2242 Abhyaam 2243 AbhyaaH |
|
159
|
|
|
|
|
|
|
2251 AyaaH 2252 Abhyaam 2253 AbhyaaH |
|
160
|
|
|
|
|
|
|
2261 AyaaH 2262 ayoH 2263 Anaam |
|
161
|
|
|
|
|
|
|
2271 Ayaam 2272 ayoH 2273 Asu |
|
162
|
|
|
|
|
|
|
2281 e 2282 e 2283 AH |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
3211 iH 3212 I 3213 ayaH |
|
165
|
|
|
|
|
|
|
3221 im 3222 I 3223 IH |
|
166
|
|
|
|
|
|
|
3231 yaa 3232 ibhyaam 3233 ibhiH |
|
167
|
|
|
|
|
|
|
3241 yai|aye 3242 ibhyaam 3243 ibhyaH |
|
168
|
|
|
|
|
|
|
3251 yaaH|eH 3252 ibhyaam 3253 ibhyaH |
|
169
|
|
|
|
|
|
|
3261 yaaH|eH 3262 yoH 3263 Inaam |
|
170
|
|
|
|
|
|
|
3271 yaam|au 3272 yoH 3273 iSu |
|
171
|
|
|
|
|
|
|
3281 e 3282 I 3283 ayaH |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
4211 I 4212 yau 4213 yaH |
|
174
|
|
|
|
|
|
|
4221 Im 4222 yau 4223 IH |
|
175
|
|
|
|
|
|
|
4231 yaa 4232 Ibhyaam 4233 IbhiH |
|
176
|
|
|
|
|
|
|
4241 yai 4242 Ibhyaam 4243 IbhyaH |
|
177
|
|
|
|
|
|
|
4251 yaaH 4252 Ibhyaam 4253 IbhyaH |
|
178
|
|
|
|
|
|
|
4261 yaaH 4262 yoH 4263 Inaam |
|
179
|
|
|
|
|
|
|
4271 yaam 4272 yoH 4273 ISu |
|
180
|
|
|
|
|
|
|
4281 i 4282 yau 4283 yaH |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
5211 uH 5212 U 5213 avaH |
|
183
|
|
|
|
|
|
|
5221 um 5222 U 5223 UH |
|
184
|
|
|
|
|
|
|
5231 vaa 5232 ubhyaam 5233 ubhiH |
|
185
|
|
|
|
|
|
|
5241 ave|vai 5242 ubhyaam 5243 ubhyaH |
|
186
|
|
|
|
|
|
|
5251 oH|vaaH 5252 ubhyaam 5253 ubhyaH |
|
187
|
|
|
|
|
|
|
5261 oH|vaaH 5262 voH 5263 Unaam |
|
188
|
|
|
|
|
|
|
5271 au|vaam 5272 voH 5273 uSu |
|
189
|
|
|
|
|
|
|
5281 o 5282 U 5283 avaH |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
6211 UH 6212 vau 6213 vaH |
|
192
|
|
|
|
|
|
|
6221 Um 6222 vau 6223 UH |
|
193
|
|
|
|
|
|
|
6231 vaa 6232 Ubhyaam 6233 UbhiH |
|
194
|
|
|
|
|
|
|
6241 vai 6242 Ubhyaam 6243 UbhyaH |
|
195
|
|
|
|
|
|
|
6251 vaaH 6252 Ubhyaam 6253 UbhyaH |
|
196
|
|
|
|
|
|
|
6261 vaaH 6262 voH 6263 Unaam |
|
197
|
|
|
|
|
|
|
6271 vaam 6272 voH 6273 USu |
|
198
|
|
|
|
|
|
|
6281 u 6282 vau 6283 vaH |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
7211 A 7212 arau 7213 araH |
|
201
|
|
|
|
|
|
|
7221 aram 7222 arau 7223 RRH |
|
202
|
|
|
|
|
|
|
7231 raa 7232 Rbhyaam 7233 RbhiH |
|
203
|
|
|
|
|
|
|
7241 re 7242 Rbhyaam 7243 RbhyaH |
|
204
|
|
|
|
|
|
|
7251 uH 7252 Rbhyaam 7253 RbhyaH |
|
205
|
|
|
|
|
|
|
7261 uH 7262 roH 7263 RRNaam |
|
206
|
|
|
|
|
|
|
7271 ari 7272 roH 7273 RSu |
|
207
|
|
|
|
|
|
|
7281 aH|ar 7282 arau 7283 araH |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
1311 am 1312 e 1313 Ani |
|
210
|
|
|
|
|
|
|
1321 am 1322 e 1323 Ani |
|
211
|
|
|
|
|
|
|
1331 ena 1332 Abhyaam 1333 aiH |
|
212
|
|
|
|
|
|
|
1341 Aya 1342 Abhyaam 1343 ebhyaH |
|
213
|
|
|
|
|
|
|
1351 At 1352 Abhyaam 1353 ebhyaH |
|
214
|
|
|
|
|
|
|
1361 asya 1362 ayoH 1363 Anaam |
|
215
|
|
|
|
|
|
|
1371 e 1372 ayoH 1373 eSu |
|
216
|
|
|
|
|
|
|
1381 a 1382 e 1383 Ani |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
3311 i 3312 inI 3313 Ini |
|
219
|
|
|
|
|
|
|
3321 i 3322 inI 3323 Ini |
|
220
|
|
|
|
|
|
|
3331 inaa 3332 ibhyaam 3333 ibhiH |
|
221
|
|
|
|
|
|
|
3341 ine 3342 ibhyaam 3343 ibhyaH |
|
222
|
|
|
|
|
|
|
3351 inaH 3352 ibhyaam 3353 ibhyaH |
|
223
|
|
|
|
|
|
|
3361 inaH 3362 inoH 3363 Inaam |
|
224
|
|
|
|
|
|
|
3371 ini 3372 inoH 3373 iSu |
|
225
|
|
|
|
|
|
|
3381 i|e 3382 inI 3383 Ini |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
5311 u 5312 unI 5313 Uni |
|
228
|
|
|
|
|
|
|
5321 u 5322 unI 5323 Uni |
|
229
|
|
|
|
|
|
|
5331 unaa 5332 ubhyaam 5333 ubhiH |
|
230
|
|
|
|
|
|
|
5341 une 5342 ubhyaam 5343 ubhyaH |
|
231
|
|
|
|
|
|
|
5351 unaH 5352 ubhyaam 5353 ubhyaH |
|
232
|
|
|
|
|
|
|
5361 unaH 5362 unoH 5363 Unaam |
|
233
|
|
|
|
|
|
|
5371 uni 5372 unoH 5373 uSu |
|
234
|
|
|
|
|
|
|
5381 o|u 5382 unI 5383 Uni |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
7311 R 7312 RNI 7313 RRNi |
|
237
|
|
|
|
|
|
|
7321 R 7322 RNI 7323 RRNi |
|
238
|
|
|
|
|
|
|
7331 raa|RNA 7332 Rbhyaam 7333 RbhiH |
|
239
|
|
|
|
|
|
|
7341 re|RNe 7342 Rbhyaam 7343 RbhyaH |
|
240
|
|
|
|
|
|
|
7351 uH|RNaH 7352 Rbhyaam 7353 RbhyaH |
|
241
|
|
|
|
|
|
|
7361 uH|RNaH 7362 roH|RNoH 7363 RRNaam |
|
242
|
|
|
|
|
|
|
7371 ari|RNi 7372 roH|RNoH 7373 RSu |
|
243
|
|
|
|
|
|
|
7381 aH|R 7382 RNI 7383 RRNi |
|
244
|
|
|
|
|
|
|
); |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Is 3263 above dirgha as stated? ## Yes, it is |
|
247
|
|
|
|
|
|
|
|
|
248
|
459
|
100
|
|
|
|
1681
|
confess "$linga nouns ending in $aakaar not supported" |
|
249
|
|
|
|
|
|
|
if !defined $ending{$coef}; |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
### This part can cater to irregular nouns |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# ambA, akkA, allA have a-kaaraant sambodhana |
|
254
|
458
|
100
|
100
|
|
|
3808
|
if($noun eq "amb" or $noun eq "akk" or $noun eq "all"){ |
|
|
|
|
100
|
|
|
|
|
|
255
|
3
|
|
|
|
|
6
|
$ending{2281} = 'a'; |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
|
|
258
|
458
|
|
|
|
|
1128
|
my $endcoef = $ending{$coef}; |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# Natva results in converting n to N when an r, R, RR, or S are encountered in |
|
261
|
|
|
|
|
|
|
# the noun, and the only letters between there and end are what are in Natva |
|
262
|
|
|
|
|
|
|
# here (h y v k kh g gh ~N p ph b bh m and a pratyay (aa~N - not implemented) |
|
263
|
|
|
|
|
|
|
# Additionally, n can not be halant |
|
264
|
|
|
|
|
|
|
|
|
265
|
458
|
|
|
|
|
767
|
my $Natva = "h|y|v|k(h)?|g(h)?|G|p(h)?|b(h)?|m"; |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# vowel is as defined in split_word |
|
268
|
458
|
|
|
|
|
4901
|
my $vowel = "(A|H|I|M|R(R|u)?|U|a(a|i|u)?|i(i)?|e|lR|o(M)?|u(u)?|\\:|\\|(\\|)?)"; |
|
269
|
|
|
|
|
|
|
|
|
270
|
458
|
|
|
|
|
516
|
my $inflected; |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
### This part can be expanded to include exceptions/options |
|
273
|
|
|
|
|
|
|
|
|
274
|
458
|
100
|
|
|
|
4144
|
if ($noun =~ m/[rRS][$Natva|$vowel]*$/ ) { |
|
275
|
120
|
|
|
|
|
392
|
$endcoef =~ s/n([a-zA-Z])/N$1/; |
|
276
|
|
|
|
|
|
|
} |
|
277
|
458
|
100
|
|
|
|
1938
|
if ( $endcoef =~ m/\|/ ) { |
|
278
|
36
|
|
|
|
|
334
|
my @foo = split( /\|/, $endcoef ); |
|
279
|
36
|
|
|
|
|
88
|
$inflected = "$noun + $foo[0]"; |
|
280
|
36
|
|
|
|
|
525
|
for my $counter ( 1 .. $#foo ) { |
|
281
|
36
|
|
|
|
|
420
|
$inflected.= " | $noun + $foo[$counter]"; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
else { |
|
285
|
422
|
|
|
|
|
1149
|
$inflected = "$noun + $endcoef"; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
## if sambodhan, prepend he |
|
288
|
458
|
100
|
|
|
|
1312
|
if ( $coef % 100 > 80 ) { |
|
289
|
47
|
100
|
|
|
|
185
|
if ( $endcoef =~ m/\|/ ) { |
|
290
|
5
|
|
|
|
|
11
|
$inflected = "he \[ $inflected ]"; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
else { |
|
293
|
42
|
|
|
|
|
79
|
$inflected = "he $inflected"; |
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
} |
|
296
|
458
|
|
|
|
|
34780
|
return $inflected; |
|
297
|
|
|
|
|
|
|
} ## end sub vibhakti |
|
298
|
|
|
|
|
|
|
############################### |
|
299
|
|
|
|
|
|
|
sub transliterate { |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# Takes a string as input. Separate it into words. |
|
302
|
|
|
|
|
|
|
# Splits each word into syllables, and for each syllable appends its |
|
303
|
|
|
|
|
|
|
# unicode to an array that is finally flattened and returned |
|
304
|
|
|
|
|
|
|
|
|
305
|
23
|
|
|
23
|
0
|
15347
|
my ($english) = @_; |
|
306
|
23
|
|
|
|
|
40
|
my @transliterated; |
|
307
|
23
|
|
|
|
|
75
|
my @x = split( /\s+/, $english ); # splt input string in to words |
|
308
|
23
|
|
|
|
|
47
|
for my $x (@x) { # get unicoded syllables for each word |
|
309
|
23
|
|
|
|
|
56
|
push( @transliterated, map( match_code($_), split_word($x) ), " " ); |
|
310
|
|
|
|
|
|
|
} |
|
311
|
23
|
|
|
|
|
112
|
return join( "", @transliterated ); # flatten the array before returning |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
############################### |
|
314
|
|
|
|
|
|
|
sub match_code { |
|
315
|
36
|
|
|
36
|
0
|
57
|
my ($syllable_mcc) = @_; |
|
316
|
36
|
|
|
|
|
1229
|
my %letter_codes = ( |
|
317
|
|
|
|
|
|
|
"~a", "अ", "~aa", "आ", "~A", "आ", |
|
318
|
|
|
|
|
|
|
"~i", "इ", "~ii", "ई", "~uu", "ऊ", |
|
319
|
|
|
|
|
|
|
"ii", "ी", "~I", "ई", "~u", "उ", |
|
320
|
|
|
|
|
|
|
"~U", "ऊ", "~R", "ऋ", "~Ru", "ऋ", |
|
321
|
|
|
|
|
|
|
"~lR", "ऌ", "~RR", "ॠ", "~e", "ए", |
|
322
|
|
|
|
|
|
|
"~ai", "ऐ", "~o", "ओ", "~au", "औ", |
|
323
|
|
|
|
|
|
|
"a", "", "aa", "ा", "A", "ा", |
|
324
|
|
|
|
|
|
|
"i", "ि", "I", "ी", "u", "ु", |
|
325
|
|
|
|
|
|
|
"uu", "ू", "R", "ृ", "lR", "ॢ", |
|
326
|
|
|
|
|
|
|
"e", "े", "ai", "ै", |
|
327
|
|
|
|
|
|
|
"U", "ू", "R", "ृ", "Ru", "ृ", |
|
328
|
|
|
|
|
|
|
"RR", "ॄ", "o", "ो", "au", "ौ", |
|
329
|
|
|
|
|
|
|
"k", "क", "kh", "ख", "g", "ग", |
|
330
|
|
|
|
|
|
|
"gh", "घ", "G", "ङ", "c", "च", |
|
331
|
|
|
|
|
|
|
"ch", "च", "C", "छ", "Ch", "छ", |
|
332
|
|
|
|
|
|
|
"j", "ज", "jh", "झ", "J", "ञ", |
|
333
|
|
|
|
|
|
|
"T", "ट", "Th", "ठ", "D", "ड", |
|
334
|
|
|
|
|
|
|
"Dh", "ढ", "N", "ण", "t", "त", |
|
335
|
|
|
|
|
|
|
"th", "थ", "d", "द", "dh", "ध", |
|
336
|
|
|
|
|
|
|
"n", "न", "p", "प", "ph", "फ", |
|
337
|
|
|
|
|
|
|
"b", "ब", "bh", "भ", "m", "म", |
|
338
|
|
|
|
|
|
|
"y", "य", "r", "र", "l", "ल", |
|
339
|
|
|
|
|
|
|
"L", "ळ", |
|
340
|
|
|
|
|
|
|
"v", "व", "z", "श", "sh", "श", |
|
341
|
|
|
|
|
|
|
"S", "ष", "Sh", "ष", "s", "स", |
|
342
|
|
|
|
|
|
|
"h", "ह", "H", "ः", ":", "ः", |
|
343
|
|
|
|
|
|
|
"M", "ं", "|", "।", "||", "॥", |
|
344
|
|
|
|
|
|
|
"oM", "ॐ", "~H", "ः", "~:", "ः", |
|
345
|
|
|
|
|
|
|
"~M", "ं", "~|", "।", "~||", "॥", |
|
346
|
|
|
|
|
|
|
"\$", "ऽ", "^", "॑", "_", "॒", |
|
347
|
|
|
|
|
|
|
"`", "॓", "'", "॔", "\@", "॰", |
|
348
|
|
|
|
|
|
|
"~oM", "ॐ", "*", "्", "CB", "ँ", |
|
349
|
|
|
|
|
|
|
); |
|
350
|
|
|
|
|
|
|
# RR 2400 lRR 2401 _lR 2402 _lRR 2403 chandra-bindu 2305 |
|
351
|
36
|
50
|
|
|
|
96
|
if ( defined $letter_codes{$syllable_mcc} ) { |
|
352
|
36
|
|
|
|
|
585
|
return $letter_codes{$syllable_mcc}; |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
else { |
|
355
|
0
|
|
|
|
|
0
|
return $syllable_mcc; |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
} ## end sub match_code |
|
358
|
|
|
|
|
|
|
######################################## |
|
359
|
|
|
|
|
|
|
sub split_word { |
|
360
|
23
|
|
|
23
|
0
|
34
|
my ($word) = @_; |
|
361
|
|
|
|
|
|
|
# vowels is copied as is in vibhakti |
|
362
|
23
|
|
|
|
|
37
|
my $vowels = "(A|H|I|M|R(R|u)?|U|a(a|i|u)?|i(i)?|e|lR|o(M)?|u(u)?|\\:|\\|(\\|)?)"; |
|
363
|
23
|
|
|
|
|
32
|
my $consonants = |
|
364
|
|
|
|
|
|
|
"(C(h|B)?|D(h)?|G|J|N|S(h)?|T(h)?|b(h)?|c(h)?|d(h)?|g(h)?|h|j(h)?|k(h)?|l|m|n|p(h)?|r|s(h)?|t(h)?|v|y|z|L)"; |
|
365
|
23
|
|
|
|
|
27
|
my @syllables; |
|
366
|
23
|
|
|
|
|
32
|
my $vowel_start_p = 1; |
|
367
|
23
|
|
|
|
|
28
|
my $matched; |
|
368
|
|
|
|
|
|
|
my $index; |
|
369
|
23
|
|
|
|
|
56
|
while ($word) { # begin out |
|
370
|
34
|
100
|
|
|
|
343
|
unless ( $word =~ m/$vowels/ ) { $index = length($word); } |
|
|
2
|
|
|
|
|
4
|
|
|
371
|
32
|
|
|
|
|
138
|
else { $index = length($`); } |
|
372
|
34
|
100
|
|
|
|
73
|
if ( $index == 0 ) { # begin 3A |
|
373
|
21
|
|
|
|
|
41
|
$matched = $1; |
|
374
|
21
|
100
|
|
|
|
37
|
if ($vowel_start_p) { # begin 0A |
|
375
|
10
|
|
|
|
|
26
|
push( @syllables, "~$matched" ); |
|
376
|
|
|
|
|
|
|
} # end 0A |
|
377
|
|
|
|
|
|
|
else { # begin 0B |
|
378
|
11
|
|
|
|
|
20
|
push( @syllables, $matched ); |
|
379
|
|
|
|
|
|
|
} # end 0B |
|
380
|
21
|
|
|
|
|
25
|
$vowel_start_p = 1; |
|
381
|
21
|
|
|
|
|
313
|
$word = substr( $word, length($matched) ); |
|
382
|
|
|
|
|
|
|
} # end 3A |
|
383
|
|
|
|
|
|
|
else { # begin 3B |
|
384
|
13
|
50
|
|
|
|
170
|
unless ( $word =~ m/$consonants/ ) { $index = length($word); } |
|
|
0
|
|
|
|
|
0
|
|
|
385
|
13
|
|
|
|
|
24
|
else { $index = length($`); } |
|
386
|
13
|
50
|
|
|
|
33
|
if ( $index == 0 ) { # begin 2A |
|
387
|
13
|
|
|
|
|
28
|
$matched = $1; |
|
388
|
13
|
|
|
|
|
23
|
push( @syllables, $matched ); |
|
389
|
13
|
|
|
|
|
18
|
$vowel_start_p = 0; |
|
390
|
13
|
|
|
|
|
27
|
$word = substr( $word, length($matched) ); |
|
391
|
13
|
100
|
|
|
|
118
|
unless ( $word =~ m/$vowels/ ) { $index = length($word); } |
|
|
2
|
|
|
|
|
4
|
|
|
392
|
11
|
|
|
|
|
23
|
else { $index = length($`); } |
|
393
|
13
|
100
|
66
|
|
|
109
|
if ( $index or length($word) == 0 ) { # begin 1A |
|
394
|
2
|
|
|
|
|
8
|
push( @syllables, "*" ); |
|
395
|
|
|
|
|
|
|
} # end 1A |
|
396
|
|
|
|
|
|
|
else { # begin 1B |
|
397
|
|
|
|
|
|
|
; |
|
398
|
|
|
|
|
|
|
} # end 1B |
|
399
|
|
|
|
|
|
|
} # end 2A |
|
400
|
|
|
|
|
|
|
else { # begin 2B |
|
401
|
0
|
|
|
|
|
0
|
push( @syllables, substr( $word, 0, 1 ) ); |
|
402
|
0
|
|
|
|
|
0
|
$word = substr( $word, 1 ); |
|
403
|
|
|
|
|
|
|
} # end 2B |
|
404
|
|
|
|
|
|
|
} # end 3B |
|
405
|
|
|
|
|
|
|
} # end out |
|
406
|
23
|
|
|
|
|
103
|
return @syllables; |
|
407
|
|
|
|
|
|
|
} ## end sub split_word |
|
408
|
|
|
|
|
|
|
########################### |
|
409
|
|
|
|
|
|
|
1; |
|
410
|
|
|
|
|
|
|
__END__ |