line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Stem::Patch::EO; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
90987
|
use v5.8.1; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
157
|
|
4
|
4
|
|
|
4
|
|
19
|
use utf8; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
28
|
|
5
|
4
|
|
|
4
|
|
87
|
use strict; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
132
|
|
6
|
4
|
|
|
4
|
|
15
|
use warnings; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
146
|
|
7
|
4
|
|
|
4
|
|
1989
|
use parent 'Exporter'; |
|
4
|
|
|
|
|
1175
|
|
|
4
|
|
|
|
|
19
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw( stem stem_eo stem_aggressive stem_eo_aggressive ); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
*stem_eo = \&stem; |
13
|
|
|
|
|
|
|
*stem_eo_aggressive = \&stem_aggressive; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my %protect = ( |
16
|
|
|
|
|
|
|
correlative => { map { $_ => 1 } map { |
17
|
|
|
|
|
|
|
my $start = $_; |
18
|
|
|
|
|
|
|
map { $start . $_ } qw( a al am e el es o om u ) |
19
|
|
|
|
|
|
|
} qw( ki ti i ĉi neni ) }, |
20
|
|
|
|
|
|
|
root => { map { $_ => 1 } qw( |
21
|
|
|
|
|
|
|
ĉar ĉi ĉu kaj ke la minus plus se |
22
|
|
|
|
|
|
|
ĉe da de el ekster en ĝis je kun na po pri pro sen tra |
23
|
|
|
|
|
|
|
ajn do ja jen ju ne pli tamen tre tro |
24
|
|
|
|
|
|
|
ci ĝi ili li mi ni oni ri si ŝi ŝli vi |
25
|
|
|
|
|
|
|
unu du tri kvin |
26
|
|
|
|
|
|
|
ĵus nun plu tuj |
27
|
|
|
|
|
|
|
amen bis boj fi ha he ho hu hura nu ve |
28
|
|
|
|
|
|
|
) }, |
29
|
|
|
|
|
|
|
simple => { map { my $root = $_; map { $root . $_ => 1 } qw( a e i o ) } qw( |
30
|
|
|
|
|
|
|
abrikot absint arogant artrit azot balustrad bant bat biskvit blat boat |
31
|
|
|
|
|
|
|
bot briliant cit ĉokolad dat degrad delikat diamant diskont dorlot dot |
32
|
|
|
|
|
|
|
ekscit elefant ermit etat evit flat font frat front frot gad gant genot |
33
|
|
|
|
|
|
|
glad glat glit grad granat granit grat grenad grot hepat hipokrit hont |
34
|
|
|
|
|
|
|
horizont imit incit iniciat intermit invit kalikot kamlot kant kapot |
35
|
|
|
|
|
|
|
karot kat klimat komitat kompat konfit konsonant konstant konstat |
36
|
|
|
|
|
|
|
kontant kot krad kravat kvant kvit lad lekant leŭtenant limonad lit lot |
37
|
|
|
|
|
|
|
markot marmot mat medit merit milit miozot monat mont muskat not oblat |
38
|
|
|
|
|
|
|
palat parad parazit pat perlamot pilot pint pirit plad plant plat plot |
39
|
|
|
|
|
|
|
pont pot predikat privat profit rabat rabot rad rakont rat renkont rilat |
40
|
|
|
|
|
|
|
rot sabat salat sat ŝat skarlat soldat spat spirit spit sprit stat ŝtat |
41
|
|
|
|
|
|
|
strat subit sublimat svat ŝvit terebint tint trikot trot universitat |
42
|
|
|
|
|
|
|
vant vat vizit volont zenit |
43
|
|
|
|
|
|
|
almilit bofrat ciferplat esperant malŝat manplat |
44
|
|
|
|
|
|
|
) }, |
45
|
|
|
|
|
|
|
); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub stem { |
48
|
4
|
|
|
4
|
0
|
29
|
my $word = lc shift; |
|
4
|
|
|
676
|
|
6
|
|
|
4
|
|
|
|
|
50
|
|
|
676
|
|
|
|
|
98414
|
|
49
|
|
|
|
|
|
|
|
50
|
676
|
|
|
|
|
78493
|
for ($word) { |
51
|
|
|
|
|
|
|
# standalone roots |
52
|
676
|
100
|
|
|
|
1994
|
last if $protect{root}{$word}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# l’ l' → la |
55
|
546
|
100
|
|
|
|
1987
|
last if s{ (?<= ^ l ) [’'] $}{a}x; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# un’ un' unuj → unu |
58
|
542
|
100
|
|
|
|
1159
|
last if s{ (?<= ^ un ) [’'] $}{u}x; |
59
|
538
|
100
|
|
|
|
1105
|
last if s{ (?<= ^ unu ) j $}{}x; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# -’ -' → -o |
62
|
536
|
|
|
|
|
964
|
s{ [’'] $}{o}x; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# ’st- 'st- → est- |
65
|
536
|
|
|
|
|
666
|
s{^ [’'] (?= st (?: [aiou] s | [iu] ) $ ) }{e}x; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# nouns, adjectives, -u correlatives: |
68
|
|
|
|
|
|
|
# -oj -on -ojn → o |
69
|
|
|
|
|
|
|
# -aj -an -ajn → a |
70
|
|
|
|
|
|
|
# -uj -un -ujn → u |
71
|
536
|
|
|
|
|
1253
|
s{ (?<= [aou] ) (?: [jn] | jn ) $}{}x; |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# correlatives: -en → -e |
74
|
536
|
|
|
|
|
698
|
s{^ ( (?: [ĉkt] | nen )? ie ) n $}{$1}x; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# correlative roots |
77
|
536
|
100
|
|
|
|
1332
|
last if $protect{correlative}{$word}; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# accusative pronouns: -in → -i |
80
|
346
|
100
|
|
|
|
653
|
last if s{ (?<= i ) n $}{}x; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# accusative adverbs: -en → -o |
83
|
320
|
|
|
|
|
297
|
s{ en $}{o}x; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# verbs: -is -as -os -us -u → -i |
86
|
320
|
|
|
|
|
632
|
s{ (?: [aiou] s | u ) $}{i}x; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# lexical aspect: ek- el- |
89
|
320
|
|
|
|
|
304
|
s{^ ek (?! scit ) }{}x; |
90
|
320
|
|
|
|
|
272
|
s{^ el (?! efant ) }{}x; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# simple words: root plus single suffix |
93
|
320
|
100
|
|
|
|
628
|
last if $protect{simple}{$word}; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# imperfective verbs & action nouns: -adi -ado → -i |
96
|
312
|
100
|
|
|
|
551
|
last if s{ ad [io] $}{i}x; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# compound verbs: |
99
|
|
|
|
|
|
|
# -inti -anti -onti -iti -ati -oti → -i |
100
|
|
|
|
|
|
|
# -inte -ante -onte -ite -ate -ote → -i |
101
|
|
|
|
|
|
|
# -inta -anta -onta -ita -ata -ota → -i |
102
|
304
|
100
|
|
|
|
861
|
last if s{ (?: [aio] n? t ) [aei] $}{i}x; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# participle nouns: |
105
|
|
|
|
|
|
|
# -into -anto -onto → -anto |
106
|
|
|
|
|
|
|
# -ito -ato -oto → -ato |
107
|
172
|
100
|
|
|
|
492
|
last if s{ [aio] ( n? ) to $}{a$1to}x; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
676
|
|
|
|
|
1807
|
return $word; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub stem_aggressive { |
114
|
332
|
|
|
332
|
0
|
627
|
my $word = stem(shift); |
115
|
332
|
|
|
|
|
339
|
my $copy = $word; |
116
|
|
|
|
|
|
|
|
117
|
332
|
|
|
|
|
373
|
for ($word) { |
118
|
|
|
|
|
|
|
# protected words |
119
|
332
|
100
|
100
|
|
|
1371
|
last if $protect{root}{$word} |
120
|
|
|
|
|
|
|
|| $protect{correlative}{$word}; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# remove final suffix |
123
|
155
|
|
|
|
|
295
|
s{ [aeio] $}{}x; |
124
|
|
|
|
|
|
|
|
125
|
155
|
100
|
|
|
|
266
|
last if $protect{simple}{$copy}; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# remove suffix for participle nouns: |
128
|
|
|
|
|
|
|
# -int- -ant- -ont- -it- -at- -ot- |
129
|
151
|
|
|
|
|
248
|
s{ [aio] n? t $}{}x; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
332
|
|
|
|
|
1173
|
return $word; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
1; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
__END__ |