line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Hyphen::TR; |
2
|
2
|
|
|
2
|
|
20973
|
use 5.008001; |
|
2
|
|
|
|
|
6
|
|
3
|
2
|
|
|
2
|
|
9
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
42
|
|
4
|
2
|
|
|
2
|
|
19
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
66
|
|
5
|
2
|
|
|
2
|
|
871
|
use utf8; # source contains turkish letters |
|
2
|
|
|
|
|
16
|
|
|
2
|
|
|
|
|
16
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = "0.01"; |
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
94
|
use base 'Text::Hyphen'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
1635
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=encoding utf-8 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Text::Hyphen::TR - determine positions for hyphens inside Turkish words |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
This module is an implementation of Knuth-Liang hyphenation algorithm |
20
|
|
|
|
|
|
|
for Turkish text using patterns from hyph-utf8 TeX package. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use Text::Hyphen::TR; |
23
|
|
|
|
|
|
|
my $hyphenator = new Text::Hyphen::TR; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
print $hyphenator->hyphenate($turkish_word_in_Unicode, '-'); |
26
|
|
|
|
|
|
|
# prints hyphenated with dashes |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 EXPORT |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
See L for the interface documentation. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
This module only provides Turkish patterns. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub _PATTERNS { |
37
|
1
|
|
|
1
|
|
123
|
return [qw( |
38
|
|
|
|
|
|
|
2a1 2â1 2e1 2ı1 2i1 2î1 2o1 2ö1 2u1 2ü1 2û1 1b1 1c1 1ç1 1d1 1f1 1g1 |
39
|
|
|
|
|
|
|
1ğ1 1h1 1j1 1k1 1l1 1m1 1n1 1p1 1r1 1s1 1ş1 1t1 1v1 1y1 1z1 2e2cek. |
40
|
|
|
|
|
|
|
2bb 2bc 2bç 2bd 2bf 2bg 2bğ 2bh 2bj 2bk 2bl 2bm 2bn 2bp 2br 2bs 2bş |
41
|
|
|
|
|
|
|
2bt 2bv 2by 2bz 2cb 2cc 2cç 2cd 2cf 2cg 2cğ 2ch 2cj 2ck 2cl 2cm 2cn |
42
|
|
|
|
|
|
|
2cp 2cr 2cs 2cş 2ct 2cv 2cy 2cz 2çb 2çc 2çç 2çd 2çf 2çg 2çğ 2çh 2çj |
43
|
|
|
|
|
|
|
2çk 2çl 2çm 2çn 2çp 2çr 2çs 2çş 2çt 2çv 2çy 2çz 2db 2dc 2dç 2dd 2df |
44
|
|
|
|
|
|
|
2dg 2dğ 2dh 2dj 2dk 2dl 2dm 2dn 2dp 2dr 2ds 2dş 2dt 2dv 2dy 2dz 2fb |
45
|
|
|
|
|
|
|
2fc 2fç 2fd 2ff 2fg 2fğ 2fh 2fj 2fk 2fl 2fm 2fn 2fp 2fr 2fs 2fş 2ft |
46
|
|
|
|
|
|
|
2fv 2fy 2fz 2gb 2gc 2gç 2gd 2gf 2gg 2gğ 2gh 2gj 2gk 2gl 2gm 2gn 2gp |
47
|
|
|
|
|
|
|
2gr 2gs 2gş 2gt 2gv 2gy 2gz 2ğb 2ğc 2ğç 2ğd 2ğf 2ğg 2ğğ 2ğh 2ğj 2ğk |
48
|
|
|
|
|
|
|
2ğl 2ğm 2ğn 2ğp 2ğr 2ğs 2ğş 2ğt 2ğv 2ğy 2ğz 2hb 2hc 2hç 2hd 2hf 2hg |
49
|
|
|
|
|
|
|
2hğ 2hh 2hj 2hk 2hl 2hm 2hn 2hp 2hr 2hs 2hş 2ht 2hv 2hy 2hz 2jb 2jc |
50
|
|
|
|
|
|
|
2jç 2jd 2jf 2jg 2jğ 2jh 2jj 2jk 2jl 2jm 2jn 2jp 2jr 2js 2jş 2jt 2jv |
51
|
|
|
|
|
|
|
2jy 2jz 2kb 2kc 2kç 2kd 2kf 2kg 2kğ 2kh 2kj 2kk 2kl 2km 2kn 2kp 2kr |
52
|
|
|
|
|
|
|
2ks 2kş 2kt 2kv 2ky 2kz 2lb 2lc 2lç 2ld 2lf 2lg 2lğ 2lh 2lj 2lk 2ll |
53
|
|
|
|
|
|
|
2lm 2ln 2lp 2lr 2ls 2lş 2lt 2lv 2ly 2lz 2mb 2mc 2mç 2md 2mf 2mg 2mğ |
54
|
|
|
|
|
|
|
2mh 2mj 2mk 2ml 2mm 2mn 2mp 2mr 2ms 2mş 2mt 2mv 2my 2mz 2nb 2nc 2nç |
55
|
|
|
|
|
|
|
2nd 2nf 2ng 2nğ 2nh 2nj 2nk 2nl 2nm 2nn 2np 2nr 2ns 2nş 2nt 2nv 2ny |
56
|
|
|
|
|
|
|
2nz 2pb 2pc 2pç 2pd 2pf 2pg 2pğ 2ph 2pj 2pk 2pl 2pm 2pn 2pp 2pr 2ps |
57
|
|
|
|
|
|
|
2pş 2pt 2pv 2py 2pz 2rb 2rc 2rç 2rd 2rf 2rg 2rğ 2rh 2rj 2rk 2rl 2rm |
58
|
|
|
|
|
|
|
2rn 2rp 2rr 2rs 2rş 2rt 2rv 2ry 2rz 2sb 2sc 2sç 2sd 2sf 2sg 2sğ 2sh |
59
|
|
|
|
|
|
|
2sj 2sk 2sl 2sm 2sn 2sp 2sr 2ss 2sş 2st 2sv 2sy 2sz 2şb 2şc 2şç 2şd |
60
|
|
|
|
|
|
|
2şf 2şg 2şğ 2şh 2şj 2şk 2şl 2şm 2şn 2şp 2şr 2şs 2şş 2şt 2şv 2şy 2şz |
61
|
|
|
|
|
|
|
2tb 2tc 2tç 2td 2tf 2tg 2tğ 2th 2tj 2tk 2tl 2tm 2tn 2tp 2tr 2ts 2tş |
62
|
|
|
|
|
|
|
2tt 2tv 2ty 2tz 2vb 2vc 2vç 2vd 2vf 2vg 2vğ 2vh 2vj 2vk 2vl 2vm 2vn |
63
|
|
|
|
|
|
|
2vp 2vr 2vs 2vş 2vt 2vv 2vy 2vz 2yb 2yc 2yç 2yd 2yf 2yg 2yğ 2yh 2yj |
64
|
|
|
|
|
|
|
2yk 2yl 2ym 2yn 2yp 2yr 2ys 2yş 2yt 2yv 2yy 2yz 2zb 2zc 2zç 2zd 2zf |
65
|
|
|
|
|
|
|
2zg 2zğ 2zh 2zj 2zk 2zl 2zm 2zn 2zp 2zr 2zs 2zş 2zt 2zv 2zy 2zz a3a2 |
66
|
|
|
|
|
|
|
a3â2 a3e2 a3ı2 a3i2 a3î2 a3o2 a3ö2 a3u2 a3ü2 a3û2 â3a2 â3â2 â3e2 â3ı2 |
67
|
|
|
|
|
|
|
â3i2 â3î2 â3o2 â3ö2 â3u2 â3ü2 â3û2 e3a2 e3â2 e3e2 e3ı2 e3i2 e3î2 e3o2 |
68
|
|
|
|
|
|
|
e3ö2 e3u2 e3ü2 e3û2 ı3a2 ı3â2 ı3e2 ı3ı2 ı3i2 ı3î2 ı3o2 ı3ö2 ı3u2 ı3ü2 |
69
|
|
|
|
|
|
|
ı3û2 i3a2 i3â2 i3e2 i3ı2 i3i2 i3î2 i3o2 i3ö2 i3u2 i3ü2 i3û2 î3a2 î3â2 |
70
|
|
|
|
|
|
|
î3e2 î3ı2 î3i2 î3î2 î3o2 î3ö2 î3u2 î3ü2 î3û2 o3a2 o3â2 o3e2 o3ı2 o3i2 |
71
|
|
|
|
|
|
|
o3î2 o3o2 o3ö2 o3u2 o3ü2 o3û2 ö3a2 ö3â2 ö3e2 ö3ı2 ö3i2 ö3î2 ö3o2 ö3ö2 |
72
|
|
|
|
|
|
|
ö3u2 ö3ü2 ö3û2 u3a2 u3â2 u3e2 u3ı2 u3i2 u3î2 u3o2 u3ö2 u3u2 u3ü2 u3û2 |
73
|
|
|
|
|
|
|
ü3a2 ü3â2 ü3e2 ü3ı2 ü3i2 ü3î2 ü3o2 ü3ö2 ü3u2 ü3ü2 ü3û2 û3a2 û3â2 û3e2 |
74
|
|
|
|
|
|
|
û3ı2 û3i2 û3î2 û3o2 û3ö2 û3u2 û3ü2 û3û2 tu4r4k m1t4rak |
75
|
|
|
|
|
|
|
2a2cak. |
76
|
|
|
|
|
|
|
)]; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Donald Knuth and Frank Liang for the algorithm. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Alexander Lebedev for all his valuable work on russian ispell |
84
|
|
|
|
|
|
|
dictionaries and russian hyphenation patterns. See his archive |
85
|
|
|
|
|
|
|
at L or his hyphenation page |
86
|
|
|
|
|
|
|
at L. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Patterns used in this module are generated by |
89
|
|
|
|
|
|
|
generate_patterns_tr.rb script which is available at |
90
|
|
|
|
|
|
|
L. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
The only change is one extra pattern that will not split "-acak" future tense suffix as "a-cak" at the very end of the word. |
93
|
|
|
|
|
|
|
The original pattern list contains the same pattern for the "-ecek" suffix and those two are actually variants |
94
|
|
|
|
|
|
|
of the same suffix so it makes no sense to have patterns for one and not the other. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Algorithm for pattern generation developed by P. A. MacKay for the Ottoman Texts Project in 1987. |
97
|
|
|
|
|
|
|
Rules adapted for modern Turkish by H. Turgut Uyar . |
98
|
|
|
|
|
|
|
Initiative to improve Turkish patterns by S. Ekin Kocabas . |
99
|
|
|
|
|
|
|
Pattern generation script written by Mojca Miklavec in June 2008. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 SEE ALSO |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
L is a completely different implementation. It has at least one bug acknowledged by its author |
104
|
|
|
|
|
|
|
which this module does not have. Try hyphenating "antrparantez". |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
L, L, |
107
|
|
|
|
|
|
|
L. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 BUGS AND SUPPORT |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
This code is hosted on Github, please see L. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Please report any bugs or feature requests to GitHub issues. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 LICENSE |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Copyright (C) Alex Kapranoff. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
120
|
|
|
|
|
|
|
the terms GNU General Public License version 3. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
The patterns that are used inside this module may also be obtained from CTAN under LPPL license. |
123
|
|
|
|
|
|
|
See L and specifically |
124
|
|
|
|
|
|
|
L. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 AUTHOR |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Alex Kapranoff Ealex@kapranoff.ruE |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
1; |