line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::TR::ASCII; |
2
|
|
|
|
|
|
|
$Lingua::TR::ASCII::VERSION = '0.16'; |
3
|
1
|
|
|
1
|
|
67037
|
use strict; |
|
1
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
29
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
5
|
1
|
|
|
1
|
|
15
|
use utf8; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7
|
|
6
|
1
|
|
|
1
|
|
28
|
use base qw( Exporter ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
162
|
|
7
|
1
|
|
|
1
|
|
1252
|
use Lingua::TR::ASCII::Data; |
|
1
|
|
|
|
|
142
|
|
|
1
|
|
|
|
|
297
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @EXPORT = qw( ascii_to_turkish turkish_to_ascii ); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub ascii_to_turkish { |
12
|
12
|
|
|
12
|
1
|
24014
|
my($str) = @_; |
13
|
12
|
100
|
|
|
|
47
|
return $str if ! $str; |
14
|
9
|
|
|
|
|
39
|
return __PACKAGE__->_new( $str )->_deasciify; |
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub turkish_to_ascii { |
18
|
9
|
|
|
9
|
1
|
81
|
my($str, $encoding) = @_; |
19
|
9
|
|
|
|
|
670
|
require Text::Unidecode; |
20
|
1
|
|
|
1
|
|
11
|
use utf8; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
21
|
9
|
|
|
|
|
1937
|
return Text::Unidecode::unidecode( $str ); |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub _new { |
25
|
9
|
|
|
9
|
|
23
|
my($class, $input) = @_; |
26
|
9
|
|
|
|
|
42
|
my $self = { |
27
|
|
|
|
|
|
|
input => $input, |
28
|
|
|
|
|
|
|
length => length $input, |
29
|
|
|
|
|
|
|
turkish => $input, |
30
|
|
|
|
|
|
|
}; |
31
|
9
|
|
|
|
|
19
|
bless $self, $class; |
32
|
9
|
|
|
|
|
28
|
return $self; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Convert a string with ASCII-only letters into one with Turkish letters. |
36
|
|
|
|
|
|
|
sub _deasciify { |
37
|
9
|
|
|
9
|
|
22
|
my($self) = @_; |
38
|
9
|
|
|
|
|
27
|
my $s = \$self->{turkish}; |
39
|
9
|
|
|
|
|
16
|
my @chars = split m{}xms, ${$s}; |
|
9
|
|
|
|
|
191
|
|
40
|
|
|
|
|
|
|
|
41
|
9
|
|
|
|
|
29
|
for my $i ( 0 .. $#chars ) { |
42
|
1598
|
|
|
|
|
2505
|
my $c = $chars[$i]; |
43
|
1598
|
100
|
|
|
|
2624
|
next if ! $self->_needs_correction( $c, $i ); |
44
|
163
|
|
33
|
|
|
248
|
substr ${$s}, $i, 1, $TOGGLE_ACCENT->{ $c } || $c; |
|
163
|
|
|
|
|
628
|
|
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
9
|
|
|
|
|
13
|
return ${$s}; |
|
9
|
|
|
|
|
165
|
|
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# Determine if char at cursor needs correction. |
51
|
|
|
|
|
|
|
sub _needs_correction { |
52
|
1598
|
|
|
1598
|
|
2595
|
my($self, $ch, $point) = @_; |
53
|
1598
|
|
66
|
|
|
4003
|
my $tr = $ASCIIFY->{ $ch } || $ch; |
54
|
1598
|
|
|
|
|
2452
|
my $pl = $PATTERN->{ lc $tr }; |
55
|
1598
|
100
|
100
|
|
|
2879
|
my $m = $pl ? $self->_matches( $pl, $point || 0 ) : 0; |
56
|
|
|
|
|
|
|
|
57
|
1598
|
50
|
|
|
|
4805
|
return $tr eq 'I' ? ( $ch eq $tr ? ! $m : $m ) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
58
|
|
|
|
|
|
|
: ( $ch eq $tr ? $m : ! $m ); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Check if the pattern is in the pattern table. |
62
|
|
|
|
|
|
|
sub _matches { |
63
|
422
|
|
|
422
|
|
661
|
my($self, $dlist, $point) = @_; |
64
|
422
|
|
100
|
|
|
923
|
my $str = $self->_get_context( $point || 0 ); |
65
|
422
|
|
|
|
|
651
|
my $rank = 2 * keys %{ $dlist }; |
|
422
|
|
|
|
|
734
|
|
66
|
422
|
|
|
|
|
638
|
my $len = length $str; |
67
|
422
|
|
|
|
|
596
|
my($start, $end); |
68
|
|
|
|
|
|
|
|
69
|
422
|
|
|
|
|
865
|
while ( $start++ <= CONTEXT_SIZE ) { |
70
|
4642
|
|
|
|
|
5538
|
$end = CONTEXT_SIZE; |
71
|
4642
|
|
|
|
|
7797
|
while ( ++$end <= $len ) { |
72
|
25399
|
|
|
|
|
36194
|
my $s = substr $str, $start, $end - $start; |
73
|
25399
|
|
100
|
|
|
60776
|
my $r = $dlist->{ $s } || next; |
74
|
815
|
100
|
|
|
|
1857
|
$rank = $r if abs $r < abs $rank; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
422
|
|
|
|
|
920
|
return $rank > 0; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub _get_context { |
82
|
422
|
|
|
422
|
|
661
|
my($self, $point, $size) = @_; |
83
|
422
|
|
50
|
|
|
1286
|
$size ||= CONTEXT_SIZE; |
84
|
422
|
|
|
|
|
609
|
my($s, $i, $space, $index); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
my $morph = sub { |
87
|
844
|
|
|
844
|
|
1326
|
my($next, $lookup) = @_; |
88
|
844
|
|
|
|
|
1055
|
$index = $point; |
89
|
844
|
|
|
|
|
1021
|
$space = 0; |
90
|
844
|
|
|
|
|
1243
|
while ( $next->() ) { |
91
|
6035
|
|
|
|
|
12348
|
my $char = substr $self->{turkish}, $index, 1; |
92
|
6035
|
|
|
|
|
9374
|
my $x = $lookup->{ $char }; |
93
|
6035
|
100
|
|
|
|
9511
|
if ( $x ) { |
94
|
4926
|
|
|
|
|
7179
|
substr $s, abs $i, 1, $x; |
95
|
4926
|
|
|
|
|
5970
|
$space = 0; |
96
|
4926
|
|
|
|
|
5793
|
$i++; |
97
|
4926
|
|
|
|
|
7875
|
next; |
98
|
|
|
|
|
|
|
} |
99
|
1109
|
100
|
|
|
|
1735
|
next if $space; |
100
|
974
|
|
|
|
|
1258
|
$space = 1; |
101
|
974
|
|
|
|
|
1611
|
$i++; |
102
|
|
|
|
|
|
|
} |
103
|
422
|
|
|
|
|
1545
|
}; |
104
|
|
|
|
|
|
|
|
105
|
422
|
|
|
|
|
924
|
$s = q{ } x ( 1 + ( 2 * $size ) ); |
106
|
422
|
|
|
|
|
582
|
$i = 1 + $size; |
107
|
422
|
|
|
|
|
795
|
substr $s, $size, 1, 'X'; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
$morph->( |
110
|
2309
|
100
|
100
|
2309
|
|
9030
|
sub { $i < length $s && ! $space && ++$index < $self->{length} }, |
111
|
422
|
|
|
|
|
1360
|
$DOWNCASE_ASCIIFY |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
422
|
|
|
|
|
1190
|
$s = substr $s, 0, $i; |
115
|
422
|
|
|
|
|
603
|
$i = 0 - --$size; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$morph->( |
118
|
4570
|
100
|
|
4570
|
|
13112
|
sub { $i <= 0 && --$index >= 0 }, |
119
|
422
|
|
|
|
|
1269
|
$UPCASE_ACCENTS |
120
|
|
|
|
|
|
|
); |
121
|
|
|
|
|
|
|
|
122
|
422
|
|
|
|
|
1733
|
return $s; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
1; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
__END__ |