line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*- Mode: Perl -*- |
2
|
|
|
|
|
|
|
# Regel.pm -- |
3
|
|
|
|
|
|
|
# Author : Ulrich Pfeifer |
4
|
|
|
|
|
|
|
# Created On : Thu Feb 1 09:10:48 1996 |
5
|
|
|
|
|
|
|
# Last Modified By: Ulrich Pfeifer |
6
|
|
|
|
|
|
|
# Last Modified On: Sun Apr 3 12:11:51 2005 |
7
|
|
|
|
|
|
|
# Language : Perl |
8
|
|
|
|
|
|
|
# Update Count : 73 |
9
|
|
|
|
|
|
|
# Status : Unknown, Use with caution! |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package Text::German::Regel; |
12
|
2
|
|
|
2
|
|
10
|
use Text::German::Util; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
2008
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$debug = 0; |
15
|
|
|
|
|
|
|
@REGEL = (); # -w |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
{ |
18
|
|
|
|
|
|
|
local ($_); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
while () { |
21
|
|
|
|
|
|
|
chomp; |
22
|
|
|
|
|
|
|
my ($regel, $a,$b,$c,$d,$e,@f) = split(/:/, $_); |
23
|
|
|
|
|
|
|
next unless $regel; |
24
|
|
|
|
|
|
|
push(@{$REGEL[$regel]}, [$a, |
25
|
|
|
|
|
|
|
$b, |
26
|
|
|
|
|
|
|
$c, |
27
|
|
|
|
|
|
|
bit_to_int($d), |
28
|
|
|
|
|
|
|
bit_to_int($e), |
29
|
|
|
|
|
|
|
@f]); |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
close DATA; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
sub reduce { |
34
|
20
|
|
|
20
|
0
|
38
|
my($v,$s,$e) = @_; |
35
|
|
|
|
|
|
|
#my $init = join ':', ($v,$s,$e); |
36
|
|
|
|
|
|
|
#local ($debug) = ($s eq 'Mit')?4:0; |
37
|
|
|
|
|
|
|
|
38
|
20
|
50
|
|
|
|
75
|
return undef if length($s.$e) < 3; |
39
|
20
|
|
|
|
|
57
|
while (length($s)<3) { |
40
|
0
|
|
|
|
|
0
|
$s .= substr($e,0,1); |
41
|
0
|
|
|
|
|
0
|
$e = substr($e,1); |
42
|
|
|
|
|
|
|
} |
43
|
20
|
|
|
|
|
57
|
while (1) { |
44
|
20
|
|
|
|
|
41
|
my @tmp = reduce1($v,$s,$e); |
45
|
20
|
100
|
|
|
|
50
|
if ($#tmp) { |
46
|
16
|
|
|
|
|
35
|
my $tmp = join ':', @tmp; |
47
|
|
|
|
|
|
|
#print STDERR "$init => $tmp\n"; |
48
|
16
|
|
|
|
|
144
|
return @tmp; # if $tmp ne $init; |
49
|
|
|
|
|
|
|
} |
50
|
4
|
50
|
|
|
|
20
|
return @tmp if !$e; |
51
|
0
|
|
|
|
|
0
|
$s .= substr($e,0,1); |
52
|
0
|
|
|
|
|
0
|
$e = substr($e,1); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub reduce1 { |
57
|
20
|
|
|
20
|
0
|
34
|
my($v,$s,$e) = @_; |
58
|
20
|
|
|
|
|
20
|
my $fc; |
59
|
|
|
|
|
|
|
my $fr; |
60
|
0
|
|
|
|
|
0
|
my $did_match; |
61
|
|
|
|
|
|
|
|
62
|
20
|
|
|
|
|
24
|
while (1) { |
63
|
20
|
|
|
|
|
49
|
$fr = Text::German::Endung::regel($e); # || '001'; # ??? |
64
|
20
|
100
|
|
|
|
51
|
last if defined $fr; |
65
|
4
|
50
|
|
|
|
11
|
last unless $e; |
66
|
0
|
|
|
|
|
0
|
$s .= substr($e,0,1); |
67
|
0
|
|
|
|
|
0
|
$e = substr($e,1); |
68
|
|
|
|
|
|
|
} |
69
|
20
|
100
|
|
|
|
40
|
return undef unless $fr; |
70
|
16
|
|
|
|
|
40
|
$fc = Text::German::Endung::wort_klasse($e); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
ruleset: |
73
|
16
|
|
|
|
|
58
|
while (defined $REGEL[$fr]) { |
74
|
24
|
|
|
|
|
30
|
for $r (@{$REGEL[$fr]}) { |
|
24
|
|
|
|
|
59
|
|
75
|
328
|
50
|
|
|
|
706
|
next unless $r->[4] | $fc; # allowed wordclasses |
76
|
328
|
|
|
|
|
441
|
my $match = $r->[5]; |
77
|
328
|
|
|
|
|
392
|
$match =~ s/\+/[bcdfghjklmnpqrstvwxyz]/; |
78
|
328
|
|
|
|
|
351
|
$match =~ s/\%/[aeiou\344\366\374]/; |
79
|
|
|
|
|
|
|
#my $ns = $s.$e; |
80
|
|
|
|
|
|
|
#$ns = substr($ns,0,length($ns)-$r->[1]); |
81
|
|
|
|
|
|
|
#$e = substr($e, length($e)-$r->[1]); |
82
|
328
|
50
|
|
|
|
543
|
print "\tREGEL: $fr:", (join ':', @{$r}),"\t($s,$match)\n" |
|
0
|
|
|
|
|
0
|
|
83
|
|
|
|
|
|
|
if $debug > 1; |
84
|
328
|
100
|
|
|
|
3181
|
if ($s =~ /$match$/) { |
85
|
22
|
|
|
|
|
27
|
$did_match++; |
86
|
22
|
50
|
|
|
|
41
|
print "\tREGEL: $fr:", (join ':', @{$r}),"\t$s => " |
|
0
|
|
|
|
|
0
|
|
87
|
|
|
|
|
|
|
if $debug; |
88
|
22
|
50
|
|
|
|
51
|
$s = (substr($s,0,length($s)-$r->[7])) if $r->[7]; |
89
|
22
|
100
|
|
|
|
50
|
$s .= $r->[8] if $r->[8]; |
90
|
22
|
50
|
|
|
|
40
|
print "$s\n" if $debug; |
91
|
22
|
100
|
|
|
|
46
|
if ($r->[6]) { # vorsilbe 'ge' kann entfallen? |
92
|
2
|
|
|
|
|
6
|
$v =~ s/^ge//; |
93
|
|
|
|
|
|
|
} |
94
|
22
|
|
|
|
|
32
|
$fr = $r->[0]; |
95
|
22
|
|
|
|
|
102
|
$fc = $r->[3]; # ??? |
96
|
22
|
100
|
|
|
|
44
|
if ($fr ne '000') { |
97
|
8
|
|
|
|
|
41
|
next ruleset; |
98
|
|
|
|
|
|
|
} else { |
99
|
|
|
|
|
|
|
#$s = substr($s,0,length($s)-$r->[1]); |
100
|
14
|
|
|
|
|
28
|
last; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
} |
104
|
16
|
|
|
|
|
24
|
last; |
105
|
|
|
|
|
|
|
} |
106
|
16
|
50
|
|
|
|
30
|
if ($did_match) { |
107
|
16
|
|
|
|
|
60
|
return ($v,$s,$e); |
108
|
|
|
|
|
|
|
} else { |
109
|
0
|
|
|
|
|
|
return undef; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
1; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# regel |
116
|
|
|
|
|
|
|
# 0 Folgeregel |
117
|
|
|
|
|
|
|
# 1 # zeichen entfernen |
118
|
|
|
|
|
|
|
# 2 |
119
|
|
|
|
|
|
|
# 3 new wc |
120
|
|
|
|
|
|
|
# 4 KLASSE fuer match |
121
|
|
|
|
|
|
|
# 5 MATCH |
122
|
|
|
|
|
|
|
# 6 vorsilbe ge |
123
|
|
|
|
|
|
|
# 7 #chars to remove |
124
|
|
|
|
|
|
|
# 8 string to append |
125
|
|
|
|
|
|
|
# 017:000:2:te:01001:01001:önn:1:3:ann |
126
|
|
|
|
|
|
|
__DATA__ |