line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Jspell::DictManager; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
7683
|
use 5.006; |
|
2
|
|
|
|
|
8
|
|
4
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
83
|
|
5
|
2
|
|
|
2
|
|
14
|
use warnings; |
|
2
|
|
|
|
|
26
|
|
|
2
|
|
|
|
|
113
|
|
6
|
2
|
|
|
2
|
|
1443
|
use Data::Dumper; |
|
2
|
|
|
|
|
14797
|
|
|
2
|
|
|
|
|
199
|
|
7
|
2
|
|
|
2
|
|
1120
|
use File::Copy; |
|
2
|
|
|
|
|
5406
|
|
|
2
|
|
|
|
|
165
|
|
8
|
2
|
|
|
2
|
|
19
|
use YAML 'LoadFile'; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
136
|
|
9
|
2
|
|
|
2
|
|
15
|
use File::Spec::Functions; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
245
|
|
10
|
2
|
|
|
2
|
|
15
|
use Lingua::Jspell; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
7646
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
require Exporter; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw() ] ); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @EXPORT = qw( &toword &install_dic ); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $VERSION = '1.96'; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Preloaded methods go here. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub install_dic{ |
28
|
0
|
|
|
0
|
1
|
|
my %opt =(yaml => undef, name=>undef); ## irr => "name.irr" |
29
|
0
|
0
|
|
|
|
|
if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
30
|
0
|
|
|
|
|
|
my ($aff,@dic)=@_; |
31
|
0
|
|
|
|
|
|
my $cpaff=1; |
32
|
0
|
0
|
|
|
|
|
if($aff =~ /^from:(.*)/){ |
33
|
0
|
|
|
|
|
|
$aff = catfile($Lingua::Jspell::JSPELLLIB,"$1.aff") ; $cpaff=0;} |
|
0
|
|
|
|
|
|
|
34
|
0
|
|
|
|
|
|
my $ya; |
35
|
0
|
0
|
|
|
|
|
open(F,">__$$.dic") or die("Error 1: $!\n"); |
36
|
0
|
0
|
|
|
|
|
for (@dic){open(G, $_) or die("Error 2($_): $!\n"); |
|
0
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
print F ; |
38
|
0
|
|
|
|
|
|
close G; |
39
|
|
|
|
|
|
|
} |
40
|
0
|
|
|
|
|
|
close F; |
41
|
0
|
0
|
|
|
|
|
$ya = LoadFile($opt{yaml}) if $opt{yaml}; |
42
|
0
|
|
0
|
|
|
|
my $name = $opt{name} || $ya->{META}{IDS}[0] || $dic[0]; |
43
|
|
|
|
|
|
|
|
44
|
0
|
0
|
|
|
|
|
if($opt{hash}){ copy($opt{hash}, "__$$.hash"); } |
|
0
|
|
|
|
|
|
|
45
|
0
|
|
|
|
|
|
else { system ("jbuild __$$.dic $aff __$$.hash"); } |
46
|
|
|
|
|
|
|
|
47
|
0
|
0
|
|
|
|
|
if($opt{irr}){ |
48
|
0
|
0
|
|
|
|
|
copy($opt{irr},catfile($Lingua::Jspell::JSPELLLIB,$opt{irr})) |
49
|
|
|
|
|
|
|
or warn ("Error 3: $!"); |
50
|
|
|
|
|
|
|
} |
51
|
0
|
0
|
|
|
|
|
copy("__$$.hash",catfile($Lingua::Jspell::JSPELLLIB,"$name.hash")) |
52
|
|
|
|
|
|
|
or warn ("Error 4: $!"); |
53
|
0
|
0
|
|
|
|
|
if($cpaff){ |
54
|
0
|
0
|
|
|
|
|
copy($aff, catfile($Lingua::Jspell::JSPELLLIB,"$name.aff")) |
55
|
|
|
|
|
|
|
or warn ("Error 5: $!"); |
56
|
|
|
|
|
|
|
} |
57
|
0
|
0
|
|
|
|
|
if ($opt{yaml}){ |
58
|
0
|
0
|
|
|
|
|
copy($opt{yaml}, catfile($Lingua::Jspell::JSPELLLIB,"$name.yaml")) |
59
|
|
|
|
|
|
|
or warn ("Error 6: $!"); |
60
|
0
|
|
|
|
|
|
for(@{$ya->{META}{IDS}}){ |
|
0
|
|
|
|
|
|
|
61
|
0
|
0
|
|
|
|
|
copy("__$$.hash",catfile($Lingua::Jspell::JSPELLLIB,"$_.hash")) |
62
|
|
|
|
|
|
|
or warn ("Error 7: $!"); |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
0
|
|
|
|
|
|
unlink("__$$.dic","__$$.hash","__$$.dic.cnt","__$$.dic.stat"); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub init{ |
69
|
0
|
|
|
0
|
1
|
|
my $file = shift; |
70
|
0
|
|
|
|
|
|
my $self = { filename => $file }; |
71
|
0
|
0
|
|
|
|
|
open F, $file or die "Cannot open file '$file': $!\n"; |
72
|
0
|
|
|
|
|
|
while() { |
73
|
0
|
0
|
|
|
|
|
$self->{ shortcut}{$1} = $2 if (m!^#([^/]+)/([^/]+)/!); |
74
|
0
|
0
|
|
|
|
|
$self->{revshortcut}{$2} = $1 if (m!^#([^/]+)/([^/]+)/!); |
75
|
|
|
|
|
|
|
} |
76
|
0
|
|
|
|
|
|
close F; |
77
|
0
|
0
|
|
|
|
|
copy($file,"$file.old") or die("$! cant create $file.old\n"); |
78
|
0
|
|
|
|
|
|
return bless($self); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
0
|
|
|
0
|
1
|
|
sub toword{ _data2line(@_) } |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub modeach_word{ |
84
|
0
|
|
|
0
|
1
|
|
my %opt =(rawfea => 0); |
85
|
0
|
|
|
|
|
|
my $dic = shift; |
86
|
0
|
0
|
|
|
|
|
if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my $func = shift; |
88
|
0
|
0
|
|
|
|
|
open DIC, $dic->{filename} or die("cannot open file"); |
89
|
0
|
0
|
|
|
|
|
open NDIC, ">$dic->{filename}.new" or die("cannot create file $!"); |
90
|
0
|
|
|
|
|
|
while() { |
91
|
0
|
0
|
0
|
|
|
|
if (m!^#! or m!^\s*$!){ print NDIC $_ ; next } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
chomp; |
94
|
0
|
|
|
|
|
|
my ($word,$class,$flags,@r) = split '/', $_; |
95
|
0
|
0
|
|
|
|
|
my @flags = ($flags)?split(//, $flags):(); |
96
|
0
|
0
|
|
|
|
|
if(not $opt{rawfea}){ |
97
|
0
|
|
|
|
|
|
my @atts; |
98
|
0
|
0
|
|
|
|
|
if ($class =~ /^\$/){ @atts = (special => $class)} |
|
0
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
else { |
100
|
0
|
0
|
|
|
|
|
$class =~ s/#([A-Za-z][A-Za-z0-9]*)/$dic->{shortcut}{$1} || ""/ge if $class; |
|
0
|
0
|
|
|
|
|
|
101
|
0
|
0
|
|
|
|
|
@atts = ($class)?split(/[,=]/, $class):(); |
102
|
|
|
|
|
|
|
} |
103
|
0
|
|
|
|
|
|
my %atts; |
104
|
0
|
0
|
|
|
|
|
if (@atts % 2) { |
105
|
0
|
|
|
|
|
|
%atts = (); |
106
|
|
|
|
|
|
|
} else { |
107
|
0
|
|
|
|
|
|
%atts = @atts; |
108
|
|
|
|
|
|
|
} |
109
|
0
|
|
0
|
|
|
|
print NDIC $func->($word,\%atts,\@flags,@r) || $_; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
else { |
112
|
0
|
|
0
|
|
|
|
print NDIC $func->($word,$class,\@flags,@r) || $_; |
113
|
|
|
|
|
|
|
} |
114
|
0
|
|
|
|
|
|
print NDIC "\n"; |
115
|
|
|
|
|
|
|
} |
116
|
0
|
|
|
|
|
|
close DIC; |
117
|
0
|
|
|
|
|
|
close NDIC; |
118
|
0
|
|
|
|
|
|
copy("$dic->{filename}.new",$dic->{filename}); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub foreach_word { |
123
|
0
|
|
|
0
|
1
|
|
my %opt =(type => "struct"); |
124
|
0
|
|
|
|
|
|
my $dic = shift; |
125
|
0
|
0
|
|
|
|
|
if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
|
my $func = shift; |
127
|
0
|
0
|
|
|
|
|
open DIC, $dic->{filename} or die("cannot open file"); |
128
|
0
|
|
|
|
|
|
while() { |
129
|
0
|
0
|
|
|
|
|
next if m!^#!; |
130
|
0
|
0
|
|
|
|
|
next if m!^\s*$!; |
131
|
0
|
|
|
|
|
|
chomp; |
132
|
0
|
|
|
|
|
|
my ($word,$class,$flags,@r) = split '/', $_; |
133
|
0
|
0
|
|
|
|
|
if($opt{type} eq "struct"){ |
|
|
0
|
|
|
|
|
|
134
|
0
|
0
|
|
|
|
|
$class =~ s/#([A-Za-z][A-Za-z0-9]*)/$dic->{shortcut}{$1} || ""/ge if $class; |
|
0
|
0
|
|
|
|
|
|
135
|
0
|
0
|
|
|
|
|
my @flags = ($flags)?split(//, $flags):(); |
136
|
0
|
0
|
|
|
|
|
my @atts = ($class)?split(/[,=]/, $class):(); |
137
|
0
|
|
|
|
|
|
my %atts; |
138
|
0
|
0
|
|
|
|
|
if (@atts % 2) { |
139
|
0
|
|
|
|
|
|
%atts = (); |
140
|
|
|
|
|
|
|
} else { |
141
|
0
|
|
|
|
|
|
%atts = @atts; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
|
$func->($word,\%atts,\@flags,@r); } |
145
|
0
|
|
|
|
|
|
elsif( $opt{type} eq "raw"){ $func->($_); } |
146
|
|
|
|
|
|
|
} |
147
|
0
|
|
|
|
|
|
close DIC; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub for_this_cat_I_want_only_these_flags { |
151
|
0
|
|
|
0
|
1
|
|
my $dic = shift; |
152
|
0
|
|
|
|
|
|
my $cat = shift; |
153
|
0
|
|
|
|
|
|
$cat =~ s/#//g; |
154
|
0
|
|
|
|
|
|
my $flags = shift; |
155
|
0
|
|
|
|
|
|
my %flags; |
156
|
0
|
|
|
|
|
|
@flags{split //,$flags}=1; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
foreach_word($dic, sub { |
159
|
0
|
|
|
0
|
|
|
my ($w,$a,$f) = @_; |
160
|
0
|
|
|
|
|
|
my %fs = %flags; |
161
|
0
|
|
|
|
|
|
my $ct = $cat; |
162
|
|
|
|
|
|
|
|
163
|
0
|
|
0
|
|
|
|
my $this_cat = $a->{CAT} || "unknown"; |
164
|
0
|
0
|
|
|
|
|
if ($this_cat eq $ct) { |
165
|
0
|
|
|
|
|
|
my $fl; |
166
|
0
|
|
|
|
|
|
for $fl(@$f) { |
167
|
0
|
0
|
|
|
|
|
unless (exists($fs{$fl})) { |
168
|
0
|
|
|
|
|
|
print "$w from category '$cat' uses flag '$fl'\n"; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
0
|
|
|
|
|
|
}); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub for_this_cat_I_dont_want_these_flags { |
176
|
0
|
|
|
0
|
1
|
|
my $dic = shift; |
177
|
0
|
|
|
|
|
|
my $cat = shift; |
178
|
0
|
|
|
|
|
|
$cat =~ s/#//g; |
179
|
0
|
|
|
|
|
|
my $flags = shift; |
180
|
0
|
|
|
|
|
|
my %flags; |
181
|
0
|
|
|
|
|
|
@flags{split //,$flags}=1; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
foreach_word($dic, sub { |
184
|
0
|
|
|
0
|
|
|
my ($w,$a,$f) = @_; |
185
|
0
|
|
|
|
|
|
my %fs = %flags; |
186
|
0
|
|
|
|
|
|
my $ct = $cat; |
187
|
|
|
|
|
|
|
|
188
|
0
|
|
0
|
|
|
|
my $this_cat = $a->{CAT} || "unknown"; |
189
|
0
|
0
|
|
|
|
|
if ($this_cat eq $ct) { |
190
|
0
|
|
|
|
|
|
my $fl; |
191
|
0
|
|
|
|
|
|
for $fl(@$f) { |
192
|
0
|
0
|
|
|
|
|
if (exists($fs{$fl})) { |
193
|
0
|
|
|
|
|
|
print "$w from category '$cat' uses flag '$fl'\n"; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
0
|
|
|
|
|
|
}); |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub not_categorized { |
202
|
0
|
|
|
0
|
1
|
|
my $dic = shift; |
203
|
|
|
|
|
|
|
|
204
|
0
|
0
|
|
|
|
|
open DIC, $dic->{filename} or die("Cannot open file"); |
205
|
0
|
|
|
|
|
|
while() { |
206
|
0
|
|
|
|
|
|
chomp; |
207
|
0
|
0
|
|
|
|
|
next if /^#/; |
208
|
0
|
0
|
|
|
|
|
next if /^\s*$/; |
209
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
|
m{^([^/]+)/}; |
211
|
0
|
|
|
|
|
|
my $word = $1; |
212
|
0
|
|
|
|
|
|
my $cat = $'; |
213
|
0
|
0
|
|
|
|
|
next unless ($cat =~ m!^/!); |
214
|
0
|
|
|
|
|
|
print "word '$word' doesn't have a categorie\n"; |
215
|
|
|
|
|
|
|
} |
216
|
0
|
|
|
|
|
|
close DIC; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub extra_words { |
220
|
0
|
|
|
0
|
1
|
|
my $dic = shift; |
221
|
|
|
|
|
|
|
|
222
|
0
|
|
|
|
|
|
my %from; |
223
|
0
|
|
|
|
|
|
my ($r,$word,$fea,$fea1,$t); |
224
|
0
|
|
|
|
|
|
my $jdic = Lingua::Jspell->new("port"); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
0
|
0
|
|
|
|
|
open DIC, $dic->{filename} or die("Cannot open file"); |
228
|
0
|
|
|
|
|
|
while() { |
229
|
0
|
|
|
|
|
|
chomp; |
230
|
0
|
0
|
|
|
|
|
next if /^#/; |
231
|
0
|
0
|
|
|
|
|
next if /^\s*$/; |
232
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
m{^([^/]+)/}; |
234
|
0
|
|
|
|
|
|
$word = $1; |
235
|
0
|
|
|
|
|
|
my @rads = $jdic->rad($word); |
236
|
0
|
0
|
|
|
|
|
if (@rads > 1) { |
237
|
0
|
0
|
|
|
|
|
print STDERR "." if rand > 0.99; |
238
|
0
|
|
|
|
|
|
for $r (@rads) { |
239
|
0
|
0
|
|
|
|
|
next if ($r eq $word); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# for the fea from $word, get the rad==$r |
242
|
0
|
|
|
|
|
|
for $fea ($jdic->fea($word)) { |
243
|
0
|
0
|
|
|
|
|
if ($fea->{rad} eq $word) { |
244
|
0
|
|
|
|
|
|
for $fea1 (fea($r)) { |
245
|
0
|
0
|
|
|
|
|
if (_same_cat($fea1->{CAT},$fea->{CAT})) { |
246
|
0
|
|
|
|
|
|
$from{$r} = {word=>$word, orig=>$fea1, dest=>$fea}; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# $from{$r} = {word=>$word}; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} |
256
|
0
|
|
|
|
|
|
close DIC; |
257
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
for (keys %from) { |
259
|
0
|
0
|
|
|
|
|
if ($from{$from{$_}{word}}{word}) { |
260
|
0
|
|
|
|
|
|
print "# warning: multiple dependence\n"; |
261
|
0
|
|
|
|
|
|
print "# \t$_ => $from{$_}{word} => $from{$from{$_}{word}}{word}\n"; |
262
|
0
|
|
|
|
|
|
delete($from{$_}); |
263
|
|
|
|
|
|
|
} else { |
264
|
0
|
|
|
|
|
|
print "# from $_ you can get $from{$_}{word}\n"; |
265
|
0
|
|
|
|
|
|
print "delete_word('$from{$_}{word}')\n"; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub _same_cat { |
271
|
0
|
|
|
0
|
|
|
my ($a,$b) = @_; |
272
|
0
|
0
|
0
|
|
|
|
if (defined($a) && defined($b)) { |
273
|
0
|
|
|
|
|
|
return ($a eq $b); |
274
|
|
|
|
|
|
|
} else { |
275
|
0
|
|
|
|
|
|
return 0; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# Each element should be a reference to an associative array like this: |
280
|
|
|
|
|
|
|
# |
281
|
|
|
|
|
|
|
# { word => 'word', flags => 'zbr', CAT => 'np', G=>'f' } |
282
|
|
|
|
|
|
|
sub add_word { |
283
|
0
|
|
|
0
|
1
|
|
my $dict = shift; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
$dict->_add_full_line(map { |
286
|
0
|
|
|
|
|
|
my $word = $_->{word}; |
|
0
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
|
my $flags = $_->{flags}; |
288
|
0
|
|
0
|
|
|
|
my $comment = $_->{comment} || ""; |
289
|
0
|
|
|
|
|
|
delete($_->{word}); |
290
|
0
|
|
|
|
|
|
delete($_->{flags}); |
291
|
0
|
|
|
|
|
|
delete($_->{comment}); |
292
|
0
|
|
|
|
|
|
my %hash = %$_; |
293
|
0
|
|
|
|
|
|
my $info = join(",",map {"$_=$hash{$_}"} keys %hash); |
|
0
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
"$word/$info/$flags/$comment" |
295
|
|
|
|
|
|
|
} @_); |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub _add_full_line { |
299
|
0
|
|
|
0
|
|
|
my $dict = shift; |
300
|
0
|
|
|
|
|
|
my %saw =(); |
301
|
0
|
|
|
|
|
|
@saw{@_} = (); |
302
|
0
|
|
|
|
|
|
my @v; |
303
|
|
|
|
|
|
|
|
304
|
0
|
0
|
|
|
|
|
open DIC, $dict->{filename} or die("cannot open dictionary file"); |
305
|
0
|
0
|
|
|
|
|
open NDIC, ">$dict->{filename}.new" or die("cannot open new dictionary file"); |
306
|
0
|
|
|
|
|
|
while () { |
307
|
0
|
0
|
0
|
|
|
|
push @v,$_ and next if (/^#/); |
308
|
0
|
|
|
|
|
|
chomp; |
309
|
0
|
|
|
|
|
|
$saw{$_} = 1; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
0
|
|
|
|
|
|
print NDIC join "", @v; |
313
|
0
|
|
|
|
|
|
print NDIC "\n\n"; |
314
|
0
|
0
|
|
|
|
|
print NDIC map {/./ ? ("$_\n"):()} sort keys %saw; |
|
0
|
|
|
|
|
|
|
315
|
0
|
|
|
|
|
|
close DIC; |
316
|
0
|
|
|
|
|
|
close NDIC; |
317
|
0
|
|
|
|
|
|
copy("$dict->{filename}.new",$dict->{filename}); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub delete_word { |
321
|
0
|
|
|
0
|
1
|
|
my $dict = shift; |
322
|
0
|
|
|
|
|
|
my $pal=shift; |
323
|
0
|
|
|
|
|
|
my $t; |
324
|
|
|
|
|
|
|
|
325
|
0
|
0
|
|
|
|
|
open DIC, $dict->{filename} or die("cannot open dictionary file"); |
326
|
0
|
0
|
|
|
|
|
open NDIC, ">$dict->{filename}.new" or die("cannot open new dictionary file"); |
327
|
|
|
|
|
|
|
|
328
|
0
|
|
|
|
|
|
while () { |
329
|
0
|
0
|
|
|
|
|
$t = $1 if /^(.+?)\//; |
330
|
0
|
0
|
|
|
|
|
print NDIC unless ($t=~/^$pal$/); |
331
|
|
|
|
|
|
|
} |
332
|
0
|
|
|
|
|
|
close DIC; |
333
|
0
|
|
|
|
|
|
close NDIC; |
334
|
0
|
|
|
|
|
|
copy("$dict->{filename}.new",$dict->{filename}); |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub add_flag { |
338
|
0
|
|
|
0
|
1
|
|
my $dic = shift; |
339
|
0
|
|
|
|
|
|
my $flag = shift; |
340
|
0
|
|
|
|
|
|
my %words; |
341
|
0
|
|
|
|
|
|
@words{@_} = 1; |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
$dic -> foreach_word( sub { |
344
|
0
|
|
|
0
|
|
|
my ($w,$a,$f) = @_; |
345
|
0
|
|
|
|
|
|
my %fs; |
346
|
0
|
|
|
|
|
|
@fs{@$f}=1; |
347
|
0
|
0
|
|
|
|
|
if ($words{$w}) { |
348
|
0
|
|
|
|
|
|
@fs{split //, $flag}=1;; |
349
|
0
|
|
|
|
|
|
print _data2line($w,$a,join("",keys %fs)); |
350
|
|
|
|
|
|
|
} |
351
|
0
|
|
|
|
|
|
print _data2line($w,$a,$f); |
352
|
|
|
|
|
|
|
|
353
|
0
|
|
|
|
|
|
}); |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
#$pal=shift; |
357
|
|
|
|
|
|
|
#($ac,$flag)=(shift=~/([\+\-])(.)/); |
358
|
|
|
|
|
|
|
# |
359
|
|
|
|
|
|
|
#while (<>) { |
360
|
|
|
|
|
|
|
#print $_ and next if ($_=~/^#/ || $_ eq "\n"); |
361
|
|
|
|
|
|
|
#$_=~s#\n#/\n# unless ($_=~/.*\/.*\/.*\//); |
362
|
|
|
|
|
|
|
#($a,$b,$c,$d)=($_=~/^(.+?)\/(.*?)\/(.*?)\/(.*)/); |
363
|
|
|
|
|
|
|
#$c=~s#$flag##g if ($a=~/^$pal$/); |
364
|
|
|
|
|
|
|
#$c.=$flag if ($a=~/^$pal$/ && $ac eq "+"); |
365
|
|
|
|
|
|
|
#print "$a/$b/$c/$d\n"; |
366
|
|
|
|
|
|
|
#} |
367
|
|
|
|
|
|
|
|
368
|
0
|
|
|
0
|
|
|
sub _data2line { my ($word,$atts,$flags,@r) = @_; |
369
|
0
|
0
|
|
|
|
|
if(ref $atts){ |
370
|
0
|
|
|
|
|
|
return "$word/". join(",",map { "$_=$atts->{$_}" } keys %$atts). |
371
|
0
|
|
|
|
|
|
"/". join("",grep {/./} @$flags). |
|
0
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
"/". join("/",@r); |
373
|
|
|
|
|
|
|
} else { |
374
|
|
|
|
|
|
|
return "$word/". $atts . |
375
|
0
|
|
|
|
|
|
"/". join("",grep {/./} @$flags). |
|
0
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
"/". join("/",@r); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 NAME |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Lingua::Jspell::DictManager - a perl module for processing jspell dictionaries |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head1 SYNOPSIS |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
use Lingua::Jspell::DictManager; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
$dict = init("dictionary file"); |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
$dict->foreach_word( \&func ); |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
$dict->for_this_cat_I_want_only_these_flags('nc', 'fp'); |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
$dict->add_flag("p","linha","carro",...); |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
$dict->add_word({word=>'word',flags=>'zbr',CAT=>'np',G=>'f'},...) |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
remflag("f.dic","p","linha","carro",...); |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=head1 DESCRIPTION |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 C |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
This function returns a new dictionary object to be used in future |
406
|
|
|
|
|
|
|
methods. It requires a string with the dictionary file name. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head2 C |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
install_dic({name=>"teste"} ,"t.aff", "t.dic") |
411
|
|
|
|
|
|
|
install_dic({name=>"t"} ,"from:port", "t1.dic", "t2.dic") |
412
|
|
|
|
|
|
|
install_dic({yaml=>"t.yaml"} ,"from:port", "t1.dic", "t2.dic") |
413
|
|
|
|
|
|
|
install_dic({yaml=>"t.yaml",irr=>"f.irr"} ,"from:port", "t1.dic") |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
C is used to reuse the affix table from language C (the |
416
|
|
|
|
|
|
|
file lang.aff is imported from the jspell library directory. (see jspell-dic |
417
|
|
|
|
|
|
|
-dir) |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
name -- name of the dictionary |
420
|
|
|
|
|
|
|
yaml -- yaml file with metadata |
421
|
|
|
|
|
|
|
irr -- file with irregular terms |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=head2 C |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
This method processes all words from the dictionary using the function |
426
|
|
|
|
|
|
|
passed as argument. This function is called with three arguments: the |
427
|
|
|
|
|
|
|
word, a reference to an associative array with the category |
428
|
|
|
|
|
|
|
information and a reference to a list of rules identifiers. |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=head2 C |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
This method processes all words from the dictionary using the function |
433
|
|
|
|
|
|
|
passed as argument. This function is called with three arguments: the |
434
|
|
|
|
|
|
|
word, a reference to an associative array with the category |
435
|
|
|
|
|
|
|
information and a reference to a list of rules identifiers. |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
If the option C<< rawfea =>1 >> is selected, modeach_word receives a string |
438
|
|
|
|
|
|
|
instead of a hash reference. |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
modeach_word({rawfea=>1}, sub { my($w,$cat,$flags,@com)=@_; ... }) |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Use the function C to rebuild a new value; |
443
|
|
|
|
|
|
|
if "" is return, the previous value is kept. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head2 C |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
This method receives a gramatical category and a string with flags. It |
448
|
|
|
|
|
|
|
will print warning messages for each entry with that category and with |
449
|
|
|
|
|
|
|
a flag not described in the flags string. |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head2 C |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Works like the previous method, but will print warnings if any |
454
|
|
|
|
|
|
|
category uses one of the specificed flags. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 C |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
This method returns a report for the entries without a category |
459
|
|
|
|
|
|
|
definition. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head2 C |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
This method tries to find redundant entries on the dictionary, |
464
|
|
|
|
|
|
|
producing an ouput file to be executed and delete the redundancy. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head2 C |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Add (one or more) word to the dictionary |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
$dict->add_word({word=>'word',flags=>'zbr',CAT=>'np',G=>'f'},...) |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 C |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Deletes the word passed as argument. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head2 C |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Adds the flags in the first argument to all words passed. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head2 C |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
to format Word, features, flags and commants to jspell-dict format. |
483
|
|
|
|
|
|
|
This functions is tically used em C. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head1 AUTHOR |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Alberto Simoes, Ealbie@alfarrabio.di.uminho.ptE |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
J.Joao Almeida, Ejj@di.uminho.ptE |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=head1 SEE ALSO |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Lingua::Jspell(3), jspell(1) |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Copyright 2007-2009 Projecto Natura |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
This program is free software; licensed under GPL. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=cut |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
1; |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
__END__ |