line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WordLists::WordList;
|
2
|
4
|
|
|
4
|
|
24722
|
use strict;
|
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
151
|
|
3
|
4
|
|
|
4
|
|
22
|
use warnings;
|
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
120
|
|
4
|
4
|
|
|
4
|
|
978
|
use utf8;
|
|
4
|
|
|
|
|
16
|
|
|
4
|
|
|
|
|
29
|
|
5
|
4
|
|
|
4
|
|
1476
|
use WordLists::Sense;
|
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
143
|
|
6
|
4
|
|
|
4
|
|
32
|
use WordLists::Common qw (@sDefaultAttList @sDefiningAttlist @sParsingParameters);
|
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
717
|
|
7
|
4
|
|
|
4
|
|
24
|
use WordLists::Base;
|
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
6911
|
|
8
|
|
|
|
|
|
|
our $VERSION = $WordLists::Base::VERSION;
|
9
|
|
|
|
|
|
|
our $AUTOLOAD;
|
10
|
|
|
|
|
|
|
our $DEFAULT_ENCODING = 'ascii';
|
11
|
|
|
|
|
|
|
our $NO_SENSE_IDENTITY = 'ascii';
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub new
|
15
|
|
|
|
|
|
|
{
|
16
|
3
|
|
|
3
|
1
|
21
|
my ($class, $args) = @_;
|
17
|
3
|
|
|
|
|
18
|
my $self = {
|
18
|
|
|
|
|
|
|
default_attlist => \@sDefaultAttList,
|
19
|
|
|
|
|
|
|
defining_attlist => \@sDefiningAttlist,
|
20
|
|
|
|
|
|
|
};
|
21
|
|
|
|
|
|
|
|
22
|
3
|
|
|
|
|
15
|
bless ($self, $class);
|
23
|
3
|
100
|
|
|
|
15
|
if ( ref $args eq 'HASH')
|
24
|
|
|
|
|
|
|
{
|
25
|
1
|
50
|
|
|
|
7
|
if (defined $args->{'parser'})
|
26
|
|
|
|
|
|
|
{
|
27
|
1
|
|
|
|
|
6
|
$self->parser($args->{'parser'});
|
28
|
|
|
|
|
|
|
}
|
29
|
1
|
50
|
|
|
|
5
|
if (defined $args->{'serialiser'})
|
30
|
|
|
|
|
|
|
{
|
31
|
0
|
|
|
|
|
0
|
$self->serialiser($args->{'serialiser'});
|
32
|
|
|
|
|
|
|
}
|
33
|
1
|
50
|
|
|
|
3
|
if (defined $args->{'name'})
|
34
|
|
|
|
|
|
|
{
|
35
|
0
|
|
|
|
|
0
|
$self->{'name'} = $args->{'name'};
|
36
|
|
|
|
|
|
|
}
|
37
|
1
|
50
|
|
|
|
4
|
if (defined $args->{'attlist'})
|
38
|
|
|
|
|
|
|
{
|
39
|
0
|
|
|
|
|
0
|
$self->{'attlist'} = $args->{'attlist'}; # todo: validate this
|
40
|
|
|
|
|
|
|
}
|
41
|
1
|
50
|
|
|
|
6
|
if (defined $args->{'from_string'})
|
|
|
50
|
|
|
|
|
|
42
|
|
|
|
|
|
|
{
|
43
|
0
|
|
|
|
|
0
|
$self->read_string($args->{'from_string'});
|
44
|
|
|
|
|
|
|
}
|
45
|
|
|
|
|
|
|
elsif (defined $args->{'from_file'})
|
46
|
|
|
|
|
|
|
{
|
47
|
1
|
|
|
|
|
6
|
$self->read_file($args->{'from_file'}, $args->{'encoding'});
|
48
|
|
|
|
|
|
|
}
|
49
|
|
|
|
|
|
|
}
|
50
|
3
|
|
|
|
|
16
|
return $self;
|
51
|
|
|
|
|
|
|
}
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub read_file # warning: this doesn't squash BOMs
|
54
|
|
|
|
|
|
|
{
|
55
|
1
|
|
|
1
|
0
|
2
|
my ($self, $fn, $enc) = @_;
|
56
|
1
|
|
|
|
|
2
|
my $args = {};
|
57
|
1
|
50
|
|
|
|
5
|
if (defined $self->{'attlist'})
|
58
|
|
|
|
|
|
|
{
|
59
|
0
|
|
|
|
|
0
|
$args->{'attlist'} = $self->{'attlist'};
|
60
|
|
|
|
|
|
|
}
|
61
|
1
|
|
|
|
|
2
|
foreach (@{$self->parser->parse_file($fn, $enc,$args)})
|
|
1
|
|
|
|
|
7
|
|
62
|
|
|
|
|
|
|
{
|
63
|
13
|
|
|
|
|
36
|
$self->read_hash($_);
|
64
|
|
|
|
|
|
|
}
|
65
|
|
|
|
|
|
|
}
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub read_hash
|
68
|
|
|
|
|
|
|
{
|
69
|
14
|
|
|
14
|
0
|
21
|
my ($self, $hash, $args) = @_;
|
70
|
14
|
|
|
|
|
62
|
my $sense = WordLists::Sense->new($hash);
|
71
|
14
|
50
|
|
|
|
38
|
if (ref $sense eq 'WordLists::Sense')
|
72
|
|
|
|
|
|
|
{
|
73
|
14
|
|
|
|
|
36
|
$self->add_sense($sense);
|
74
|
|
|
|
|
|
|
}
|
75
|
|
|
|
|
|
|
}
|
76
|
|
|
|
|
|
|
sub read_array
|
77
|
|
|
|
|
|
|
{
|
78
|
1
|
|
|
1
|
0
|
3
|
my ($self, $array, $args) = @_;
|
79
|
1
|
|
|
|
|
1
|
foreach my $hash (@{$array})
|
|
1
|
|
|
|
|
3
|
|
80
|
|
|
|
|
|
|
{
|
81
|
1
|
50
|
|
|
|
4
|
if (ref $hash eq ref {})
|
82
|
|
|
|
|
|
|
{
|
83
|
1
|
|
|
|
|
5
|
$self->read_hash($hash) ;
|
84
|
|
|
|
|
|
|
}
|
85
|
|
|
|
|
|
|
else
|
86
|
|
|
|
|
|
|
{
|
87
|
0
|
|
|
|
|
0
|
warn 'Expecting $wl->read_array([{}])'
|
88
|
|
|
|
|
|
|
}
|
89
|
|
|
|
|
|
|
}
|
90
|
1
|
|
|
|
|
4
|
return $self;
|
91
|
|
|
|
|
|
|
}
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub read_string
|
94
|
|
|
|
|
|
|
{
|
95
|
1
|
|
|
1
|
0
|
1491
|
my ($self, $s, $args) = @_;
|
96
|
|
|
|
|
|
|
{
|
97
|
1
|
|
|
|
|
2
|
my $parsed = $self->parser->parse_string($s);
|
|
1
|
|
|
|
|
5
|
|
98
|
1
|
50
|
|
|
|
5
|
$parsed = [$parsed] if (ref ($parsed) eq ref {});
|
99
|
1
|
|
|
|
|
5
|
return $self->read_array($parsed, $args);
|
100
|
|
|
|
|
|
|
}
|
101
|
|
|
|
|
|
|
}
|
102
|
|
|
|
|
|
|
sub to_string
|
103
|
|
|
|
|
|
|
{
|
104
|
1
|
|
|
1
|
0
|
3063
|
my ($self, $args) = @_;
|
105
|
1
|
|
|
|
|
4
|
foreach(@sParsingParameters)
|
106
|
|
|
|
|
|
|
{
|
107
|
5
|
50
|
|
|
|
28
|
$args->{$_} = $self->{$_} unless defined $args->{$_};
|
108
|
|
|
|
|
|
|
}
|
109
|
1
|
|
|
|
|
5
|
my $senses = [map {$_->to_hash($args)} $self->get_all_senses];
|
|
1
|
|
|
|
|
7
|
|
110
|
1
|
|
|
|
|
5
|
return $self->serialiser->to_string($senses, $args);
|
111
|
|
|
|
|
|
|
}
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub compare_senses
|
114
|
|
|
|
|
|
|
{
|
115
|
0
|
|
|
0
|
0
|
0
|
my ($self, $sense_a, $sense_b, $args) = @_;
|
116
|
0
|
|
|
0
|
|
0
|
my $cmp = [
|
117
|
|
|
|
|
|
|
{'name' => 'hw', 'c' => sub{lc $_[0] cmp lc $_[1] }}
|
118
|
0
|
|
|
|
|
0
|
];
|
119
|
0
|
0
|
0
|
|
|
0
|
if (defined($args->{'sense_compare'}) and ref ($args->{'sense_compare'}) eq ref [])
|
120
|
|
|
|
|
|
|
{
|
121
|
0
|
|
|
|
|
0
|
$cmp = $args->{'sense_compare'};
|
122
|
|
|
|
|
|
|
}
|
123
|
0
|
|
|
|
|
0
|
foreach (@{$cmp})
|
|
0
|
|
|
|
|
0
|
|
124
|
|
|
|
|
|
|
{
|
125
|
0
|
|
|
|
|
0
|
my $result = 0;
|
126
|
0
|
0
|
|
|
|
0
|
if (defined $_->{'name'})
|
127
|
|
|
|
|
|
|
{
|
128
|
0
|
|
|
|
|
0
|
$result = &{$_->{'c'}}(
|
|
0
|
|
|
|
|
0
|
|
129
|
|
|
|
|
|
|
$sense_a->get($_->{'name'}),
|
130
|
|
|
|
|
|
|
$sense_b->get($_->{'name'})
|
131
|
|
|
|
|
|
|
);
|
132
|
|
|
|
|
|
|
}
|
133
|
|
|
|
|
|
|
else
|
134
|
|
|
|
|
|
|
{
|
135
|
0
|
|
|
|
|
0
|
$result = &{$_->{'c'}}(
|
|
0
|
|
|
|
|
0
|
|
136
|
|
|
|
|
|
|
$sense_a,
|
137
|
|
|
|
|
|
|
$sense_b
|
138
|
|
|
|
|
|
|
);
|
139
|
|
|
|
|
|
|
}
|
140
|
0
|
0
|
|
|
|
0
|
return $result unless $result == 0;
|
141
|
|
|
|
|
|
|
}
|
142
|
0
|
|
|
|
|
0
|
return 0;
|
143
|
|
|
|
|
|
|
}
|
144
|
|
|
|
|
|
|
sub sort
|
145
|
|
|
|
|
|
|
{
|
146
|
0
|
|
|
0
|
0
|
0
|
my ($self, $args) = @_;
|
147
|
0
|
|
|
|
|
0
|
$self->{'senses'} = [sort {$self->compare_senses($a, $b, $args)} @{$self->{'senses'}}];
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
148
|
0
|
|
|
|
|
0
|
return 1;
|
149
|
|
|
|
|
|
|
}
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub get_senses_for
|
152
|
|
|
|
|
|
|
{
|
153
|
24
|
|
|
24
|
1
|
53
|
my ($self, $sHW, $sPos) = @_;
|
154
|
24
|
|
|
|
|
28
|
my @senses;
|
155
|
24
|
100
|
33
|
|
|
176
|
if (defined $sPos and ($sPos or $self->{'significant_empty_pos'}))
|
|
|
50
|
66
|
|
|
|
|
156
|
|
|
|
|
|
|
{
|
157
|
22
|
100
|
|
|
|
102
|
@senses = @{$self->{'index'}{$sHW}{$sPos}} if defined $self->{'index'}{$sHW}{$sPos};
|
|
14
|
|
|
|
|
44
|
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
elsif (defined $sHW)
|
160
|
|
|
|
|
|
|
{
|
161
|
2
|
|
|
|
|
4
|
foreach my $sPos (keys %{$self->{'index'}{$sHW}})
|
|
2
|
|
|
|
|
11
|
|
162
|
|
|
|
|
|
|
{
|
163
|
2
|
|
|
|
|
3
|
push @senses, @{$self->{'index'}{$sHW}{$sPos}};
|
|
2
|
|
|
|
|
9
|
|
164
|
|
|
|
|
|
|
}
|
165
|
|
|
|
|
|
|
}
|
166
|
24
|
|
|
|
|
110
|
return @senses;
|
167
|
|
|
|
|
|
|
}
|
168
|
|
|
|
|
|
|
sub get_all_senses
|
169
|
|
|
|
|
|
|
{
|
170
|
3
|
|
|
3
|
1
|
10
|
my ($self) = @_;
|
171
|
3
|
50
|
|
|
|
11
|
return () unless defined $self->{'senses'};
|
172
|
3
|
|
|
|
|
36
|
return @{$self->{'senses'}};
|
|
3
|
|
|
|
|
18
|
|
173
|
|
|
|
|
|
|
}
|
174
|
|
|
|
|
|
|
sub get_current_attlist
|
175
|
|
|
|
|
|
|
{
|
176
|
0
|
|
|
0
|
0
|
0
|
my ($self) = @_;
|
177
|
0
|
0
|
|
|
|
0
|
if ($self->{'attlist'})
|
178
|
|
|
|
|
|
|
{
|
179
|
0
|
|
|
|
|
0
|
return @{$self->{'attlist'}};
|
|
0
|
|
|
|
|
0
|
|
180
|
|
|
|
|
|
|
}
|
181
|
0
|
|
|
|
|
0
|
return @{$self->{'default_attlist'}};
|
|
0
|
|
|
|
|
0
|
|
182
|
|
|
|
|
|
|
}
|
183
|
|
|
|
|
|
|
sub get_default_attlist
|
184
|
|
|
|
|
|
|
{
|
185
|
0
|
|
|
0
|
0
|
0
|
my ($self) = @_;
|
186
|
0
|
|
|
|
|
0
|
return @{$self->{'default_attlist'}}
|
|
0
|
|
|
|
|
0
|
|
187
|
|
|
|
|
|
|
}
|
188
|
|
|
|
|
|
|
sub add_sense
|
189
|
|
|
|
|
|
|
{
|
190
|
22
|
|
|
22
|
0
|
32
|
my ($self, $sense) = @_;
|
191
|
22
|
|
|
|
|
52
|
my $success = $self->_add_sense_to_list($sense);
|
192
|
22
|
|
|
|
|
61
|
$self->_index_sense($sense);
|
193
|
22
|
|
|
|
|
99
|
return $success;
|
194
|
|
|
|
|
|
|
}
|
195
|
|
|
|
|
|
|
sub _add_sense_to_list
|
196
|
|
|
|
|
|
|
{
|
197
|
22
|
|
|
22
|
|
27
|
my ($self, $sense) = @_;
|
198
|
22
|
|
|
|
|
43
|
return push (@{$self->{'senses'}}, $sense);
|
|
22
|
|
|
|
|
67
|
|
199
|
|
|
|
|
|
|
}
|
200
|
|
|
|
|
|
|
sub _index_sense
|
201
|
|
|
|
|
|
|
{
|
202
|
22
|
|
|
22
|
|
32
|
my ($self, $sense) = @_;
|
203
|
22
|
|
|
|
|
116
|
my $sHW = $sense->get_hw;
|
204
|
22
|
|
|
|
|
115
|
my $sPos = $sense->get_pos;
|
205
|
22
|
|
50
|
|
|
85
|
$sHW ||='';
|
206
|
22
|
|
50
|
|
|
48
|
$sPos ||='';
|
207
|
22
|
|
|
|
|
28
|
return push (@{$self->{'index'}{$sHW}{$sPos}}, $sense);
|
|
22
|
|
|
|
|
117
|
|
208
|
|
|
|
|
|
|
}
|
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _rebuild_index
|
211
|
|
|
|
|
|
|
{
|
212
|
0
|
|
|
0
|
|
0
|
my ($self) = @_;
|
213
|
0
|
|
|
|
|
0
|
my $index = $self->{'index'};
|
214
|
0
|
|
|
|
|
0
|
$index = ();
|
215
|
0
|
|
|
|
|
0
|
foreach my $sense (@{$self->{'senses'}})
|
|
0
|
|
|
|
|
0
|
|
216
|
|
|
|
|
|
|
{
|
217
|
0
|
|
|
|
|
0
|
push (@{$index->{$sense->get_hw}{$sense->get_pos}}, $sense);
|
|
0
|
|
|
|
|
0
|
|
218
|
|
|
|
|
|
|
}
|
219
|
|
|
|
|
|
|
}
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub parser
|
224
|
|
|
|
|
|
|
{
|
225
|
3
|
|
|
3
|
1
|
6
|
my ($self, $parser) = @_;
|
226
|
3
|
100
|
|
|
|
13
|
if (defined $parser)
|
227
|
|
|
|
|
|
|
{
|
228
|
1
|
|
|
|
|
7
|
$self->{'#parser'} = $parser;
|
229
|
|
|
|
|
|
|
}
|
230
|
3
|
100
|
|
|
|
14
|
if (defined $self->{'#parser'})
|
231
|
|
|
|
|
|
|
{
|
232
|
2
|
|
|
|
|
8
|
return $self->{'#parser'}
|
233
|
|
|
|
|
|
|
}
|
234
|
|
|
|
|
|
|
else
|
235
|
|
|
|
|
|
|
{
|
236
|
4
|
|
|
4
|
|
31
|
use WordLists::Parse::Simple;
|
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
434
|
|
237
|
1
|
|
|
|
|
11
|
$self->{'#parser'} = WordLists::Parse::Simple->new();
|
238
|
|
|
|
|
|
|
}
|
239
|
|
|
|
|
|
|
}
|
240
|
|
|
|
|
|
|
sub serialiser
|
241
|
|
|
|
|
|
|
{
|
242
|
1
|
|
|
1
|
1
|
2
|
my ($self, $serialiser) = @_;
|
243
|
1
|
50
|
|
|
|
5
|
if (defined $serialiser)
|
244
|
|
|
|
|
|
|
{
|
245
|
0
|
|
|
|
|
0
|
$self->{'#serialiser'} = $serialiser;
|
246
|
|
|
|
|
|
|
}
|
247
|
1
|
50
|
|
|
|
4
|
if (defined $self->{'#serialiser'})
|
248
|
|
|
|
|
|
|
{
|
249
|
0
|
|
|
|
|
0
|
return $self->{'#serialiser'}
|
250
|
|
|
|
|
|
|
}
|
251
|
|
|
|
|
|
|
else
|
252
|
|
|
|
|
|
|
{
|
253
|
4
|
|
|
4
|
|
23
|
use WordLists::Serialise::Simple;
|
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
312
|
|
254
|
1
|
|
|
|
|
10
|
return $self->{'#serialiser'} = WordLists::Serialise::Simple->new();
|
255
|
|
|
|
|
|
|
}
|
256
|
|
|
|
|
|
|
}
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
1;
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=pod
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head1 NAME
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
WordLists::WordList
|
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
my $wl = WordLists::WordList->new({from_file=>'unit1.txt'});
|
269
|
|
|
|
|
|
|
my @senses = $wl->get_senses_for('book', 'verb');
|
270
|
|
|
|
|
|
|
$wl->add_sense($new_sense);
|
271
|
|
|
|
|
|
|
print OUT $wl->to_string;
|
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
WordLists::WordList is a base class for a group of L objects.
|
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=head3 new
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
The constructor creates an empty wordlist, and will populate the wordlist if you pass it parameters such as C and C (in which case, you can also specify C). These parameters should be passed in a hash ref (as per the example in the synopsis). You can populate the wordlist later, of course.
|
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head3 parser
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
This is an accessor for the parser, and returns the parser and/or sets the parser if given one. The parser defaults to L, and the parser is created the first time it is requested (not when the L object is created, unless C or C is used).
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head3 serialiser
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
This is an accessor for the serialiser, and returns the serialiser and/or sets the serialiser if given one. The serialiser defaults to L, and the serialiser is created the first time it is requested (not when the L object is created).
|
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=head3 get_senses_for
|
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
This returns senses which match the parameters specified (hw, pos).
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head3 get_all_senses
|
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
This returns all senses; by default, it will return them in the order in which they were entered, but senses can be reordered using the C method.
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=head1 TODO
|
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 BUGS
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Please use the Github issues tracker.
|
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head1 LICENSE
|
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut
|