line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WordLists::Common;
|
2
|
8
|
|
|
8
|
|
5349
|
use strict;
|
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
291
|
|
3
|
8
|
|
|
8
|
|
41
|
use warnings;
|
|
8
|
|
|
|
|
18
|
|
|
8
|
|
|
|
|
269
|
|
4
|
8
|
|
|
8
|
|
20332
|
use Unicode::Normalize; #provides NFD
|
|
8
|
|
|
|
|
28149
|
|
|
8
|
|
|
|
|
712
|
|
5
|
8
|
|
|
8
|
|
2236
|
use utf8;
|
|
8
|
|
|
|
|
35
|
|
|
8
|
|
|
|
|
61
|
|
6
|
8
|
|
|
8
|
|
2403
|
use WordLists::Base;
|
|
8
|
|
|
|
|
53
|
|
|
8
|
|
|
|
|
15278
|
|
7
|
|
|
|
|
|
|
our $VERSION = $WordLists::Base::VERSION;
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $AUTOLOAD;
|
10
|
|
|
|
|
|
|
require Exporter;
|
11
|
|
|
|
|
|
|
our @ISA = qw (Exporter);
|
12
|
|
|
|
|
|
|
our @EXPORT = ();
|
13
|
|
|
|
|
|
|
our @EXPORT_OK = qw(
|
14
|
|
|
|
|
|
|
pretty_doubles
|
15
|
|
|
|
|
|
|
pretty_singles
|
16
|
|
|
|
|
|
|
pretty_endash
|
17
|
|
|
|
|
|
|
norm_spacing
|
18
|
|
|
|
|
|
|
custom_norm
|
19
|
|
|
|
|
|
|
generic_norm_hw
|
20
|
|
|
|
|
|
|
generic_norm_pos
|
21
|
|
|
|
|
|
|
generic_minimal_pos
|
22
|
|
|
|
|
|
|
uniques
|
23
|
|
|
|
|
|
|
@sPosWords
|
24
|
|
|
|
|
|
|
@sDefaultAttList
|
25
|
|
|
|
|
|
|
@sDefiningAttlist
|
26
|
|
|
|
|
|
|
@sParsingParameters
|
27
|
|
|
|
|
|
|
reverse_punct
|
28
|
|
|
|
|
|
|
);
|
29
|
|
|
|
|
|
|
our @sDefaultAttList = qw(hw pos def eg);
|
30
|
|
|
|
|
|
|
our @sDefiningAttlist = qw(hw pos);
|
31
|
|
|
|
|
|
|
our @sParsingParameters = qw(is_header field_sep attlist default_attlist header_marker);
|
32
|
|
|
|
|
|
|
our @sPosWords = (qw(
|
33
|
|
|
|
|
|
|
n
|
34
|
|
|
|
|
|
|
noun
|
35
|
|
|
|
|
|
|
v
|
36
|
|
|
|
|
|
|
verb
|
37
|
|
|
|
|
|
|
adj
|
38
|
|
|
|
|
|
|
adjective
|
39
|
|
|
|
|
|
|
adv
|
40
|
|
|
|
|
|
|
advb
|
41
|
|
|
|
|
|
|
adverb
|
42
|
|
|
|
|
|
|
conj
|
43
|
|
|
|
|
|
|
conjunction
|
44
|
|
|
|
|
|
|
excl
|
45
|
|
|
|
|
|
|
exclamation
|
46
|
|
|
|
|
|
|
expression
|
47
|
|
|
|
|
|
|
pref
|
48
|
|
|
|
|
|
|
prefix
|
49
|
|
|
|
|
|
|
suffix
|
50
|
|
|
|
|
|
|
det
|
51
|
|
|
|
|
|
|
determiner
|
52
|
|
|
|
|
|
|
quant
|
53
|
|
|
|
|
|
|
quantifier
|
54
|
|
|
|
|
|
|
postmodifier
|
55
|
|
|
|
|
|
|
predeterminer
|
56
|
|
|
|
|
|
|
abbreviation
|
57
|
|
|
|
|
|
|
pv
|
58
|
|
|
|
|
|
|
mv
|
59
|
|
|
|
|
|
|
auxiliary
|
60
|
|
|
|
|
|
|
aux
|
61
|
|
|
|
|
|
|
prep
|
62
|
|
|
|
|
|
|
preposition
|
63
|
|
|
|
|
|
|
number
|
64
|
|
|
|
|
|
|
ordinal
|
65
|
|
|
|
|
|
|
cardinal
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
),
|
68
|
|
|
|
|
|
|
'ordinal number',
|
69
|
|
|
|
|
|
|
'cardinal number',
|
70
|
|
|
|
|
|
|
'plural noun',
|
71
|
|
|
|
|
|
|
'compound noun',
|
72
|
|
|
|
|
|
|
'phrasal verb',
|
73
|
|
|
|
|
|
|
'modal verb',
|
74
|
|
|
|
|
|
|
'auxiliary verb',
|
75
|
|
|
|
|
|
|
);
|
76
|
|
|
|
|
|
|
sub pretty_doubles($)
|
77
|
|
|
|
|
|
|
{
|
78
|
0
|
|
|
0
|
1
|
0
|
my $s = shift;
|
79
|
0
|
|
|
|
|
0
|
$s =~ s/[“"”]/"/g;
|
80
|
0
|
|
|
|
|
0
|
$s =~ s/"$/”/g;
|
81
|
0
|
|
|
|
|
0
|
$s =~ s/^"/“/g;
|
82
|
0
|
|
|
|
|
0
|
$s =~ s/"([\s\t\r\n])/”$1/g;
|
83
|
0
|
|
|
|
|
0
|
$s =~ s/([\s\t\r\n])"/$1“/g;
|
84
|
0
|
|
|
|
|
0
|
$s =~ s/([\(\{\[])"/$1“/g;
|
85
|
0
|
|
|
|
|
0
|
$s =~ s/"([\)\}\]])/”$1/g;
|
86
|
0
|
|
|
|
|
0
|
$s =~ s/([\w\.\?\!])"/$1”/g;
|
87
|
0
|
|
|
|
|
0
|
$s =~ s/"/“/g;
|
88
|
0
|
|
|
|
|
0
|
return $s;
|
89
|
|
|
|
|
|
|
}
|
90
|
|
|
|
|
|
|
sub reverse_punct ($)
|
91
|
|
|
|
|
|
|
{
|
92
|
4
|
|
|
4
|
0
|
13
|
my $s = shift;
|
93
|
4
|
|
|
|
|
35
|
my %sReversal = (qw`
|
94
|
|
|
|
|
|
|
( )
|
95
|
|
|
|
|
|
|
[ ]
|
96
|
|
|
|
|
|
|
{ }
|
97
|
|
|
|
|
|
|
< >
|
98
|
|
|
|
|
|
|
‘ ’
|
99
|
|
|
|
|
|
|
“ ”
|
100
|
|
|
|
|
|
|
‹ ›
|
101
|
|
|
|
|
|
|
« »
|
102
|
|
|
|
|
|
|
¡ !
|
103
|
|
|
|
|
|
|
¿ ?
|
104
|
|
|
|
|
|
|
`);
|
105
|
4
|
100
|
|
|
|
11
|
if (defined $sReversal{$s})
|
106
|
|
|
|
|
|
|
{
|
107
|
2
|
|
|
|
|
14
|
return $sReversal{$s};
|
108
|
|
|
|
|
|
|
}
|
109
|
2
|
|
|
|
|
4
|
foreach (qw`< [ { (`)
|
110
|
|
|
|
|
|
|
{
|
111
|
3
|
|
|
|
|
10
|
my $sToFind = quotemeta ($_) . "([^" . quotemeta ($sReversal{$_}) . "]+)". quotemeta $sReversal{$_};
|
112
|
3
|
100
|
|
|
|
109
|
if ($s =~ m/^$sToFind$/)
|
113
|
|
|
|
|
|
|
{
|
114
|
2
|
|
|
|
|
3
|
my $sR = $s;
|
115
|
2
|
|
|
|
|
38
|
$sR =~ s/^$sToFind/$_\/$1$sReversal{$_}/g;
|
116
|
2
|
|
|
|
|
14
|
return $sR;
|
117
|
|
|
|
|
|
|
}
|
118
|
|
|
|
|
|
|
}
|
119
|
0
|
|
|
|
|
0
|
return $s;
|
120
|
|
|
|
|
|
|
}
|
121
|
|
|
|
|
|
|
sub pretty_singles($)
|
122
|
|
|
|
|
|
|
{
|
123
|
0
|
|
|
0
|
1
|
0
|
my $s = shift;
|
124
|
0
|
|
|
|
|
0
|
$s =~ s/[‘'’]/'/g;
|
125
|
0
|
|
|
|
|
0
|
$s =~ s/'$/’/g;
|
126
|
0
|
|
|
|
|
0
|
$s =~ s/^'/‘/g;
|
127
|
0
|
|
|
|
|
0
|
$s =~ s/'([\s\t\r\n])/’$1/g;
|
128
|
0
|
|
|
|
|
0
|
$s =~ s/([\s\t\r\n])'/$1‘/g;
|
129
|
0
|
|
|
|
|
0
|
$s =~ s/([\(\{\[])'/$1‘/g;
|
130
|
0
|
|
|
|
|
0
|
$s =~ s/'([\)\}\]])/’$1/g;
|
131
|
0
|
|
|
|
|
0
|
$s =~ s/([\w\.\?\!])'/$1’/g;
|
132
|
0
|
|
|
|
|
0
|
$s =~ s/'/“/g;
|
133
|
0
|
|
|
|
|
0
|
return $s;
|
134
|
|
|
|
|
|
|
}
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub pretty_endash($)
|
137
|
|
|
|
|
|
|
{
|
138
|
0
|
|
|
0
|
1
|
0
|
my $s = shift;
|
139
|
0
|
|
|
|
|
0
|
$s =~ s/([\s\t\r\n])-([\s\t\r\n])/$1–$2/g;
|
140
|
0
|
|
|
|
|
0
|
$s =~ s/([\s\t\r\n])-$/$1–/g;
|
141
|
0
|
|
|
|
|
0
|
$s =~ s/^-([\s\t\r\n])/–$1/g;
|
142
|
0
|
|
|
|
|
0
|
return $s;
|
143
|
|
|
|
|
|
|
}
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub norm_spacing($)
|
146
|
|
|
|
|
|
|
{
|
147
|
13
|
|
|
13
|
1
|
15
|
my $s = shift;
|
148
|
13
|
|
|
|
|
23
|
$s =~ s/^\s+//;
|
149
|
13
|
|
|
|
|
23
|
$s =~ s/\s+$//;
|
150
|
13
|
|
|
|
|
18
|
$s =~ s/\s+/ /;
|
151
|
13
|
|
|
|
|
25
|
return $s;
|
152
|
|
|
|
|
|
|
}
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub custom_norm
|
155
|
|
|
|
|
|
|
{
|
156
|
0
|
|
|
0
|
1
|
0
|
my $s = shift;
|
157
|
0
|
|
|
|
|
0
|
my $args = shift;
|
158
|
0
|
0
|
|
|
|
0
|
return $s unless ref $args eq ref {};
|
159
|
0
|
0
|
|
|
|
0
|
$s = lc $s if $args->{'lc'};
|
160
|
0
|
0
|
|
|
|
0
|
$s = uc $s if $args->{'uc'};
|
161
|
0
|
0
|
|
|
|
0
|
if ($args->{'trim_space'})
|
162
|
|
|
|
|
|
|
{
|
163
|
0
|
|
|
|
|
0
|
$s =~ s/^\s+//;
|
164
|
0
|
|
|
|
|
0
|
$s =~ s/\s+$//;
|
165
|
0
|
|
|
|
|
0
|
$s =~ s/[\t\r\n\s]+/ /g;
|
166
|
|
|
|
|
|
|
}
|
167
|
0
|
0
|
|
|
|
0
|
$s =~ s/\bsb\b/someone/g if $args->{'sb'};
|
168
|
0
|
0
|
|
|
|
0
|
$s =~ s/\bsth\b/something/g if $args->{'sth'};
|
169
|
0
|
0
|
|
|
|
0
|
$s =~ s/\(.*\)//g if $args->{'brackets'} eq 'kill';
|
170
|
0
|
0
|
|
|
|
0
|
$s =~ tr/()//d if $args->{'brackets'} eq 'ignore';
|
171
|
0
|
0
|
|
|
|
0
|
$s =~ s/\[.*\]//g if $args->{'squares'} eq 'kill';
|
172
|
0
|
0
|
|
|
|
0
|
$s =~ tr/[]//d if $args->{'squares'} eq 'ignore';
|
173
|
0
|
0
|
|
|
|
0
|
if ($args->{'accents'})
|
174
|
|
|
|
|
|
|
{
|
175
|
0
|
|
|
|
|
0
|
$s = NFD($s); # These two lines use Unicode::Normalize::NFD to
|
176
|
7
|
|
|
7
|
|
56
|
$s =~ s/\pM//og; # remove accents but keep the underlying characters
|
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
108
|
|
|
0
|
|
|
|
|
0
|
|
177
|
|
|
|
|
|
|
}
|
178
|
|
|
|
|
|
|
|
179
|
0
|
0
|
|
|
|
0
|
$s =~ s/[^[:alpha:][:digit:]]//g if $args->{'alnum_only'};
|
180
|
|
|
|
|
|
|
# can't and can`t should match. So, unfortunately, does cant
|
181
|
0
|
0
|
|
|
|
0
|
$s =~ s/_//g if $args->{'alnum_only'};
|
182
|
0
|
|
|
|
|
0
|
return $s;
|
183
|
|
|
|
|
|
|
}
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub generic_norm_hw($)
|
186
|
|
|
|
|
|
|
{
|
187
|
15
|
|
|
15
|
1
|
24
|
my $s = lc shift;
|
188
|
15
|
|
|
|
|
31
|
$s =~ s/\(.*\)//g;
|
189
|
15
|
|
|
|
|
27
|
$s =~ s/\bsb\b/someone/g;
|
190
|
15
|
|
|
|
|
23
|
$s =~ s/\bsth\b/something/g;
|
191
|
15
|
|
|
|
|
21
|
$s =~ s/^the //g;
|
192
|
15
|
|
|
|
|
78
|
$s = NFD($s); # These two lines use Unicode::Normalize::NFD to
|
193
|
15
|
|
|
|
|
40
|
$s =~ s/\pM//og; # remove accents but keep the underlying characters
|
194
|
15
|
|
|
|
|
27
|
$s =~ s/[^[:alpha:][:digit:]]//g;
|
195
|
|
|
|
|
|
|
# can't and can`t should match. So, unfortunately, does cant
|
196
|
15
|
|
|
|
|
19
|
$s =~ s/_//g;
|
197
|
15
|
|
|
|
|
48
|
return $s;
|
198
|
|
|
|
|
|
|
}
|
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub generic_norm_pos($)
|
201
|
|
|
|
|
|
|
{
|
202
|
13
|
|
|
13
|
1
|
18
|
my $sPos = lc shift;
|
203
|
13
|
|
|
|
|
17
|
$sPos =~ tr/\-\t\r\n \./ /;
|
204
|
|
|
|
|
|
|
|
205
|
13
|
|
|
|
|
24
|
$sPos = norm_spacing ($sPos);
|
206
|
|
|
|
|
|
|
|
207
|
13
|
|
|
|
|
17
|
$sPos =~ s<\b(pl|plural)\b>
|
208
|
|
|
|
|
|
|
;
|
209
|
|
|
|
|
|
|
|
210
|
13
|
|
|
|
|
21
|
$sPos =~ s<\b(comp|compound)\b>
|
211
|
|
|
|
|
|
|
;
|
212
|
|
|
|
|
|
|
|
213
|
13
|
|
|
|
|
30
|
$sPos =~ s<\b(n|noun)\b>
|
214
|
|
|
|
|
|
|
;
|
215
|
|
|
|
|
|
|
|
216
|
13
|
|
|
|
|
24
|
$sPos =~ s<\b(a|adj|adjective)\b>
|
217
|
|
|
|
|
|
|
;
|
218
|
|
|
|
|
|
|
|
219
|
13
|
|
|
|
|
19
|
$sPos =~ s<\b(adv|advb|adverb)\b>
|
220
|
|
|
|
|
|
|
;
|
221
|
|
|
|
|
|
|
|
222
|
13
|
|
|
|
|
22
|
$sPos =~ s<\b(preposition|prep)\b>
|
223
|
|
|
|
|
|
|
;
|
224
|
|
|
|
|
|
|
|
225
|
13
|
|
|
|
|
15
|
$sPos =~ s<\b(quant|quantifier|q)\b>
|
226
|
|
|
|
|
|
|
;
|
227
|
|
|
|
|
|
|
|
228
|
13
|
|
|
|
|
15
|
$sPos =~ s<\b(pre)(det|determiner|d)\b>
|
229
|
|
|
|
|
|
|
<$1determiner>;
|
230
|
|
|
|
|
|
|
|
231
|
13
|
|
|
|
|
13
|
$sPos =~ s<\b(pronoun|pron)\b>
|
232
|
|
|
|
|
|
|
;
|
233
|
|
|
|
|
|
|
|
234
|
13
|
|
|
|
|
50
|
$sPos =~ s<\b(v|verb)\b>
|
235
|
|
|
|
|
|
|
;
|
236
|
|
|
|
|
|
|
|
237
|
13
|
|
|
|
|
23
|
$sPos =~ s<\b(phr|phrase)\b>
|
238
|
|
|
|
|
|
|
;
|
239
|
|
|
|
|
|
|
|
240
|
13
|
|
|
|
|
15
|
$sPos =~ s<\b(exp|expr|expression)\b>
|
241
|
|
|
|
|
|
|
;
|
242
|
|
|
|
|
|
|
|
243
|
13
|
|
|
|
|
15
|
$sPos =~ s<\b(mod|modal)\b>
|
244
|
|
|
|
|
|
|
;
|
245
|
|
|
|
|
|
|
|
246
|
13
|
|
|
|
|
15
|
$sPos =~ s<\bphrase\s+verb\b>
|
247
|
|
|
|
|
|
|
;
|
248
|
|
|
|
|
|
|
|
249
|
13
|
|
|
|
|
13
|
$sPos =~ s<\bp\s*verb\b>
|
250
|
|
|
|
|
|
|
;
|
251
|
|
|
|
|
|
|
|
252
|
13
|
|
|
|
|
14
|
$sPos =~ s<\b(prefix|pref)\b>
|
253
|
|
|
|
|
|
|
;
|
254
|
|
|
|
|
|
|
|
255
|
13
|
|
|
|
|
15
|
$sPos =~ s<\b(suffix|suff)\b>
|
256
|
|
|
|
|
|
|
;
|
257
|
|
|
|
|
|
|
|
258
|
13
|
|
|
|
|
23
|
$sPos =~ s<\b(short|abbreviated|abbreviation|abbrev|abbr)( form)?\b>
|
259
|
|
|
|
|
|
|
;
|
260
|
|
|
|
|
|
|
|
261
|
13
|
|
|
|
|
21
|
$sPos =~ s<\b(conj|conjunction)\b>
|
262
|
|
|
|
|
|
|
;
|
263
|
|
|
|
|
|
|
|
264
|
13
|
|
|
|
|
14
|
$sPos =~ s<\b(int|interj|inter|interjection)\b>
|
265
|
|
|
|
|
|
|
;
|
266
|
|
|
|
|
|
|
|
267
|
13
|
|
|
|
|
18
|
$sPos =~ s<\b(ex|excl|exclam|exclamation)\b>
|
268
|
|
|
|
|
|
|
;
|
269
|
|
|
|
|
|
|
|
270
|
13
|
|
|
|
|
43
|
return $sPos;
|
271
|
|
|
|
|
|
|
}
|
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub generic_minimal_pos($)
|
275
|
|
|
|
|
|
|
{
|
276
|
0
|
|
|
0
|
1
|
|
my $sPos = generic_norm_pos(shift);
|
277
|
|
|
|
|
|
|
|
278
|
0
|
|
|
|
|
|
$sPos =~ s<(adverb)>
|
279
|
|
|
|
|
|
|
;
|
280
|
0
|
|
|
|
|
|
$sPos =~ s<(adjective)>
|
281
|
|
|
|
|
|
|
;
|
282
|
0
|
|
|
|
|
|
$sPos =~ s<(phrasal)>
|
283
|
|
|
|
|
|
|
;
|
284
|
0
|
|
|
|
|
|
$sPos =~ s<(modal)>
|
285
|
|
|
|
|
|
|
<>;
|
286
|
0
|
|
|
|
|
|
$sPos =~ s<(verb)>
|
287
|
|
|
|
|
|
|
;
|
288
|
0
|
|
|
|
|
|
$sPos =~ s<(noun)>
|
289
|
|
|
|
|
|
|
;
|
290
|
0
|
|
|
|
|
|
$sPos =~ s<(adjective)>
|
291
|
|
|
|
|
|
|
;
|
292
|
0
|
|
|
|
|
|
$sPos =~ s<(preposition)>
|
293
|
|
|
|
|
|
|
;
|
294
|
0
|
|
|
|
|
|
$sPos =~ s<(exclamation)>
|
295
|
|
|
|
|
|
|
;
|
296
|
0
|
|
|
|
|
|
$sPos =~tr/ //d;
|
297
|
0
|
|
|
|
|
|
return $sPos;
|
298
|
|
|
|
|
|
|
}
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub uniques
|
301
|
|
|
|
|
|
|
{
|
302
|
0
|
|
|
0
|
1
|
|
my %bSeen;
|
303
|
0
|
|
|
|
|
|
return grep {$bSeen{$_}++; $bSeen{$_} ==1;} @_;
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
}
|
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
1;
|
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=pod
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head1 NAME
|
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
WordLists::Common
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
use WordLists::Common qw(pretty_doubles pretty_singles);
|
318
|
|
|
|
|
|
|
print pretty_doubles (pretty_singles (
|
319
|
|
|
|
|
|
|
qq{"That's right," she said, "I was told to 'get lost!'".}
|
320
|
|
|
|
|
|
|
) );
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
This provides common functions and values of relevance to wordlists - such as normalising parts of speech and typographic dashes and quotes. Exportable functions and values include:
|
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=over
|
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=item *
|
329
|
|
|
|
|
|
|
C<@sPosWords>, a list of things which look like parts of speech (to help parsing things like "head verb", "head up", "head noun")
|
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=item *
|
332
|
|
|
|
|
|
|
A function C replacing space + hyphen + space with space + en-dash + space.
|
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=item *
|
335
|
|
|
|
|
|
|
A function C replacing double quotes with 'smart' double quotes.
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item *
|
338
|
|
|
|
|
|
|
A function C replacing apostrophe/single-quote with 'smart' single quotes.
|
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=item *
|
341
|
|
|
|
|
|
|
A function C
|
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=item *
|
344
|
|
|
|
|
|
|
A function C which takes several options:
|
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
=over
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=item *
|
349
|
|
|
|
|
|
|
C - if true, lowercases the string.
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item *
|
352
|
|
|
|
|
|
|
C - if true, uppercases the string. Overrides C.
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=item *
|
355
|
|
|
|
|
|
|
C - if true, removes initial and final space, and also condenses repeating white space to a single C<\x20>.
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=item *
|
358
|
|
|
|
|
|
|
C - if true, removes characters other than alphabetic ones or digits.
|
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=item *
|
361
|
|
|
|
|
|
|
C - if this is 'kill', removes the contents of any C<()> brackets; if 'ignore', removes the brackets themselves.
|
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=item *
|
364
|
|
|
|
|
|
|
C - if this is 'kill', removes the contents of any C<[]> brackets; if 'ignore', removes the brackets themselves.
|
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=item *
|
367
|
|
|
|
|
|
|
C - if true, removes accents and modifier characters from letters.
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=item *
|
370
|
|
|
|
|
|
|
C - if true, replaces 'sb' with 'someone'.
|
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item *
|
373
|
|
|
|
|
|
|
C - if true, replaces 'sth' with 'something'.
|
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=back
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item *
|
378
|
|
|
|
|
|
|
A function C which returns a word without accents or characters other than [a-z0-9].
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=item *
|
381
|
|
|
|
|
|
|
A function C for normalising parts of speech so that 'v' and 'verb' match.
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item *
|
384
|
|
|
|
|
|
|
A function C which will normalise parts of speech and reduce them to 'minimal' ones.
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=item *
|
387
|
|
|
|
|
|
|
A function C which will reduce a list to the unique members.
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=back
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head1 BUGS
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Please use the Github issues tracker.
|
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=head1 LICENSE
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=cut
|