line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Lingua::EN::NameGrammar - grammar tree for Lingua::EN::NameParse |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Internal functions called from NameParse.pm module |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 DESCRIPTION |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Grammar tree of personal name syntax for module. |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
The grammar defined here is for use with the Parse::RecDescent module. |
14
|
|
|
|
|
|
|
Note that parsing is done depth first, meaning match the shortest string first. |
15
|
|
|
|
|
|
|
To avoid premature matches, when one rule is a sub set of another longer rule, |
16
|
|
|
|
|
|
|
it must appear after the longer rule. See the Parse::RecDescent documentation |
17
|
|
|
|
|
|
|
for more details. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 AUTHOR |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
NameParse::Grammar was written by Kim Ryan . |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Copyright (c) 2016 Kim Ryan. All rights reserved. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
29
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=cut |
33
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
package Lingua::EN::NameParse::Grammar; |
36
|
2
|
|
|
2
|
|
13
|
use strict; |
|
2
|
|
|
|
|
13
|
|
|
2
|
|
|
|
|
54
|
|
37
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
1270
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our $VERSION = '1.37'; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Rules that define valid orderings of a names components |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $rules_start = q{ full_name : }; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $rules_joint_names = |
47
|
|
|
|
|
|
|
q{ |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# A (?) refers to an optional component, occurring 0 or more times. |
50
|
|
|
|
|
|
|
# Optional items are returned as an array, which for our case will |
51
|
|
|
|
|
|
|
# always consist of one element, when they exist. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
title given_name surname conjunction title given_name surname non_matching(?) |
54
|
|
|
|
|
|
|
{ |
55
|
|
|
|
|
|
|
# block of code to define actions upon successful completion of a |
56
|
|
|
|
|
|
|
# 'production' or rule |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Two separate people |
59
|
|
|
|
|
|
|
$return = |
60
|
|
|
|
|
|
|
{ |
61
|
|
|
|
|
|
|
# Parse::RecDescent lets you return a single scalar, which we use as |
62
|
|
|
|
|
|
|
# an anonymous hash reference |
63
|
|
|
|
|
|
|
title_1 => $item[1], |
64
|
|
|
|
|
|
|
given_name_1 => $item[2], |
65
|
|
|
|
|
|
|
surname_1 => $item[3], |
66
|
|
|
|
|
|
|
conjunction_1 => $item[4], |
67
|
|
|
|
|
|
|
title_2 => $item[5], |
68
|
|
|
|
|
|
|
given_name_2 => $item[6], |
69
|
|
|
|
|
|
|
surname_2 => $item[7], |
70
|
|
|
|
|
|
|
non_matching => $item[8][0], |
71
|
|
|
|
|
|
|
number => 2, |
72
|
|
|
|
|
|
|
type => 'Mr_John_Smith_&_Ms_Mary_Jones' |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
| |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
title initials surname conjunction title initials surname non_matching(?) |
79
|
|
|
|
|
|
|
{ |
80
|
|
|
|
|
|
|
$return = |
81
|
|
|
|
|
|
|
{ |
82
|
|
|
|
|
|
|
title_1 => $item[1], |
83
|
|
|
|
|
|
|
initials_1 => $item[2], |
84
|
|
|
|
|
|
|
surname_1 => $item[3], |
85
|
|
|
|
|
|
|
conjunction_1 => $item[4], |
86
|
|
|
|
|
|
|
title_2 => $item[5], |
87
|
|
|
|
|
|
|
initials_2 => $item[6], |
88
|
|
|
|
|
|
|
surname_2 => $item[7], |
89
|
|
|
|
|
|
|
non_matching => $item[8][0], |
90
|
|
|
|
|
|
|
number => 2, |
91
|
|
|
|
|
|
|
type => 'Mr_A_Smith_&_Ms_B_Jones' |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
| |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
title initials conjunction title initials surname non_matching(?) |
97
|
|
|
|
|
|
|
{ |
98
|
|
|
|
|
|
|
# Two related people, own initials, shared surname |
99
|
|
|
|
|
|
|
$return = |
100
|
|
|
|
|
|
|
{ |
101
|
|
|
|
|
|
|
title_1 => $item[1], |
102
|
|
|
|
|
|
|
initials_1 => $item[2], |
103
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
104
|
|
|
|
|
|
|
title_2 => $item[4], |
105
|
|
|
|
|
|
|
initials_2 => $item[5], |
106
|
|
|
|
|
|
|
surname_1 => $item[6], |
107
|
|
|
|
|
|
|
non_matching => $item[7][0], |
108
|
|
|
|
|
|
|
number => 2, |
109
|
|
|
|
|
|
|
type => 'Mr_A_&_Ms_B_Smith' |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
| |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
title initials conjunction initials surname non_matching(?) |
115
|
|
|
|
|
|
|
{ |
116
|
|
|
|
|
|
|
# Two related people, shared title, separate initials, |
117
|
|
|
|
|
|
|
# shared surname. Example, father and son, sisters |
118
|
|
|
|
|
|
|
$return = |
119
|
|
|
|
|
|
|
{ |
120
|
|
|
|
|
|
|
title_1 => $item[1], |
121
|
|
|
|
|
|
|
initials_1 => $item[2], |
122
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
123
|
|
|
|
|
|
|
initials_2 => $item[4], |
124
|
|
|
|
|
|
|
surname_1 => $item[5], |
125
|
|
|
|
|
|
|
non_matching => $item[6][0], |
126
|
|
|
|
|
|
|
number => 2, |
127
|
|
|
|
|
|
|
type => 'Mr_A_&_B_Smith' |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
| |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
title conjunction title initials conjunction initials surname non_matching(?) |
134
|
|
|
|
|
|
|
{ |
135
|
|
|
|
|
|
|
# Two related people, own initials, shared surname |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
$return = |
138
|
|
|
|
|
|
|
{ |
139
|
|
|
|
|
|
|
title_1 => $item[1], |
140
|
|
|
|
|
|
|
conjunction_1 => $item[2], |
141
|
|
|
|
|
|
|
title_2 => $item[3], |
142
|
|
|
|
|
|
|
initials_1 => $item[4], |
143
|
|
|
|
|
|
|
conjunction_2 => $item[5], |
144
|
|
|
|
|
|
|
initials_2 => $item[6], |
145
|
|
|
|
|
|
|
surname_1 => $item[7], |
146
|
|
|
|
|
|
|
non_matching => $item[8][0], |
147
|
|
|
|
|
|
|
number => 2, |
148
|
|
|
|
|
|
|
type => 'Mr_&_Ms_A_&_B_Smith' |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
| |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
title conjunction title initials surname non_matching(?) |
155
|
|
|
|
|
|
|
{ |
156
|
|
|
|
|
|
|
# Two related people, shared initials, shared surname |
157
|
|
|
|
|
|
|
$return = |
158
|
|
|
|
|
|
|
{ |
159
|
|
|
|
|
|
|
title_1 => $item[1], |
160
|
|
|
|
|
|
|
conjunction_1 => $item[2], |
161
|
|
|
|
|
|
|
title_2 => $item[3], |
162
|
|
|
|
|
|
|
initials_1 => $item[4], |
163
|
|
|
|
|
|
|
surname_1 => $item[5], |
164
|
|
|
|
|
|
|
non_matching => $item[6][0], |
165
|
|
|
|
|
|
|
number => 2, |
166
|
|
|
|
|
|
|
type => 'Mr_&_Ms_A_Smith' |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
| |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
given_name surname conjunction given_name surname non_matching(?) |
172
|
|
|
|
|
|
|
{ |
173
|
|
|
|
|
|
|
$return = |
174
|
|
|
|
|
|
|
{ |
175
|
|
|
|
|
|
|
given_name_1 => $item[1], |
176
|
|
|
|
|
|
|
surname_1 => $item[2], |
177
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
178
|
|
|
|
|
|
|
given_name_2 => $item[4], |
179
|
|
|
|
|
|
|
surname_2 => $item[5], |
180
|
|
|
|
|
|
|
non_matching => $item[6][0], |
181
|
|
|
|
|
|
|
number => 2, |
182
|
|
|
|
|
|
|
type => 'John_Smith_&_Mary_Jones' |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
| |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
initials surname conjunction initials surname non_matching(?) |
188
|
|
|
|
|
|
|
{ |
189
|
|
|
|
|
|
|
$return = |
190
|
|
|
|
|
|
|
{ |
191
|
|
|
|
|
|
|
initials_1 => $item[1], |
192
|
|
|
|
|
|
|
surname_1 => $item[2], |
193
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
194
|
|
|
|
|
|
|
initials_2 => $item[4], |
195
|
|
|
|
|
|
|
surname_2 => $item[5], |
196
|
|
|
|
|
|
|
non_matching => $item[6][0], |
197
|
|
|
|
|
|
|
number => 2, |
198
|
|
|
|
|
|
|
type => 'A_Smith_&_B_Jones' |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
| |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
given_name conjunction given_name surname non_matching(?) |
204
|
|
|
|
|
|
|
{ |
205
|
|
|
|
|
|
|
$return = |
206
|
|
|
|
|
|
|
{ |
207
|
|
|
|
|
|
|
given_name_1 => $item[1], |
208
|
|
|
|
|
|
|
conjunction_1 => $item[2], |
209
|
|
|
|
|
|
|
given_name_2 => $item[3], |
210
|
|
|
|
|
|
|
surname_2 => $item[4], |
211
|
|
|
|
|
|
|
non_matching => $item[5][0], |
212
|
|
|
|
|
|
|
number => 2, |
213
|
|
|
|
|
|
|
type => 'John_&_Mary_Smith' |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
| |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
}; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
my $rules_single_names = |
221
|
|
|
|
|
|
|
q{ |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
precursor(?) title given_name_standard middle_name surname suffix(?) non_matching(?) |
224
|
|
|
|
|
|
|
{ |
225
|
|
|
|
|
|
|
$return = |
226
|
|
|
|
|
|
|
{ |
227
|
|
|
|
|
|
|
precursor => $item[1][0], |
228
|
|
|
|
|
|
|
title_1 => $item[2], |
229
|
|
|
|
|
|
|
given_name_1 => $item[3], |
230
|
|
|
|
|
|
|
middle_name => $item[4], |
231
|
|
|
|
|
|
|
surname_1 => $item[5], |
232
|
|
|
|
|
|
|
suffix => $item[6][0], |
233
|
|
|
|
|
|
|
non_matching => $item[7][0], |
234
|
|
|
|
|
|
|
number => 1, |
235
|
|
|
|
|
|
|
type => 'Mr_John_Adam_Smith' |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
| |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
precursor(?) title given_name_standard single_initial surname suffix(?) non_matching(?) |
241
|
|
|
|
|
|
|
{ |
242
|
|
|
|
|
|
|
$return = |
243
|
|
|
|
|
|
|
{ |
244
|
|
|
|
|
|
|
precursor => $item[1][0], |
245
|
|
|
|
|
|
|
title_1 => $item[2], |
246
|
|
|
|
|
|
|
given_name_1 => $item[3], |
247
|
|
|
|
|
|
|
initials_1 => $item[4], |
248
|
|
|
|
|
|
|
surname_1 => $item[5], |
249
|
|
|
|
|
|
|
suffix => $item[6][0], |
250
|
|
|
|
|
|
|
non_matching => $item[7][0], |
251
|
|
|
|
|
|
|
number => 1, |
252
|
|
|
|
|
|
|
type => 'Mr_John_A_Smith' |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
| |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
precursor(?) title given_name surname suffix(?) non_matching(?) |
258
|
|
|
|
|
|
|
{ |
259
|
|
|
|
|
|
|
$return = |
260
|
|
|
|
|
|
|
{ |
261
|
|
|
|
|
|
|
precursor => $item[1][0], |
262
|
|
|
|
|
|
|
title_1 => $item[2], |
263
|
|
|
|
|
|
|
given_name_1 => $item[3], |
264
|
|
|
|
|
|
|
surname_1 => $item[4], |
265
|
|
|
|
|
|
|
suffix => $item[5][0], |
266
|
|
|
|
|
|
|
non_matching => $item[6][0], |
267
|
|
|
|
|
|
|
number => 1, |
268
|
|
|
|
|
|
|
type => 'Mr_John_Smith' |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
| |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
precursor(?) title initials surname suffix(?) non_matching(?) |
274
|
|
|
|
|
|
|
{ |
275
|
|
|
|
|
|
|
$return = |
276
|
|
|
|
|
|
|
{ |
277
|
|
|
|
|
|
|
precursor => $item[1][0], |
278
|
|
|
|
|
|
|
title_1 => $item[2], |
279
|
|
|
|
|
|
|
initials_1 => $item[3], |
280
|
|
|
|
|
|
|
surname_1 => $item[4], |
281
|
|
|
|
|
|
|
suffix => $item[5][0], |
282
|
|
|
|
|
|
|
non_matching => $item[6][0], |
283
|
|
|
|
|
|
|
number => 1, |
284
|
|
|
|
|
|
|
type => 'Mr_A_Smith' |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
| |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
precursor(?) given_name_standard middle_name surname suffix(?) non_matching(?) |
290
|
|
|
|
|
|
|
{ |
291
|
|
|
|
|
|
|
$return = |
292
|
|
|
|
|
|
|
{ |
293
|
|
|
|
|
|
|
precursor => $item[1][0], |
294
|
|
|
|
|
|
|
given_name_1 => $item[2], |
295
|
|
|
|
|
|
|
middle_name => $item[3], |
296
|
|
|
|
|
|
|
surname_1 => $item[4], |
297
|
|
|
|
|
|
|
suffix => $item[5][0], |
298
|
|
|
|
|
|
|
non_matching => $item[6][0], |
299
|
|
|
|
|
|
|
number => 1, |
300
|
|
|
|
|
|
|
type => 'John_Adam_Smith' |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
| |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
precursor(?) given_name_standard single_initial surname suffix(?) non_matching(?) |
306
|
|
|
|
|
|
|
{ |
307
|
|
|
|
|
|
|
$return = |
308
|
|
|
|
|
|
|
{ |
309
|
|
|
|
|
|
|
precursor => $item[1][0], |
310
|
|
|
|
|
|
|
given_name_1 => $item[2], |
311
|
|
|
|
|
|
|
initials_1 => $item[3], |
312
|
|
|
|
|
|
|
surname_1 => $item[4], |
313
|
|
|
|
|
|
|
suffix => $item[5][0], |
314
|
|
|
|
|
|
|
non_matching => $item[6][0], |
315
|
|
|
|
|
|
|
number => 1, |
316
|
|
|
|
|
|
|
type => 'John_A_Smith' |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
| |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
precursor(?) single_initial middle_name surname suffix(?) non_matching(?) |
322
|
|
|
|
|
|
|
{ |
323
|
|
|
|
|
|
|
$return = |
324
|
|
|
|
|
|
|
{ |
325
|
|
|
|
|
|
|
precursor => $item[1][0], |
326
|
|
|
|
|
|
|
initials_1 => $item[2], |
327
|
|
|
|
|
|
|
middle_name => $item[3], |
328
|
|
|
|
|
|
|
surname_1 => $item[4], |
329
|
|
|
|
|
|
|
suffix => $item[5][0], |
330
|
|
|
|
|
|
|
non_matching => $item[6][0], |
331
|
|
|
|
|
|
|
number => 1, |
332
|
|
|
|
|
|
|
type => 'J_Adam_Smith' |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
| |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
precursor(?) given_name surname suffix(?) non_matching(?) |
338
|
|
|
|
|
|
|
{ |
339
|
|
|
|
|
|
|
$return = |
340
|
|
|
|
|
|
|
{ |
341
|
|
|
|
|
|
|
precursor => $item[1][0], |
342
|
|
|
|
|
|
|
given_name_1 => $item[2], |
343
|
|
|
|
|
|
|
surname_1 => $item[3], |
344
|
|
|
|
|
|
|
suffix => $item[4][0], |
345
|
|
|
|
|
|
|
non_matching => $item[5][0], |
346
|
|
|
|
|
|
|
number => 1, |
347
|
|
|
|
|
|
|
type => 'John_Smith' |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
| |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
precursor(?) initials surname suffix(?) non_matching(?) |
353
|
|
|
|
|
|
|
{ |
354
|
|
|
|
|
|
|
$return = |
355
|
|
|
|
|
|
|
{ |
356
|
|
|
|
|
|
|
precursor => $item[1][0], |
357
|
|
|
|
|
|
|
initials_1 => $item[2], |
358
|
|
|
|
|
|
|
surname_1 => $item[3], |
359
|
|
|
|
|
|
|
suffix => $item[4][0], |
360
|
|
|
|
|
|
|
non_matching => $item[5][0], |
361
|
|
|
|
|
|
|
number => 1, |
362
|
|
|
|
|
|
|
type => 'A_Smith' |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
| |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
given_name_standard non_matching(?) |
368
|
|
|
|
|
|
|
{ |
369
|
|
|
|
|
|
|
$return = |
370
|
|
|
|
|
|
|
{ |
371
|
|
|
|
|
|
|
given_name_1 => $item[1], |
372
|
|
|
|
|
|
|
non_matching => $item[2][0], |
373
|
|
|
|
|
|
|
number => 1, |
374
|
|
|
|
|
|
|
type => 'John' |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
| |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
non_matching(?) |
380
|
|
|
|
|
|
|
{ |
381
|
|
|
|
|
|
|
$return = |
382
|
|
|
|
|
|
|
{ |
383
|
|
|
|
|
|
|
non_matching => $item[1][0], |
384
|
|
|
|
|
|
|
number => 0, |
385
|
|
|
|
|
|
|
type => 'unknown' |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
}; |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
391
|
|
|
|
|
|
|
# Individual components that a name can be composed from. Components are |
392
|
|
|
|
|
|
|
# expressed as literals or Perl regular expressions. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
my $titles = |
396
|
|
|
|
|
|
|
q{ |
397
|
|
|
|
|
|
|
title : /(MR|MS|M\/S|MRS|MISS|DR) / |
398
|
|
|
|
|
|
|
}; |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
my $extended_titles = |
401
|
|
|
|
|
|
|
q{ |
402
|
|
|
|
|
|
|
| |
403
|
|
|
|
|
|
|
/( |
404
|
|
|
|
|
|
|
SIR| |
405
|
|
|
|
|
|
|
MESSRS| # Plural or Mr |
406
|
|
|
|
|
|
|
MADAME?| |
407
|
|
|
|
|
|
|
MME| # Madame |
408
|
|
|
|
|
|
|
MISTER| |
409
|
|
|
|
|
|
|
MASTER| |
410
|
|
|
|
|
|
|
MAST| |
411
|
|
|
|
|
|
|
MS?GR| # Monsignor |
412
|
|
|
|
|
|
|
COUNT| |
413
|
|
|
|
|
|
|
COUNTESS| |
414
|
|
|
|
|
|
|
DUKE| |
415
|
|
|
|
|
|
|
DUCHESS| |
416
|
|
|
|
|
|
|
LORD| |
417
|
|
|
|
|
|
|
LADY| |
418
|
|
|
|
|
|
|
MARQUESS| |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# Medical |
421
|
|
|
|
|
|
|
DOCTOR|SISTER|MATRON| |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Legal |
424
|
|
|
|
|
|
|
JUDGE| |
425
|
|
|
|
|
|
|
JUSTICE| |
426
|
|
|
|
|
|
|
MAGISTRATE| |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# Police |
429
|
|
|
|
|
|
|
DET|INSP|CONST| |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# Military |
432
|
|
|
|
|
|
|
BRIGDIER|BRIG| |
433
|
|
|
|
|
|
|
CAPTAIN|CAPT| |
434
|
|
|
|
|
|
|
COLONEL|COL| |
435
|
|
|
|
|
|
|
COMMANDER IN CHIEF|COMMANDER| |
436
|
|
|
|
|
|
|
COMMODORE| |
437
|
|
|
|
|
|
|
CDR| # Commander, Commodore |
438
|
|
|
|
|
|
|
FIELD\ MARSHALL| |
439
|
|
|
|
|
|
|
FLIGHT\ OFFICER| FL OFF| |
440
|
|
|
|
|
|
|
FLIGHT\ LIEUTENANT|FLT LT| |
441
|
|
|
|
|
|
|
PILOT\ OFFICER| |
442
|
|
|
|
|
|
|
GENERAL\ OF\ THE\ ARMY|GENERAL|GEN| |
443
|
|
|
|
|
|
|
PTE|PVT|PRIVATE| |
444
|
|
|
|
|
|
|
SGT|SARGENT| |
445
|
|
|
|
|
|
|
AIR\ COMMANDER| |
446
|
|
|
|
|
|
|
AIR\ COMMODORE| |
447
|
|
|
|
|
|
|
AIR\ MARSHALL| |
448
|
|
|
|
|
|
|
LIEUTENANT\ COLONEL|LT\ COL| |
449
|
|
|
|
|
|
|
LT\ GEN| |
450
|
|
|
|
|
|
|
LT\ CDR| |
451
|
|
|
|
|
|
|
LIEUTENANT|LT|LEUT|LIEUT| |
452
|
|
|
|
|
|
|
MAJOR GENERAL|MAJ GEN| |
453
|
|
|
|
|
|
|
MAJOR|MAJ| |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# Religious |
456
|
|
|
|
|
|
|
RABBI| |
457
|
|
|
|
|
|
|
BISHOP| |
458
|
|
|
|
|
|
|
BROTHER| |
459
|
|
|
|
|
|
|
CHAPLAIN| |
460
|
|
|
|
|
|
|
FATHER| |
461
|
|
|
|
|
|
|
PASTOR| |
462
|
|
|
|
|
|
|
MOTHER\ SUPERIOR|MOTHER| |
463
|
|
|
|
|
|
|
MOST\ REVER[E|A]ND| |
464
|
|
|
|
|
|
|
MT\ REVD|V\ REVD|REVD| |
465
|
|
|
|
|
|
|
MUFTI| |
466
|
|
|
|
|
|
|
REVER[E|A]ND| |
467
|
|
|
|
|
|
|
REVD| |
468
|
|
|
|
|
|
|
REV| |
469
|
|
|
|
|
|
|
SHEIKH?| |
470
|
|
|
|
|
|
|
VERY\ REVER[E|A]ND| |
471
|
|
|
|
|
|
|
VICAR| |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# Other |
476
|
|
|
|
|
|
|
AMBASSADOR| |
477
|
|
|
|
|
|
|
PROFESSOR| |
478
|
|
|
|
|
|
|
PROF| |
479
|
|
|
|
|
|
|
ALDERMAN|ALD| |
480
|
|
|
|
|
|
|
COUNCILLOR |
481
|
|
|
|
|
|
|
)\ /x |
482
|
|
|
|
|
|
|
}; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
my $common = |
485
|
|
|
|
|
|
|
q{ |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
precursor : |
488
|
|
|
|
|
|
|
/( |
489
|
|
|
|
|
|
|
ESTATE\ OF\ THE\ LATE| |
490
|
|
|
|
|
|
|
ESTATE\ OF| |
491
|
|
|
|
|
|
|
HIS\ EXCELLENCY| |
492
|
|
|
|
|
|
|
HIS\ HONOU?R| |
493
|
|
|
|
|
|
|
HER\ EXCELLENCY| |
494
|
|
|
|
|
|
|
HER\ HONOU?R| |
495
|
|
|
|
|
|
|
THE\ RIGHT HONOU?RABLE| |
496
|
|
|
|
|
|
|
THE\ HONOU?RABLE| |
497
|
|
|
|
|
|
|
RIGHT\ HONOU?RABLE| |
498
|
|
|
|
|
|
|
THE\ RT\ HON| |
499
|
|
|
|
|
|
|
THE\ HON| |
500
|
|
|
|
|
|
|
RT\ HON |
501
|
|
|
|
|
|
|
)\ /x |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
conjunction : /AND |& / |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
# Used in the John_A_Smith and J_Adam_Smith name types, as well as when intials are set to 1 |
506
|
|
|
|
|
|
|
single_initial: /[A-Z] / |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
# Examples are Jo-Anne, D'Artagnan, O'Shaugnessy La'Keishia, T-Bone |
509
|
|
|
|
|
|
|
split_given_name : /[A-Z]{1,}['|-][A-Z]{2,} / |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
constonant: /[A-DF-HJ-NP-TV-Z]]/ |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# For use with John_Adam_Smith and John_A_Smith name types |
514
|
|
|
|
|
|
|
given_name_standard: |
515
|
|
|
|
|
|
|
/[A-Z]{3,} / | |
516
|
|
|
|
|
|
|
/[AEIOU]/ constonant / / | |
517
|
|
|
|
|
|
|
constonant /[AEIOUY] / | |
518
|
|
|
|
|
|
|
split_given_name |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# Patronymic, place name and other surname prefixes |
521
|
|
|
|
|
|
|
prefix: |
522
|
|
|
|
|
|
|
/( |
523
|
|
|
|
|
|
|
[A|E]L| # ARABIC, GREEK, |
524
|
|
|
|
|
|
|
AP| # WELSH |
525
|
|
|
|
|
|
|
BEN| # HEBREW |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
DELLA|DELLE|DALLE| # ITALIAN |
528
|
|
|
|
|
|
|
DELA| |
529
|
|
|
|
|
|
|
DELL?| |
530
|
|
|
|
|
|
|
DE\ LA| |
531
|
|
|
|
|
|
|
DE\ LOS| |
532
|
|
|
|
|
|
|
DE| |
533
|
|
|
|
|
|
|
D[A|I|U]| |
534
|
|
|
|
|
|
|
L[A|E|O]| |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
ST| # ABBREVIATION FOR SAINT |
537
|
|
|
|
|
|
|
SAN| # SPANISH |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# DUTCH |
540
|
|
|
|
|
|
|
DEN| |
541
|
|
|
|
|
|
|
VON\ DER| |
542
|
|
|
|
|
|
|
VON| |
543
|
|
|
|
|
|
|
VAN\ DE[N|R]| |
544
|
|
|
|
|
|
|
VAN |
545
|
|
|
|
|
|
|
)\ /x |
546
|
|
|
|
|
|
|
| |
547
|
|
|
|
|
|
|
/[D|L|O]'/ # ITALIAN, IRISH OR FRENCH, abbreviation for 'the', 'of' etc |
548
|
|
|
|
|
|
|
| |
549
|
|
|
|
|
|
|
/D[A|E]LL'/ |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
middle_name: |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
# Dont grab surname prefix too early. For example, John Van Dam could be |
554
|
|
|
|
|
|
|
# interpreted as middle name of Van and Surname of Dam. So exclude prefixs |
555
|
|
|
|
|
|
|
# from middle names |
556
|
|
|
|
|
|
|
...!prefix given_name |
557
|
|
|
|
|
|
|
{ |
558
|
|
|
|
|
|
|
$return = $item[2]; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# Use look-ahead to avoid ambiguity between surname and suffix. For example, |
563
|
|
|
|
|
|
|
# John Smith Snr, would detect Snr as the surname and Smith as the middle name |
564
|
|
|
|
|
|
|
surname : ...!suffix first_surname second_surname(?) |
565
|
|
|
|
|
|
|
{ |
566
|
|
|
|
|
|
|
if ( $item[2] and $item[3][0] ) |
567
|
|
|
|
|
|
|
{ |
568
|
|
|
|
|
|
|
$return = "$item[2]$item[3][0]"; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
else |
571
|
|
|
|
|
|
|
{ |
572
|
|
|
|
|
|
|
$return = $item[2]; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
first_surname : prefix name |
577
|
|
|
|
|
|
|
{ |
578
|
|
|
|
|
|
|
$return = "$item[1]$item[2]"; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
| |
581
|
|
|
|
|
|
|
name |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
second_surname : '-' name |
585
|
|
|
|
|
|
|
{ |
586
|
|
|
|
|
|
|
if ( $item[1] and $item[2] ) |
587
|
|
|
|
|
|
|
{ |
588
|
|
|
|
|
|
|
$return = "$item[1]$item[2]"; |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
# Note space will not occur for first part of a hphenated surname |
593
|
|
|
|
|
|
|
# AddressParse::_valid_name will do further check on name context |
594
|
|
|
|
|
|
|
name : /[A-Z]{2,} ?/ |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
suffix: |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
/( |
600
|
|
|
|
|
|
|
ESQUIRE| |
601
|
|
|
|
|
|
|
ESQ | |
602
|
|
|
|
|
|
|
SN?R| # Senior |
603
|
|
|
|
|
|
|
JN?R| # Junior |
604
|
|
|
|
|
|
|
PHD | |
605
|
|
|
|
|
|
|
MD | |
606
|
|
|
|
|
|
|
LLB | |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
XI{1,3}| # 11th, 12th, 13th |
609
|
|
|
|
|
|
|
X | # 10th |
610
|
|
|
|
|
|
|
IV | # 4th |
611
|
|
|
|
|
|
|
VI{1,3} | # 6th, 7th, 8th |
612
|
|
|
|
|
|
|
V | # 5th |
613
|
|
|
|
|
|
|
IX | # 9th |
614
|
|
|
|
|
|
|
I{1,3} # 1st, 2nd, 3rd |
615
|
|
|
|
|
|
|
)\ /x |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# One or more characters. |
619
|
|
|
|
|
|
|
non_matching: /.*/ |
620
|
|
|
|
|
|
|
}; |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# Define given name combinations, specifying the minimum number of letters. |
623
|
|
|
|
|
|
|
# The correct pair of rules is determined by the 'initials' key in the hash |
624
|
|
|
|
|
|
|
# passed to the 'new' method. |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
my $given_name_min_2 = q{ given_name :/[A-Z]{2,} / | split_given_name }; |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# Joe, Jo-Anne ... |
630
|
|
|
|
|
|
|
my $given_name_min_3 = |
631
|
|
|
|
|
|
|
q{ |
632
|
|
|
|
|
|
|
given_name: /[A-Z]{3,} / | split_given_name |
633
|
|
|
|
|
|
|
}; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# John ... |
637
|
|
|
|
|
|
|
my $given_name_min_4 = |
638
|
|
|
|
|
|
|
q{ |
639
|
|
|
|
|
|
|
given_name: /[A-Z]{4,} / | split_given_name |
640
|
|
|
|
|
|
|
}; |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
# Define initials combinations specifying the minimum and maximum letters. |
644
|
|
|
|
|
|
|
# Order from most complex to simplest, to avoid premature matching. |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
# 'A' |
647
|
|
|
|
|
|
|
my $initials_1 = q{ initials : single_initial }; |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
#'AB' 'A B' |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
my $initials_2 = |
652
|
|
|
|
|
|
|
q{ |
653
|
|
|
|
|
|
|
initials: /([A-Z] ){1,2}/ | /([A-Z]){1,2} / |
654
|
|
|
|
|
|
|
}; |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# 'ABC' or 'A B C' |
657
|
|
|
|
|
|
|
my $initials_3 = |
658
|
|
|
|
|
|
|
q{ |
659
|
|
|
|
|
|
|
initials: /([A-Z] ){1,3}/ | /([A-Z]){1,3} / |
660
|
|
|
|
|
|
|
}; |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
664
|
|
|
|
|
|
|
# Assemble correct combination for grammar tree. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
sub _create |
667
|
|
|
|
|
|
|
{ |
668
|
2
|
|
|
2
|
|
4
|
my $name = shift; |
669
|
|
|
|
|
|
|
|
670
|
2
|
|
|
|
|
6
|
my $grammar = $rules_start; |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
|
673
|
2
|
50
|
|
|
|
8
|
if ( $name->{joint_names} ) |
674
|
|
|
|
|
|
|
{ |
675
|
2
|
|
|
|
|
13
|
$grammar .= $rules_joint_names; |
676
|
|
|
|
|
|
|
} |
677
|
2
|
|
|
|
|
39
|
$grammar .= $rules_single_names; |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
|
680
|
2
|
|
|
|
|
18
|
$grammar .= $common; |
681
|
|
|
|
|
|
|
|
682
|
2
|
|
|
|
|
18
|
$grammar .= $titles; |
683
|
|
|
|
|
|
|
|
684
|
2
|
100
|
|
|
|
12
|
if ( $name->{extended_titles} ) |
685
|
|
|
|
|
|
|
{ |
686
|
1
|
|
|
|
|
4
|
$grammar .= $extended_titles; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
2
|
50
|
|
|
|
9
|
$name->{initials} > 3 and $name->{initials} = 3; |
690
|
2
|
50
|
|
|
|
9
|
$name->{initials} < 1 and $name->{initials} = 1; |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
# Define limit of when a string is treated as an initial, or |
693
|
|
|
|
|
|
|
# a given name. For example, if initials are set to 2, MR TO SMITH |
694
|
|
|
|
|
|
|
# will have initials of T & O and no given name, but MR TOM SMITH will |
695
|
|
|
|
|
|
|
# have no initials, and a given name of Tom. |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
699
|
2
|
50
|
|
|
|
13
|
if ( $name->{initials} == 1 ) |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
700
|
|
|
|
|
|
|
{ |
701
|
0
|
|
|
|
|
0
|
$grammar .= $given_name_min_2 . $initials_1; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
elsif ( $name->{initials} == 2 ) |
704
|
|
|
|
|
|
|
{ |
705
|
2
|
|
|
|
|
8
|
$grammar .= $initials_2 . $given_name_min_3; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
elsif ( $name->{initials} == 3 ) |
708
|
|
|
|
|
|
|
{ |
709
|
0
|
|
|
|
|
0
|
$grammar .= $given_name_min_4 . $initials_3; |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
|
713
|
2
|
|
|
|
|
38
|
return($grammar); |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
716
|
|
|
|
|
|
|
1; |