line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::EN::NameCase; |
2
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
79238
|
use strict; |
|
8
|
|
|
|
|
14
|
|
|
8
|
|
|
|
|
269
|
|
4
|
8
|
|
|
8
|
|
3566
|
use locale; |
|
8
|
|
|
|
|
1515
|
|
|
8
|
|
|
|
|
33
|
|
5
|
|
|
|
|
|
|
|
6
|
8
|
|
|
8
|
|
228
|
use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK $HEBREW $SPANISH $ROMAN $POSTNOMINAL ); |
|
8
|
|
|
|
|
14
|
|
|
8
|
|
|
|
|
735
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
$VERSION = '1.18'; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
11
|
|
|
|
|
|
|
# Modules |
12
|
|
|
|
|
|
|
|
13
|
8
|
|
|
8
|
|
41
|
use Carp; |
|
8
|
|
|
|
|
10
|
|
|
8
|
|
|
|
|
506
|
|
14
|
8
|
|
|
8
|
|
33
|
use Exporter(); |
|
8
|
|
|
|
|
9
|
|
|
8
|
|
|
|
|
7809
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
@ISA = qw( Exporter ); |
17
|
|
|
|
|
|
|
@EXPORT = qw( nc ); |
18
|
|
|
|
|
|
|
@EXPORT_OK = qw( NameCase nc ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
21
|
|
|
|
|
|
|
# Variables |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$HEBREW = 1; |
24
|
|
|
|
|
|
|
$SPANISH = 0; |
25
|
|
|
|
|
|
|
$ROMAN = 1; |
26
|
|
|
|
|
|
|
$POSTNOMINAL = 1; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
my @POST_NOMINAL_INITIALS = qw( |
29
|
|
|
|
|
|
|
VC GC KG LG KT LT KP GCB OM GCSI GCMG GCIE GCVO GBE CH KCB DCB KCSI KCMG |
30
|
|
|
|
|
|
|
DCMG KCIE KCVO DCVO KBE DBE CB CSI CMG CIE CVO CBE DSO LVO OBE ISO MVO MBE |
31
|
|
|
|
|
|
|
IOM CGC RRC DSC MC DFC AFC ARRC OBI DCM CGM GM IDSM DSM MM DFM AFM SGM IOM |
32
|
|
|
|
|
|
|
CPM QGM RVM BEM QPM QFSM QAM CPM MSM ERD VD TD UD ED RD VRD AE |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
PC ADC QHP QHS QHDS QHNS QHC SCJ J LJ QS SL QC KC JP DL MP MSP MSYP AM AM |
35
|
|
|
|
|
|
|
MLA MEP DBEnv DConstMgt DREst EdD DPhil PhD DLitt DSocSci MD EngD DD LLD |
36
|
|
|
|
|
|
|
DProf MA MArch MAnth MSc MMORSE MMath MMathStat MPharm MPhil MSc MSci MSt |
37
|
|
|
|
|
|
|
MRes MEng MChem MBiochem MSocSc MMus LLM BCL MPhys MComp MAcc MFin MBA MPA |
38
|
|
|
|
|
|
|
MEd MEP MEnt MCGI MGeol MLitt MEarthSc MClinRes BA BSc LLB BEng MBChB FdA |
39
|
|
|
|
|
|
|
FdSc FdEng PgDip PgD PgCert PgC PgCLTHE AUH AKC AUS HNC HNCert HND HNDip |
40
|
|
|
|
|
|
|
DipHE Dip OND CertHE ACSM MCSM DIC AICSM ARSM ARCS LLB LLM BCL MJur DPhil |
41
|
|
|
|
|
|
|
PhD LLD DipLP FCILEx GCILEx ACILEx CQSW DipSW BSW MSW FCILT CMILT MILT CPL |
42
|
|
|
|
|
|
|
CTP CML PLS CTL DLP PLog EJLog ESLog EMLog JrLog Log SrLog BArch MArch ARB |
43
|
|
|
|
|
|
|
RIBA RIAS RIAI RSAW MB BM BS BCh BChir MRCS FRCS MS MCh. MRCP FRCP MRCPCH |
44
|
|
|
|
|
|
|
FRCPCH MRCPath MFPM FFPM BDS MRCPsych FRCPsych MRCOG FRCOG MCEM FCEM FRCA |
45
|
|
|
|
|
|
|
FFPMRCA MRCGP FRCGP BSc MScChiro MChiro MSc DC LFHOM MFHOM FFHOM FADO FBDO |
46
|
|
|
|
|
|
|
FCOptom MCOptom MOst DPT MCSP FCSP. SROT MSCR FSCR. CPhT RN VN RVN BVSc |
47
|
|
|
|
|
|
|
BVetMed VetMB BVM&S MRCVS FRCVS FAWM PGCAP PGCHE PGCE PGDE BEd NPQH QTS |
48
|
|
|
|
|
|
|
CSci CSciTeach RSci RSciTech CEng IEng EngTech ICTTech DEM MM CMarEng |
49
|
|
|
|
|
|
|
CMarSci CMarTech IMarEng MarEngTech RGN SRN RMN RSCN SEN EN RNMH RN RM RN1 |
50
|
|
|
|
|
|
|
RNA RN2 RN3 RNMH RN4 RN5 RNLD RN6 RN8 RNC RN7 RN9 RHV RSN ROH RFHN SPAN |
51
|
|
|
|
|
|
|
SPMH SPCN SPLD SPHP SCHM SCLD SPCC SPDN V100 V200 V300 LPE MSc |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
55
|
|
|
|
|
|
|
# Functions |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub NameCase { |
58
|
12
|
100
|
100
|
12
|
1
|
8871
|
croak "Usage: \$SCALAR|\@ARRAY = NameCase [\\]\$SCALAR|\@ARRAY" |
|
|
|
66
|
|
|
|
|
59
|
|
|
|
|
|
|
if ref $_[0] and ( ref $_[0] ne 'ARRAY' and ref $_[0] ne 'SCALAR' ); |
60
|
|
|
|
|
|
|
|
61
|
11
|
|
|
|
|
16
|
local( $_ ); |
62
|
|
|
|
|
|
|
|
63
|
11
|
100
|
100
|
|
|
87
|
if( wantarray and ( scalar @_ > 1 or ref $_[0] eq 'ARRAY' ) ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# We have received an array or array reference in a list context |
65
|
|
|
|
|
|
|
# so we will return an array. |
66
|
3
|
100
|
|
|
|
5
|
map { nc( $_ ) } @{ ref( $_[0] ) ? $_[0] : \@_ }; |
|
94
|
|
|
|
|
160
|
|
|
3
|
|
|
|
|
11
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
} elsif( ref $_[0] eq 'ARRAY' ) { |
69
|
|
|
|
|
|
|
# We have received an array reference in a scalar or void context |
70
|
|
|
|
|
|
|
# so we will work on the array in-place. |
71
|
1
|
|
|
|
|
2
|
foreach ( @{ $_[0] } ) { |
|
1
|
|
|
|
|
4
|
|
72
|
46
|
|
|
|
|
80
|
$_ = nc( $_ ); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
} elsif( ref $_[0] eq 'SCALAR' ) { |
76
|
|
|
|
|
|
|
# We don't work on scalar references in-place; we take the value |
77
|
|
|
|
|
|
|
# and return a name-cased copy. |
78
|
3
|
|
|
|
|
4
|
nc( ${ $_[0] } ); |
|
3
|
|
|
|
|
9
|
|
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
} elsif( scalar @_ == 1 ) { |
81
|
|
|
|
|
|
|
# We've received a scalar: we return a name-cased copy. |
82
|
3
|
|
|
|
|
8
|
nc( $_[0] ); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
} else { |
85
|
1
|
|
|
|
|
160
|
croak "NameCase only accepts a single scalar, array or array ref"; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub nc { |
90
|
177
|
100
|
100
|
177
|
1
|
9352
|
croak "Usage: nc [[\\]\$SCALAR]" |
|
|
|
66
|
|
|
|
|
91
|
|
|
|
|
|
|
if scalar @_ > 1 or ( ref $_[0] and ref $_[0] ne 'SCALAR' ); |
92
|
|
|
|
|
|
|
|
93
|
174
|
100
|
|
|
|
408
|
local( $_ ) = @_ if @_; |
94
|
174
|
100
|
|
|
|
263
|
$_ = ${$_} if ref( $_ ) ; # Replace reference with value. |
|
2
|
|
|
|
|
4
|
|
95
|
|
|
|
|
|
|
|
96
|
174
|
100
|
|
|
|
256
|
return $_ unless($_); |
97
|
|
|
|
|
|
|
|
98
|
173
|
|
|
|
|
239
|
$_ = lc ; # Lowercase the lot. |
99
|
173
|
|
|
|
|
1016
|
s{ \b (\w) }{\u$1}gx; # Uppercase first letter of every word. |
100
|
173
|
|
|
|
|
205
|
s{ (\'\w) \b }{\L$1}gx; # Lowercase 's. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# Name case Mcs and Macs - taken straight from NameParse.pm incl. comments. |
103
|
|
|
|
|
|
|
# Exclude names with 1-2 letters after prefix like Mack, Macky, Mace |
104
|
|
|
|
|
|
|
# Exclude names ending in a,c,i,o, or j are typically Polish or Italian |
105
|
|
|
|
|
|
|
|
106
|
173
|
100
|
100
|
|
|
708
|
if ( /\bMac[A-Za-z]{2,}[^aciozj]\b/ or /\bMc/ ) { |
107
|
48
|
|
|
|
|
242
|
s/\b(Ma?c)([A-Za-z]+)/$1\u$2/g; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Now correct for "Mac" exceptions |
110
|
48
|
|
|
|
|
75
|
s/\bMacEvicius/Macevicius/g; # Lithuanian |
111
|
48
|
|
|
|
|
46
|
s/\bMacHado/Machado/g; # Portuguese |
112
|
48
|
|
|
|
|
56
|
s/\bMacHar/Machar/g; |
113
|
48
|
|
|
|
|
60
|
s/\bMacHin/Machin/g; |
114
|
48
|
|
|
|
|
51
|
s/\bMacHlin/Machlin/g; |
115
|
48
|
|
|
|
|
49
|
s/\bMacIas/Macias/g; |
116
|
48
|
|
|
|
|
66
|
s/\bMacIulis/Maciulis/g; |
117
|
48
|
|
|
|
|
58
|
s/\bMacKie/Mackie/g; |
118
|
48
|
|
|
|
|
55
|
s/\bMacKle/Mackle/g; |
119
|
48
|
|
|
|
|
51
|
s/\bMacKlin/Macklin/g; |
120
|
48
|
|
|
|
|
41
|
s/\bMacQuarie/Macquarie/g; |
121
|
48
|
|
|
|
|
70
|
s/\bMacOmber/Macomber/g; |
122
|
48
|
|
|
|
|
46
|
s/\bMacIn/Macin/g; |
123
|
48
|
|
|
|
|
30
|
s/\bMacKintosh/Mackintosh/g; |
124
|
48
|
|
|
|
|
46
|
s/\bMacKen/Macken/g; |
125
|
48
|
|
|
|
|
44
|
s/\bMacHen/Machen/g; |
126
|
48
|
|
|
|
|
37
|
s/\bMacisaac/MacIsaac/g; |
127
|
48
|
|
|
|
|
35
|
s/\bMacHiel/Machiel/g; |
128
|
48
|
|
|
|
|
44
|
s/\bMacIol/Maciol/g; |
129
|
48
|
|
|
|
|
44
|
s/\bMacKell/Mackell/g; |
130
|
48
|
|
|
|
|
42
|
s/\bMacKlem/Macklem/g; |
131
|
48
|
|
|
|
|
44
|
s/\bMacKrell/Mackrell/g; |
132
|
48
|
|
|
|
|
54
|
s/\bMacLin/Maclin/g; |
133
|
48
|
|
|
|
|
39
|
s/\bMacKey/Mackey/g; |
134
|
48
|
|
|
|
|
44
|
s/\bMacKley/Mackley/g; |
135
|
48
|
|
|
|
|
37
|
s/\bMacHell/Machell/g; |
136
|
48
|
|
|
|
|
49
|
s/\bMacHon/Machon/g; |
137
|
|
|
|
|
|
|
} |
138
|
173
|
|
|
|
|
165
|
s/Macmurdo/MacMurdo/g; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Fixes for "son (daughter) of" etc. in various languages. |
141
|
173
|
|
|
|
|
154
|
s{ \b Al(?=\s+\w) }{al}gx; # al Arabic or forename Al. |
142
|
173
|
|
|
|
|
163
|
s{ \b Ap \b }{ap}gx; # ap Welsh. |
143
|
|
|
|
|
|
|
# search for: followed by ben |
144
|
|
|
|
|
|
|
# without first (?<=\S\s), first name of 'ben jones' remains lowercase |
145
|
173
|
100
|
|
|
|
315
|
s{ (?<=\S\s)\bBen(?=\s+\w) }{ben}gx if $HEBREW; # ben Hebrew or forename Ben. |
146
|
173
|
100
|
|
|
|
250
|
s{ (?<=\S\s)\bBat(?=\s+\w) }{bat}gx if $HEBREW; # bat Hebrew or forename Bat. |
147
|
173
|
|
|
|
|
199
|
s{ \b Dell([ae])\b }{dell$1}gx; # della and delle Italian. |
148
|
173
|
|
|
|
|
203
|
s{ \b D([aeiu]) \b }{d$1}gx; # da, de, di Italian; du French. |
149
|
173
|
|
|
|
|
152
|
s{ \b De([lr]) \b }{de$1}gx; # del Italian; der Dutch/Flemish. |
150
|
173
|
100
|
|
|
|
267
|
s{ \b El \b }{el}gx unless $SPANISH; # el Greek or El Spanish. |
151
|
173
|
100
|
|
|
|
249
|
s{ \b La \b }{la}gx unless $SPANISH; # la French or La Spanish. |
152
|
173
|
|
|
|
|
180
|
s{ \b L([eo]) \b }{l$1}gx; # lo Italian; le French. |
153
|
173
|
|
|
|
|
167
|
s{ \b Van(?=\s+\w) }{van}gx; # van German or forename Van. |
154
|
173
|
|
|
|
|
154
|
s{ \b Von \b }{von}gx; # von Dutch/Flemish |
155
|
|
|
|
|
|
|
|
156
|
173
|
100
|
|
|
|
234
|
if($ROMAN) { |
157
|
|
|
|
|
|
|
# Fixes for roman numeral names, e.g. Henry VIII, up to 89, LXXXIX |
158
|
168
|
|
|
|
|
1632
|
s{ \b ( (?: [Xx]{1,3} | [Xx][Ll] | [Ll][Xx]{0,3} )? |
159
|
|
|
|
|
|
|
(?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b }{\U$1}gx; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
173
|
100
|
|
|
|
262
|
if($POSTNOMINAL) { |
163
|
|
|
|
|
|
|
# post-nominal initials |
164
|
170
|
|
|
|
|
203
|
for my $pni (@POST_NOMINAL_INITIALS) { |
165
|
56100
|
|
|
|
|
271454
|
s{ \b ($pni) $}{$pni}ix; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
173
|
|
|
|
|
491
|
$_; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
1; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
__END__ |