line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::EN::NameCase; |
2
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
88457
|
use warnings; |
|
8
|
|
|
|
|
16
|
|
|
8
|
|
|
|
|
436
|
|
4
|
8
|
|
|
8
|
|
68
|
use strict; |
|
8
|
|
|
|
|
11
|
|
|
8
|
|
|
|
|
255
|
|
5
|
8
|
|
|
8
|
|
3878
|
use locale; |
|
8
|
|
|
|
|
1716
|
|
|
8
|
|
|
|
|
44
|
|
6
|
|
|
|
|
|
|
|
7
|
8
|
|
|
8
|
|
304
|
use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK $HEBREW $SPANISH $ROMAN $POSTNOMINAL ); |
|
8
|
|
|
|
|
11
|
|
|
8
|
|
|
|
|
970
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
$VERSION = '1.19'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
12
|
|
|
|
|
|
|
# Modules |
13
|
|
|
|
|
|
|
|
14
|
8
|
|
|
8
|
|
41
|
use Carp; |
|
8
|
|
|
|
|
10
|
|
|
8
|
|
|
|
|
595
|
|
15
|
8
|
|
|
8
|
|
39
|
use Exporter(); |
|
8
|
|
|
|
|
12
|
|
|
8
|
|
|
|
|
9717
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
@ISA = qw( Exporter ); |
18
|
|
|
|
|
|
|
@EXPORT = qw( nc ); |
19
|
|
|
|
|
|
|
@EXPORT_OK = qw( NameCase nc ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
22
|
|
|
|
|
|
|
# Variables |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$HEBREW = 1; |
25
|
|
|
|
|
|
|
$SPANISH = 0; |
26
|
|
|
|
|
|
|
$ROMAN = 1; |
27
|
|
|
|
|
|
|
$POSTNOMINAL = 1; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
my @POST_NOMINAL_INITIALS = qw( |
30
|
|
|
|
|
|
|
VC GC KG LG KT LT KP GCB OM GCSI GCMG GCIE GCVO GBE CH KCB DCB KCSI KCMG |
31
|
|
|
|
|
|
|
DCMG KCIE KCVO DCVO KBE DBE CB CSI CMG CIE CVO CBE DSO LVO OBE ISO MVO MBE |
32
|
|
|
|
|
|
|
IOM CGC RRC DSC MC DFC AFC ARRC OBI DCM CGM GM IDSM DSM MM DFM AFM SGM IOM |
33
|
|
|
|
|
|
|
CPM QGM RVM BEM QPM QFSM QAM CPM MSM ERD VD TD UD ED RD VRD AE |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
PC ADC QHP QHS QHDS QHNS QHC SCJ J LJ QS SL QC KC JP DL MP MSP MSYP AM AM |
36
|
|
|
|
|
|
|
MLA MEP DBEnv DConstMgt DREst EdD DPhil PhD DLitt DSocSci MD EngD DD LLD |
37
|
|
|
|
|
|
|
DProf MA MArch MAnth MSc MMORSE MMath MMathStat MPharm MPhil MSc MSci MSt |
38
|
|
|
|
|
|
|
MRes MEng MChem MBiochem MSocSc MMus LLM BCL MPhys MComp MAcc MFin MBA MPA |
39
|
|
|
|
|
|
|
MEd MEP MEnt MCGI MGeol MLitt MEarthSc MClinRes BA BSc LLB BEng MBChB FdA |
40
|
|
|
|
|
|
|
FdSc FdEng PgDip PgD PgCert PgC PgCLTHE AUH AKC AUS HNC HNCert HND HNDip |
41
|
|
|
|
|
|
|
DipHE Dip OND CertHE ACSM MCSM DIC AICSM ARSM ARCS LLB LLM BCL MJur DPhil |
42
|
|
|
|
|
|
|
PhD LLD DipLP FCILEx GCILEx ACILEx CQSW DipSW BSW MSW FCILT CMILT MILT CPL |
43
|
|
|
|
|
|
|
CTP CML PLS CTL DLP PLog EJLog ESLog EMLog JrLog Log SrLog BArch MArch ARB |
44
|
|
|
|
|
|
|
RIBA RIAS RIAI RSAW MB BM BS BCh BChir MRCS FRCS MS MCh. MRCP FRCP MRCPCH |
45
|
|
|
|
|
|
|
FRCPCH MRCPath MFPM FFPM BDS MRCPsych FRCPsych MRCOG FRCOG MCEM FCEM FRCA |
46
|
|
|
|
|
|
|
FFPMRCA MRCGP FRCGP BSc MScChiro MChiro MSc DC LFHOM MFHOM FFHOM FADO FBDO |
47
|
|
|
|
|
|
|
FCOptom MCOptom MOst DPT MCSP FCSP. SROT MSCR FSCR. CPhT RN VN RVN BVSc |
48
|
|
|
|
|
|
|
BVetMed VetMB BVM&S MRCVS FRCVS FAWM PGCAP PGCHE PGCE PGDE BEd NPQH QTS |
49
|
|
|
|
|
|
|
CSci CSciTeach RSci RSciTech CEng IEng EngTech ICTTech DEM MM CMarEng |
50
|
|
|
|
|
|
|
CMarSci CMarTech IMarEng MarEngTech RGN SRN RMN RSCN SEN EN RNMH RN RM RN1 |
51
|
|
|
|
|
|
|
RNA RN2 RN3 RNMH RN4 RN5 RNLD RN6 RN8 RNC RN7 RN9 RHV RSN ROH RFHN SPAN |
52
|
|
|
|
|
|
|
SPMH SPCN SPLD SPHP SCHM SCLD SPCC SPDN V100 V200 V300 LPE MSc |
53
|
|
|
|
|
|
|
); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
56
|
|
|
|
|
|
|
# Functions |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub NameCase { |
59
|
12
|
100
|
100
|
12
|
1
|
6840
|
croak "Usage: \$SCALAR|\@ARRAY = NameCase [\\]\$SCALAR|\@ARRAY" |
|
|
|
66
|
|
|
|
|
60
|
|
|
|
|
|
|
if ref $_[0] and ( ref $_[0] ne 'ARRAY' and ref $_[0] ne 'SCALAR' ); |
61
|
|
|
|
|
|
|
|
62
|
11
|
|
|
|
|
15
|
local( $_ ); |
63
|
|
|
|
|
|
|
|
64
|
11
|
100
|
100
|
|
|
78
|
if( wantarray and ( scalar @_ > 1 or ref $_[0] eq 'ARRAY' ) ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# We have received an array or array reference in a list context |
66
|
|
|
|
|
|
|
# so we will return an array. |
67
|
3
|
100
|
|
|
|
3
|
map { nc( $_ ) } @{ ref( $_[0] ) ? $_[0] : \@_ }; |
|
94
|
|
|
|
|
155
|
|
|
3
|
|
|
|
|
12
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
} elsif( ref $_[0] eq 'ARRAY' ) { |
70
|
|
|
|
|
|
|
# We have received an array reference in a scalar or void context |
71
|
|
|
|
|
|
|
# so we will work on the array in-place. |
72
|
1
|
|
|
|
|
1
|
foreach ( @{ $_[0] } ) { |
|
1
|
|
|
|
|
4
|
|
73
|
46
|
|
|
|
|
81
|
$_ = nc( $_ ); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
} elsif( ref $_[0] eq 'SCALAR' ) { |
77
|
|
|
|
|
|
|
# We don't work on scalar references in-place; we take the value |
78
|
|
|
|
|
|
|
# and return a name-cased copy. |
79
|
3
|
|
|
|
|
5
|
nc( ${ $_[0] } ); |
|
3
|
|
|
|
|
7
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
} elsif( scalar @_ == 1 ) { |
82
|
|
|
|
|
|
|
# We've received a scalar: we return a name-cased copy. |
83
|
3
|
|
|
|
|
6
|
nc( $_[0] ); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
} else { |
86
|
1
|
|
|
|
|
219
|
croak "NameCase only accepts a single scalar, array or array ref"; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub nc { |
91
|
177
|
100
|
100
|
177
|
1
|
8154
|
croak "Usage: nc [[\\]\$SCALAR]" |
|
|
|
66
|
|
|
|
|
92
|
|
|
|
|
|
|
if scalar @_ > 1 or ( ref $_[0] and ref $_[0] ne 'SCALAR' ); |
93
|
|
|
|
|
|
|
|
94
|
174
|
100
|
|
|
|
414
|
local( $_ ) = @_ if @_; |
95
|
174
|
100
|
|
|
|
271
|
$_ = ${$_} if ref( $_ ) ; # Replace reference with value. |
|
2
|
|
|
|
|
5
|
|
96
|
|
|
|
|
|
|
|
97
|
174
|
100
|
|
|
|
259
|
return $_ unless($_); |
98
|
|
|
|
|
|
|
|
99
|
173
|
|
|
|
|
314
|
$_ = lc ; # Lowercase the lot. |
100
|
173
|
|
|
|
|
1108
|
s{ \b (\w) }{\u$1}gx; # Uppercase first letter of every word. |
101
|
173
|
|
|
|
|
225
|
s{ (\'\w) \b }{\L$1}gx; # Lowercase 's. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Name case Mcs and Macs - taken straight from NameParse.pm incl. comments. |
104
|
|
|
|
|
|
|
# Exclude names with 1-2 letters after prefix like Mack, Macky, Mace |
105
|
|
|
|
|
|
|
# Exclude names ending in a,c,i,o, or j are typically Polish or Italian |
106
|
|
|
|
|
|
|
|
107
|
173
|
100
|
100
|
|
|
773
|
if ( /\bMac[A-Za-z]{2,}[^aciozj]\b/ or /\bMc/ ) { |
108
|
48
|
|
|
|
|
239
|
s/\b(Ma?c)([A-Za-z]+)/$1\u$2/g; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Now correct for "Mac" exceptions |
111
|
48
|
|
|
|
|
64
|
s/\bMacEvicius/Macevicius/g; # Lithuanian |
112
|
48
|
|
|
|
|
51
|
s/\bMacHado/Machado/g; # Portuguese |
113
|
48
|
|
|
|
|
65
|
s/\bMacHar/Machar/g; |
114
|
48
|
|
|
|
|
75
|
s/\bMacHin/Machin/g; |
115
|
48
|
|
|
|
|
57
|
s/\bMacHlin/Machlin/g; |
116
|
48
|
|
|
|
|
49
|
s/\bMacIas/Macias/g; |
117
|
48
|
|
|
|
|
55
|
s/\bMacIulis/Maciulis/g; |
118
|
48
|
|
|
|
|
52
|
s/\bMacKie/Mackie/g; |
119
|
48
|
|
|
|
|
47
|
s/\bMacKle/Mackle/g; |
120
|
48
|
|
|
|
|
42
|
s/\bMacKlin/Macklin/g; |
121
|
48
|
|
|
|
|
50
|
s/\bMacQuarie/Macquarie/g; |
122
|
48
|
|
|
|
|
39
|
s/\bMacOmber/Macomber/g; |
123
|
48
|
|
|
|
|
36
|
s/\bMacIn/Macin/g; |
124
|
48
|
|
|
|
|
42
|
s/\bMacKintosh/Mackintosh/g; |
125
|
48
|
|
|
|
|
41
|
s/\bMacKen/Macken/g; |
126
|
48
|
|
|
|
|
43
|
s/\bMacHen/Machen/g; |
127
|
48
|
|
|
|
|
37
|
s/\bMacisaac/MacIsaac/g; |
128
|
48
|
|
|
|
|
38
|
s/\bMacHiel/Machiel/g; |
129
|
48
|
|
|
|
|
40
|
s/\bMacIol/Maciol/g; |
130
|
48
|
|
|
|
|
43
|
s/\bMacKell/Mackell/g; |
131
|
48
|
|
|
|
|
41
|
s/\bMacKlem/Macklem/g; |
132
|
48
|
|
|
|
|
44
|
s/\bMacKrell/Mackrell/g; |
133
|
48
|
|
|
|
|
57
|
s/\bMacLin/Maclin/g; |
134
|
48
|
|
|
|
|
46
|
s/\bMacKey/Mackey/g; |
135
|
48
|
|
|
|
|
34
|
s/\bMacKley/Mackley/g; |
136
|
48
|
|
|
|
|
41
|
s/\bMacHell/Machell/g; |
137
|
48
|
|
|
|
|
52
|
s/\bMacHon/Machon/g; |
138
|
|
|
|
|
|
|
} |
139
|
173
|
|
|
|
|
168
|
s/Macmurdo/MacMurdo/g; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# Fixes for "son (daughter) of" etc. in various languages. |
142
|
173
|
|
|
|
|
166
|
s{ \b Al(?=\s+\w) }{al}gx; # al Arabic or forename Al. |
143
|
173
|
|
|
|
|
142
|
s{ \b Ap \b }{ap}gx; # ap Welsh. |
144
|
|
|
|
|
|
|
# search for: followed by ben |
145
|
|
|
|
|
|
|
# without first (?<=\S\s), first name of 'ben jones' remains lowercase |
146
|
173
|
100
|
|
|
|
302
|
s{ (?<=\S\s)\bBen(?=\s+\w) }{ben}gx if $HEBREW; # ben Hebrew or forename Ben. |
147
|
173
|
100
|
|
|
|
278
|
s{ (?<=\S\s)\bBat(?=\s+\w) }{bat}gx if $HEBREW; # bat Hebrew or forename Bat. |
148
|
173
|
|
|
|
|
142
|
s{ \b Dell([ae])\b }{dell$1}gx; # della and delle Italian. |
149
|
173
|
|
|
|
|
193
|
s{ \b D([aeiu]) \b }{d$1}gx; # da, de, di Italian; du French. |
150
|
173
|
|
|
|
|
155
|
s{ \b De([lr]) \b }{de$1}gx; # del Italian; der Dutch/Flemish. |
151
|
173
|
100
|
|
|
|
278
|
s{ \b El \b }{el}gx unless $SPANISH; # el Greek or El Spanish. |
152
|
173
|
100
|
|
|
|
259
|
s{ \b La \b }{la}gx unless $SPANISH; # la French or La Spanish. |
153
|
173
|
|
|
|
|
187
|
s{ \b L([eo]) \b }{l$1}gx; # lo Italian; le French. |
154
|
173
|
|
|
|
|
160
|
s{ \b Van(?=\s+\w) }{van}gx; # van German or forename Van. |
155
|
173
|
|
|
|
|
155
|
s{ \b Von \b }{von}gx; # von Dutch/Flemish |
156
|
|
|
|
|
|
|
|
157
|
173
|
100
|
|
|
|
229
|
if($ROMAN) { |
158
|
|
|
|
|
|
|
# Fixes for roman numeral names, e.g. Henry VIII, up to 89, LXXXIX |
159
|
168
|
|
|
|
|
1553
|
s{ \b ( (?: [Xx]{1,3} | [Xx][Ll] | [Ll][Xx]{0,3} )? |
160
|
|
|
|
|
|
|
(?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b }{\U$1}gx; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
173
|
100
|
|
|
|
246
|
if($POSTNOMINAL) { |
164
|
|
|
|
|
|
|
# post-nominal initials |
165
|
170
|
|
|
|
|
204
|
for my $pni (@POST_NOMINAL_INITIALS) { |
166
|
56100
|
|
|
|
|
265023
|
s{ \b ($pni) $}{$pni}ix; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
173
|
|
|
|
|
558
|
$_; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
1; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
__END__ |