File Coverage

blib/lib/Lingua/EN/NameCase.pm
Criterion Covered Total %
statement 87 87 100.0
branch 34 34 100.0
condition 18 21 85.7
subroutine 8 8 100.0
pod 2 2 100.0
total 149 152 98.0


line stmt bran cond sub pod time code
1             package Lingua::EN::NameCase;
2              
3 8     8   88457 use warnings;
  8         16  
  8         436  
4 8     8   68 use strict;
  8         11  
  8         255  
5 8     8   3878 use locale;
  8         1716  
  8         44  
6              
7 8     8   304 use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK $HEBREW $SPANISH $ROMAN $POSTNOMINAL );
  8         11  
  8         970  
8              
9             $VERSION = '1.19';
10              
11             #--------------------------------------------------------------------------
12             # Modules
13              
14 8     8   41 use Carp;
  8         10  
  8         595  
15 8     8   39 use Exporter();
  8         12  
  8         9717  
16              
17             @ISA = qw( Exporter );
18             @EXPORT = qw( nc );
19             @EXPORT_OK = qw( NameCase nc );
20              
21             #--------------------------------------------------------------------------
22             # Variables
23              
24             $HEBREW = 1;
25             $SPANISH = 0;
26             $ROMAN = 1;
27             $POSTNOMINAL = 1;
28              
29             my @POST_NOMINAL_INITIALS = qw(
30             VC GC KG LG KT LT KP GCB OM GCSI GCMG GCIE GCVO GBE CH KCB DCB KCSI KCMG
31             DCMG KCIE KCVO DCVO KBE DBE CB CSI CMG CIE CVO CBE DSO LVO OBE ISO MVO MBE
32             IOM CGC RRC DSC MC DFC AFC ARRC OBI DCM CGM GM IDSM DSM MM DFM AFM SGM IOM
33             CPM QGM RVM BEM QPM QFSM QAM CPM MSM ERD VD TD UD ED RD VRD AE
34            
35             PC ADC QHP QHS QHDS QHNS QHC SCJ J LJ QS SL QC KC JP DL MP MSP MSYP AM AM
36             MLA MEP DBEnv DConstMgt DREst EdD DPhil PhD DLitt DSocSci MD EngD DD LLD
37             DProf MA MArch MAnth MSc MMORSE MMath MMathStat MPharm MPhil MSc MSci MSt
38             MRes MEng MChem MBiochem MSocSc MMus LLM BCL MPhys MComp MAcc MFin MBA MPA
39             MEd MEP MEnt MCGI MGeol MLitt MEarthSc MClinRes BA BSc LLB BEng MBChB FdA
40             FdSc FdEng PgDip PgD PgCert PgC PgCLTHE AUH AKC AUS HNC HNCert HND HNDip
41             DipHE Dip OND CertHE ACSM MCSM DIC AICSM ARSM ARCS LLB LLM BCL MJur DPhil
42             PhD LLD DipLP FCILEx GCILEx ACILEx CQSW DipSW BSW MSW FCILT CMILT MILT CPL
43             CTP CML PLS CTL DLP PLog EJLog ESLog EMLog JrLog Log SrLog BArch MArch ARB
44             RIBA RIAS RIAI RSAW MB BM BS BCh BChir MRCS FRCS MS MCh. MRCP FRCP MRCPCH
45             FRCPCH MRCPath MFPM FFPM BDS MRCPsych FRCPsych MRCOG FRCOG MCEM FCEM FRCA
46             FFPMRCA MRCGP FRCGP BSc MScChiro MChiro MSc DC LFHOM MFHOM FFHOM FADO FBDO
47             FCOptom MCOptom MOst DPT MCSP FCSP. SROT MSCR FSCR. CPhT RN VN RVN BVSc
48             BVetMed VetMB BVM&S MRCVS FRCVS FAWM PGCAP PGCHE PGCE PGDE BEd NPQH QTS
49             CSci CSciTeach RSci RSciTech CEng IEng EngTech ICTTech DEM MM CMarEng
50             CMarSci CMarTech IMarEng MarEngTech RGN SRN RMN RSCN SEN EN RNMH RN RM RN1
51             RNA RN2 RN3 RNMH RN4 RN5 RNLD RN6 RN8 RNC RN7 RN9 RHV RSN ROH RFHN SPAN
52             SPMH SPCN SPLD SPHP SCHM SCLD SPCC SPDN V100 V200 V300 LPE MSc
53             );
54              
55             #--------------------------------------------------------------------------
56             # Functions
57              
58             sub NameCase {
59 12 100 100 12 1 6840 croak "Usage: \$SCALAR|\@ARRAY = NameCase [\\]\$SCALAR|\@ARRAY"
      66        
60             if ref $_[0] and ( ref $_[0] ne 'ARRAY' and ref $_[0] ne 'SCALAR' );
61              
62 11         15 local( $_ );
63              
64 11 100 100     78 if( wantarray and ( scalar @_ > 1 or ref $_[0] eq 'ARRAY' ) ) {
    100 66        
    100          
    100          
65             # We have received an array or array reference in a list context
66             # so we will return an array.
67 3 100       3 map { nc( $_ ) } @{ ref( $_[0] ) ? $_[0] : \@_ };
  94         155  
  3         12  
68              
69             } elsif( ref $_[0] eq 'ARRAY' ) {
70             # We have received an array reference in a scalar or void context
71             # so we will work on the array in-place.
72 1         1 foreach ( @{ $_[0] } ) {
  1         4  
73 46         81 $_ = nc( $_ );
74             }
75              
76             } elsif( ref $_[0] eq 'SCALAR' ) {
77             # We don't work on scalar references in-place; we take the value
78             # and return a name-cased copy.
79 3         5 nc( ${ $_[0] } );
  3         7  
80              
81             } elsif( scalar @_ == 1 ) {
82             # We've received a scalar: we return a name-cased copy.
83 3         6 nc( $_[0] );
84              
85             } else {
86 1         219 croak "NameCase only accepts a single scalar, array or array ref";
87             }
88             }
89              
90             sub nc {
91 177 100 100 177 1 8154 croak "Usage: nc [[\\]\$SCALAR]"
      66        
92             if scalar @_ > 1 or ( ref $_[0] and ref $_[0] ne 'SCALAR' );
93              
94 174 100       414 local( $_ ) = @_ if @_;
95 174 100       271 $_ = ${$_} if ref( $_ ) ; # Replace reference with value.
  2         5  
96              
97 174 100       259 return $_ unless($_);
98              
99 173         314 $_ = lc ; # Lowercase the lot.
100 173         1108 s{ \b (\w) }{\u$1}gx; # Uppercase first letter of every word.
101 173         225 s{ (\'\w) \b }{\L$1}gx; # Lowercase 's.
102              
103             # Name case Mcs and Macs - taken straight from NameParse.pm incl. comments.
104             # Exclude names with 1-2 letters after prefix like Mack, Macky, Mace
105             # Exclude names ending in a,c,i,o, or j are typically Polish or Italian
106              
107 173 100 100     773 if ( /\bMac[A-Za-z]{2,}[^aciozj]\b/ or /\bMc/ ) {
108 48         239 s/\b(Ma?c)([A-Za-z]+)/$1\u$2/g;
109              
110             # Now correct for "Mac" exceptions
111 48         64 s/\bMacEvicius/Macevicius/g; # Lithuanian
112 48         51 s/\bMacHado/Machado/g; # Portuguese
113 48         65 s/\bMacHar/Machar/g;
114 48         75 s/\bMacHin/Machin/g;
115 48         57 s/\bMacHlin/Machlin/g;
116 48         49 s/\bMacIas/Macias/g;
117 48         55 s/\bMacIulis/Maciulis/g;
118 48         52 s/\bMacKie/Mackie/g;
119 48         47 s/\bMacKle/Mackle/g;
120 48         42 s/\bMacKlin/Macklin/g;
121 48         50 s/\bMacQuarie/Macquarie/g;
122 48         39 s/\bMacOmber/Macomber/g;
123 48         36 s/\bMacIn/Macin/g;
124 48         42 s/\bMacKintosh/Mackintosh/g;
125 48         41 s/\bMacKen/Macken/g;
126 48         43 s/\bMacHen/Machen/g;
127 48         37 s/\bMacisaac/MacIsaac/g;
128 48         38 s/\bMacHiel/Machiel/g;
129 48         40 s/\bMacIol/Maciol/g;
130 48         43 s/\bMacKell/Mackell/g;
131 48         41 s/\bMacKlem/Macklem/g;
132 48         44 s/\bMacKrell/Mackrell/g;
133 48         57 s/\bMacLin/Maclin/g;
134 48         46 s/\bMacKey/Mackey/g;
135 48         34 s/\bMacKley/Mackley/g;
136 48         41 s/\bMacHell/Machell/g;
137 48         52 s/\bMacHon/Machon/g;
138             }
139 173         168 s/Macmurdo/MacMurdo/g;
140              
141             # Fixes for "son (daughter) of" etc. in various languages.
142 173         166 s{ \b Al(?=\s+\w) }{al}gx; # al Arabic or forename Al.
143 173         142 s{ \b Ap \b }{ap}gx; # ap Welsh.
144             # search for: followed by ben
145             # without first (?<=\S\s), first name of 'ben jones' remains lowercase
146 173 100       302 s{ (?<=\S\s)\bBen(?=\s+\w) }{ben}gx if $HEBREW; # ben Hebrew or forename Ben.
147 173 100       278 s{ (?<=\S\s)\bBat(?=\s+\w) }{bat}gx if $HEBREW; # bat Hebrew or forename Bat.
148 173         142 s{ \b Dell([ae])\b }{dell$1}gx; # della and delle Italian.
149 173         193 s{ \b D([aeiu]) \b }{d$1}gx; # da, de, di Italian; du French.
150 173         155 s{ \b De([lr]) \b }{de$1}gx; # del Italian; der Dutch/Flemish.
151 173 100       278 s{ \b El \b }{el}gx unless $SPANISH; # el Greek or El Spanish.
152 173 100       259 s{ \b La \b }{la}gx unless $SPANISH; # la French or La Spanish.
153 173         187 s{ \b L([eo]) \b }{l$1}gx; # lo Italian; le French.
154 173         160 s{ \b Van(?=\s+\w) }{van}gx; # van German or forename Van.
155 173         155 s{ \b Von \b }{von}gx; # von Dutch/Flemish
156              
157 173 100       229 if($ROMAN) {
158             # Fixes for roman numeral names, e.g. Henry VIII, up to 89, LXXXIX
159 168         1553 s{ \b ( (?: [Xx]{1,3} | [Xx][Ll] | [Ll][Xx]{0,3} )?
160             (?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b }{\U$1}gx;
161             }
162              
163 173 100       246 if($POSTNOMINAL) {
164             # post-nominal initials
165 170         204 for my $pni (@POST_NOMINAL_INITIALS) {
166 56100         265023 s{ \b ($pni) $}{$pni}ix;
167             }
168             }
169              
170 173         558 $_;
171             }
172              
173             1;
174              
175             __END__