File Coverage

blib/lib/Lingua/EN/NameCase.pm
Criterion Covered Total %
statement 84 84 100.0
branch 34 34 100.0
condition 18 21 85.7
subroutine 7 7 100.0
pod 2 2 100.0
total 145 148 97.9


line stmt bran cond sub pod time code
1             package Lingua::EN::NameCase;
2              
3 8     8   79238 use strict;
  8         14  
  8         269  
4 8     8   3566 use locale;
  8         1515  
  8         33  
5              
6 8     8   228 use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK $HEBREW $SPANISH $ROMAN $POSTNOMINAL );
  8         14  
  8         735  
7              
8             $VERSION = '1.18';
9              
10             #--------------------------------------------------------------------------
11             # Modules
12              
13 8     8   41 use Carp;
  8         10  
  8         506  
14 8     8   33 use Exporter();
  8         9  
  8         7809  
15              
16             @ISA = qw( Exporter );
17             @EXPORT = qw( nc );
18             @EXPORT_OK = qw( NameCase nc );
19              
20             #--------------------------------------------------------------------------
21             # Variables
22              
23             $HEBREW = 1;
24             $SPANISH = 0;
25             $ROMAN = 1;
26             $POSTNOMINAL = 1;
27              
28             my @POST_NOMINAL_INITIALS = qw(
29             VC GC KG LG KT LT KP GCB OM GCSI GCMG GCIE GCVO GBE CH KCB DCB KCSI KCMG
30             DCMG KCIE KCVO DCVO KBE DBE CB CSI CMG CIE CVO CBE DSO LVO OBE ISO MVO MBE
31             IOM CGC RRC DSC MC DFC AFC ARRC OBI DCM CGM GM IDSM DSM MM DFM AFM SGM IOM
32             CPM QGM RVM BEM QPM QFSM QAM CPM MSM ERD VD TD UD ED RD VRD AE
33            
34             PC ADC QHP QHS QHDS QHNS QHC SCJ J LJ QS SL QC KC JP DL MP MSP MSYP AM AM
35             MLA MEP DBEnv DConstMgt DREst EdD DPhil PhD DLitt DSocSci MD EngD DD LLD
36             DProf MA MArch MAnth MSc MMORSE MMath MMathStat MPharm MPhil MSc MSci MSt
37             MRes MEng MChem MBiochem MSocSc MMus LLM BCL MPhys MComp MAcc MFin MBA MPA
38             MEd MEP MEnt MCGI MGeol MLitt MEarthSc MClinRes BA BSc LLB BEng MBChB FdA
39             FdSc FdEng PgDip PgD PgCert PgC PgCLTHE AUH AKC AUS HNC HNCert HND HNDip
40             DipHE Dip OND CertHE ACSM MCSM DIC AICSM ARSM ARCS LLB LLM BCL MJur DPhil
41             PhD LLD DipLP FCILEx GCILEx ACILEx CQSW DipSW BSW MSW FCILT CMILT MILT CPL
42             CTP CML PLS CTL DLP PLog EJLog ESLog EMLog JrLog Log SrLog BArch MArch ARB
43             RIBA RIAS RIAI RSAW MB BM BS BCh BChir MRCS FRCS MS MCh. MRCP FRCP MRCPCH
44             FRCPCH MRCPath MFPM FFPM BDS MRCPsych FRCPsych MRCOG FRCOG MCEM FCEM FRCA
45             FFPMRCA MRCGP FRCGP BSc MScChiro MChiro MSc DC LFHOM MFHOM FFHOM FADO FBDO
46             FCOptom MCOptom MOst DPT MCSP FCSP. SROT MSCR FSCR. CPhT RN VN RVN BVSc
47             BVetMed VetMB BVM&S MRCVS FRCVS FAWM PGCAP PGCHE PGCE PGDE BEd NPQH QTS
48             CSci CSciTeach RSci RSciTech CEng IEng EngTech ICTTech DEM MM CMarEng
49             CMarSci CMarTech IMarEng MarEngTech RGN SRN RMN RSCN SEN EN RNMH RN RM RN1
50             RNA RN2 RN3 RNMH RN4 RN5 RNLD RN6 RN8 RNC RN7 RN9 RHV RSN ROH RFHN SPAN
51             SPMH SPCN SPLD SPHP SCHM SCLD SPCC SPDN V100 V200 V300 LPE MSc
52             );
53              
54             #--------------------------------------------------------------------------
55             # Functions
56              
57             sub NameCase {
58 12 100 100 12 1 8871 croak "Usage: \$SCALAR|\@ARRAY = NameCase [\\]\$SCALAR|\@ARRAY"
      66        
59             if ref $_[0] and ( ref $_[0] ne 'ARRAY' and ref $_[0] ne 'SCALAR' );
60              
61 11         16 local( $_ );
62              
63 11 100 100     87 if( wantarray and ( scalar @_ > 1 or ref $_[0] eq 'ARRAY' ) ) {
    100 66        
    100          
    100          
64             # We have received an array or array reference in a list context
65             # so we will return an array.
66 3 100       5 map { nc( $_ ) } @{ ref( $_[0] ) ? $_[0] : \@_ };
  94         160  
  3         11  
67              
68             } elsif( ref $_[0] eq 'ARRAY' ) {
69             # We have received an array reference in a scalar or void context
70             # so we will work on the array in-place.
71 1         2 foreach ( @{ $_[0] } ) {
  1         4  
72 46         80 $_ = nc( $_ );
73             }
74              
75             } elsif( ref $_[0] eq 'SCALAR' ) {
76             # We don't work on scalar references in-place; we take the value
77             # and return a name-cased copy.
78 3         4 nc( ${ $_[0] } );
  3         9  
79              
80             } elsif( scalar @_ == 1 ) {
81             # We've received a scalar: we return a name-cased copy.
82 3         8 nc( $_[0] );
83              
84             } else {
85 1         160 croak "NameCase only accepts a single scalar, array or array ref";
86             }
87             }
88              
89             sub nc {
90 177 100 100 177 1 9352 croak "Usage: nc [[\\]\$SCALAR]"
      66        
91             if scalar @_ > 1 or ( ref $_[0] and ref $_[0] ne 'SCALAR' );
92              
93 174 100       408 local( $_ ) = @_ if @_;
94 174 100       263 $_ = ${$_} if ref( $_ ) ; # Replace reference with value.
  2         4  
95              
96 174 100       256 return $_ unless($_);
97              
98 173         239 $_ = lc ; # Lowercase the lot.
99 173         1016 s{ \b (\w) }{\u$1}gx; # Uppercase first letter of every word.
100 173         205 s{ (\'\w) \b }{\L$1}gx; # Lowercase 's.
101              
102             # Name case Mcs and Macs - taken straight from NameParse.pm incl. comments.
103             # Exclude names with 1-2 letters after prefix like Mack, Macky, Mace
104             # Exclude names ending in a,c,i,o, or j are typically Polish or Italian
105              
106 173 100 100     708 if ( /\bMac[A-Za-z]{2,}[^aciozj]\b/ or /\bMc/ ) {
107 48         242 s/\b(Ma?c)([A-Za-z]+)/$1\u$2/g;
108              
109             # Now correct for "Mac" exceptions
110 48         75 s/\bMacEvicius/Macevicius/g; # Lithuanian
111 48         46 s/\bMacHado/Machado/g; # Portuguese
112 48         56 s/\bMacHar/Machar/g;
113 48         60 s/\bMacHin/Machin/g;
114 48         51 s/\bMacHlin/Machlin/g;
115 48         49 s/\bMacIas/Macias/g;
116 48         66 s/\bMacIulis/Maciulis/g;
117 48         58 s/\bMacKie/Mackie/g;
118 48         55 s/\bMacKle/Mackle/g;
119 48         51 s/\bMacKlin/Macklin/g;
120 48         41 s/\bMacQuarie/Macquarie/g;
121 48         70 s/\bMacOmber/Macomber/g;
122 48         46 s/\bMacIn/Macin/g;
123 48         30 s/\bMacKintosh/Mackintosh/g;
124 48         46 s/\bMacKen/Macken/g;
125 48         44 s/\bMacHen/Machen/g;
126 48         37 s/\bMacisaac/MacIsaac/g;
127 48         35 s/\bMacHiel/Machiel/g;
128 48         44 s/\bMacIol/Maciol/g;
129 48         44 s/\bMacKell/Mackell/g;
130 48         42 s/\bMacKlem/Macklem/g;
131 48         44 s/\bMacKrell/Mackrell/g;
132 48         54 s/\bMacLin/Maclin/g;
133 48         39 s/\bMacKey/Mackey/g;
134 48         44 s/\bMacKley/Mackley/g;
135 48         37 s/\bMacHell/Machell/g;
136 48         49 s/\bMacHon/Machon/g;
137             }
138 173         165 s/Macmurdo/MacMurdo/g;
139              
140             # Fixes for "son (daughter) of" etc. in various languages.
141 173         154 s{ \b Al(?=\s+\w) }{al}gx; # al Arabic or forename Al.
142 173         163 s{ \b Ap \b }{ap}gx; # ap Welsh.
143             # search for: followed by ben
144             # without first (?<=\S\s), first name of 'ben jones' remains lowercase
145 173 100       315 s{ (?<=\S\s)\bBen(?=\s+\w) }{ben}gx if $HEBREW; # ben Hebrew or forename Ben.
146 173 100       250 s{ (?<=\S\s)\bBat(?=\s+\w) }{bat}gx if $HEBREW; # bat Hebrew or forename Bat.
147 173         199 s{ \b Dell([ae])\b }{dell$1}gx; # della and delle Italian.
148 173         203 s{ \b D([aeiu]) \b }{d$1}gx; # da, de, di Italian; du French.
149 173         152 s{ \b De([lr]) \b }{de$1}gx; # del Italian; der Dutch/Flemish.
150 173 100       267 s{ \b El \b }{el}gx unless $SPANISH; # el Greek or El Spanish.
151 173 100       249 s{ \b La \b }{la}gx unless $SPANISH; # la French or La Spanish.
152 173         180 s{ \b L([eo]) \b }{l$1}gx; # lo Italian; le French.
153 173         167 s{ \b Van(?=\s+\w) }{van}gx; # van German or forename Van.
154 173         154 s{ \b Von \b }{von}gx; # von Dutch/Flemish
155              
156 173 100       234 if($ROMAN) {
157             # Fixes for roman numeral names, e.g. Henry VIII, up to 89, LXXXIX
158 168         1632 s{ \b ( (?: [Xx]{1,3} | [Xx][Ll] | [Ll][Xx]{0,3} )?
159             (?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b }{\U$1}gx;
160             }
161              
162 173 100       262 if($POSTNOMINAL) {
163             # post-nominal initials
164 170         203 for my $pni (@POST_NOMINAL_INITIALS) {
165 56100         271454 s{ \b ($pni) $}{$pni}ix;
166             }
167             }
168              
169 173         491 $_;
170             }
171              
172             1;
173              
174             __END__