line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Capitalize; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Text::Capitalize - capitalize strings ("to WORK AS titles" becomes "To Work as Titles") |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Text::Capitalize; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
print capitalize( "...and justice for all" ), "\n"; |
12
|
|
|
|
|
|
|
...And Justice For All |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
print capitalize_title( "...and justice for all" ), "\n"; |
15
|
|
|
|
|
|
|
...And Justice for All |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
print capitalize_title( "agent of SFPUG", PRESERVE_ALLCAPS=>1 ), "\n"; |
18
|
|
|
|
|
|
|
Agent of SFPUG |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
print capitalize_title( "the ring: symbol or cliche?", |
21
|
|
|
|
|
|
|
PRESERVE_WHITESPACE=>1 ), "\n"; |
22
|
|
|
|
|
|
|
The Ring: Symbol or Cliche? |
23
|
|
|
|
|
|
|
(Note, double-space after colon is still there.) |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# To work on international characters, may need to set locale |
26
|
|
|
|
|
|
|
use Env qw( LANG ); |
27
|
|
|
|
|
|
|
$LANG = "en_US"; |
28
|
|
|
|
|
|
|
print capitalize_title( "über maus" ), "\n"; |
29
|
|
|
|
|
|
|
Über Maus |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use Text::Capitalize qw( scramble_case ); |
32
|
|
|
|
|
|
|
print scramble_case( 'It depends on what you mean by "mean"' ); |
33
|
|
|
|
|
|
|
It dEpenDS On wHAT YOu mEan by "meAn". |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 ABSTRACT |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
Text::Capitalize is for capitalizing strings in a manner |
38
|
|
|
|
|
|
|
suitable for use in titles. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Text::Capitalize provides some routines for B |
43
|
|
|
|
|
|
|
formatting of strings. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
The simple B function just makes the inital character |
46
|
|
|
|
|
|
|
of each word uppercase, and forces the rest to lowercase. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
The B function applies English title case rules |
49
|
|
|
|
|
|
|
(discussed below) where only the "important" words are supposed |
50
|
|
|
|
|
|
|
to be capitalized. There are also some customization features |
51
|
|
|
|
|
|
|
provided to allow the user to choose variant rules. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Comparing B and B: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Input: "lost watches of splitsville" |
56
|
|
|
|
|
|
|
capitalize: "Lost Watches Of Splitsville" |
57
|
|
|
|
|
|
|
capitalize_title: "Lost Watches of Splitsville" |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Some examples of formatting with B: |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Input: "KiLLiNG TiMe" |
62
|
|
|
|
|
|
|
capitalize_title: "Killing Time" |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Input: "we have come to wound the autumnal city" |
65
|
|
|
|
|
|
|
capitalize_title: "We Have Come to Wound the Autumnal City" |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Input: "ask for whom they ask for" |
68
|
|
|
|
|
|
|
captialize_title: "Ask for Whom They Ask For" |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Text::Capitalize also provides some functions for special effects |
71
|
|
|
|
|
|
|
such as B, which typically would be used for this sort |
72
|
|
|
|
|
|
|
of transformation: |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Input: "get whacky" |
75
|
|
|
|
|
|
|
scramble_case: "gET wHaCkY" (or something similar) |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 EXPORTS |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 default exports |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=over |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
9
|
|
|
9
|
|
728574
|
use 5.006; |
|
9
|
|
|
|
|
37
|
|
|
9
|
|
|
|
|
383
|
|
87
|
9
|
|
|
9
|
|
54
|
use strict; |
|
9
|
|
|
|
|
20
|
|
|
9
|
|
|
|
|
311
|
|
88
|
9
|
|
|
9
|
|
57
|
use warnings; |
|
9
|
|
|
|
|
19
|
|
|
9
|
|
|
|
|
272
|
|
89
|
9
|
|
|
9
|
|
1140
|
use utf8; |
|
9
|
|
|
|
|
27
|
|
|
9
|
|
|
|
|
71
|
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# use locale; |
92
|
9
|
|
|
9
|
|
229
|
use Carp; |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
723
|
|
93
|
9
|
|
|
9
|
|
49
|
use Exporter; |
|
9
|
|
|
|
|
24
|
|
|
9
|
|
|
|
|
378
|
|
94
|
9
|
|
|
9
|
|
44
|
use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
886
|
|
95
|
|
|
|
|
|
|
|
96
|
9
|
|
|
9
|
|
45
|
use vars qw($DEBUG); |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
7528
|
|
97
|
|
|
|
|
|
|
$DEBUG = 0; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
100
|
|
|
|
|
|
|
@EXPORT = qw(capitalize capitalize_title); |
101
|
|
|
|
|
|
|
@EXPORT_OK = qw(@exceptions |
102
|
|
|
|
|
|
|
%defaults_capitalize_title |
103
|
|
|
|
|
|
|
scramble_case |
104
|
|
|
|
|
|
|
random_case |
105
|
|
|
|
|
|
|
zippify_case |
106
|
|
|
|
|
|
|
capitalize_title_original |
107
|
|
|
|
|
|
|
); |
108
|
|
|
|
|
|
|
$VERSION = '1.3'; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Define the pattern to match "exceptions": the minor words |
111
|
|
|
|
|
|
|
# that don't usually get capitalized in titles (used by capitalize_title) |
112
|
9
|
|
|
9
|
|
61
|
use vars qw(@exceptions); |
|
9
|
|
|
|
|
15
|
|
|
9
|
|
|
|
|
632
|
|
113
|
|
|
|
|
|
|
@exceptions = qw( |
114
|
|
|
|
|
|
|
a an the |
115
|
|
|
|
|
|
|
and or nor for but so yet |
116
|
|
|
|
|
|
|
to of by at for but in with has |
117
|
|
|
|
|
|
|
de von |
118
|
|
|
|
|
|
|
); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# Define the default arguments for the capitalize_title function |
121
|
9
|
|
|
9
|
|
130
|
use vars qw(%defaults_capitalize_title); |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
600
|
|
122
|
|
|
|
|
|
|
%defaults_capitalize_title = ( |
123
|
|
|
|
|
|
|
PRESERVE_WHITESPACE => 0, |
124
|
|
|
|
|
|
|
PRESERVE_ALLCAPS => 0, |
125
|
|
|
|
|
|
|
PRESERVE_ANYCAPS => 0, |
126
|
|
|
|
|
|
|
NOT_CAPITALIZED => \@exceptions, |
127
|
|
|
|
|
|
|
); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# Defining patterns to match "words" and "sentences" (used by capitalize_title) |
131
|
|
|
|
|
|
|
|
132
|
9
|
|
|
9
|
|
46
|
use vars qw($word_rule $sentence_rule); |
|
9
|
|
|
|
|
13
|
|
|
9
|
|
|
|
|
484
|
|
133
|
9
|
|
|
9
|
|
49
|
use vars qw($anything $ellipsis $dot $qmark $emdash $terminator $ws); |
|
9
|
|
|
|
|
20
|
|
|
9
|
|
|
|
|
171037
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
$word_rule = qr{ ([^\w\s]*) # $1 - leading punctuation |
136
|
|
|
|
|
|
|
# (e.g. ellipsis, leading apostrophe) |
137
|
|
|
|
|
|
|
([\w']*) # $2 - the word itself (includes non-leading apostrophes) |
138
|
|
|
|
|
|
|
([^\w\s]*) # $3 - trailing punctuation |
139
|
|
|
|
|
|
|
# (e.g. comma, ellipsis, period) |
140
|
|
|
|
|
|
|
(\s*) # $4 - trailing whitespace |
141
|
|
|
|
|
|
|
# (usually " ", though at EOL prob "") |
142
|
|
|
|
|
|
|
}x ; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# Pieces for the $sentence_rule |
145
|
|
|
|
|
|
|
$anything = qr{.*?}; |
146
|
|
|
|
|
|
|
$ellipsis = qr{\Q...}; |
147
|
|
|
|
|
|
|
$dot = qr{\Q.}; |
148
|
|
|
|
|
|
|
$qmark = qr{\Q?}; |
149
|
|
|
|
|
|
|
$emdash = qr{\Q--}; |
150
|
|
|
|
|
|
|
$terminator = qr{$ellipsis|$dot|$qmark|!|:|$emdash|$}; |
151
|
|
|
|
|
|
|
$ws = qr{\s*}; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
$sentence_rule = |
154
|
|
|
|
|
|
|
qr{ ( $anything # anything up to... |
155
|
|
|
|
|
|
|
$terminator # any sentence terminator (*or* the EOS) |
156
|
|
|
|
|
|
|
$ws # trailing whitespace, if any |
157
|
|
|
|
|
|
|
) # all captured to $1 |
158
|
|
|
|
|
|
|
}ox; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item capitalize |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Makes the inital character of each word uppercase, and forces the |
164
|
|
|
|
|
|
|
rest to lowercase. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
The original routine by Stanislaw Y. Pusep. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub capitalize { |
171
|
0
|
|
|
0
|
1
|
0
|
local $_ = shift; |
172
|
0
|
0
|
|
|
|
0
|
s/\b(.*?)\b/$1 eq uc $1 ? $1 : "\u\L$1"/ge; |
|
0
|
|
|
|
|
0
|
|
173
|
0
|
|
|
|
|
0
|
return $_; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item capitalize_title |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Applies English title case rules (See L) where only the |
179
|
|
|
|
|
|
|
"important" words are supposed to be capitalized. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The one required argument is the string to be capitalized. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Some customization options may be passed in as pairs of names and |
184
|
|
|
|
|
|
|
values following the required argument. |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
The following customizations are allowed: |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Boolean: |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
PRESERVE_WHITESPACE |
191
|
|
|
|
|
|
|
PRESERVE_ALLCAPS |
192
|
|
|
|
|
|
|
PRESERVE_ANYCAPS |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Array reference: |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
NOT_CAPITALIZED |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
See L. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub capitalize_title { |
203
|
456
|
|
|
456
|
1
|
599328
|
my $string = shift; |
204
|
|
|
|
|
|
|
|
205
|
456
|
|
|
|
|
3816
|
my %args = (%defaults_capitalize_title, |
206
|
|
|
|
|
|
|
@_ # imports the argument pair list, if any |
207
|
|
|
|
|
|
|
); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Checking for spelling errors in options |
210
|
456
|
|
|
|
|
1792
|
foreach (keys %args) { |
211
|
1824
|
50
|
|
|
|
5818
|
unless (exists $defaults_capitalize_title{$_}) { |
212
|
0
|
|
|
|
|
0
|
carp "Bad option $_\n"; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
456
|
|
|
|
|
941
|
my $keep_ws = $args{ PRESERVE_WHITESPACE }; |
217
|
456
|
|
|
|
|
809
|
my $keep_acronyms = $args{ PRESERVE_ALLCAPS }; |
218
|
456
|
|
|
|
|
862
|
my $keep_mixups = $args{ PRESERVE_ANYCAPS }; |
219
|
|
|
|
|
|
|
|
220
|
456
|
|
|
|
|
547
|
my $exceptions_or = join '|', @{ $args{ NOT_CAPITALIZED } }; |
|
456
|
|
|
|
|
2734
|
|
221
|
456
|
|
|
|
|
4711
|
my $exception_rule = qr{^(?:$exceptions_or)$}i; |
222
|
|
|
|
|
|
|
|
223
|
456
|
|
|
|
|
1338
|
my $new_string = ""; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
### Processing each sentence (titles can have multiple sentences) |
226
|
456
|
|
|
|
|
6129
|
while ( $string =~ /$sentence_rule/g ) { |
227
|
1218
|
|
|
|
|
8692
|
my $sentence = $1; |
228
|
1218
|
|
|
|
|
2892
|
my $new_sentence = ""; |
229
|
|
|
|
|
|
|
|
230
|
1218
|
|
|
|
|
2181
|
my @words = (); |
231
|
|
|
|
|
|
|
# The array @words will contain records about each word, including its |
232
|
|
|
|
|
|
|
# surroundings: trailing whitespace and leading or trailing punctuation |
233
|
|
|
|
|
|
|
# (for cases such as "...and", "'em", "and...", "F.B.I.") |
234
|
|
|
|
|
|
|
# Each row is an aref of: $punct_leading, $word, $punct_trailing, $spc |
235
|
|
|
|
|
|
|
|
236
|
1218
|
|
|
|
|
1309
|
my $i = 0; |
237
|
1218
|
|
|
|
|
8920
|
while ($sentence =~ /$word_rule/g) { |
238
|
|
|
|
|
|
|
# If we've matched something, load it (pattern yields an empty match at eos) |
239
|
3888
|
100
|
66
|
|
|
29642
|
if ( ($2 ne '') or $1 or $3 or ($4 ne '') ) { |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
240
|
2670
|
|
|
|
|
10404
|
$words[ $i ] = [ $1, $2, $3, $4 ]; |
241
|
2670
|
|
|
|
|
24831
|
$i++; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
### Processing each word |
246
|
1218
|
|
|
|
|
1551
|
my ($punct_leading, $word, $punct_trailing, $spc); |
247
|
1218
|
|
|
|
|
1396
|
my $first = 0; |
248
|
1218
|
|
|
|
|
4038
|
my $last = $#words; |
249
|
1218
|
|
|
|
|
4055
|
for ( $i = $first; $i <= $last; $i++ ) { |
250
|
|
|
|
|
|
|
{ |
251
|
|
|
|
|
|
|
# (easier to know when you're doing the first and last using explicit counter) |
252
|
2670
|
|
|
|
|
2545
|
($punct_leading, $word, $punct_trailing, $spc) = ( @{ $words[$i] } ); |
|
2670
|
|
|
|
|
31746
|
|
253
|
|
|
|
|
|
|
|
254
|
2670
|
100
|
|
|
|
7481
|
unless ($keep_ws) { # collapse whitespace |
255
|
1335
|
100
|
|
|
|
3043
|
$spc = " " if (length($spc) > 0); |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# Keep words with any capitals (e.g. "iMac") if they're being passed through. |
259
|
2670
|
100
|
100
|
|
|
8002
|
next if ( ($keep_mixups) && ( $word =~ m{[[:upper:]]} ) ); |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# Keep all uppercase words if they're being passed through. |
262
|
2412
|
100
|
100
|
|
|
12300
|
next if ( ($keep_acronyms) && ( $word =~ m{^[[:upper:]]+$}) ); |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# Fugliness to get some French names to work, e.g. "d'Alembert", "l'Hospital" |
265
|
2372
|
100
|
|
|
|
10419
|
if ( $word =~ m{^[dl]'}) { |
266
|
10
|
|
|
|
|
73
|
$word =~ s{ ^(d') (\w) }{ lc($1) . uc($2) }iex; |
|
4
|
|
|
|
|
20
|
|
267
|
10
|
|
|
|
|
47
|
$word =~ s{ ^(l') (\w) }{ lc($1) . uc($2) }iex; |
|
6
|
|
|
|
|
42
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# But upcase first char if first or last word |
270
|
10
|
100
|
66
|
|
|
86
|
if ( ($i == $first) or ($i == $last) ) { |
271
|
6
|
|
|
|
|
23
|
$word = ucfirst( $word ); |
272
|
|
|
|
|
|
|
} |
273
|
10
|
|
|
|
|
53
|
next; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# The first word and the last are always capitalized |
277
|
2362
|
100
|
100
|
|
|
9000
|
if ( ($i == $first) or ($i == $last) ) { |
278
|
1070
|
|
|
|
|
2029
|
$word = ucfirst( lc( $word ) ); |
279
|
1070
|
|
|
|
|
2346
|
next; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# upcase all words, except for the exceptions |
283
|
1292
|
100
|
|
|
|
7202
|
if ( $word =~ m{$exception_rule} ) { |
284
|
496
|
|
|
|
|
3062
|
$word = lc( $word ); |
285
|
|
|
|
|
|
|
} else { |
286
|
796
|
|
|
|
|
1611
|
$word = ucfirst( lc( $word ) ); |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
2670
|
|
|
|
|
3096
|
} continue { # Append word to the new sentence |
290
|
2670
|
|
|
|
|
8622
|
$new_sentence .= $punct_leading . $word . $punct_trailing . $spc; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
} # end of per word for loop |
293
|
|
|
|
|
|
|
|
294
|
1218
|
|
|
|
|
13829
|
$new_string .= $new_sentence; |
295
|
|
|
|
|
|
|
} # end of per sentence loop. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# Delete leading/trailing spaces, unless preserving whitespace, |
298
|
|
|
|
|
|
|
# (Doing as final step to avoid dropping spaces *between* sentences.) |
299
|
456
|
100
|
|
|
|
886
|
unless ($keep_ws) { |
300
|
228
|
|
|
|
|
428
|
$new_string =~ s|^\s+||; |
301
|
228
|
|
|
|
|
984
|
$new_string =~ s|\s+$||; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
456
|
|
|
|
|
2979
|
return $new_string; |
305
|
|
|
|
|
|
|
} # end sub capitalize_title |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=back |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 optional exports |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=over |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=item @exceptions |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
The list of minor words that don't usually get capitalized in |
318
|
|
|
|
|
|
|
titles (used by L). Defaults to: |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
a an the |
321
|
|
|
|
|
|
|
and or nor for but so yet |
322
|
|
|
|
|
|
|
to of by at for but in with has |
323
|
|
|
|
|
|
|
de von |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=item %defaults_capitalize_title |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Defines the default arguments for the capitalize_title function |
328
|
|
|
|
|
|
|
Initially, this is set-up to shut off the features |
329
|
|
|
|
|
|
|
PRESERVE_WHITESPACE, PRESERVE_ALLCAPS and PRESERVE_ANYCAPS; |
330
|
|
|
|
|
|
|
it also has L<@exceptions> as the NOT_CAPITALIZED list. |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=item scramble_case |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
This routine provides a special effect: sCraMBliNg tHe CaSe |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
The algorithm here uses a modified probability distribution to get |
337
|
|
|
|
|
|
|
a weirder looking effect than simple randomization such as with L. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
For a discussion of the algorithm, see L. |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# Instead of initializing $uppers, $downers to zero, using fudged |
344
|
|
|
|
|
|
|
# initial counts to |
345
|
|
|
|
|
|
|
# (1) provide an initial bias against leading with uppercase, |
346
|
|
|
|
|
|
|
# (2) eliminate need to watch for division by zero on $tweak below. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# Rather than "int(rand(2))" which generates a 50/50 distribution of 0s and 1s, |
349
|
|
|
|
|
|
|
# we're using "int(rand(1+$tweak))" where $tweak will |
350
|
|
|
|
|
|
|
# provide a restoring force back to the average |
351
|
|
|
|
|
|
|
# So here we want $tweak: |
352
|
|
|
|
|
|
|
# to go to 1 when you approach $uppers = $downers |
353
|
|
|
|
|
|
|
# to be larger than 1 if $downers > $uppers |
354
|
|
|
|
|
|
|
# to be less than 1 if $uppers > $downers |
355
|
|
|
|
|
|
|
# A simple formula that does this: |
356
|
|
|
|
|
|
|
# $uppity = int( rand( 1 + $downers/$uppers) ); |
357
|
|
|
|
|
|
|
# The alternative (proposed by Randal Schwartz) is no real speed improvement: |
358
|
|
|
|
|
|
|
# $uppity = rand( $uppers + $downers ) > $uppers; |
359
|
|
|
|
|
|
|
# (though there are no worries about divide by zero there). |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# Note that this benchmarks faster: |
362
|
|
|
|
|
|
|
# @chars = split //, $string; |
363
|
|
|
|
|
|
|
# Than: |
364
|
|
|
|
|
|
|
# @chars = split /(?<=[[:alpha:]])/, $string; |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub scramble_case { |
367
|
75
|
|
|
75
|
1
|
122819
|
my $string = shift; |
368
|
75
|
|
|
|
|
114
|
my (@chars, $uppity, $newstring, $total, $uppers, $downers, $tweak); |
369
|
|
|
|
|
|
|
|
370
|
75
|
|
|
|
|
911
|
@chars = split //, $string; |
371
|
|
|
|
|
|
|
|
372
|
75
|
|
|
|
|
185
|
$uppers = 2; |
373
|
75
|
|
|
|
|
91
|
$downers = 1; |
374
|
75
|
|
|
|
|
178
|
foreach my $c (@chars) { |
375
|
2447
|
|
|
|
|
4017
|
$uppity = int( rand( 1 + $downers/$uppers) ); |
376
|
|
|
|
|
|
|
|
377
|
2447
|
100
|
|
|
|
4660
|
if ($uppity) { |
378
|
1191
|
|
|
|
|
1506
|
$c = uc($c); |
379
|
1191
|
|
|
|
|
2321
|
$uppers++; |
380
|
|
|
|
|
|
|
} else { |
381
|
1256
|
|
|
|
|
4140
|
$c = lc($c); |
382
|
1256
|
|
|
|
|
1769
|
$downers++; |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
} |
385
|
75
|
|
|
|
|
298
|
$newstring = join '', @chars; |
386
|
75
|
|
|
|
|
422
|
return $newstring; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=item random_case |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Randomizes the case of each character with a 50-50 chance |
392
|
|
|
|
|
|
|
of each one becoming upper or lower case. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=cut |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub random_case { |
397
|
75
|
|
|
75
|
1
|
50294
|
local $_; |
398
|
75
|
|
|
|
|
184
|
my $string = shift; |
399
|
75
|
|
|
|
|
109
|
my (@chars, $uppity, $newstring); |
400
|
75
|
|
|
|
|
949
|
@chars = split //, $string; |
401
|
|
|
|
|
|
|
|
402
|
75
|
|
|
|
|
252
|
foreach (@chars) { |
403
|
2447
|
|
|
|
|
3584
|
$uppity = int ( rand(2) ); # simple, 50-50 random pick |
404
|
|
|
|
|
|
|
|
405
|
2447
|
100
|
|
|
|
4197
|
if ($uppity) { |
406
|
1243
|
|
|
|
|
3387
|
$_ = uc; |
407
|
|
|
|
|
|
|
} else { |
408
|
1204
|
|
|
|
|
3854
|
$_ = lc; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} |
411
|
75
|
|
|
|
|
350
|
$newstring = join '', @chars; |
412
|
75
|
|
|
|
|
442
|
return $newstring; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item zippify_case |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Function to provide a special effect: "RANDOMLY upcasing WHOLE WORDS at a TIME". |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
This uses a similar algorithm to L, though it also |
420
|
|
|
|
|
|
|
ignores words on the L<@exceptions> list, just as L does. |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=cut |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub zippify_case { |
425
|
0
|
|
|
0
|
1
|
|
my $string = shift; |
426
|
0
|
|
|
|
|
|
my (@words, $uppity, $newstring, $total, $uppers, $downers, $tweak); |
427
|
0
|
|
|
|
|
|
@words = split /\b/, $string; |
428
|
|
|
|
|
|
|
|
429
|
0
|
|
|
|
|
|
$uppers = 1; |
430
|
0
|
|
|
|
|
|
$downers = 5; |
431
|
0
|
|
|
|
|
|
WORD: foreach my $word (@words) { |
432
|
0
|
|
|
|
|
|
foreach (@exceptions) { |
433
|
0
|
0
|
|
|
|
|
next WORD if m/\Q$word\E/i; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# a modified "random" distribution with fewer "streaks" than normal. |
437
|
0
|
|
|
|
|
|
$uppity = int( rand( 1 + $downers/$uppers ) ); |
438
|
|
|
|
|
|
|
|
439
|
0
|
0
|
|
|
|
|
if ($uppity) { |
440
|
0
|
|
|
|
|
|
$word = uc($word); |
441
|
0
|
|
|
|
|
|
$uppers++; |
442
|
|
|
|
|
|
|
} else { |
443
|
0
|
|
|
|
|
|
$word = lc($word); |
444
|
0
|
|
|
|
|
|
$downers++; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
} |
447
|
0
|
|
|
|
|
|
$newstring = join '', @words; |
448
|
0
|
|
|
|
|
|
return $newstring; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
1; |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=back |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=head1 BACKGROUND |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
The capitalize_title function tries to do the right thing by |
462
|
|
|
|
|
|
|
default: adjust an arbitrary chunk of text so that it can be used |
463
|
|
|
|
|
|
|
as a title. But as with many aspects of the human languages, it |
464
|
|
|
|
|
|
|
is extremely difficult to come up with a set of programmatic |
465
|
|
|
|
|
|
|
rules that will cover all cases. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head2 Words that don't get capitalized |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This web page: |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
http://www.continentallocating.com/World.Literature/General2/LiteraryTitles2.htm |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
presents some admirably clear rules for capitalizing titles: |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
ALL words in EVERY title are capitalized except |
476
|
|
|
|
|
|
|
(1) a, an, and the, |
477
|
|
|
|
|
|
|
(2) two and three letter conjunctions (and, or, nor, for, but, so, yet), |
478
|
|
|
|
|
|
|
(3) prepositions. |
479
|
|
|
|
|
|
|
Exceptions: The first and last words are always capitalized even |
480
|
|
|
|
|
|
|
if they are among the above three groups. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
But consider the case: |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
"It Waits Underneath the Sea" |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Should the word "underneath" be downcased because it's a preposition? |
487
|
|
|
|
|
|
|
Most English speakers would be surprised to see it that way. |
488
|
|
|
|
|
|
|
Consequently, the default list of exceptions to capitalization in this module |
489
|
|
|
|
|
|
|
only includes the shortest of the common prepositions (to of by at for but in). |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
The default entries on the exception list are: |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
a an the |
494
|
|
|
|
|
|
|
and or nor for but so yet |
495
|
|
|
|
|
|
|
to of by at for but in with has |
496
|
|
|
|
|
|
|
de von |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
The observant may note that the last row is not composed of English |
499
|
|
|
|
|
|
|
words. The honorary "de" has been included in honor of "Honoré de |
500
|
|
|
|
|
|
|
Balzac". And "von" was added for the sake of equal time. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head2 Customizing the Exceptions to Capitalization |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
If you have different ideas about the "rules" of English |
506
|
|
|
|
|
|
|
(or perhaps if you're trying to use this code with another |
507
|
|
|
|
|
|
|
language with different rules) you might like to substitute |
508
|
|
|
|
|
|
|
a new exception list of your own: |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
capitalize_title( "Dude, we, like, went to Old Slavy, and uh, they didn't have it", |
511
|
|
|
|
|
|
|
NOT_CAPITALIZED => [ qw( uh duh huh wha like man you know ) ] ); |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
This should return: |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Dude, We, like, Went To Old Slavy, And uh, They Didn't Have It |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
Less radically, you might like to simply add a word to the list, |
518
|
|
|
|
|
|
|
for example "from": |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
use Text::Capitalize 0.2 qw( capitalize_title @exceptions ); |
521
|
|
|
|
|
|
|
push @exceptions, "from"; |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
print capitalize_title( "fungi from yuggoth", |
524
|
|
|
|
|
|
|
NOT_CAPITALIZED => \@exceptions); |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
This should output: |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Fungi from Yuggoth |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 All Uppercase Words |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
In order to work with a wide range of input strings, by default |
533
|
|
|
|
|
|
|
capitalize_title presumes that upper-case input needs to be adjusted |
534
|
|
|
|
|
|
|
(e.g. "DOOM APPROACHES!" would become "Doom Approaches!"). But, this |
535
|
|
|
|
|
|
|
doesn't allow for the possibilities such as an acronym in a title |
536
|
|
|
|
|
|
|
(e.g. "RAM Prices Plummet" ideally should not become "Ram Prices |
537
|
|
|
|
|
|
|
Plummet"). If the PRESERVE_ALLCAPS option is set, then it will be |
538
|
|
|
|
|
|
|
presumed that an all-uppercase word is that way for a reason, and |
539
|
|
|
|
|
|
|
will be left alone: |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
print capitalize_title( "ram more RAM down your throat", |
542
|
|
|
|
|
|
|
PRESERVE_ALLCAPS => 1 ); |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
This should output: |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
Ram More RAM Down Your Throat |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=head2 Preserving Any Usage of Uppercase for Mixed-case Words |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
There are some other odd cases that are difficult to handle well, |
551
|
|
|
|
|
|
|
notably mixed-case words such as "iMac", "CHiPs", and so on. For |
552
|
|
|
|
|
|
|
these purposes, a PRESERVE_ANYCAPS option has been provided which |
553
|
|
|
|
|
|
|
presumes that any usage of uppercase is there for a reason, in which |
554
|
|
|
|
|
|
|
case the entire word should be passed through untouched. With |
555
|
|
|
|
|
|
|
PRESERVE_ANYCAPS on, only the case of all lowercase words will ever |
556
|
|
|
|
|
|
|
be adjusted: |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
print capitalize_title( "TLAs i have known and loved", |
559
|
|
|
|
|
|
|
PRESERVE_ANYCAPS => 1 ); |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
This should output: |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
TLAs I Have Known and Loved |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
print capitalize_title( "the next iMac: just another NeXt?", |
566
|
|
|
|
|
|
|
PRESERVE_ANYCAPS => 1); |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
This should output: |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
The Next iMac: Just Another NeXt? |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=head2 Handling Whitespace |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
By default, the capitalize_title function presumes that you're trying |
576
|
|
|
|
|
|
|
to clean up potential title strings. As an extra feature it collapses |
577
|
|
|
|
|
|
|
multiple spaces and tabs into single spaces. If this feature doesn't |
578
|
|
|
|
|
|
|
seem desirable and you want it to literally restrict itself to |
579
|
|
|
|
|
|
|
adjusting capitalization, you can force that behavior with the |
580
|
|
|
|
|
|
|
PRESERVE_WHITESPACE option: |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
print capitalize_title( "it came from texas: the new new world order?", |
583
|
|
|
|
|
|
|
PRESERVE_WHITESPACE => 1); |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
This should output: |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
It Came From Texas: The New New World Order? |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
(Note: the double-space after the colon is still there.) |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=head2 Comparison to Text::Autoformat |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
As you might expect, there's more than one way to do this, |
594
|
|
|
|
|
|
|
and these two pieces of code perform very similar functions: |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
use Text::Capitalize 0.2; |
597
|
|
|
|
|
|
|
print capitalize_title( $t ), "\n"; |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
use Text::Autoformat; |
600
|
|
|
|
|
|
|
print autoformat { case => "highlight", right => length( $t ) }, $t; |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
Note: with autoformat, supplying the length of the string as the |
603
|
|
|
|
|
|
|
"right margin" is much faster than plugging in an arbitrarily large |
604
|
|
|
|
|
|
|
number. There doesn't seem to be any other way of turning off |
605
|
|
|
|
|
|
|
line-breaking (e.g. by using the "fill" parameter) though possibly |
606
|
|
|
|
|
|
|
there will be in the future. |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
As of this writing, "capitalize_title" has some advantages: |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=over |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=item 1. |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
It works on characters outside the English 7-bit ASCII |
615
|
|
|
|
|
|
|
range, for example with my locale setting (en_US) the |
616
|
|
|
|
|
|
|
ISO-8859-1 International characters are handled correctly, |
617
|
|
|
|
|
|
|
so that "über maus" becomes "Über Maus". |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=item 2. |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
Minor words following leading punctuation become upper case: |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
"...And Justice for All" |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
=item 3. |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
It works with multiple sentence input (e.g. "And sooner. And later." |
628
|
|
|
|
|
|
|
should probably not be "And sooner. and later.") |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=item 4. |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
The list of minor words is more extensive (i.e. includes: so, yet, nor), |
633
|
|
|
|
|
|
|
and is also customizable. |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=item 5. |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
There's a way of preserving acronyms via the PRESERVE_ALLCAPS option |
638
|
|
|
|
|
|
|
and similarly, mixed-case words ("iMac", "NeXt", etc") with the |
639
|
|
|
|
|
|
|
PRESERVE_ANYCAPS option. |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=item 6. |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
capitalize_title is roughly ten times faster. |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=back |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
Another difference is that Text::Autoformat's "highlight" |
648
|
|
|
|
|
|
|
always preserves whitespace something like capitalize_title |
649
|
|
|
|
|
|
|
does with the PRESERVE_WHITESPACE option set. |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
However, it should be pointed out that Text::Autoformat is under |
652
|
|
|
|
|
|
|
active maintenance by Damian Conway. It also does far more than |
653
|
|
|
|
|
|
|
this module, and you may want to use it for other reasons. |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
=head2 Still more ways to do it |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
Late breaking news: The second edition of the Perl Cookbook |
658
|
|
|
|
|
|
|
has just come out. It now includes: "Properly Capitalizing |
659
|
|
|
|
|
|
|
a Title or Headline" as recipe 1.14. You should |
660
|
|
|
|
|
|
|
familiarize yourself with this if you want to become a true |
661
|
|
|
|
|
|
|
master of all title capitalization routines. |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
(And I see that recipe 1.13 includes a "randcap" program as |
664
|
|
|
|
|
|
|
an example, which as it happens does something like the |
665
|
|
|
|
|
|
|
random_case function described below...) |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
=head1 SPECIAL EFFECTS |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
Some functions have been provided to make strings look weird |
670
|
|
|
|
|
|
|
by scrambling their capitalization ("lIKe tHiS"): |
671
|
|
|
|
|
|
|
random_case and scramble_case. The function "random_case" |
672
|
|
|
|
|
|
|
does a straight-forward randomization of capitalization so |
673
|
|
|
|
|
|
|
that each letter has a 50-50 chance of being upper or lower |
674
|
|
|
|
|
|
|
case. The function "scramble_case" performs a very similar |
675
|
|
|
|
|
|
|
function, but does a slightly better job of producing something |
676
|
|
|
|
|
|
|
"weird-looking". |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
The difficulty is that there are differences between human |
679
|
|
|
|
|
|
|
perception of randomness and actual randomness. Consider |
680
|
|
|
|
|
|
|
the fact that of the sixteen ways that the four letter word |
681
|
|
|
|
|
|
|
"word" can be capitalized, three of them are rather boring: |
682
|
|
|
|
|
|
|
"word", "Word" and "WORD". To make it less likely that |
683
|
|
|
|
|
|
|
scramble_case will produce dull output when you want "weird" |
684
|
|
|
|
|
|
|
output, a modified probability distribution has been used |
685
|
|
|
|
|
|
|
that records the history of previous outcomes, and tweaks |
686
|
|
|
|
|
|
|
the likelihood of the next decision in the opposite |
687
|
|
|
|
|
|
|
direction, back toward the expected average. In effect, |
688
|
|
|
|
|
|
|
this simulates a world in which the Gambler's Fallacy is |
689
|
|
|
|
|
|
|
correct ("Hm... red has come up a lot, I bet that black is |
690
|
|
|
|
|
|
|
going to come up now."). "Streaks" are much less likely |
691
|
|
|
|
|
|
|
with scramble_case than with random_case. |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Additionally, with scramble_case the probability that the |
694
|
|
|
|
|
|
|
first character of the input string will become upper-case |
695
|
|
|
|
|
|
|
has been tweaked to less than 50%. (Future versions may |
696
|
|
|
|
|
|
|
apply this tweak on a per-word basis rather than just on a |
697
|
|
|
|
|
|
|
per-string basis). |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
There is also a function that scrambles capitalization on |
700
|
|
|
|
|
|
|
a word-by-word basis called "zippify_case", which should produce output |
701
|
|
|
|
|
|
|
like: "In my PREVIOUS life i was a LATEX-novelty REPAIRMAN!" |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=head1 EXPORT |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
By default, this version of the module provides the two |
707
|
|
|
|
|
|
|
functions capitalize and capitalize_title. Future versions |
708
|
|
|
|
|
|
|
will have no further additions to the default export list. |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
Optionally, the following functions may also be exported: |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=over |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=item scramble_case |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
A function to scramble capitalization in a wEiRD loOOkInG wAy. |
717
|
|
|
|
|
|
|
Supposed to look a little stranger than the simpler random_case |
718
|
|
|
|
|
|
|
output |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=item random_case |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
Function to randomize capitalization of each letter in the |
723
|
|
|
|
|
|
|
string. Compare to "scramble_case" |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=item zippify_case |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
A function like "scramble_case" that acts on a word-by-word basis |
728
|
|
|
|
|
|
|
(Somewhat LIKE this, YOU know?). |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=back |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
It is also possible to export the following variables: |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=over |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=item @exceptions |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
The list of minor words that capitalize_title uses by default to |
739
|
|
|
|
|
|
|
determine the exceptions to capitalization. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=item %defaults-capitalize_title |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
The hash of allowed arguments (with defaults) that the |
744
|
|
|
|
|
|
|
capitalize_title function uses. |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=back |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
=head1 BUGS |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
1. In capitalize_title, quoted sentence terminators are |
751
|
|
|
|
|
|
|
treated as actual sentence breaks, e.g. in this case: |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
'say "yes but!" and "know what?"' |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
The program sees the ! and effectively treats this as two |
756
|
|
|
|
|
|
|
separate sentences: the word "but" becomes "But" (under the |
757
|
|
|
|
|
|
|
rule that last words must always be uppercase, even if they're |
758
|
|
|
|
|
|
|
on the exception list) and the word "and" becomes "And" (under |
759
|
|
|
|
|
|
|
the first word rule). |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
2. There's no good way to automatically handle names like |
762
|
|
|
|
|
|
|
"McCoy". Consider the difficulty of disambiguating "Macadam |
763
|
|
|
|
|
|
|
Roads" from "MacAdam Rode". If you need to solve problems like |
764
|
|
|
|
|
|
|
this, consider using the case_surname function of Lingua::En::NameParse. |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
3. In general, Text::Capitalize is a very parochial |
767
|
|
|
|
|
|
|
English oriented module that looks like it belongs in the |
768
|
|
|
|
|
|
|
"Lingua::En::*" tree. |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
4. There's currently no way of doing a PRESERVE_ANYCAPS |
771
|
|
|
|
|
|
|
that *also* adjusts capitalization of words on the exception |
772
|
|
|
|
|
|
|
list, so that "iMac Or iPod" would become "iMac or iPod". |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
=head1 SEE ALSO |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
L |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
"The Perl Cookbook", second edition, recipes 1.13 and 1.14 |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
L |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
About "scramble_case": |
784
|
|
|
|
|
|
|
L |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=head1 VERSION |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
Version 0.9 |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
=head1 AUTHORS |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
Joseph M. Brenner |
793
|
|
|
|
|
|
|
E-Mail: doom@kzsu.stanford.edu |
794
|
|
|
|
|
|
|
Homepage: http://obsidianrook.com/map |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Stanislaw Y. Pusep (who wrote "capitalize") |
797
|
|
|
|
|
|
|
E-Mail: stanis@linuxmail.org |
798
|
|
|
|
|
|
|
ICQ UIN: 11979567 |
799
|
|
|
|
|
|
|
Homepage: http://sysdlabs.hypermart.net/ |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
And many thanks (for feature suggestions and code examples) to: |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Belden Lyman, Yary Hcluhan, Randal Schwartz |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Copyright 2003 by Joseph Brenner. All rights reserved. |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
810
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
=cut |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
|