File Coverage

blib/lib/Locale/Maketext/Utils/Phrase/Norm/Ellipsis.pm
Criterion Covered Total %
statement 73 73 100.0
branch 28 28 100.0
condition 3 3 100.0
subroutine 3 3 100.0
pod 0 1 0.0
total 107 108 99.0


line stmt bran cond sub pod time code
1             package Locale::Maketext::Utils::Phrase::Norm::Ellipsis;
2              
3 4     4   2742 use strict;
  4         8  
  4         152  
4 4     4   18 use warnings;
  4         6  
  4         4825  
5              
6             sub normalize_maketext_string {
7 87     87 0 276 my ($filter) = @_;
8              
9 87         271 my $string_sr = $filter->get_string_sr();
10              
11             # 1. placeholder for BN w/ empty string args: ',,'
12 87         205 while ( ${$string_sr} =~ m/(\[.*?\])/g ) { # see note about this regex in Consider.pm
  194         1098  
13 107         325 my $bn_match = $1;
14 107 100       390 if ( $bn_match =~ m/[,]{2,}/ ) {
15 2         5 my $bn_match_tmp = $bn_match;
16 2         11 $bn_match_tmp =~ s/([,]{2,})/my $n=CORE::length("$1");"MULTI_COMMA_IN_BN_$n"/ge;
  2         6  
  2         9  
17 2         4 ${$string_sr} =~ s/\Q$bn_match\E/$bn_match_tmp/;
  2         40  
18             }
19             }
20              
21             # 2. look for multi's
22 87 100       162 if ( ${$string_sr} =~ s/(?:[.]{2,}|[,]{2,})/…/g ) {
  87         1699  
23 8         37 $filter->add_warning('multiple period/comma instead of ellipsis character');
24             }
25              
26             # 3. restore placeholder
27 87         162 ${$string_sr} =~ s/MULTI_COMMA_IN_BN_([0-9]+)/"," x "$1"/eg;
  87         244  
  2         10  
28              
29             # TODO: output,latin so this occurance is more rare:
30             # if ( ${$string_sr} =~ s/([,.]{2,})/\[comment,should “$1” here be an ellipsis?\]/g ) {
31             # $filter->add_warning('multiple concurrent period and comma');
32             # }
33              
34 87 100       196 if ( ${$string_sr} =~ s/^(|\xc2\xa0|\[output\,nbsp\])…/ …/ ) {
  87         415  
35 8         28 $filter->add_warning('initial ellipsis should be preceded by a normal space');
36             }
37              
38             # 1. placeholders for legit ones
39 87         231 my %l;
40 87         146 my $copy = ${$string_sr};
  87         201  
41 87 100       157 if ( ${$string_sr} =~ s/((?:\x20|\xc2\xa0|\[output\,nbsp\])…[\!\?\.\:])$/ELLIPSIS_END/ ) { # final
  87         516  
42 8         38 $l{'ELLIPSIS_END'} = $1;
43             }
44              
45 87 100       176 if ( ${$string_sr} =~ s/^( …(?:\x20|\xc2\xa0|\[output\,nbsp\]))/ELLIPSIS_START/ ) { # initial
  87         307  
46 6         33 $l{'ELLIPSIS_START'} = $1;
47             }
48              
49 87         1812 while ( ${$string_sr} =~ m/(\(|\x20|\xc2\xa0|\[output\,nbsp\])…(\)|\x20|\xc2\xa0|\[output\,nbsp\])/g ) {
  149         821  
50 62         105 ${$string_sr} =~ s/(\(|\x20|\xc2\xa0|\[output\,nbsp\])…(\)|\x20|\xc2\xa0|\[output\,nbsp\])/ELLIPSIS_MEDIAL/;
  62         487  
51 62         115 push @{ $l{'ELLIPSIS_MEDIAL'} }, [ $1, $2 ];
  62         257  
52             }
53              
54             # 2. mark any remaining ones (that are not legit)
55 87 100       172 if ( ${$string_sr} =~ s/\A …(?!\x20|\xc2\xa0|\[output\,nbsp\])/ … / ) {
  87         376  
56 8         34 $filter->add_warning('initial ellipsis should be followed by a normal space or a non-break-space (in bracket notation or character form)');
57             }
58              
59 87 100       157 if ( ${$string_sr} =~ s/…(?:\x20|\xc2\xa0|\[output\,nbsp\]|\s)+\z/…/ ) {
  87         454  
60 8         28 $filter->add_warning('final ellipsis should be followed by a valid punctuation mark or nothing');
61             }
62              
63 87 100 100     180 if ( ${$string_sr} =~ m/…\z/ && ${$string_sr} !~ m/(?:\x20|\xc2\xa0|\[output\,nbsp\])…\z/ ) {
  87         366  
  16         116  
64 8         16 ${$string_sr} =~ s/…$/ …/;
  8         59  
65 8         26 $filter->add_warning('final ellipsis should be preceded by a normal space or a non-break-space (in bracket notation or character form)');
66             }
67              
68 87         190 my $medial_prob = 0;
69 87 100       167 if ( ${$string_sr} =~ s/(.{1})((?:(?<!\x20)…|(?<!\xc2\xa0)…(?<!\[output\,nbsp\])…))(.{2})/$1 $2$3/g ) {
  87         2288  
70 8         19 $medial_prob++;
71             }
72              
73 87 100       196 if ( ${$string_sr} =~ s/(.{2})…(?!\x20|\xc2\xa0|\[output\,nbsp\]|\z)(.{1})/$1… $2/g ) {
  87         379  
74 8         15 $medial_prob++;
75             }
76              
77 87 100       237 if ($medial_prob) {
78 8         126 $filter->add_warning('medial ellipsis should be surrounded on each side by a parenthesis or normal space or a non-break-space (in bracket notation or character form)');
79             }
80              
81             # 3. reconstruct the valid ones
82 87 100       284 ${$string_sr} =~ s/ELLIPSIS_END/$l{'ELLIPSIS_END'}/ if exists $l{'ELLIPSIS_END'};
  8         52  
83 87 100       283 ${$string_sr} =~ s/ELLIPSIS_START/$l{'ELLIPSIS_START'}/ if exists $l{'ELLIPSIS_START'};
  6         40  
84 87 100       271 if ( exists $l{'ELLIPSIS_MEDIAL'} ) {
85 14         30 for my $medial ( @{ $l{'ELLIPSIS_MEDIAL'} } ) {
  14         61  
86 62         101 ${$string_sr} =~ s/ELLIPSIS_MEDIAL/$medial->[0]…$medial->[1]/;
  62         307  
87             }
88             }
89              
90 87         318 return $filter->return_value;
91             }
92              
93             1;
94              
95             __END__
96              
97             =encoding utf-8
98              
99             =head1 Normalization
100              
101             =over 4
102              
103             =item * It must be an ellipsis character (OSX: ⌥;).
104              
105             =item * It must be surrounded by valid whitespace …
106              
107             =item * … except for a trailing ellipsis.
108              
109             =back
110              
111             Valid whitespace is a normal space or a non-break-space (literal (OSX: ⌥space) or via [output,nbsp]).
112              
113             The only exception is that the initial space has to be a normal space (non-break-space there would imply formatting or partial phrase, ick).
114              
115             =head2 Rationale
116              
117             We want to be simple, consistent, and clear.
118              
119             =over 4
120              
121             =item * CLDR has 3 simple location based rules:
122              
123             initial:…{0}
124             medial:{0}…{1}
125             final:{0}…
126              
127             Yet, English provides many more rules based on location in the text, purpose (show an omission, indicate a trailing off for various purposes), context (puntuation before or after?), and author’s whim.
128              
129             Some are exact opposites and yet still valid either way.
130              
131             So lets keep it simple.
132              
133             =item * We are unlikely to be omitting things from a quote:
134              
135             The server said, “PHP […] is like training wheels without the bike.”.
136              
137             Can be added later if necessary.
138              
139             =item * We are unlikely to be implying a continuing thought:
140              
141             What can you do, you know how he is ….
142              
143             Even if we were this form is still valid. So lets keep it consistent.
144              
145             =item * We are not writing literature.
146              
147             So lets keep it simple.
148              
149             =item * The CLDR version leaves room for ambiguity:
150              
151             I drove the car…
152              
153             Is that the first part of “I drove the car to the store.” or “I drove the carpet home and installed it.”?
154              
155             So lets keep it clear.
156              
157             =back
158              
159             Tip: If you’re doing a single word(e.g. to indicate an action is happening) you might consider doing a non-break-space to the left of it:
160              
161             'Loading …' # i.e. Loading(OSX: ⌥-space)…
162              
163             'Loading[output,nbsp]…' # visually explicit
164              
165             =head1 possible violations
166              
167             None
168              
169             =head1 possible warnings
170              
171             =over 4
172              
173             =item multiple period/comma instead of ellipsis character
174              
175             We want an ellipsis character instead of 3 periods (or 2 periods, 4 or 5 periods, or commas (yes I’ve seen translators do ‘..’, ‘,,,,’, etc and after inquiring ‘…’ was the correct syntax)).
176              
177             These will be turned into an ellipsis character.
178              
179             =item initial ellipsis should be preceded by a normal space
180              
181             The string is modified with a corrected version.
182              
183             =item initial ellipsis should be followed by a normal space or a non-break-space (in bracket notation or character form)
184              
185             The string is modified with a corrected version.
186              
187             =item final ellipsis should be preceded by a normal space or a non-break-space (in bracket notation or character form)
188              
189             The string is modified with a corrected version.
190              
191             =item final ellipsis should be followed by a valid punctuation mark or nothing
192              
193             The string is modified with a corrected version.
194              
195             =item medial ellipsis should be surrounded on each side by a parenthesis or normal space or a non-break-space (in bracket notation or character form)
196              
197             The string is modified with a corrected version.
198              
199             =back