File Coverage

blib/lib/Locale/Maketext/Utils/Phrase/Norm/Ellipsis.pm

Criterion	Covered	Total	%
statement	73	73	100.0
branch	28	28	100.0
condition	3	3	100.0
subroutine	3	3	100.0
pod	0	1	0.0
total	107	108	99.0

line	stmt	bran	cond	sub	pod	time	code
1							package Locale::Maketext::Utils::Phrase::Norm::Ellipsis;
2
3	4			4		2742	use strict;
	4					8
	4					152
4	4			4		18	use warnings;
	4					6
	4					4825
5
6							sub normalize_maketext_string {
7	87			87	0	276	my ($filter) = @_;
8
9	87					271	my $string_sr = $filter->get_string_sr();
10
11							# 1. placeholder for BN w/ empty string args: ',,'
12	87					205	while ( ${$string_sr} =~ m/(\[.*?\])/g ) { # see note about this regex in Consider.pm
	194					1098
13	107					325	my $bn_match = $1;
14	107	100				390	if ( $bn_match =~ m/[,]{2,}/ ) {
15	2					5	my $bn_match_tmp = $bn_match;
16	2					11	$bn_match_tmp =~ s/([,]{2,})/my $n=CORE::length("$1");"MULTI_COMMA_IN_BN_$n"/ge;
	2					6
	2					9
17	2					4	${$string_sr} =~ s/\Q$bn_match\E/$bn_match_tmp/;
	2					40
18							}
19							}
20
21							# 2. look for multi's
22	87	100				162	if ( ${$string_sr} =~ s/(?:[.]{2,}\|[,]{2,})/…/g ) {
	87					1699
23	8					37	$filter->add_warning('multiple period/comma instead of ellipsis character');
24							}
25
26							# 3. restore placeholder
27	87					162	${$string_sr} =~ s/MULTI_COMMA_IN_BN_([0-9]+)/"," x "$1"/eg;
	87					244
	2					10
28
29							# TODO: output,latin so this occurance is more rare:
30							# if ( ${$string_sr} =~ s/([,.]{2,})/\[comment,should “$1” here be an ellipsis?\]/g ) {
31							# $filter->add_warning('multiple concurrent period and comma');
32							# }
33
34	87	100				196	if ( ${$string_sr} =~ s/^(\|\xc2\xa0\|\[output\,nbsp\])…/ …/ ) {
	87					415
35	8					28	$filter->add_warning('initial ellipsis should be preceded by a normal space');
36							}
37
38							# 1. placeholders for legit ones
39	87					231	my %l;
40	87					146	my $copy = ${$string_sr};
	87					201
41	87	100				157	if ( ${$string_sr} =~ s/((?:\x20\|\xc2\xa0\|\[output\,nbsp\])…[\!\?\.\:])$/ELLIPSIS_END/ ) { # final
	87					516
42	8					38	$l{'ELLIPSIS_END'} = $1;
43							}
44
45	87	100				176	if ( ${$string_sr} =~ s/^( …(?:\x20\|\xc2\xa0\|\[output\,nbsp\]))/ELLIPSIS_START/ ) { # initial
	87					307
46	6					33	$l{'ELLIPSIS_START'} = $1;
47							}
48
49	87					1812	while ( ${$string_sr} =~ m/($\|\x20\|\xc2\xa0\|\[output\,nbsp\])…($\|\x20\|\xc2\xa0\|\[output\,nbsp\])/g ) {
	149					821
50	62					105	${$string_sr} =~ s/($\|\x20\|\xc2\xa0\|\[output\,nbsp\])…($\|\x20\|\xc2\xa0\|\[output\,nbsp\])/ELLIPSIS_MEDIAL/;
	62					487
51	62					115	push @{ $l{'ELLIPSIS_MEDIAL'} }, [ $1, $2 ];
	62					257
52							}
53
54							# 2. mark any remaining ones (that are not legit)
55	87	100				172	if ( ${$string_sr} =~ s/\A …(?!\x20\|\xc2\xa0\|\[output\,nbsp\])/ … / ) {
	87					376
56	8					34	$filter->add_warning('initial ellipsis should be followed by a normal space or a non-break-space (in bracket notation or character form)');
57							}
58
59	87	100				157	if ( ${$string_sr} =~ s/…(?:\x20\|\xc2\xa0\|\[output\,nbsp\]\|\s)+\z/…/ ) {
	87					454
60	8					28	$filter->add_warning('final ellipsis should be followed by a valid punctuation mark or nothing');
61							}
62
63	87	100	100			180	if ( ${$string_sr} =~ m/…\z/ && ${$string_sr} !~ m/(?:\x20\|\xc2\xa0\|\[output\,nbsp\])…\z/ ) {
	87					366
	16					116
64	8					16	${$string_sr} =~ s/…$/ …/;
	8					59
65	8					26	$filter->add_warning('final ellipsis should be preceded by a normal space or a non-break-space (in bracket notation or character form)');
66							}
67
68	87					190	my $medial_prob = 0;
69	87	100				167	if ( ${$string_sr} =~ s/(.{1})((?:(?<!\x20)…\|(?<!\xc2\xa0)…(?<!\[output\,nbsp\])…))(.{2})/$1 $2$3/g ) {
	87					2288
70	8					19	$medial_prob++;
71							}
72
73	87	100				196	if ( ${$string_sr} =~ s/(.{2})…(?!\x20\|\xc2\xa0\|\[output\,nbsp\]\|\z)(.{1})/$1… $2/g ) {
	87					379
74	8					15	$medial_prob++;
75							}
76
77	87	100				237	if ($medial_prob) {
78	8					126	$filter->add_warning('medial ellipsis should be surrounded on each side by a parenthesis or normal space or a non-break-space (in bracket notation or character form)');
79							}
80
81							# 3. reconstruct the valid ones
82	87	100				284	${$string_sr} =~ s/ELLIPSIS_END/$l{'ELLIPSIS_END'}/ if exists $l{'ELLIPSIS_END'};
	8					52
83	87	100				283	${$string_sr} =~ s/ELLIPSIS_START/$l{'ELLIPSIS_START'}/ if exists $l{'ELLIPSIS_START'};
	6					40
84	87	100				271	if ( exists $l{'ELLIPSIS_MEDIAL'} ) {
85	14					30	for my $medial ( @{ $l{'ELLIPSIS_MEDIAL'} } ) {
	14					61
86	62					101	${$string_sr} =~ s/ELLIPSIS_MEDIAL/$medial->[0]…$medial->[1]/;
	62					307
87							}
88							}
89
90	87					318	return $filter->return_value;
91							}
92
93							1;
94
95							__END__
96
97							=encoding utf-8
98
99							=head1 Normalization
100
101							=over 4
102
103							=item * It must be an ellipsis character (OSX: ⌥;).
104
105							=item * It must be surrounded by valid whitespace …
106
107							=item * … except for a trailing ellipsis.
108
109							=back
110
111							Valid whitespace is a normal space or a non-break-space (literal (OSX: ⌥space) or via [output,nbsp]).
112
113							The only exception is that the initial space has to be a normal space (non-break-space there would imply formatting or partial phrase, ick).
114
115							=head2 Rationale
116
117							We want to be simple, consistent, and clear.
118
119							=over 4
120
121							=item * CLDR has 3 simple location based rules:
122
123							initial:…{0}
124							medial:{0}…{1}
125							final:{0}…
126
127							Yet, English provides many more rules based on location in the text, purpose (show an omission, indicate a trailing off for various purposes), context (puntuation before or after?), and author’s whim.
128
129							Some are exact opposites and yet still valid either way.
130
131							So lets keep it simple.
132
133							=item * We are unlikely to be omitting things from a quote:
134
135							The server said, “PHP […] is like training wheels without the bike.”.
136
137							Can be added later if necessary.
138
139							=item * We are unlikely to be implying a continuing thought:
140
141							What can you do, you know how he is ….
142
143							Even if we were this form is still valid. So lets keep it consistent.
144
145							=item * We are not writing literature.
146
147							So lets keep it simple.
148
149							=item * The CLDR version leaves room for ambiguity:
150
151							I drove the car…
152
153							Is that the first part of “I drove the car to the store.” or “I drove the carpet home and installed it.”?
154
155							So lets keep it clear.
156
157							=back
158
159							Tip: If you’re doing a single word(e.g. to indicate an action is happening) you might consider doing a non-break-space to the left of it:
160
161							'Loading …' # i.e. Loading(OSX: ⌥-space)…
162
163							'Loading[output,nbsp]…' # visually explicit
164
165							=head1 possible violations
166
167							None
168
169							=head1 possible warnings
170
171							=over 4
172
173							=item multiple period/comma instead of ellipsis character
174
175							We want an ellipsis character instead of 3 periods (or 2 periods, 4 or 5 periods, or commas (yes I’ve seen translators do ‘..’, ‘,,,,’, etc and after inquiring ‘…’ was the correct syntax)).
176
177							These will be turned into an ellipsis character.
178
179							=item initial ellipsis should be preceded by a normal space
180
181							The string is modified with a corrected version.
182
183							=item initial ellipsis should be followed by a normal space or a non-break-space (in bracket notation or character form)
184
185							The string is modified with a corrected version.
186
187							=item final ellipsis should be preceded by a normal space or a non-break-space (in bracket notation or character form)
188
189							The string is modified with a corrected version.
190
191							=item final ellipsis should be followed by a valid punctuation mark or nothing
192
193							The string is modified with a corrected version.
194
195							=item medial ellipsis should be surrounded on each side by a parenthesis or normal space or a non-break-space (in bracket notation or character form)
196
197							The string is modified with a corrected version.
198
199							=back