line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Locale::Maketext::Utils::Phrase::Norm::Ellipsis; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
1944
|
use strict; |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
89
|
|
4
|
4
|
|
|
4
|
|
11
|
use warnings; |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
2852
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
sub normalize_maketext_string { |
7
|
86
|
|
|
86
|
0
|
71
|
my ($filter) = @_; |
8
|
|
|
|
|
|
|
|
9
|
86
|
|
|
|
|
138
|
my $string_sr = $filter->get_string_sr(); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# 1. placeholder for BN w/ empty string args: ',,' |
12
|
86
|
|
|
|
|
78
|
while ( ${$string_sr} =~ m/(\[.*?\])/g ) { # see note about this regex in Consider.pm |
|
182
|
|
|
|
|
525
|
|
13
|
96
|
|
|
|
|
131
|
my $bn_match = $1; |
14
|
96
|
50
|
|
|
|
166
|
if ( $bn_match =~ m/[,]{2,}/ ) { |
15
|
0
|
|
|
|
|
0
|
my $bn_match_tmp = $bn_match; |
16
|
0
|
|
|
|
|
0
|
$bn_match_tmp =~ s/([,]{2,})/my $n=CORE::length("$1");"MULTI_COMMA_IN_BN_$n"/ge; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
17
|
0
|
|
|
|
|
0
|
${$string_sr} =~ s/\Q$bn_match\E/$bn_match_tmp/; |
|
0
|
|
|
|
|
0
|
|
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# 2. look for multi's |
22
|
86
|
100
|
|
|
|
59
|
if ( ${$string_sr} =~ s/(?:[.]{2,}|[,]{2,})/…/g ) { |
|
86
|
|
|
|
|
518
|
|
23
|
8
|
|
|
|
|
19
|
$filter->add_warning('multiple period/comma instead of ellipsis character'); |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# 3. restore placeholder |
27
|
86
|
|
|
|
|
67
|
${$string_sr} =~ s/MULTI_COMMA_IN_BN_([0-9]+)/"," x "$1"/eg; |
|
86
|
|
|
|
|
103
|
|
|
0
|
|
|
|
|
0
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# TODO: output,latin so this occurance is more rare: |
30
|
|
|
|
|
|
|
# if ( ${$string_sr} =~ s/([,.]{2,})/\[comment,should “$1” here be an ellipsis?\]/g ) { |
31
|
|
|
|
|
|
|
# $filter->add_warning('multiple concurrent period and comma'); |
32
|
|
|
|
|
|
|
# } |
33
|
|
|
|
|
|
|
|
34
|
86
|
100
|
|
|
|
60
|
if ( ${$string_sr} =~ s/^(|\xc2\xa0|\[output\,nbsp\])…/ …/ ) { |
|
86
|
|
|
|
|
194
|
|
35
|
8
|
|
|
|
|
18
|
$filter->add_warning('initial ellipsis should be preceded by a normal space'); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# 1. placeholders for legit ones |
39
|
86
|
|
|
|
|
80
|
my %l; |
40
|
86
|
|
|
|
|
55
|
my $copy = ${$string_sr}; |
|
86
|
|
|
|
|
79
|
|
41
|
86
|
100
|
|
|
|
70
|
if ( ${$string_sr} =~ s/((?:\x20|\xc2\xa0|\[output\,nbsp\])…[\!\?\.\:])$/ELLIPSIS_END/ ) { # final |
|
86
|
|
|
|
|
279
|
|
42
|
8
|
|
|
|
|
23
|
$l{'ELLIPSIS_END'} = $1; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
86
|
100
|
|
|
|
62
|
if ( ${$string_sr} =~ s/^( …(?:\x20|\xc2\xa0|\[output\,nbsp\]))/ELLIPSIS_START/ ) { # initial |
|
86
|
|
|
|
|
155
|
|
46
|
6
|
|
|
|
|
14
|
$l{'ELLIPSIS_START'} = $1; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
86
|
|
|
|
|
65
|
while ( ${$string_sr} =~ m/(\(|\x20|\xc2\xa0|\[output\,nbsp\])…(\)|\x20|\xc2\xa0|\[output\,nbsp\])/g ) { |
|
148
|
|
|
|
|
439
|
|
50
|
62
|
|
|
|
|
35
|
${$string_sr} =~ s/(\(|\x20|\xc2\xa0|\[output\,nbsp\])…(\)|\x20|\xc2\xa0|\[output\,nbsp\])/ELLIPSIS_MEDIAL/; |
|
62
|
|
|
|
|
247
|
|
51
|
62
|
|
|
|
|
43
|
push @{ $l{'ELLIPSIS_MEDIAL'} }, [ $1, $2 ]; |
|
62
|
|
|
|
|
136
|
|
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# 2. mark any remaining ones (that are not legit) |
55
|
86
|
100
|
|
|
|
69
|
if ( ${$string_sr} =~ s/\A …(?!\x20|\xc2\xa0|\[output\,nbsp\])/ … / ) { |
|
86
|
|
|
|
|
153
|
|
56
|
8
|
|
|
|
|
19
|
$filter->add_warning('initial ellipsis should be followed by a normal space or a non-break-space (in bracket notation or character form)'); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
86
|
100
|
|
|
|
54
|
if ( ${$string_sr} =~ s/…(?:\x20|\xc2\xa0|\[output\,nbsp\]|\s)+\z/…/ ) { |
|
86
|
|
|
|
|
194
|
|
60
|
8
|
|
|
|
|
17
|
$filter->add_warning('final ellipsis should be followed by a valid punctuation mark or nothing'); |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
86
|
100
|
100
|
|
|
62
|
if ( ${$string_sr} =~ m/…\z/ && ${$string_sr} !~ m/(?:\x20|\xc2\xa0|\[output\,nbsp\])…\z/ ) { |
|
86
|
|
|
|
|
179
|
|
|
16
|
|
|
|
|
73
|
|
64
|
8
|
|
|
|
|
3
|
${$string_sr} =~ s/…$/ …/; |
|
8
|
|
|
|
|
22
|
|
65
|
8
|
|
|
|
|
14
|
$filter->add_warning('final ellipsis should be preceded by a normal space or a non-break-space (in bracket notation or character form)'); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
86
|
|
|
|
|
80
|
my $medial_prob = 0; |
69
|
86
|
100
|
|
|
|
65
|
if ( ${$string_sr} =~ s/(.{1})((?:(?
|
|
86
|
|
|
|
|
813
|
|
70
|
8
|
|
|
|
|
9
|
$medial_prob++; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
86
|
100
|
|
|
|
69
|
if ( ${$string_sr} =~ s/(.{2})…(?!\x20|\xc2\xa0|\[output\,nbsp\]|\z)(.{1})/$1… $2/g ) { |
|
86
|
|
|
|
|
190
|
|
74
|
8
|
|
|
|
|
7
|
$medial_prob++; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
86
|
100
|
|
|
|
115
|
if ($medial_prob) { |
78
|
8
|
|
|
|
|
22
|
$filter->add_warning('medial ellipsis should be surrounded on each side by a parenthesis or normal space or a non-break-space (in bracket notation or character form)'); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# 3. reconstruct the valid ones |
82
|
86
|
100
|
|
|
|
140
|
${$string_sr} =~ s/ELLIPSIS_END/$l{'ELLIPSIS_END'}/ if exists $l{'ELLIPSIS_END'}; |
|
8
|
|
|
|
|
31
|
|
83
|
86
|
100
|
|
|
|
119
|
${$string_sr} =~ s/ELLIPSIS_START/$l{'ELLIPSIS_START'}/ if exists $l{'ELLIPSIS_START'}; |
|
6
|
|
|
|
|
23
|
|
84
|
86
|
100
|
|
|
|
140
|
if ( exists $l{'ELLIPSIS_MEDIAL'} ) { |
85
|
14
|
|
|
|
|
12
|
for my $medial ( @{ $l{'ELLIPSIS_MEDIAL'} } ) { |
|
14
|
|
|
|
|
20
|
|
86
|
62
|
|
|
|
|
38
|
${$string_sr} =~ s/ELLIPSIS_MEDIAL/$medial->[0]…$medial->[1]/; |
|
62
|
|
|
|
|
181
|
|
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
86
|
|
|
|
|
154
|
return $filter->return_value; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
1; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
__END__ |