File Coverage

blib/lib/Locale/Maketext/Utils/Phrase/Norm/Consider.pm
Criterion Covered Total %
statement 64 64 100.0
branch 30 30 100.0
condition 40 45 88.8
subroutine 4 4 100.0
pod 0 1 0.0
total 138 144 95.8


line stmt bran cond sub pod time code
1             package Locale::Maketext::Utils::Phrase::Norm::Consider;
2              
3 4     4   2758 use strict;
  4         7  
  4         160  
4 4     4   19 use warnings;
  4         8  
  4         229  
5 4     4   2144 use Locale::Maketext::Utils::Phrase ();
  4         14  
  4         4441  
6              
7             sub normalize_maketext_string {
8 102     102 0 251 my ($filter) = @_;
9              
10 102         318 my $string_sr = $filter->get_string_sr();
11              
12 102         195 my $struct = Locale::Maketext::Utils::Phrase::phrase2struct( ${$string_sr} );
  102         416  
13              
14             # entires phrase is bracket notation
15 102 100       402 if ( Locale::Maketext::Utils::Phrase::struct_is_entirely_bracket_notation($struct) ) {
16 8         17 ${$string_sr} .= "[comment,does this phrase really need to be entirely bracket notation?]";
  8         29  
17 8         35 $filter->add_warning('Entire phrase is bracket notation, is there a better way in this case?');
18             }
19              
20 102         211 my $idx = -1;
21 102         219 my $has_bare = 0;
22 102         216 my $has_hardurl = 0;
23 102         181 my $has_empty_arg = 0;
24 102         181 my $last_idx = @{$struct} - 1;
  102         235  
25 102         365 my $bn_var_rexep = Locale::Maketext::Utils::Phrase::get_bn_var_regexp();
26              
27 102         237 for my $piece ( @{$struct} ) {
  102         297  
28 394         709 $idx++;
29 394 100       982 next if !ref($piece);
30              
31 166 100       522 my $before = $idx == 0 ? '' : $struct->[ $idx - 1 ];
32 166         411 my $bn = $piece->{'orig'};
33 166 100       438 my $after = $idx == $last_idx ? '' : $struct->[ $idx + 1 ];
34              
35 166 100 66     745 if ( $piece->{'type'} eq 'var' || $piece->{'type'} eq 'basic_var' ) {
36              
37             # unless the “bare” bracket notation …
38 98 100 100     2038 unless (
      33        
      66        
      100        
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
39             ( $idx == $last_idx && $before =~ m/\:(?:\x20|\xc2\xa0)/ && ( !defined $after || $after eq '' ) ) # … is a trailing '…: [_2]'
40             #tidyoff
41             or (
42             ( $before !~ m/(?:\x20|\xc2\xa0)$/ && $after !~ m/^(?:\x20|\xc2\xa0)/ ) # … is surrounded by non-whitespace already
43             &&
44             ( $before !~ m/[a-zA-Z0-9]$/ && $after !~ m/^[a-zA-Z0-9]/ ) # … and that non-whitespace is also non-alphanumeric (TODO target phrases need a lookup)
45             )
46             #tidyon
47             or ( $before =~ m/,(?:\x20|\xc2\xa0)$/ && $after =~ m/^,/ ) # … is in a comma reference
48             or ( $before =~ m/\([^\)]+(?:\x20|\xc2\xa0)$/ && $after =~ m/^\)/ ) # … is at the end of parenthesised text
49             or ( $before =~ m/\($/ && $after =~ m/(?:\x20|\xc2\xa0)[^\)]+\)/ ) # … is at the beginning of parenthesised text
50             or ( $before =~ m/(?:\x20|\xc2\xa0)$/ && $after =~ m/’s(?:\x20|\xc2\xa0|;.|,.|[\!\?\.\:])/ ) # … is an apostrophe-s (curly so its not markup!)
51              
52             ) {
53 56         113 ${$string_sr} =~ s/(\Q$bn\E)/“$1”/;
  56         1072  
54 56         142 $has_bare++;
55             }
56             }
57              
58             # Do not hardcode URL in [output,url]:
59 166 100 100     676 if ( $piece->{'list'}[0] eq 'output' && $piece->{'list'}[1] eq 'url' ) {
60 32 100       411 if ( $piece->{'list'}[2] !~ m/\A$bn_var_rexep\z/ ) {
61 16         34 my $last_idx_bn = @{ $piece->{'list'} } - 1;
  16         45  
62 16         36 my $url = $piece->{'list'}[2];
63 16 100       31 my $args = @{ $piece->{'list'} } > 3 ? ',' . join( ',', @{ $piece->{'list'} }[ 3 .. $last_idx_bn ] ) : '';
  16         87  
  8         37  
64              
65 16         34 ${$string_sr} =~ s/(\Q$bn\E)/\[output,url,why hardcode “$url”$args\]/;
  16         243  
66 16         43 $has_hardurl++;
67             }
68             }
69              
70 166 100       1028 if ( $piece->{'type'} eq 'complex' ) {
71 16         29 my @arg_list = @{ $piece->{'list'} };
  16         55  
72 16         29 my $new_bn = $bn;
73 16         117 my $empty_arg_count = 0;
74 16         68 for my $frag ( @arg_list[ 1 .. $#arg_list ] ) {
75 48 100       131 if ( $frag eq '' ) {
76 16         260 $new_bn =~ s{
77             ($Locale::Maketext::Utils::Phrase::bn_delimit)
78             (
79             $Locale::Maketext::Utils::Phrase::bn_delimit
80             |
81             $Locale::Maketext::Utils::Phrase::closing_bn
82             )
83             }{$1EMPTY STRING$2}x;
84              
85 16         36 $empty_arg_count++;
86             }
87             }
88 16 100       46 if ( $empty_arg_count > 0 ) {
89 12         21 ${$string_sr} =~ s/\Q$bn\E/$new_bn/;
  12         186  
90 12         41 $has_empty_arg++;
91             }
92             }
93             }
94              
95 102 100       365 $filter->add_warning('Hard coded URLs can be a maintenance nightmare, why not pass the URL in so the phrase does not change if the URL does') if $has_hardurl;
96 102 100       415 $filter->add_warning('Bare variable can lead to ambiguous output') if $has_bare;
97 102 100       301 $filter->add_warning('Empty strings as arguments in bracket notation should be avoided') if $has_empty_arg;
98              
99 102         344 return $filter->return_value;
100             }
101              
102             1;
103              
104             __END__
105              
106             =encoding utf-8
107              
108             =head1 Normalization
109              
110             The checks in here are for various best practices to consider while crafting phrases.
111              
112             =head2 Rationale
113              
114             These are warnings only and are meant to help point out things that typically are best done differently but could possibly be legit and thus a human needs to consider and sort it out.
115              
116             =head1 possible violations
117              
118             None
119              
120             =head1 possible warnings
121              
122             =over 4
123              
124             =item Entire phrase is bracket notation, is there a better way in this case?
125              
126             This will append '[comment,does this phrase really need to be entirely bracket notation?]' to the phrase.
127              
128             The idea behind it is that a phrase that is entirely bracket notation is a sure sign that it needs done differently.
129              
130             For example:
131              
132             =over 4
133              
134             =item method
135              
136             $lh->maketext('[numf,_1]',$n);
137              
138             There is no need to translate that, it’d be the same in every locale!
139              
140             You would simply do this:
141              
142             $lh->numf($n)
143              
144             =item overly complex
145              
146             $lh->maketext('[boolean,_1,Your foo has been installed.,Your foo has been uninstalled.]',$is_install);
147              
148             Unnecessarily difficult to read/work with and without benefit. You can't use any other bracket notation. You can probably spot other issues too.
149              
150             Depending on the situation you might do either of these:
151              
152             if ($is_install) {
153             $lh->maketext('Your foo has been installed.');
154             }
155             else {
156             $lh->maketext('Your foo has been uninstalled.');
157             }
158              
159             or if you prefer to keep the variant–pair as one unit:
160              
161             $lh->maketext('Your foo has been [boolean,_1,installed,uninstalled].',$is_install);
162              
163             =back
164              
165             =item Hard coded URLs can be a maintenance nightmare, why not pass the URL in so the phrase does not change if the URL does
166              
167             $lh->maketext('You can [output,url,http://support.example.com,visit our support page] for further assistance.');
168              
169             What happens when support.example.com changes to custcare.example.com? You have to change, not only the caller but the lexicons and translations, ick!
170              
171             Then after you do that your boss says, oh wait actually it needs to be customer.example.com …
172              
173             But if you had passed it in as an argument:
174              
175             $lh->maketext('You can [output,url,_1,visit our support page] for further assistance.', $url_db{'support_url'});
176              
177             Now when support.example.com changes to custcare.example.com you update 'support_url' in %url_db–done.
178              
179             He wants it to be customer.example.com, no problem update 'support_url' in %url_db–done.
180              
181             =item Bare variable can lead to ambiguous output
182              
183             $lh->maketext('The checksum was [_1].', $sum);
184              
185             If $sum is empty or undef you get odd spacing (e.g. “was .” instead of “was.”), could lose info, (e.g. “wait, the checksum is what now?”), or change meaning completely (e.g. what if the checksum was the string “BAD”).
186              
187             'The checksum was .'
188             'The checksum was BAD.' # what! my data is corrupt ⁈
189              
190             That applies even if it is decorated some way:
191              
192             'The checksum was <code></code>.'
193             'The checksum was <code>BAD</code>.' # what my data is corrupt ⁈
194              
195             It promotes evil partial phrases (i.e. that are untranslatable which is sort of the opposite of localizing things no?)
196              
197             $lh->maketext('The checksum was [_1].', $lh->maketext('inserted into the database)); # !!!! DON’T DO THIS !!!!
198              
199             One way to visually distinguish what you intend regardless of the value given is simply to quote it:
200              
201             The checksum was “[_1]”.
202              
203             becomes:
204              
205             The checksum was “”. # It is obvious that the sum is empty
206             The checksum was “ ”. # It is obvious that the sum is all whitespace
207             The checksum was “BAD”. # It is obvious that it is a string made up of B, A, and D and not a statement that the sum has a problem
208             The checksum was “perfectly awesome”. # It looks weird so someone probably will notice and ask you to fix your code
209              
210             In other words:
211              
212             =over 4
213              
214             =item I<Using “ and ” disambiguates the entire string’s intent.> No accidental or malicious meaning changes.
215              
216             =item I<They also provide substance to a variable that may very well be null.>
217              
218             For browsers, any span-level tag which is empty is not expressed in the rendering of the page. Therefore if we wrap variable expressions in span-level DOM, the user stands a very real chance of seeing incompleteness or potentially not noticing errors at all.
219              
220             =item I<Having this dis-ambiguation also assists the translator:>
221              
222             =over 4
223              
224             =item They can use whatever their locale uses without needing bracket notation (e.g. « and »).
225              
226             This allows for flexibility since brakcet notation is not nestable (and should not be since it isn’t a templating engine).
227              
228             =item When the translators see <strong> or any other wrapping element in the phrase, they will not immediately know what’s going on.
229              
230             =back
231              
232             =item I<It helps programmers make better choices.>
233              
234             =back
235              
236             I<Perhaps quotes are the wrong thing in a given instance:> Depending on what you’re doing other things might work too:
237              
238             =over 4
239              
240             =item Trailing introductory “:”:
241              
242             An error has occured: [_2]
243              
244             =item Alternate text:
245              
246             Sorry, [is_defined,_2,“_2” is an invalid,you must specify a valid] value for “[_1]”.
247              
248             =item Parentheses:
249              
250             The domain ([_1]) could not be found.
251              
252             The clown (AKA [_1]) is down.
253              
254             The network ([_1] in IPv6) is up.
255              
256             =item Comma reference:
257              
258             The user, [_1], already exists.
259              
260             =item Etc etc
261              
262             =back
263              
264             =back
265              
266             =head1 Checks only run under extra filter mode:
267              
268             None.