line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package File::Find::utf8; |
2
|
4
|
|
|
4
|
|
254667
|
use strict; |
|
4
|
|
|
|
|
39
|
|
|
4
|
|
|
|
|
99
|
|
3
|
4
|
|
|
4
|
|
18
|
use warnings; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
84
|
|
4
|
4
|
|
|
4
|
|
55
|
use 5.010; # state |
|
4
|
|
|
|
|
12
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# ABSTRACT: Fully UTF-8 aware File::Find |
7
|
|
|
|
|
|
|
our $VERSION = '0.014'; # VERSION |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
#pod =begin :prelude |
10
|
|
|
|
|
|
|
#pod |
11
|
|
|
|
|
|
|
#pod =for test_synopsis |
12
|
|
|
|
|
|
|
#pod my @directories_to_search; |
13
|
|
|
|
|
|
|
#pod |
14
|
|
|
|
|
|
|
#pod =end :prelude |
15
|
|
|
|
|
|
|
#pod |
16
|
|
|
|
|
|
|
#pod =head1 SYNOPSIS |
17
|
|
|
|
|
|
|
#pod |
18
|
|
|
|
|
|
|
#pod # Use the utf-8 versions of find and finddepth |
19
|
|
|
|
|
|
|
#pod use File::Find::utf8; |
20
|
|
|
|
|
|
|
#pod find(\&wanted, @directories_to_search); |
21
|
|
|
|
|
|
|
#pod |
22
|
|
|
|
|
|
|
#pod # Revert back to non-utf-8 versions |
23
|
|
|
|
|
|
|
#pod no File::Find::utf8; |
24
|
|
|
|
|
|
|
#pod finddepth(\&wanted, @directories_to_search); |
25
|
|
|
|
|
|
|
#pod |
26
|
|
|
|
|
|
|
#pod # Export only the find function |
27
|
|
|
|
|
|
|
#pod use File::Find::utf8 qw(find); |
28
|
|
|
|
|
|
|
#pod find(\&wanted, @directories_to_search); |
29
|
|
|
|
|
|
|
#pod |
30
|
|
|
|
|
|
|
#pod # Export no functions |
31
|
|
|
|
|
|
|
#pod use File::Find::utf8 qw(:none); # NOT "use File::Find::utf8 qw();"! |
32
|
|
|
|
|
|
|
#pod File::Find::find(\&wanted, @directories_to_search); |
33
|
|
|
|
|
|
|
#pod |
34
|
|
|
|
|
|
|
#pod =head1 DESCRIPTION |
35
|
|
|
|
|
|
|
#pod |
36
|
|
|
|
|
|
|
#pod While the original L functions are capable of handling |
37
|
|
|
|
|
|
|
#pod UTF-8 quite well, they expect and return all data as bytes, not as |
38
|
|
|
|
|
|
|
#pod characters. |
39
|
|
|
|
|
|
|
#pod |
40
|
|
|
|
|
|
|
#pod This module replaces the L functions with fully UTF-8 |
41
|
|
|
|
|
|
|
#pod aware versions, both expecting and returning characters. |
42
|
|
|
|
|
|
|
#pod |
43
|
|
|
|
|
|
|
#pod B Replacement of functions is not done on DOS, Windows, and OS/2 |
44
|
|
|
|
|
|
|
#pod as these systems do not have full UTF-8 file system support. |
45
|
|
|
|
|
|
|
#pod |
46
|
|
|
|
|
|
|
#pod =head2 Behaviour |
47
|
|
|
|
|
|
|
#pod |
48
|
|
|
|
|
|
|
#pod The module behaves as a pragma so you can use both C |
49
|
|
|
|
|
|
|
#pod File::Find::utf8> and C to turn utf-8 support on |
50
|
|
|
|
|
|
|
#pod or off. |
51
|
|
|
|
|
|
|
#pod |
52
|
|
|
|
|
|
|
#pod By default, both find() and finddepth() are exported (as with the original |
53
|
|
|
|
|
|
|
#pod L), if you want to prevent this, use C |
54
|
|
|
|
|
|
|
#pod qw(:none)>. (As all the magic happens in the module's import function, |
55
|
|
|
|
|
|
|
#pod you can not simply use C |
56
|
|
|
|
|
|
|
#pod |
57
|
|
|
|
|
|
|
#pod L warning levels are properly propagated. Note though that |
58
|
|
|
|
|
|
|
#pod for propagation of fatal L warnings, Perl 5.12 or higher |
59
|
|
|
|
|
|
|
#pod is required (or the appropriate version of L). |
60
|
|
|
|
|
|
|
#pod |
61
|
|
|
|
|
|
|
#pod =head1 COMPATIBILITY |
62
|
|
|
|
|
|
|
#pod |
63
|
|
|
|
|
|
|
#pod The filesystems of Dos, Windows, and OS/2 do not (fully) support |
64
|
|
|
|
|
|
|
#pod UTF-8. The L function will therefore not be replaced on these |
65
|
|
|
|
|
|
|
#pod systems. |
66
|
|
|
|
|
|
|
#pod |
67
|
|
|
|
|
|
|
#pod =head1 SEE ALSO |
68
|
|
|
|
|
|
|
#pod |
69
|
|
|
|
|
|
|
#pod =for :list |
70
|
|
|
|
|
|
|
#pod * L -- The original module. |
71
|
|
|
|
|
|
|
#pod * L -- Fully utf-8 aware version of the L functions. |
72
|
|
|
|
|
|
|
#pod * L -- Turn on utf-8, all of it. |
73
|
|
|
|
|
|
|
#pod This was also the module I first added the utf-8 aware versions of |
74
|
|
|
|
|
|
|
#pod L and L to before moving them to their own package. |
75
|
|
|
|
|
|
|
#pod |
76
|
|
|
|
|
|
|
#pod =cut |
77
|
|
|
|
|
|
|
|
78
|
4
|
|
|
4
|
|
23
|
use File::Find (); |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
52
|
|
79
|
4
|
|
|
4
|
|
923
|
use Encode (); |
|
4
|
|
|
|
|
17326
|
|
|
4
|
|
|
|
|
569
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Holds the pointers to the original version of redefined functions |
82
|
|
|
|
|
|
|
state %_orig_functions; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Current (i.e., this) package |
85
|
|
|
|
|
|
|
my $current_package = __PACKAGE__; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Original package (i.e., the one for which this module is replacing the functions) |
88
|
|
|
|
|
|
|
my $original_package = $current_package; |
89
|
|
|
|
|
|
|
$original_package =~ s/::utf8$//; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
require Carp; |
92
|
|
|
|
|
|
|
$Carp::Internal{$current_package}++; # To get warnings reported at correct caller level |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#pod =attr $File::Find::utf8::SPECIALVARS |
95
|
|
|
|
|
|
|
#pod |
96
|
|
|
|
|
|
|
#pod By default C only decodes the I |
97
|
|
|
|
|
|
|
#pod L variables C<$_>, C<$File::Find::name>, |
98
|
|
|
|
|
|
|
#pod C<$File::Find::dir>, and (when C or C is in |
99
|
|
|
|
|
|
|
#pod effect) C<$File::Find::fullname> for use in the C, |
100
|
|
|
|
|
|
|
#pod C, and C functions. |
101
|
|
|
|
|
|
|
#pod |
102
|
|
|
|
|
|
|
#pod If for any reason (e.g., compatibility with find.pl or find2perl) you |
103
|
|
|
|
|
|
|
#pod also need the I variables C<$File::Find::topdir>, |
104
|
|
|
|
|
|
|
#pod C<$File::Find::topdev>, C<$File::Find::topino>, |
105
|
|
|
|
|
|
|
#pod C<$File::Find::topmode>, and C<$File::Find::topnlink> to be decoded, |
106
|
|
|
|
|
|
|
#pod specify C in your |
107
|
|
|
|
|
|
|
#pod code. The extra decoding that needs to happen will impact performance |
108
|
|
|
|
|
|
|
#pod though, so use only when absolutely necessary. |
109
|
|
|
|
|
|
|
#pod |
110
|
|
|
|
|
|
|
#pod =cut |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
our $SPECIALVARS = 0; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#pod =attr $File::Find::utf8::UTF8_CHECK |
115
|
|
|
|
|
|
|
#pod |
116
|
|
|
|
|
|
|
#pod By default C marks decoding errors as fatal (default value |
117
|
|
|
|
|
|
|
#pod for this setting is C). If you want, you can change this by |
118
|
|
|
|
|
|
|
#pod setting C. The value C reports |
119
|
|
|
|
|
|
|
#pod the encoding errors as warnings, and C will completely |
120
|
|
|
|
|
|
|
#pod ignore them. Please see L for details. Note: C is |
121
|
|
|
|
|
|
|
#pod I enforced. |
122
|
|
|
|
|
|
|
#pod =cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
our $UTF8_CHECK = Encode::FB_CROAK | Encode::LEAVE_SRC; # Die on encoding errors |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# UTF-8 Encoding object |
127
|
|
|
|
|
|
|
my $_UTF8 = Encode::find_encoding('UTF-8'); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub import { |
130
|
|
|
|
|
|
|
# If run on the dos/os2/windows platform, ignore overriding functions silently. |
131
|
|
|
|
|
|
|
# These platforms do not have (proper) utf-8 file system suppport... |
132
|
8
|
50
|
|
8
|
|
401
|
unless ($^O =~ /MSWin32|cygwin|dos|os2/) { |
133
|
4
|
|
|
4
|
|
34
|
no strict qw(refs); ## no critic (TestingAndDebugging::ProhibitNoStrict) |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
121
|
|
134
|
4
|
|
|
4
|
|
18
|
no warnings qw(redefine); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
2749
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Redefine each of the functions to their UTF-8 equivalent |
137
|
8
|
|
|
|
|
22
|
for my $f (@{$original_package . '::EXPORT'}, @{$original_package . '::EXPORT_OK'}) { |
|
8
|
|
|
|
|
30
|
|
|
8
|
|
|
|
|
29
|
|
138
|
|
|
|
|
|
|
# If we already have the _orig_function, we have redefined the function |
139
|
|
|
|
|
|
|
# in an earlier load of this module, so we need not do it again |
140
|
16
|
100
|
|
|
|
47
|
unless ($_orig_functions{$f}) { |
141
|
8
|
|
|
|
|
10
|
$_orig_functions{$f} = \&{$original_package . '::' . $f}; |
|
8
|
|
|
|
|
45
|
|
142
|
8
|
|
|
|
|
13
|
*{$original_package . '::' . $f} = \&{"_utf8_$f"}; |
|
8
|
|
|
|
|
40
|
|
|
8
|
|
|
|
|
17
|
|
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
8
|
|
|
|
|
36
|
$^H{$current_package} = 1; # Set compiler hint that we should use the utf-8 version |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Determine symbols to export |
149
|
8
|
|
|
|
|
12
|
shift; # First argument contains the package (that's us) |
150
|
8
|
100
|
|
|
|
22
|
@_ = (':DEFAULT') if !@_; # If nothing provided, use default |
151
|
8
|
|
|
|
|
15
|
@_ = grep { $_ ne ':none' } @_; # Strip :none tag |
|
8
|
|
|
|
|
24
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# Use exporter to export |
154
|
8
|
|
|
|
|
31
|
require Exporter; |
155
|
8
|
100
|
|
|
|
21
|
if (@_) { |
156
|
7
|
|
|
|
|
15
|
@_ = ($original_package, @_); |
157
|
7
|
|
|
|
|
2062
|
goto &Exporter::import; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
1
|
|
|
|
|
74
|
return; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub unimport { ## no critic (Subroutines::ProhibitBuiltinHomonyms) |
164
|
1
|
|
|
1
|
|
10
|
$^H{$current_package} = 0; # Set compiler hint that we should not use the utf-8 version |
165
|
1
|
|
|
|
|
1302
|
return; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub _utf8_find { |
169
|
15
|
|
|
15
|
|
9856
|
my $ref = shift; # This can be the wanted function or a find options hash |
170
|
|
|
|
|
|
|
# Make argument always into the find's options hash |
171
|
15
|
100
|
|
|
|
65
|
my %find_options_hash = ref($ref) eq "HASH" ? %$ref : (wanted => $ref); |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Holds the (possibly encoded) arguments |
174
|
15
|
|
|
|
|
32
|
my @args = @_; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# Get the hint from the caller (one level deeper if called from finddepth) |
177
|
15
|
100
|
50
|
|
|
70
|
my $hints = ((caller 1)[3]//'') ne 'File::Find::utf8::_utf8_finddepth' ? (caller 0)[10] : (caller 1)[10]; |
178
|
15
|
100
|
|
|
|
646
|
if ($hints->{$current_package}) { |
179
|
12
|
100
|
|
|
|
38
|
$UTF8_CHECK |= Encode::LEAVE_SRC if $UTF8_CHECK; # Enforce LEAVE_SRC |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# Save original processors |
182
|
12
|
|
|
|
|
16
|
my %org_proc; |
183
|
12
|
|
|
|
|
23
|
for my $proc ("wanted", "preprocess", "postprocess") { $org_proc{$proc} = $find_options_hash{$proc}; } |
|
36
|
|
|
|
|
60
|
|
184
|
|
|
|
|
|
|
my $follow_option = (exists $find_options_hash{follow} && $find_options_hash{follow}) |
185
|
12
|
|
33
|
|
|
66
|
|| (exists $find_options_hash{follow_fast} && $find_options_hash{follow_fast}); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Wrap processors to become utf8-aware |
188
|
12
|
50
|
33
|
|
|
43
|
if (defined $org_proc{wanted} && ref $org_proc{wanted}) { |
189
|
|
|
|
|
|
|
$find_options_hash{wanted} = sub { |
190
|
|
|
|
|
|
|
# Decode the file variables so they become characters |
191
|
15
|
50
|
|
15
|
|
450
|
local $_ = $_UTF8->decode($_, $UTF8_CHECK) if $_; |
192
|
15
|
50
|
|
|
|
106
|
local $File::Find::name = $_UTF8->decode($File::Find::name, $UTF8_CHECK) if $File::Find::name; |
193
|
15
|
50
|
|
|
|
74
|
local $File::Find::dir = $_UTF8->decode($File::Find::dir, $UTF8_CHECK) if $File::Find::dir; |
194
|
15
|
0
|
33
|
|
|
57
|
local $File::Find::fullname = $_UTF8->decode($File::Find::fullname, $UTF8_CHECK) if $follow_option && $File::Find::fullname; |
195
|
|
|
|
|
|
|
# These are only necessary for compatibility reasons (find.pl, find2perl). |
196
|
|
|
|
|
|
|
# If you need them, set $File::Find::utf8::SPECIALVARS |
197
|
15
|
0
|
33
|
|
|
24
|
local $File::Find::topdir = $_UTF8->decode($File::Find::topdir, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topdir; |
198
|
15
|
0
|
33
|
|
|
22
|
local $File::Find::topdev = $_UTF8->decode($File::Find::topdev, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topdev; |
199
|
15
|
0
|
33
|
|
|
30
|
local $File::Find::topino = $_UTF8->decode($File::Find::topino, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topino; |
200
|
15
|
0
|
33
|
|
|
26
|
local $File::Find::topmode = $_UTF8->decode($File::Find::topmode, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topmode; |
201
|
15
|
0
|
33
|
|
|
22
|
local $File::Find::topnlink = $_UTF8->decode($File::Find::topnlink, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topnlink; |
202
|
15
|
0
|
|
|
|
40
|
return $org_proc{wanted}->(map { $_ ? $_UTF8->decode($_, $UTF8_CHECK) : $_ } @_); |
|
0
|
|
|
|
|
0
|
|
203
|
12
|
|
|
|
|
58
|
}; |
204
|
|
|
|
|
|
|
} |
205
|
12
|
|
|
|
|
25
|
for my $proc ("preprocess", "postprocess") { |
206
|
24
|
100
|
66
|
|
|
55
|
if (defined $org_proc{$proc} && ref $org_proc{$proc}) { |
207
|
|
|
|
|
|
|
$find_options_hash{$proc} = sub { |
208
|
|
|
|
|
|
|
# Decode the file variables so they become characters |
209
|
3
|
50
|
|
3
|
|
86
|
local $File::Find::dir = $_UTF8->decode($File::Find::dir, $UTF8_CHECK) if $File::Find::dir; |
210
|
|
|
|
|
|
|
# Decode the arguments and encode the results |
211
|
3
|
0
|
|
|
|
23
|
return map { $_ ? $_UTF8->encode($_, $UTF8_CHECK) : $_ } $org_proc{$proc}->(map { $_ ? $_UTF8->decode($_, $UTF8_CHECK) : $_ } @_); |
|
0
|
50
|
|
|
|
0
|
|
|
72
|
|
|
|
|
310
|
|
212
|
3
|
|
|
|
|
9
|
}; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
# Encode arguments as utf-8 so that the original File::Find receives bytes |
216
|
12
|
50
|
|
|
|
24
|
@args = map { $_ ? $_UTF8->encode($_, $UTF8_CHECK) : $_ } @_; |
|
12
|
|
|
|
|
104
|
|
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# Make sure warning level propagates to File::Find |
220
|
|
|
|
|
|
|
# Note: on perl prior to v5.12 warnings_fatal_enabled does not exist |
221
|
|
|
|
|
|
|
# so we can not use it. |
222
|
14
|
100
|
66
|
|
|
607
|
if (!warnings::enabled('File::Find')) { |
|
|
100
|
|
|
|
|
|
223
|
4
|
|
|
4
|
|
26
|
no warnings 'File::Find'; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
265
|
|
224
|
3
|
|
|
|
|
747
|
return $_orig_functions{find}->(\%find_options_hash, @args); |
225
|
|
|
|
|
|
|
} elsif (!exists &warnings::fatal_enabled or !warnings::fatal_enabled('File::Find')) { |
226
|
4
|
|
|
4
|
|
22
|
use warnings 'File::Find'; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
212
|
|
227
|
10
|
|
|
|
|
2153
|
return $_orig_functions{find}->(\%find_options_hash, @args); |
228
|
|
|
|
|
|
|
} else { |
229
|
4
|
|
|
4
|
|
32
|
use warnings FATAL => qw(File::Find); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
580
|
|
230
|
1
|
|
|
|
|
314
|
return $_orig_functions{find}->(\%find_options_hash, @args); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub _utf8_finddepth { |
235
|
4
|
|
|
4
|
|
4681
|
my $ref = shift; # This can be the wanted function or a find options hash |
236
|
4
|
50
|
|
|
|
24
|
return _utf8_find( { bydepth => 1, ref($ref) eq "HASH" ? %$ref : (wanted => $ref) }, @_); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
1; |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
__END__ |