line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Unicode::CheckUTF8 - checks if scalar is valid UTF-8 |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Unicode::CheckUTF8 qw(is_utf8); |
8
|
|
|
|
|
|
|
my $is_ok = is_utf8($scalar); |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 DESCRIPTION |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
This is an XS wrapper around some Unicode Consortium code to check |
13
|
|
|
|
|
|
|
if a string is valid UTF-8, revised to conform to what expat/Mozilla |
14
|
|
|
|
|
|
|
think is valid UTF-8, especially with regard to low-ASCII characters. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Note that this module has NOTHING to do with Perl's internal UTF8 flag |
17
|
|
|
|
|
|
|
on scalars. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
This module is for use when you're getting input from users and want |
20
|
|
|
|
|
|
|
to make sure it's valid UTF-8 before continuing. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 HISTORY |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This is some old code, dating back to before Perl 5.8 and before |
25
|
|
|
|
|
|
|
Unicode support in Perl. I wish I didn't have to keep using this |
26
|
|
|
|
|
|
|
code, but I can't find any other code on CPAN for UTF-8 checking |
27
|
|
|
|
|
|
|
that's both sufficiently fast and more importantly, correct. So now |
28
|
|
|
|
|
|
|
there's yet another way to do it. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=cut |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
package Unicode::CheckUTF8; |
33
|
|
|
|
|
|
|
|
34
|
3
|
|
|
3
|
|
97262
|
use base 'Exporter'; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
515
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
BEGIN { |
37
|
3
|
|
|
3
|
|
7
|
$VERSION = "1.03"; |
38
|
|
|
|
|
|
|
|
39
|
3
|
|
|
|
|
7
|
@EXPORT = qw(); |
40
|
3
|
|
|
|
|
9
|
@EXPORT_OK = qw(isLegalUTF8String is_utf8); |
41
|
|
|
|
|
|
|
|
42
|
3
|
|
|
|
|
24
|
require XSLoader; |
43
|
3
|
|
|
|
|
1882
|
XSLoader::load Unicode::CheckUTF8, $VERSION; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
1; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 BUGS |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Hopefully not, but mail me if so! |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 AUTHOR |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Brad Fitzpatrick Ebrad@danga.comE, based on Unicode Consortium code. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Artur Bergman, helping me kill old Inline code using his awesome |
57
|
|
|
|
|
|
|
knowledge of all things Perl and XS. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|