line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Binary; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
70829
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
71
|
|
4
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
298
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = 0.01; |
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
10
|
use base qw(Exporter); |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
386
|
|
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
2429
|
use Encode qw(decode_utf8); |
|
2
|
|
|
|
|
38539
|
|
|
2
|
|
|
|
|
580
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw(is_text is_binary); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub is_text { |
15
|
8
|
|
|
8
|
1
|
31
|
my ($string) = @_; |
16
|
|
|
|
|
|
|
|
17
|
8
|
50
|
|
|
|
22
|
if (length($string) > 512) { |
18
|
8
|
|
|
|
|
14
|
$string = substr($string, 0, 512); |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
|
21
|
8
|
100
|
|
|
|
37
|
return '' if (index($string, "\c@") != -1); |
22
|
6
|
|
|
|
|
9
|
my $length = length($string); |
23
|
6
|
|
|
|
|
22
|
my $odd = ($string =~ tr/\x01\x02\x03\x04\x05\x06\x07\x09\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f//d); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Detecting >=128 and non-UTF-8 is interesting. Note that all UTF-8 >=128 has several bytes with |
26
|
|
|
|
|
|
|
# >=128 set, so a quick test is possible by simply checking if any are >=128. However, the count |
27
|
|
|
|
|
|
|
# from that is typically wrong, if this is binary data, it'll not have been decoded. So we do this |
28
|
|
|
|
|
|
|
# in two steps. |
29
|
|
|
|
|
|
|
|
30
|
6
|
|
|
|
|
10
|
my $copy = $string; |
31
|
6
|
100
|
|
|
|
49
|
if (($copy =~ tr[\x80-\xff][]d) > 0) { |
32
|
2
|
|
|
|
|
13
|
my $modified = decode_utf8($string, Encode::FB_DEFAULT); |
33
|
2
|
|
|
2
|
|
2117
|
my $substitions = ($modified =~ tr/\x{fffd}//d); |
|
2
|
|
|
|
|
22
|
|
|
2
|
|
|
|
|
29
|
|
|
2
|
|
|
|
|
218
|
|
34
|
2
|
|
|
|
|
3
|
$odd += $substitions; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
6
|
100
|
|
|
|
32
|
return '' if ($odd * 3 > $length); |
38
|
|
|
|
|
|
|
|
39
|
2
|
|
|
|
|
13
|
return 1; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub is_binary { |
43
|
4
|
|
|
4
|
1
|
6
|
my ($string) = @_; |
44
|
4
|
|
|
|
|
10
|
return ! is_text($string); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
1; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 NAME |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
Data::Binary - Simple detection of binary versus text in strings |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 SYNOPSIS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
use Data::Binary qw(is_text is_binary); |
56
|
|
|
|
|
|
|
my $text = File::Slurp::read_file("test1.doc"); |
57
|
|
|
|
|
|
|
my $is_text = is_text($text); # equivalent to -T "test1.doc" |
58
|
|
|
|
|
|
|
my $is_binary = is_binary($text); # equivalent to -B "test1.doc" |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 DESCRIPTION |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
This simple module provides string equivalents to the -T / -B operators. Since |
63
|
|
|
|
|
|
|
these only work on file names and file handles, this module provides the same |
64
|
|
|
|
|
|
|
functions but on strings. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Note that the actual implementation is currently different, basically because |
67
|
|
|
|
|
|
|
the -T / -B functions are in C/XS, and this module is written in pure Perl. |
68
|
|
|
|
|
|
|
For now, anyway. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 FUNCTIONS |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head2 is_text($string) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Uses the same kind of heuristics in -T, but applies them to a string. Returns true |
75
|
|
|
|
|
|
|
if the string is basically text. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 is_binary($string) |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Uses the same kind of heuristics in -B, but applies them to a string. Returns true |
80
|
|
|
|
|
|
|
if the string is basically binary. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Stuart Watt, stuart@morungos.com |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 COPYRIGHT |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Copyright (c) 2014 Stuart Watt. All rights reserved. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |