line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Demoroniser; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
147855
|
use strict; |
|
6
|
|
|
|
|
18
|
|
|
6
|
|
|
|
|
240
|
|
4
|
6
|
|
|
6
|
|
34
|
use warnings; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
229
|
|
5
|
|
|
|
|
|
|
|
6
|
6
|
|
|
6
|
|
33
|
use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS ); |
|
6
|
|
|
|
|
18
|
|
|
6
|
|
|
|
|
552
|
|
7
|
6
|
|
|
6
|
|
5868
|
use Encode::ZapCP1252; |
|
6
|
|
|
|
|
109932
|
|
|
6
|
|
|
|
|
834
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
BEGIN { |
10
|
6
|
|
|
6
|
|
52
|
require Exporter; |
11
|
6
|
|
|
|
|
13
|
$VERSION = '0.06'; |
12
|
6
|
|
|
|
|
92
|
@ISA = qw( Exporter ); |
13
|
6
|
|
|
|
|
19
|
@EXPORT = qw(); |
14
|
6
|
|
|
|
|
22
|
%EXPORT_TAGS = ( |
15
|
|
|
|
|
|
|
'all' => [ qw( demoroniser demoroniser_utf8 ) ] |
16
|
|
|
|
|
|
|
); |
17
|
6
|
|
|
|
|
12
|
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
|
6
|
|
|
|
|
2056
|
|
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my %character = ( # ASCII UTF8 |
21
|
|
|
|
|
|
|
"\xE2\x80\x9A" => [ ',', "\x201A" ], # 82 - SINGLE LOW-9 QUOTATION MARK |
22
|
|
|
|
|
|
|
"\xE2\x80\x9E" => [ ',,', "\x201E" ], # 84 - DOUBLE LOW-9 QUOTATION MARK |
23
|
|
|
|
|
|
|
"\xE2\x80\xA6" => [ '...', "\x2026" ], # 85 - HORIZONTAL ELLIPSIS |
24
|
|
|
|
|
|
|
"\xCB\x86" => [ '^', "\x02C6" ], # 88 - MODIFIER LETTER CIRCUMFLEX ACCENT |
25
|
|
|
|
|
|
|
"\xE2\x80\x98" => [ '`', "\x2018" ], # 91 - LEFT SINGLE QUOTATION MARK |
26
|
|
|
|
|
|
|
"\xE2\x80\x99" => [ q{'}, "\x2019" ], # 92 - RIGHT SINGLE QUOTATION MARK |
27
|
|
|
|
|
|
|
"\xE2\x80\x9C" => [ '"', "\x201C" ], # 93 - LEFT DOUBLE QUOTATION MARK |
28
|
|
|
|
|
|
|
"\xE2\x80\x9D" => [ '"', "\x201D" ], # 94 - RIGHT DOUBLE QUOTATION MARK |
29
|
|
|
|
|
|
|
"\xE2\x80\xA2" => [ '*', "\x2022" ], # 95 - BULLET |
30
|
|
|
|
|
|
|
"\xE2\x80\x93" => [ '-', "\x2013" ], # 96 - EN DASH |
31
|
|
|
|
|
|
|
"\xE2\x80\x94" => [ '-', "\x2014" ], # 97 - EM DASH |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
"\xE2\x80\xB9" => [ '<', "\x2039" ], # 8B - SINGLE LEFT-POINTING ANGLE |
34
|
|
|
|
|
|
|
# QUOTATION MARK |
35
|
|
|
|
|
|
|
"\xE2\x80\xBA" => [ '>', "\x203A" ], # 9B - SINGLE RIGHT-POINTING ANGLE |
36
|
|
|
|
|
|
|
# QUOTATION MARK |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $characters_re = '(' . join( '|', keys %character ) . ')'; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub demoroniser { |
42
|
4
|
|
|
4
|
1
|
1044
|
my $str = shift; |
43
|
4
|
100
|
|
|
|
13
|
return unless(defined $str); |
44
|
|
|
|
|
|
|
|
45
|
3
|
|
|
|
|
66
|
$str =~ s/$characters_re/$character{$1}[0]/g; |
46
|
|
|
|
|
|
|
|
47
|
3
|
|
|
|
|
86
|
zap_cp1252($str); |
48
|
|
|
|
|
|
|
|
49
|
3
|
|
|
|
|
72
|
return $str; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub demoroniser_utf8 { |
53
|
4
|
|
|
4
|
1
|
8
|
my $str = shift; |
54
|
4
|
100
|
|
|
|
11
|
return unless(defined $str); |
55
|
|
|
|
|
|
|
|
56
|
3
|
|
|
|
|
50
|
$str =~ s/$characters_re/$character{$1}[1]/g; |
57
|
|
|
|
|
|
|
|
58
|
3
|
|
|
|
|
74
|
fix_cp1252($str); |
59
|
|
|
|
|
|
|
|
60
|
3
|
|
|
|
|
54
|
return $str; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
1; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
__END__ |