line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package PerlIO::via::PrepareCP1251; |
2
|
|
|
|
|
|
|
$PerlIO::via::PrepareCP1251::VERSION = '0.02'; |
3
|
|
|
|
|
|
|
# $Id$ |
4
|
|
|
|
|
|
|
# ABSTRACT: prepare unicode stream to be encoded as cp1251 |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
49759
|
use 5.010; |
|
1
|
|
|
|
|
2
|
|
7
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
8
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
25
|
|
9
|
1
|
|
|
1
|
|
3
|
use utf8; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
6
|
|
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
527
|
use charnames ':full'; |
|
1
|
|
|
|
|
185685
|
|
|
1
|
|
|
|
|
7
|
|
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
2014
|
use Unicode::Normalize; |
|
1
|
|
|
|
|
209683
|
|
|
1
|
|
|
|
|
840
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $INCOMPATIBLE_CHAR_MODE = 'default_char'; |
19
|
|
|
|
|
|
|
our $DEFAULT_CHAR = q{?}; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our %CMAP = ( |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Kazakh |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
"\x{04D8}" => "\x{0410}", # SCHWA |
27
|
|
|
|
|
|
|
"\x{04D9}" => "\x{0430}", |
28
|
|
|
|
|
|
|
"\x{0492}" => "\x{0413}", # GHE WITH STROKE |
29
|
|
|
|
|
|
|
"\x{0493}" => "\x{0433}", |
30
|
|
|
|
|
|
|
"\x{04A2}" => "\x{041D}", # EN WITH DESCENDER |
31
|
|
|
|
|
|
|
"\x{04A3}" => "\x{043D}", |
32
|
|
|
|
|
|
|
"\x{04AE}" => "\x{0423}", # STRAIGHT U |
33
|
|
|
|
|
|
|
"\x{04AF}" => "\x{0443}", |
34
|
|
|
|
|
|
|
"\x{04B0}" => "\x{0423}", # STRAIGHT U WITH STROKE |
35
|
|
|
|
|
|
|
"\x{04B1}" => "\x{0443}", |
36
|
|
|
|
|
|
|
"\x{04BA}" => "\x{0425}", # SHHA |
37
|
|
|
|
|
|
|
"\x{04BB}" => "\x{0445}", |
38
|
|
|
|
|
|
|
"\x{049A}" => "\x{041A}", # KA WITH DESCENDER |
39
|
|
|
|
|
|
|
"\x{049B}" => "\x{043A}", |
40
|
|
|
|
|
|
|
"\x{04E8}" => "\x{041E}", # BARRED O |
41
|
|
|
|
|
|
|
"\x{04E9}" => "\x{043E}", |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
_init(); |
46
|
|
|
|
|
|
|
1; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
{ |
50
|
|
|
|
|
|
|
my %codepage; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub _init { |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# load cp1251 to unicode table |
55
|
1
|
|
|
1
|
|
5
|
while ( ) { |
56
|
275
|
100
|
|
|
|
917
|
next unless my ( $code, $ucode ) = m{ 0x ([0-9A-F]{2}) \s+ 0x ([0-9A-F]{4}) }xms; |
57
|
255
|
|
|
|
|
1005
|
$codepage{ chr hex $ucode } = chr hex $code; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# remove diacritics |
61
|
1
|
|
|
|
|
3
|
for my $codepoint ( 0x000 .. 0x2FFF ) { |
62
|
12288
|
|
|
|
|
11431
|
my $chr = chr $codepoint; |
63
|
12288
|
100
|
|
|
|
18208
|
next if exists $codepage{$chr}; |
64
|
12033
|
100
|
|
|
|
16434
|
next if exists $CMAP{$chr}; |
65
|
|
|
|
|
|
|
|
66
|
12017
|
|
|
|
|
16900
|
my $nfd = substr NFD( $chr ), 0, 1; |
67
|
12017
|
100
|
|
|
|
221966
|
next unless exists $codepage{$nfd}; |
68
|
|
|
|
|
|
|
|
69
|
530
|
|
|
|
|
1318
|
$CMAP{$chr} = $nfd; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
1
|
|
|
|
|
3
|
return; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub _convert_symbol { |
77
|
10
|
|
|
10
|
|
9
|
my ($char) = @_; |
78
|
|
|
|
|
|
|
|
79
|
10
|
100
|
|
|
|
31
|
return $char if exists $codepage{$char}; |
80
|
8
|
100
|
|
|
|
27
|
return $CMAP{$char} if exists $CMAP{$char}; |
81
|
3
|
100
|
|
|
|
10
|
return q{} if $INCOMPATIBLE_CHAR_MODE eq 'skip'; |
82
|
2
|
100
|
|
|
|
7
|
return $DEFAULT_CHAR if $INCOMPATIBLE_CHAR_MODE eq 'default_char'; |
83
|
1
|
50
|
|
|
|
4
|
return $char if $INCOMPATIBLE_CHAR_MODE eq 'pass'; |
84
|
|
|
|
|
|
|
|
85
|
1
|
50
|
|
|
|
8
|
if ( my $name = charnames::viacode(ord $char) ) { |
86
|
1
|
|
|
|
|
25260
|
return "\\N{$name}"; |
87
|
|
|
|
|
|
|
} |
88
|
0
|
|
|
|
|
0
|
return sprintf '\x{%04x}', ord $char; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub PUSHED { |
95
|
7
|
|
|
7
|
1
|
771190
|
my ($class) = @_; |
96
|
7
|
|
|
|
|
32
|
return bless \*PUSHED, $class; |
97
|
|
|
|
|
|
|
}; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub UTF8 { |
102
|
7
|
|
|
7
|
1
|
53
|
1; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub WRITE { |
108
|
7
|
|
|
7
|
|
1813048
|
my ( undef, $line, $handle ) = @_; |
109
|
7
|
|
|
|
|
15
|
utf8::decode $line; |
110
|
|
|
|
|
|
|
my $out = join q{}, |
111
|
7
|
|
|
|
|
18
|
map { _convert_symbol($_) } |
|
10
|
|
|
|
|
18
|
|
112
|
|
|
|
|
|
|
split m//, $line; |
113
|
7
|
50
|
|
|
|
9
|
return ( print {$handle} $out ) ? length($line) : -1; |
|
7
|
|
|
|
|
57
|
|
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=pod |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=encoding UTF-8 |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head1 NAME |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
PerlIO::via::PrepareCP1251 - prepare unicode stream to be encoded as cp1251 |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head1 VERSION |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
version 0.02 |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head1 SYNOPSIS |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
require PerlIO::via::PrepareCP1251; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# note the filter order |
133
|
|
|
|
|
|
|
open my $fh, '>:encoding(cp1251):via(PrepareCP1251)', $filename; |
134
|
|
|
|
|
|
|
print {$fh} $unicode_string; |
135
|
|
|
|
|
|
|
close $fh; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=head2 $INCOMPATIBLE_CHAR_MODE |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
Behaviour for incompatible chars: |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
pass - put original symbol |
142
|
|
|
|
|
|
|
default_char - put $DEFAULT_CHAR |
143
|
|
|
|
|
|
|
skip - don't put anything |
144
|
|
|
|
|
|
|
charname - put \N{charname} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head1 PerlIO::via interface |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 PUSHED |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Constructor |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head2 UTF8 |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Require utf8 stream |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 WRITE |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Filter text |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 AUTHOR |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
liosha |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
This software is copyright (c) 2015 by liosha. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
169
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
__DATA__ |