| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Encode::ZapCP1252; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 3 |  |  | 3 |  | 74560 | use strict; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 140 |  | 
| 4 |  |  |  |  |  |  | require Exporter; | 
| 5 | 3 |  |  | 3 |  | 17 | use vars qw($VERSION @ISA @EXPORT); | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 229 |  | 
| 6 | 3 |  |  | 3 |  | 89 | use 5.006_002; | 
|  | 3 |  |  |  |  | 15 |  | 
|  | 3 |  |  |  |  | 265 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | $VERSION = '0.33'; | 
| 9 |  |  |  |  |  |  | @ISA     = qw(Exporter); | 
| 10 |  |  |  |  |  |  | @EXPORT  = qw(zap_cp1252 fix_cp1252); | 
| 11 | 3 |  |  | 3 |  | 18 | use constant PERL588 => $] >= 5.008_008; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 1574 |  | 
| 12 |  |  |  |  |  |  | require Encode if PERL588; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | our %ascii_for = ( | 
| 15 |  |  |  |  |  |  | # http://en.wikipedia.org/wiki/Windows-1252 | 
| 16 |  |  |  |  |  |  | "\x80" => 'e',    # EURO SIGN | 
| 17 |  |  |  |  |  |  | "\x82" => ',',    # SINGLE LOW-9 QUOTATION MARK | 
| 18 |  |  |  |  |  |  | "\x83" => 'f',    # LATIN SMALL LETTER F WITH HOOK | 
| 19 |  |  |  |  |  |  | "\x84" => ',,',   # DOUBLE LOW-9 QUOTATION MARK | 
| 20 |  |  |  |  |  |  | "\x85" => '...',  # HORIZONTAL ELLIPSIS | 
| 21 |  |  |  |  |  |  | "\x86" => '+',    # DAGGER | 
| 22 |  |  |  |  |  |  | "\x87" => '++',   # DOUBLE DAGGER | 
| 23 |  |  |  |  |  |  | "\x88" => '^',    # MODIFIER LETTER CIRCUMFLEX ACCENT | 
| 24 |  |  |  |  |  |  | "\x89" => '%',    # PER MILLE SIGN | 
| 25 |  |  |  |  |  |  | "\x8a" => 'S',    # LATIN CAPITAL LETTER S WITH CARON | 
| 26 |  |  |  |  |  |  | "\x8b" => '<',    # SINGLE LEFT-POINTING ANGLE QUOTATION MARK | 
| 27 |  |  |  |  |  |  | "\x8c" => 'OE',   # LATIN CAPITAL LIGATURE OE | 
| 28 |  |  |  |  |  |  | "\x8e" => 'Z',    # LATIN CAPITAL LETTER Z WITH CARON | 
| 29 |  |  |  |  |  |  | "\x91" => "'",    # LEFT SINGLE QUOTATION MARK | 
| 30 |  |  |  |  |  |  | "\x92" => "'",    # RIGHT SINGLE QUOTATION MARK | 
| 31 |  |  |  |  |  |  | "\x93" => '"',    # LEFT DOUBLE QUOTATION MARK | 
| 32 |  |  |  |  |  |  | "\x94" => '"',    # RIGHT DOUBLE QUOTATION MARK | 
| 33 |  |  |  |  |  |  | "\x95" => '*',    # BULLET | 
| 34 |  |  |  |  |  |  | "\x96" => '-',    # EN DASH | 
| 35 |  |  |  |  |  |  | "\x97" => '--',   # EM DASH | 
| 36 |  |  |  |  |  |  | "\x98" => '~',    # SMALL TILDE | 
| 37 |  |  |  |  |  |  | "\x99" => '(tm)', # TRADE MARK SIGN | 
| 38 |  |  |  |  |  |  | "\x9a" => 's',    # LATIN SMALL LETTER S WITH CARON | 
| 39 |  |  |  |  |  |  | "\x9b" => '>',    # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | 
| 40 |  |  |  |  |  |  | "\x9c" => 'oe',   # LATIN SMALL LIGATURE OE | 
| 41 |  |  |  |  |  |  | "\x9e" => 'z',    # LATIN SMALL LETTER Z WITH CARON | 
| 42 |  |  |  |  |  |  | "\x9f" => 'Y',    # LATIN CAPITAL LETTER Y WITH DIAERESIS | 
| 43 |  |  |  |  |  |  | ); | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | our %utf8_for = ( | 
| 46 |  |  |  |  |  |  | # http://en.wikipedia.org/wiki/Windows-1252 | 
| 47 |  |  |  |  |  |  | "\x80" => '€',    # EURO SIGN | 
| 48 |  |  |  |  |  |  | "\x82" => ',',    # SINGLE LOW-9 QUOTATION MARK | 
| 49 |  |  |  |  |  |  | "\x83" => 'ƒ',    # LATIN SMALL LETTER F WITH HOOK | 
| 50 |  |  |  |  |  |  | "\x84" => '„',    # DOUBLE LOW-9 QUOTATION MARK | 
| 51 |  |  |  |  |  |  | "\x85" => '…',    # HORIZONTAL ELLIPSIS | 
| 52 |  |  |  |  |  |  | "\x86" => '†',    # DAGGER | 
| 53 |  |  |  |  |  |  | "\x87" => '‡',    # DOUBLE DAGGER | 
| 54 |  |  |  |  |  |  | "\x88" => 'ˆ',    # MODIFIER LETTER CIRCUMFLEX ACCENT | 
| 55 |  |  |  |  |  |  | "\x89" => '‰',    # PER MILLE SIGN | 
| 56 |  |  |  |  |  |  | "\x8a" => 'Š',    # LATIN CAPITAL LETTER S WITH CARON | 
| 57 |  |  |  |  |  |  | "\x8b" => '‹',    # SINGLE LEFT-POINTING ANGLE QUOTATION MARK | 
| 58 |  |  |  |  |  |  | "\x8c" => 'Œ',    # LATIN CAPITAL LIGATURE OE | 
| 59 |  |  |  |  |  |  | "\x8e" => 'Ž',    # LATIN CAPITAL LETTER Z WITH CARON | 
| 60 |  |  |  |  |  |  | "\x91" => '‘',    # LEFT SINGLE QUOTATION MARK | 
| 61 |  |  |  |  |  |  | "\x92" => '’',    # RIGHT SINGLE QUOTATION MARK | 
| 62 |  |  |  |  |  |  | "\x93" => '“',    # LEFT DOUBLE QUOTATION MARK | 
| 63 |  |  |  |  |  |  | "\x94" => '”',    # RIGHT DOUBLE QUOTATION MARK | 
| 64 |  |  |  |  |  |  | "\x95" => '•',    # BULLET | 
| 65 |  |  |  |  |  |  | "\x96" => '–',    # EN DASH | 
| 66 |  |  |  |  |  |  | "\x97" => '—',    # EM DASH | 
| 67 |  |  |  |  |  |  | "\x98" => '˜',    # SMALL TILDE | 
| 68 |  |  |  |  |  |  | "\x99" => '™',    # TRADE MARK SIGN | 
| 69 |  |  |  |  |  |  | "\x9a" => 'š',    # LATIN SMALL LETTER S WITH CARON | 
| 70 |  |  |  |  |  |  | "\x9b" => '›',    # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | 
| 71 |  |  |  |  |  |  | "\x9c" => 'œ',    # LATIN SMALL LIGATURE OE | 
| 72 |  |  |  |  |  |  | "\x9e" => 'ž',    # LATIN SMALL LETTER Z WITH CARON | 
| 73 |  |  |  |  |  |  | "\x9f" => 'Ÿ',    # LATIN CAPITAL LETTER Y WITH DIAERESIS | 
| 74 |  |  |  |  |  |  | ); | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | BEGIN { | 
| 77 | 3 | 50 |  | 3 |  | 15 | my $proto = $] >= 5.010000 ? '_' : '$'; | 
| 78 | 3 |  |  | 8 | 0 | 207 | eval "sub zap_cp1252($proto) { unshift \@_, \\%ascii_for; &_tweakit; }"; | 
|  | 8 |  |  |  |  | 2823 |  | 
|  | 8 |  |  |  |  | 25 |  | 
| 79 | 3 |  |  | 9 | 0 | 1371 | eval "sub fix_cp1252($proto) { unshift \@_, \\%utf8_for;  &_tweakit; }"; | 
|  | 9 |  |  |  |  | 2722 |  | 
|  | 9 |  |  |  |  | 29 |  | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | sub _tweakit { | 
| 83 | 17 |  |  | 17 |  | 22 | my $table = shift; | 
| 84 | 17 | 100 |  |  |  | 58 | return unless defined $_[0]; | 
| 85 | 15 | 100 |  |  |  | 38 | local $_[0] = $_[0] if defined wantarray; | 
| 86 | 15 | 100 |  |  |  | 66 | if (PERL588 && Encode::is_utf8($_[0])) { | 
| 87 | 5 |  |  |  |  | 13 | _tweak_decoded($table, $_[0]); | 
| 88 |  |  |  |  |  |  | } else { | 
| 89 | 10 | 50 |  |  |  | 42 | $_[0] =~ s{([\x80-\x9f])}{$table->{$1} || $1}emxsg; | 
|  | 270 |  |  |  |  | 893 |  | 
| 90 |  |  |  |  |  |  | } | 
| 91 | 15 | 100 |  |  |  | 87 | return $_[0] if defined wantarray; | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | sub _tweak_decoded { | 
| 95 | 5 |  |  | 5 |  | 7 | my $table = shift; | 
| 96 | 5 |  |  |  |  | 8 | local $@; | 
| 97 |  |  |  |  |  |  | # First, try to replace in the decoded string. | 
| 98 | 5 |  |  |  |  | 9 | eval { | 
| 99 | 5 |  |  |  |  | 32 | $_[0] =~ s{([\x80-\x9f])}{ | 
| 100 | 81 | 50 |  |  |  | 3451 | $table->{$1} ? Encode::decode('UTF-8', $table->{$1}) : $1 | 
| 101 |  |  |  |  |  |  | }emxsg | 
| 102 |  |  |  |  |  |  | }; | 
| 103 | 5 | 100 |  |  |  | 980 | if (my $err = $@) { | 
| 104 |  |  |  |  |  |  | # If we got a "Malformed UTF-8 character" error, then someone | 
| 105 |  |  |  |  |  |  | # likely turned on the utf8 flag without decoding. So turn it off. | 
| 106 |  |  |  |  |  |  | # and try again. | 
| 107 | 2 | 50 |  |  |  | 9 | die if $err !~ /Malformed/; | 
| 108 | 2 |  |  |  |  | 5 | Encode::_utf8_off($_[0]); | 
| 109 | 2 | 50 |  |  |  | 9 | $_[0] =~ s/([\x80-\x9f])/$table->{$1} || $1/emxsg; | 
|  | 54 |  |  |  |  | 200 |  | 
| 110 | 2 |  |  |  |  | 11 | Encode::_utf8_on($_[0]); | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | 1; | 
| 115 |  |  |  |  |  |  | __END__ |