File Coverage

blib/lib/LaTeX/Encode.pm
Criterion Covered Total %
statement 91 94 96.8
branch 33 42 78.5
condition 7 13 53.8
subroutine 12 12 100.0
pod 4 4 100.0
total 147 165 89.0


line stmt bran cond sub pod time code
1             #========================================================================
2             #
3             # LaTeX::Encode
4             #
5             # DESCRIPTION
6             # Provides a function to encode text that contains characters
7             # special to LaTeX.
8             #
9             # AUTHOR
10             # Andrew Ford
11             #
12             # COPYRIGHT
13             # Copyright (C) 2007-2012 Andrew Ford. All Rights Reserved.
14             #
15             # This module is free software; you can redistribute it and/or
16             # modify it under the same terms as Perl itself.
17             #
18             # $Id: Encode.pm 32 2012-09-30 20:33:42Z andrew $
19             #========================================================================
20              
21             package LaTeX::Encode;
22              
23 12     12   344367 use strict;
  12         33  
  12         1420  
24 12     12   73 use warnings;
  12         26  
  12         699  
25              
26             require 5.008_001;
27              
28 12     12   15099 use Readonly;
  12         46635  
  12         1243  
29              
30 12     12   110 use base qw(Exporter);
  12         20  
  12         8860  
31              
32             our $VERSION = '0.091.5';
33              
34             our @EXPORT = qw(latex_encode);
35             our @EXPORT_OK = qw(add_latex_encodings remove_latex_encodings reset_latex_encodings);
36             our %EXPORT_TAGS = ( all => [ qw( latex_encode
37             add_latex_encodings
38             remove_latex_encodings
39             reset_latex_encodings ) ] );
40              
41             our @mappings_specified_on_import;
42              
43             Readonly my $IMPORT_TAG_ADD => 'add';
44             Readonly my $IMPORT_TAG_REMOVE => 'remove';
45              
46             my %latex_encoding_base;
47              
48             our $encoded_char_re;
49              
50             our %latex_encoding;
51              
52             our %provided_by;
53              
54             # Encode text with characters special to LaTeX
55              
56             sub latex_encode {
57 576     576 1 658253 my $text = shift;
58 576 100       1960 my $options = ref $_[0] ? shift : { @_ };
59 576         1040 my $exceptions = $options->{except};
60 576         920 my $iquotes = $options->{iquotes};
61 576         725 my $packages_reqd = $options->{packages};
62 576         1308 my $unmatched = $options->{unmatched};
63              
64              
65             # If a list of exception characters was specified then we replace
66             # those characters in the text string with something that is not
67             # going to match the encoding regular expression. The encoding we
68             # use is a hex 01 byte followed by four hexadecimal digits
69              
70 576 100       1462 if ($exceptions) {
71 1         5 $exceptions =~ s{ \\ }{\\\\}gx;
72 1         29 $text =~ s{ ([\x{01}$exceptions]) }
73 3         17 { sprintf("\x{01}%04x", ord($1)); }gxe;
74             }
75              
76             # Deal with "intelligent quotes". This can be done separately
77             # from the rest of the encoding as the characters ` and ' are not
78             # encoded.
79              
80 576 100       2286 if ($iquotes) {
81              
82             # A single or double quote before a word character, preceded
83             # by start of line, whitespace or punctuation gets converted
84             # to "`" or "``" respectively.
85              
86 7         43 $text =~ s{ ( ^ | [\s\p{IsPunct}] )( ['"] ) (?= \w ) }
87 12 100   12   15901 { $2 eq '"' ? "$1``" : "$1`" }mgxe;
  12         146  
  12         171  
  6         59  
88              
89             # A double quote preceded by a word or punctuation character
90             # and followed by whitespace or end of line gets converted to
91             # "''". (Final single quotes are represented by themselves so
92             # we don't need to worry about those.)
93              
94 7         19 $text =~ s{ (?<= [\w\p{IsPunct}] ) " (?= \s | $ ) }
95 3         8 { "''" }mgxe
96             }
97              
98              
99             # Replace any characters that need encoding
100              
101 576         9523 $text =~ s{ ($encoded_char_re) }
102 562 100 66     1589 { $packages_reqd->{$provided_by{$1}} = 1
103             if ref $packages_reqd and exists $provided_by{$1};
104 562         19387 $latex_encoding{$1} }gsxe;
105              
106 576         3091 $text =~ s{ ([\x{00}\x{02}-\x{09}\x{0b}\x{0c}\x{0e}-\x{1f}\x{007f}-\x{ffff}]) }
107 1         6 { _replace_unencoded_char(ord($1), $unmatched) }gxse;
108              
109              
110             # If the caller specified exceptions then we need to decode them
111              
112 576 100       1196 if ($exceptions) {
113 1         6 $text =~ s{ \x{01} ([0-9a-f]{4}) }{ chr(hex($1)) }gxe;
  3         14  
114             }
115              
116 576         5353 return $text;
117             }
118              
119              
120             sub _replace_unencoded_char {
121 1     1   2 my ($charcode, $action) = @_;
122            
123 1 50 50     13 if (ref $action eq 'CODE') {
    50          
124 0         0 return $action->($charcode);
125             }
126             elsif (($action || '') eq 'ignore') {
127 0         0 return '';
128             }
129             else {
130 1   50     13 return sprintf('\\%s{%04x}', $action || 'unmatched', $charcode);
131             }
132             }
133              
134              
135             # Add encodings to the encoding table
136             # Return the changed encodings
137              
138             sub add_latex_encodings {
139 4     4 1 18 my (%new_encoding) = @_;
140 4         5 my %old_encoding;
141             my $changed;
142              
143 4         15 foreach my $key (keys %new_encoding) {
144 7 50 66     49 if ((! exists $latex_encoding{$key}) or ($latex_encoding{$key} ne $new_encoding{$key})) {
145 7 50 33     24 $old_encoding{$key} = $latex_encoding{$key} if defined wantarray and exists $latex_encoding{$key};
146 7         15 $latex_encoding{$key} = $new_encoding{$key};
147 7         14 $changed = 1;
148             }
149             }
150 4 50       22 _compile_encoding_regexp() if $changed;
151 4 50       23 return unless defined wantarray;
152 0         0 return %old_encoding;
153             }
154              
155              
156             # Remove encodings from the encoding table
157             # Return the removed encodings
158              
159             sub remove_latex_encodings {
160 4     4 1 13 my (@keys) = @_;
161 4         8 my %removed_encoding;
162            
163 4         11 foreach my $key (@keys) {
164 4 50       18 if (exists $latex_encoding{$key}) {
165 4         21 $removed_encoding{$key} = delete $latex_encoding{$key};
166             }
167             }
168 4 50       27 _compile_encoding_regexp() if keys %removed_encoding;
169 4 100       20 return unless defined wantarray;
170 2         12 return %removed_encoding;
171             }
172              
173              
174             # Reset the encoding table
175              
176             sub reset_latex_encodings {
177 15     15 1 38 my ($class, $forget_import_specifiers) = @_;
178 15 100       102 if ($class !~ /::/) {
179 12         28 $forget_import_specifiers = $class;
180             }
181 15         520 %latex_encoding = ();
182              
183             $latex_encoding{$_} = $latex_encoding_base{$_}
184 15         10844 for keys %latex_encoding_base;
185              
186 15 100       475 if (! $forget_import_specifiers ) {
187 2         9 foreach my $spec ( @mappings_specified_on_import ) {
188 2 100       16 if ($spec->[0] eq $IMPORT_TAG_ADD) {
    50          
189 1         16 add_latex_encodings(%{$spec->[1]});
  1         6  
190             }
191             elsif ($spec->[0] eq $IMPORT_TAG_REMOVE) {
192 1         16 remove_latex_encodings(@{$spec->[1]});
  1         6  
193             }
194             }
195             }
196 15         67 _compile_encoding_regexp();
197            
198 15         29 return;
199             }
200              
201              
202             # Import function - picks out 'add' and 'remove' tags and adds or removes encodings
203             # appropriately
204              
205             sub import {
206 12     12   146 my ($self, @list) = @_;
207 12         28 $DB::Simple = 1;
208 12         21 my $i = 0;
209 12         85 while ($i < @list) {
210 6 100       42 if ($list[$i] eq $IMPORT_TAG_ADD) {
    100          
211 1         8 my ($add, $to_add) = splice(@list, $i, 2);
212 1         5 add_latex_encodings(%$to_add);
213 1         5 push @mappings_specified_on_import, [ $IMPORT_TAG_ADD => $to_add ];
214             }
215             elsif ($list[$i] eq $IMPORT_TAG_REMOVE) {
216 1         11 my ($remove, $to_remove) = splice(@list, $i, 2);
217 1         4 remove_latex_encodings(@$to_remove);
218 1         5 push @mappings_specified_on_import, [ $IMPORT_TAG_REMOVE => $to_remove ];
219             }
220             else {
221 4         99 $i++;
222             }
223             }
224 12         2772 $self->export_to_level(1, $self, @list);
225 12         18662 return;
226             }
227              
228              
229             %latex_encoding_base = (
230              
231             chr(0x0022) => '{\\textacutedbl}', # QUOTATION MARK (")
232             chr(0x0023) => '\\#', # NUMBER SIGN (#)
233             chr(0x0024) => '\\$', # DOLLAR SIGN ($)
234             chr(0x0025) => '\\%', # PERCENT SIGN (%)
235             chr(0x0026) => '\\&', # AMPERSAND (&)
236             chr(0x003c) => '{\\textlangle}', # LESS-THAN SIGN (<)
237             chr(0x003e) => '{\\textrangle}', # GREATER-THAN SIGN (>)
238             chr(0x005c) => '{\\textbackslash}', # REVERSE SOLIDUS (\)
239             chr(0x005e) => '\\^{ }', # CIRCUMFLEX ACCENT (^)
240             chr(0x005f) => '\\_', # LOW LINE (_)
241             chr(0x007b) => '\\{', # LEFT CURLY BRACKET ({)
242             chr(0x007d) => '\\}', # RIGHT CURLY BRACKET (})
243             chr(0x007e) => '{\\texttildelow}', # TILDE (~)
244              
245             # C1 Controls and Latin-1 Supplement
246              
247             chr(0x00a0) => '~', # NO-BREAK SPACE ( )
248             chr(0x00a1) => '{\\textexclamdown}', # INVERTED EXCLAMATION MARK (¡)
249             chr(0x00a2) => '{\\textcent}', # CENT SIGN (¢)
250             chr(0x00a3) => '{\\textsterling}', # POUND SIGN (£)
251             chr(0x00a4) => '{\\textcurrency}', # CURRENCY SIGN (¤)
252             chr(0x00a5) => '{\\textyen}', # YEN SIGN (¥)
253             chr(0x00a6) => '{\\textbrokenbar}', # BROKEN BAR (¦)
254             chr(0x00a7) => '{\\textsection}', # SECTION SIGN (§)
255             chr(0x00a8) => '{\\textasciidieresis}', # DIAERESIS (¨)
256             chr(0x00a9) => '{\\textcopyright}', # COPYRIGHT SIGN (©)
257             chr(0x00aa) => '{\\textordfeminine}', # FEMININE ORDINAL INDICATOR (ª)
258             chr(0x00ab) => '{\\guillemotleft}', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK («)
259             chr(0x00ac) => '{\\textlnot}', # NOT SIGN (¬)
260             chr(0x00ad) => '\\-', # SOFT HYPHEN (­)
261             chr(0x00ae) => '{\\textregistered}', # REGISTERED SIGN (®)
262             chr(0x00af) => '{\\textasciimacron}', # MACRON (¯)
263             chr(0x00b0) => '{\\textdegree}', # DEGREE SIGN (°)
264             chr(0x00b1) => '{\\textpm}', # PLUS-MINUS SIGN (±)
265             chr(0x00b2) => '{\\texttwosuperior}', # SUPERSCRIPT TWO (²)
266             chr(0x00b3) => '{\\textthreesuperior}', # SUPERSCRIPT THREE (³)
267             chr(0x00b4) => '{\\textasciiacute}', # ACUTE ACCENT (´)
268             chr(0x00b5) => '{\\textmu}', # MICRO SIGN (µ)
269             chr(0x00b6) => '{\\textparagraph}', # PILCROW SIGN (¶)
270             chr(0x00b7) => '{\\textperiodcentered}', # MIDDLE DOT (·)
271             chr(0x00b8) => '{\\c{~}}', # CEDILLA (¸)
272             chr(0x00b9) => '{\\textonesuperior}', # SUPERSCRIPT ONE (¹)
273             chr(0x00ba) => '{\\textordmasculine}', # MASCULINE ORDINAL INDICATOR (º)
274             chr(0x00bb) => '{\\guillemotright}', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (»)
275             chr(0x00bc) => '{\\textonequarter}', # VULGAR FRACTION ONE QUARTER (¼)
276             chr(0x00bd) => '{\\textonehalf}', # VULGAR FRACTION ONE HALF (½)
277             chr(0x00be) => '{\\textthreequarters}', # VULGAR FRACTION THREE QUARTERS (¾)
278             chr(0x00bf) => '{\\textquestiondown}', # INVERTED QUESTION MARK (¿)
279             chr(0x00c0) => '{\\`A}', # LATIN CAPITAL LETTER A WITH GRAVE (À)
280             chr(0x00c1) => '{\\\'A}', # LATIN CAPITAL LETTER A WITH ACUTE (Á)
281             chr(0x00c2) => '{\\^A}', # LATIN CAPITAL LETTER A WITH CIRCUMFLEX (Â)
282             chr(0x00c3) => '{\\~A}', # LATIN CAPITAL LETTER A WITH TILDE (Ã)
283             chr(0x00c4) => '{\\"A}', # LATIN CAPITAL LETTER A WITH DIAERESIS (Ä)
284             chr(0x00c5) => '{\\AA}', # LATIN CAPITAL LETTER A WITH RING ABOVE (Å)
285             chr(0x00c6) => '{\\AE}', # LATIN CAPITAL LETTER AE (Æ)
286             chr(0x00c7) => '\\c{C}', # LATIN CAPITAL LETTER C WITH CEDILLA (Ç)
287             chr(0x00c8) => '{\\`E}', # LATIN CAPITAL LETTER E WITH GRAVE (È)
288             chr(0x00c9) => '{\\\'E}', # LATIN CAPITAL LETTER E WITH ACUTE (É)
289             chr(0x00ca) => '{\\^E}', # LATIN CAPITAL LETTER E WITH CIRCUMFLEX (Ê)
290             chr(0x00cb) => '{\\"E}', # LATIN CAPITAL LETTER E WITH DIAERESIS (Ë)
291             chr(0x00cc) => '{\\`I}', # LATIN CAPITAL LETTER I WITH GRAVE (Ì)
292             chr(0x00cd) => '{\\\'I}', # LATIN CAPITAL LETTER I WITH ACUTE (Í)
293             chr(0x00ce) => '{\\^I}', # LATIN CAPITAL LETTER I WITH CIRCUMFLEX (Î)
294             chr(0x00cf) => '{\\"I}', # LATIN CAPITAL LETTER I WITH DIAERESIS (Ï)
295             chr(0x00d0) => '{\\DH}', # LATIN CAPITAL LETTER ETH (Ð)
296             chr(0x00d1) => '{\\~N}', # LATIN CAPITAL LETTER N WITH TILDE (Ñ)
297             chr(0x00d2) => '{\\`O}', # LATIN CAPITAL LETTER O WITH GRAVE (Ò)
298             chr(0x00d3) => '{\\\'O}', # LATIN CAPITAL LETTER O WITH ACUTE (Ó)
299             chr(0x00d4) => '{\\^O}', # LATIN CAPITAL LETTER O WITH CIRCUMFLEX (Ô)
300             chr(0x00d5) => '{\\~O}', # LATIN CAPITAL LETTER O WITH TILDE (Õ)
301             chr(0x00d6) => '{\\"O}', # LATIN CAPITAL LETTER O WITH DIAERESIS (Ö)
302             chr(0x00d7) => '{\\texttimes}', # MULTIPLICATION SIGN (×)
303             chr(0x00d8) => '{\\O}', # LATIN CAPITAL LETTER O WITH STROKE (Ø)
304             chr(0x00d9) => '{\\`U}', # LATIN CAPITAL LETTER U WITH GRAVE (Ù)
305             chr(0x00da) => '{\\\'U}', # LATIN CAPITAL LETTER U WITH ACUTE (Ú)
306             chr(0x00db) => '{\\^U}', # LATIN CAPITAL LETTER U WITH CIRCUMFLEX (Û)
307             chr(0x00dc) => '{\\"U}', # LATIN CAPITAL LETTER U WITH DIAERESIS (Ü)
308             chr(0x00dd) => '{\\\'Y}', # LATIN CAPITAL LETTER Y WITH ACUTE (Ý)
309             chr(0x00de) => '{\\TH}', # LATIN CAPITAL LETTER THORN (Þ)
310             chr(0x00df) => '{\\ss}', # LATIN SMALL LETTER SHARP S (ß)
311             chr(0x00e0) => '{\\`a}', # LATIN SMALL LETTER A WITH GRAVE (à)
312             chr(0x00e1) => '{\\\'a}', # LATIN SMALL LETTER A WITH ACUTE (á)
313             chr(0x00e2) => '{\\^a}', # LATIN SMALL LETTER A WITH CIRCUMFLEX (â)
314             chr(0x00e3) => '{\\~a}', # LATIN SMALL LETTER A WITH TILDE (ã)
315             chr(0x00e4) => '{\\"a}', # LATIN SMALL LETTER A WITH DIAERESIS (ä)
316             chr(0x00e5) => '{\\aa}', # LATIN SMALL LETTER A WITH RING ABOVE (å)
317             chr(0x00e6) => '{\\ae}', # LATIN SMALL LETTER AE (æ)
318             chr(0x00e7) => '\\c{c}', # LATIN SMALL LETTER C WITH CEDILLA (ç)
319             chr(0x00e8) => '{\\`e}', # LATIN SMALL LETTER E WITH GRAVE (è)
320             chr(0x00e9) => '{\\\'e}', # LATIN SMALL LETTER E WITH ACUTE (é)
321             chr(0x00ea) => '{\\^e}', # LATIN SMALL LETTER E WITH CIRCUMFLEX (ê)
322             chr(0x00eb) => '{\\"e}', # LATIN SMALL LETTER E WITH DIAERESIS (ë)
323             chr(0x00ec) => '{\\`i}', # LATIN SMALL LETTER I WITH GRAVE (ì)
324             chr(0x00ed) => '{\\\'i}', # LATIN SMALL LETTER I WITH ACUTE (í)
325             chr(0x00ee) => '{\\^i}', # LATIN SMALL LETTER I WITH CIRCUMFLEX (î)
326             chr(0x00ef) => '{\\"i}', # LATIN SMALL LETTER I WITH DIAERESIS (ï)
327             chr(0x00f0) => '{\\dh}', # LATIN SMALL LETTER ETH (ð)
328             chr(0x00f1) => '{\\~n}', # LATIN SMALL LETTER N WITH TILDE (ñ)
329             chr(0x00f2) => '{\\`o}', # LATIN SMALL LETTER O WITH GRAVE (ò)
330             chr(0x00f3) => '{\\\'o}', # LATIN SMALL LETTER O WITH ACUTE (ó)
331             chr(0x00f4) => '{\\^o}', # LATIN SMALL LETTER O WITH CIRCUMFLEX (ô)
332             chr(0x00f5) => '{\\~o}', # LATIN SMALL LETTER O WITH TILDE (õ)
333             chr(0x00f6) => '{\\"o}', # LATIN SMALL LETTER O WITH DIAERESIS (ö)
334             chr(0x00f7) => '{\\textdiv}', # DIVISION SIGN (÷)
335             chr(0x00f8) => '{\\o}', # LATIN SMALL LETTER O WITH STROKE (ø)
336             chr(0x00f9) => '{\\`u}', # LATIN SMALL LETTER U WITH GRAVE (ù)
337             chr(0x00fa) => '{\\\'u}', # LATIN SMALL LETTER U WITH ACUTE (ú)
338             chr(0x00fb) => '{\\^u}', # LATIN SMALL LETTER U WITH CIRCUMFLEX (û)
339             chr(0x00fc) => '{\\"u}', # LATIN SMALL LETTER U WITH DIAERESIS (ü)
340             chr(0x00fd) => '{\\\'y}', # LATIN SMALL LETTER Y WITH ACUTE (ý)
341             chr(0x00fe) => '{\\th}', # LATIN SMALL LETTER THORN (þ)
342             chr(0x00ff) => '{\\"y}', # LATIN SMALL LETTER Y WITH DIAERESIS (ÿ)
343              
344             # Latin Extended-A
345              
346             chr(0x0100) => '\\={A}', # LATIN CAPITAL LETTER A WITH MACRON
347             chr(0x0101) => '\\={a}', # LATIN SMALL LETTER A WITH MACRON
348             chr(0x0102) => '\\u{A}', # LATIN CAPITAL LETTER A WITH BREVE
349             chr(0x0103) => '\\u{a}', # LATIN SMALL LETTER A WITH BREVE
350             chr(0x0104) => '\\k{A}', # LATIN CAPITAL LETTER A WITH OGONEK
351             chr(0x0105) => '\\k{a}', # LATIN SMALL LETTER A WITH OGONEK
352             chr(0x0106) => '\\\'{C}', # LATIN CAPITAL LETTER C WITH ACUTE
353             chr(0x0107) => '\\\'{c}', # LATIN SMALL LETTER C WITH ACUTE
354             chr(0x0108) => '\\^{C}', # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
355             chr(0x0109) => '\\^{c}', # LATIN SMALL LETTER C WITH CIRCUMFLEX
356             chr(0x010a) => '\\.{C}', # LATIN CAPITAL LETTER C WITH DOT ABOVE
357             chr(0x010b) => '\\.{c}', # LATIN SMALL LETTER C WITH DOT ABOVE
358             chr(0x010c) => '\\v{C}', # LATIN CAPITAL LETTER C WITH CARON
359             chr(0x010d) => '\\v{c}', # LATIN SMALL LETTER C WITH CARON
360             chr(0x010e) => '\\v{D}', # LATIN CAPITAL LETTER D WITH CARON
361             chr(0x010f) => '\\v{d}', # LATIN SMALL LETTER D WITH CARON
362             chr(0x0112) => '\\={E}', # LATIN CAPITAL LETTER E WITH MACRON
363             chr(0x0113) => '\\={e}', # LATIN SMALL LETTER E WITH MACRON
364             chr(0x0114) => '\\u{E}', # LATIN CAPITAL LETTER E WITH BREVE
365             chr(0x0115) => '\\u{e}', # LATIN SMALL LETTER E WITH BREVE
366             chr(0x0116) => '\\.{E}', # LATIN CAPITAL LETTER E WITH DOT ABOVE
367             chr(0x0117) => '\\.{e}', # LATIN SMALL LETTER E WITH DOT ABOVE
368             chr(0x0118) => '\\k{E}', # LATIN CAPITAL LETTER E WITH OGONEK
369             chr(0x0119) => '\\k{e}', # LATIN SMALL LETTER E WITH OGONEK
370             chr(0x011a) => '\\v{E}', # LATIN CAPITAL LETTER E WITH CARON
371             chr(0x011b) => '\\v{e}', # LATIN SMALL LETTER E WITH CARON
372             chr(0x011c) => '\\^{G}', # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
373             chr(0x011d) => '\\^{g}', # LATIN SMALL LETTER G WITH CIRCUMFLEX
374             chr(0x011e) => '\\u{G}', # LATIN CAPITAL LETTER G WITH BREVE
375             chr(0x011f) => '\\u{g}', # LATIN SMALL LETTER G WITH BREVE
376             chr(0x0120) => '\\.{G}', # LATIN CAPITAL LETTER G WITH DOT ABOVE
377             chr(0x0121) => '\\.{g}', # LATIN SMALL LETTER G WITH DOT ABOVE
378             chr(0x0122) => '\\c{G}', # LATIN CAPITAL LETTER G WITH CEDILLA
379             chr(0x0123) => '\\c{g}', # LATIN SMALL LETTER G WITH CEDILLA
380             chr(0x0124) => '\\^{H}', # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
381             chr(0x0125) => '\\^{h}', # LATIN SMALL LETTER H WITH CIRCUMFLEX
382             chr(0x0128) => '\\~{I}', # LATIN CAPITAL LETTER I WITH TILDE
383             chr(0x0129) => '\\~{\\i}', # LATIN SMALL LETTER I WITH TILDE
384             chr(0x012a) => '\\={I}', # LATIN CAPITAL LETTER I WITH MACRON
385             chr(0x012b) => '\\={\\i}', # LATIN SMALL LETTER I WITH MACRON
386             chr(0x012c) => '\\u{I}', # LATIN CAPITAL LETTER I WITH BREVE
387             chr(0x012d) => '\\u{\\i}', # LATIN SMALL LETTER I WITH BREVE
388             chr(0x012e) => '\\k{I}', # LATIN CAPITAL LETTER I WITH OGONEK
389             chr(0x012f) => '\\k{i}', # LATIN SMALL LETTER I WITH OGONEK
390             chr(0x0130) => '\\.{I}', # LATIN CAPITAL LETTER I WITH DOT ABOVE
391             chr(0x0131) => '{\\i}', # LATIN SMALL LETTER DOTLESS I
392             chr(0x0134) => '\\^{J}', # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
393             chr(0x0135) => '\\^{\\j}', # LATIN SMALL LETTER J WITH CIRCUMFLEX
394             chr(0x0136) => '\\c{K}', # LATIN CAPITAL LETTER K WITH CEDILLA
395             chr(0x0137) => '\\c{k}', # LATIN SMALL LETTER K WITH CEDILLA
396             chr(0x0139) => '\\\'{L}', # LATIN CAPITAL LETTER L WITH ACUTE
397             chr(0x013a) => '\\\'{l}', # LATIN SMALL LETTER L WITH ACUTE
398             chr(0x013b) => '\\c{L}', # LATIN CAPITAL LETTER L WITH CEDILLA
399             chr(0x013c) => '\\c{l}', # LATIN SMALL LETTER L WITH CEDILLA
400             chr(0x013d) => '\\v{L}', # LATIN CAPITAL LETTER L WITH CARON
401             chr(0x013e) => '\\v{l}', # LATIN SMALL LETTER L WITH CARON
402             chr(0x0143) => '\\\'{N}', # LATIN CAPITAL LETTER N WITH ACUTE
403             chr(0x0144) => '\\\'{n}', # LATIN SMALL LETTER N WITH ACUTE
404             chr(0x0145) => '\\c{N}', # LATIN CAPITAL LETTER N WITH CEDILLA
405             chr(0x0146) => '\\c{n}', # LATIN SMALL LETTER N WITH CEDILLA
406             chr(0x0147) => '\\v{N}', # LATIN CAPITAL LETTER N WITH CARON
407             chr(0x0148) => '\\v{n}', # LATIN SMALL LETTER N WITH CARON
408             chr(0x014c) => '\\={O}', # LATIN CAPITAL LETTER O WITH MACRON
409             chr(0x014d) => '\\={o}', # LATIN SMALL LETTER O WITH MACRON
410             chr(0x014e) => '\\u{O}', # LATIN CAPITAL LETTER O WITH BREVE
411             chr(0x014f) => '\\u{o}', # LATIN SMALL LETTER O WITH BREVE
412             chr(0x0152) => '{\\OE}', # LATIN CAPITAL LIGATURE OE (Œ)
413             chr(0x0153) => '{\\oe}', # LATIN SMALL LIGATURE OE (œ)
414             chr(0x0154) => '\\\'{R}', # LATIN CAPITAL LETTER R WITH ACUTE
415             chr(0x0155) => '\\\'{r}', # LATIN SMALL LETTER R WITH ACUTE
416             chr(0x0156) => '\\c{R}', # LATIN CAPITAL LETTER R WITH CEDILLA
417             chr(0x0157) => '\\c{r}', # LATIN SMALL LETTER R WITH CEDILLA
418             chr(0x0158) => '\\v{R}', # LATIN CAPITAL LETTER R WITH CARON
419             chr(0x0159) => '\\v{r}', # LATIN SMALL LETTER R WITH CARON
420             chr(0x015a) => '\\\'{S}', # LATIN CAPITAL LETTER S WITH ACUTE
421             chr(0x015b) => '\\\'{s}', # LATIN SMALL LETTER S WITH ACUTE
422             chr(0x015c) => '\\^{S}', # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
423             chr(0x015d) => '\\^{s}', # LATIN SMALL LETTER S WITH CIRCUMFLEX
424             chr(0x015e) => '\\c{S}', # LATIN CAPITAL LETTER S WITH CEDILLA
425             chr(0x015f) => '\\c{s}', # LATIN SMALL LETTER S WITH CEDILLA
426             chr(0x0160) => '\\v{S}', # LATIN CAPITAL LETTER S WITH CARON (Š)
427             chr(0x0161) => '\\v{s}', # LATIN SMALL LETTER S WITH CARON (š)
428             chr(0x0162) => '\\c{T}', # LATIN CAPITAL LETTER T WITH CEDILLA
429             chr(0x0163) => '\\c{t}', # LATIN SMALL LETTER T WITH CEDILLA
430             chr(0x0164) => '\\v{T}', # LATIN CAPITAL LETTER T WITH CARON
431             chr(0x0165) => '\\v{t}', # LATIN SMALL LETTER T WITH CARON
432             chr(0x0168) => '\\~{U}', # LATIN CAPITAL LETTER U WITH TILDE
433             chr(0x0169) => '\\~{u}', # LATIN SMALL LETTER U WITH TILDE
434             chr(0x016a) => '\\={U}', # LATIN CAPITAL LETTER U WITH MACRON
435             chr(0x016b) => '\\={u}', # LATIN SMALL LETTER U WITH MACRON
436             chr(0x016c) => '\\u{U}', # LATIN CAPITAL LETTER U WITH BREVE
437             chr(0x016d) => '\\u{u}', # LATIN SMALL LETTER U WITH BREVE
438             chr(0x016e) => '\\r{U}', # LATIN CAPITAL LETTER U WITH RING ABOVE
439             chr(0x016f) => '\\r{u}', # LATIN SMALL LETTER U WITH RING ABOVE
440             chr(0x0172) => '\\k{U}', # LATIN CAPITAL LETTER U WITH OGONEK
441             chr(0x0173) => '\\k{u}', # LATIN SMALL LETTER U WITH OGONEK
442             chr(0x0174) => '\\^{W}', # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
443             chr(0x0175) => '\\^{w}', # LATIN SMALL LETTER W WITH CIRCUMFLEX
444             chr(0x0176) => '\\^{Y}', # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
445             chr(0x0177) => '\\^{y}', # LATIN SMALL LETTER Y WITH CIRCUMFLEX
446             chr(0x0178) => '{\\"Y}', # LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ)
447             chr(0x0179) => '\\\'{Z}', # LATIN CAPITAL LETTER Z WITH ACUTE
448             chr(0x017a) => '\\\'{z}', # LATIN SMALL LETTER Z WITH ACUTE
449             chr(0x017b) => '\\.{Z}', # LATIN CAPITAL LETTER Z WITH DOT ABOVE
450             chr(0x017c) => '\\.{z}', # LATIN SMALL LETTER Z WITH DOT ABOVE
451             chr(0x017d) => '\\v{Z}', # LATIN CAPITAL LETTER Z WITH CARON
452             chr(0x017e) => '\\v{z}', # LATIN SMALL LETTER Z WITH CARON
453             chr(0x0192) => '{\\textflorin}', # LATIN SMALL LETTER F WITH HOOK (ƒ)
454             chr(0x01cd) => '\\v{A}', # LATIN CAPITAL LETTER A WITH CARON
455             chr(0x01ce) => '\\v{a}', # LATIN SMALL LETTER A WITH CARON
456             chr(0x01cf) => '\\v{I}', # LATIN CAPITAL LETTER I WITH CARON
457             chr(0x01d0) => '\\v{i}', # LATIN SMALL LETTER I WITH CARON
458             chr(0x01d1) => '\\v{O}', # LATIN CAPITAL LETTER O WITH CARON
459             chr(0x01d2) => '\\v{o}', # LATIN SMALL LETTER O WITH CARON
460             chr(0x01d3) => '\\v{U}', # LATIN CAPITAL LETTER U WITH CARON
461             chr(0x01d4) => '\\v{u}', # LATIN SMALL LETTER U WITH CARON
462             chr(0x01e6) => '\\v{G}', # LATIN CAPITAL LETTER G WITH CARON
463             chr(0x01e7) => '\\v{g}', # LATIN SMALL LETTER G WITH CARON
464             chr(0x01e8) => '\\v{K}', # LATIN CAPITAL LETTER K WITH CARON
465             chr(0x01e9) => '\\v{k}', # LATIN SMALL LETTER K WITH CARON
466             chr(0x01ea) => '\\k{O}', # LATIN CAPITAL LETTER O WITH OGONEK
467             chr(0x01eb) => '\\k{o}', # LATIN SMALL LETTER O WITH OGONEK
468             chr(0x01f0) => '\\v{j}', # LATIN SMALL LETTER J WITH CARON
469             chr(0x01f4) => '\\\'{G}', # LATIN CAPITAL LETTER G WITH ACUTE
470             chr(0x01f5) => '\\\'{g}', # LATIN SMALL LETTER G WITH ACUTE
471             chr(0x01f8) => '\\`{N}', # LATIN CAPITAL LETTER N WITH GRAVE
472             chr(0x01f9) => '\\`{n}', # LATIN SMALL LETTER N WITH GRAVE
473              
474             # Spacing Modifier Letters
475              
476             chr(0x02c6) => '{\\textasciicircum}', # MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ)
477             chr(0x02dc) => '{\\textasciitilde}', # SMALL TILDE (˜)
478              
479             # Greek and Coptic
480              
481             chr(0x0391) => '\\ensuremath{\\mathrm{A}}', # GREEK CAPITAL LETTER ALPHA (Α)
482             chr(0x0392) => '\\ensuremath{\\mathrm{B}}', # GREEK CAPITAL LETTER BETA (Β)
483             chr(0x0393) => '\\ensuremath{\\Gamma}', # GREEK CAPITAL LETTER GAMMA (Γ)
484             chr(0x0394) => '\\ensuremath{\\Delta}', # GREEK CAPITAL LETTER DELTA (Δ)
485             chr(0x0395) => '\\ensuremath{\\mathrm{E}}', # GREEK CAPITAL LETTER EPSILON (Ε)
486             chr(0x0396) => '\\ensuremath{\\mathrm{Z}}', # GREEK CAPITAL LETTER ZETA (Ζ)
487             chr(0x0397) => '\\ensuremath{\\mathrm{H}}', # GREEK CAPITAL LETTER ETA (Η)
488             chr(0x0398) => '\\ensuremath{\\Theta}', # GREEK CAPITAL LETTER THETA (Θ)
489             chr(0x0399) => '\\ensuremath{\\mathrm{I}}', # GREEK CAPITAL LETTER IOTA (Ι)
490             chr(0x039a) => '\\ensuremath{\\mathrm{K}}', # GREEK CAPITAL LETTER KAPPA (Κ)
491             chr(0x039b) => '\\ensuremath{\\Lambda}', # GREEK CAPITAL LETTER LAMDA (Λ)
492             chr(0x039c) => '\\ensuremath{\\mathrm{M}}', # GREEK CAPITAL LETTER MU (Μ)
493             chr(0x039d) => '\\ensuremath{\\mathrm{N}}', # GREEK CAPITAL LETTER NU (Ν)
494             chr(0x039e) => '\\ensuremath{\\Xi}', # GREEK CAPITAL LETTER XI (Ξ)
495             chr(0x039f) => '\\ensuremath{\\mathrm{O}}', # GREEK CAPITAL LETTER OMICRON (Ο)
496             chr(0x03a0) => '\\ensuremath{\\Pi}', # GREEK CAPITAL LETTER PI (Π)
497             chr(0x03a1) => '\\ensuremath{\\mathrm{R}}', # GREEK CAPITAL LETTER RHO (Ρ)
498             chr(0x03a3) => '\\ensuremath{\\Sigma}', # GREEK CAPITAL LETTER SIGMA (Σ)
499             chr(0x03a4) => '\\ensuremath{\\mathrm{T}}', # GREEK CAPITAL LETTER TAU (Τ)
500             chr(0x03a5) => '\\ensuremath{\\Upsilon}', # GREEK CAPITAL LETTER UPSILON (Υ)
501             chr(0x03a6) => '\\ensuremath{\\Phi}', # GREEK CAPITAL LETTER PHI (Φ)
502             chr(0x03a7) => '\\ensuremath{\\mathrm{X}}', # GREEK CAPITAL LETTER CHI (Χ)
503             chr(0x03a8) => '\\ensuremath{\\Psi}', # GREEK CAPITAL LETTER PSI (Ψ)
504             chr(0x03a9) => '\\ensuremath{\\Omega}', # GREEK CAPITAL LETTER OMEGA (Ω)
505             chr(0x03b1) => '\\ensuremath{\\alpha}', # GREEK SMALL LETTER ALPHA (α)
506             chr(0x03b2) => '\\ensuremath{\\beta}', # GREEK SMALL LETTER BETA (β)
507             chr(0x03b3) => '\\ensuremath{\\gamma}', # GREEK SMALL LETTER GAMMA (γ)
508             chr(0x03b4) => '\\ensuremath{\\delta}', # GREEK SMALL LETTER DELTA (δ)
509             chr(0x03b5) => '\\ensuremath{\\epsilon}', # GREEK SMALL LETTER EPSILON (ε)
510             chr(0x03b6) => '\\ensuremath{\\zeta}', # GREEK SMALL LETTER ZETA (ζ)
511             chr(0x03b7) => '\\ensuremath{\\eta}', # GREEK SMALL LETTER ETA (η)
512             chr(0x03b8) => '\\ensuremath{\\theta}', # GREEK SMALL LETTER THETA (θ)
513             chr(0x03b9) => '\\ensuremath{\\iota}', # GREEK SMALL LETTER IOTA (ι)
514             chr(0x03ba) => '\\ensuremath{\\kappa}', # GREEK SMALL LETTER KAPPA (κ)
515             chr(0x03bb) => '\\ensuremath{\\lambda}', # GREEK SMALL LETTER LAMDA (λ)
516             chr(0x03bc) => '\\ensuremath{\\mu}', # GREEK SMALL LETTER MU (μ)
517             chr(0x03bd) => '\\ensuremath{\\nu}', # GREEK SMALL LETTER NU (ν)
518             chr(0x03be) => '\\ensuremath{\\xi}', # GREEK SMALL LETTER XI (ξ)
519             chr(0x03bf) => '\\ensuremath{o}', # GREEK SMALL LETTER OMICRON (ο)
520             chr(0x03c0) => '\\ensuremath{\\pi}', # GREEK SMALL LETTER PI (π)
521             chr(0x03c1) => '\\ensuremath{\\rho}', # GREEK SMALL LETTER RHO (ρ)
522             chr(0x03c3) => '\\ensuremath{\\sigma}', # GREEK SMALL LETTER SIGMA (σ)
523             chr(0x03c4) => '\\ensuremath{\\tau}', # GREEK SMALL LETTER TAU (τ)
524             chr(0x03c5) => '\\ensuremath{\\upsilon}', # GREEK SMALL LETTER UPSILON (υ)
525             chr(0x03c6) => '\\ensuremath{\\phi}', # GREEK SMALL LETTER PHI (φ)
526             chr(0x03c7) => '\\ensuremath{\\chi}', # GREEK SMALL LETTER CHI (χ)
527             chr(0x03c8) => '\\ensuremath{\\psi}', # GREEK SMALL LETTER PSI (ψ)
528             chr(0x03c9) => '\\ensuremath{\\omega}', # GREEK SMALL LETTER OMEGA (ω)
529             chr(0x0e3f) => '{\\textbaht}', # THAI CURRENCY SYMBOL BAHT
530              
531             # Latin Extended Additional
532              
533             chr(0x1e02) => '\\.{B}', # LATIN CAPITAL LETTER B WITH DOT ABOVE
534             chr(0x1e03) => '\\.{b}', # LATIN SMALL LETTER B WITH DOT ABOVE
535             chr(0x1e04) => '\\d{B}', # LATIN CAPITAL LETTER B WITH DOT BELOW
536             chr(0x1e05) => '\\d{b}', # LATIN SMALL LETTER B WITH DOT BELOW
537             chr(0x1e06) => '\\b{B}', # LATIN CAPITAL LETTER B WITH LINE BELOW
538             chr(0x1e07) => '\\b{b}', # LATIN SMALL LETTER B WITH LINE BELOW
539             chr(0x1e0a) => '\\.{D}', # LATIN CAPITAL LETTER D WITH DOT ABOVE
540             chr(0x1e0b) => '\\.{d}', # LATIN SMALL LETTER D WITH DOT ABOVE
541             chr(0x1e0c) => '\\d{D}', # LATIN CAPITAL LETTER D WITH DOT BELOW
542             chr(0x1e0d) => '\\d{d}', # LATIN SMALL LETTER D WITH DOT BELOW
543             chr(0x1e0e) => '\\b{D}', # LATIN CAPITAL LETTER D WITH LINE BELOW
544             chr(0x1e0f) => '\\b{d}', # LATIN SMALL LETTER D WITH LINE BELOW
545             chr(0x1e10) => '\\c{D}', # LATIN CAPITAL LETTER D WITH CEDILLA
546             chr(0x1e11) => '\\c{d}', # LATIN SMALL LETTER D WITH CEDILLA
547             chr(0x1e1e) => '\\.{F}', # LATIN CAPITAL LETTER F WITH DOT ABOVE
548             chr(0x1e1f) => '\\.{f}', # LATIN SMALL LETTER F WITH DOT ABOVE
549             chr(0x1e20) => '\\={G}', # LATIN CAPITAL LETTER G WITH MACRON
550             chr(0x1e21) => '\\={g}', # LATIN SMALL LETTER G WITH MACRON
551             chr(0x1e22) => '\\.{H}', # LATIN CAPITAL LETTER H WITH DOT ABOVE
552             chr(0x1e23) => '\\.{h}', # LATIN SMALL LETTER H WITH DOT ABOVE
553             chr(0x1e24) => '\\d{H}', # LATIN CAPITAL LETTER H WITH DOT BELOW
554             chr(0x1e25) => '\\d{h}', # LATIN SMALL LETTER H WITH DOT BELOW
555             chr(0x1e28) => '\\c{H}', # LATIN CAPITAL LETTER H WITH CEDILLA
556             chr(0x1e29) => '\\c{h}', # LATIN SMALL LETTER H WITH CEDILLA
557             chr(0x1e30) => '\\\'{K}', # LATIN CAPITAL LETTER K WITH ACUTE
558             chr(0x1e31) => '\\\'{k}', # LATIN SMALL LETTER K WITH ACUTE
559             chr(0x1e32) => '\\d{K}', # LATIN CAPITAL LETTER K WITH DOT BELOW
560             chr(0x1e33) => '\\d{k}', # LATIN SMALL LETTER K WITH DOT BELOW
561             chr(0x1e34) => '\\b{K}', # LATIN CAPITAL LETTER K WITH LINE BELOW
562             chr(0x1e35) => '\\b{k}', # LATIN SMALL LETTER K WITH LINE BELOW
563             chr(0x1e36) => '\\d{L}', # LATIN CAPITAL LETTER L WITH DOT BELOW
564             chr(0x1e37) => '\\d{l}', # LATIN SMALL LETTER L WITH DOT BELOW
565             chr(0x1e3a) => '\\b{L}', # LATIN CAPITAL LETTER L WITH LINE BELOW
566             chr(0x1e3b) => '\\b{l}', # LATIN SMALL LETTER L WITH LINE BELOW
567             chr(0x1e3e) => '\\\'{M}', # LATIN CAPITAL LETTER M WITH ACUTE
568             chr(0x1e3f) => '\\\'{m}', # LATIN SMALL LETTER M WITH ACUTE
569             chr(0x1e40) => '\\.{M}', # LATIN CAPITAL LETTER M WITH DOT ABOVE
570             chr(0x1e41) => '\\.{m}', # LATIN SMALL LETTER M WITH DOT ABOVE
571             chr(0x1e42) => '\\d{M}', # LATIN CAPITAL LETTER M WITH DOT BELOW
572             chr(0x1e43) => '\\d{m}', # LATIN SMALL LETTER M WITH DOT BELOW
573             chr(0x1e44) => '\\.{N}', # LATIN CAPITAL LETTER N WITH DOT ABOVE
574             chr(0x1e45) => '\\.{n}', # LATIN SMALL LETTER N WITH DOT ABOVE
575             chr(0x1e46) => '\\d{N}', # LATIN CAPITAL LETTER N WITH DOT BELOW
576             chr(0x1e47) => '\\d{n}', # LATIN SMALL LETTER N WITH DOT BELOW
577             chr(0x1e48) => '\\b{N}', # LATIN CAPITAL LETTER N WITH LINE BELOW
578             chr(0x1e49) => '\\b{n}', # LATIN SMALL LETTER N WITH LINE BELOW
579             chr(0x1e54) => '\\\'{P}', # LATIN CAPITAL LETTER P WITH ACUTE
580             chr(0x1e55) => '\\\'{p}', # LATIN SMALL LETTER P WITH ACUTE
581             chr(0x1e56) => '\\.{P}', # LATIN CAPITAL LETTER P WITH DOT ABOVE
582             chr(0x1e57) => '\\.{p}', # LATIN SMALL LETTER P WITH DOT ABOVE
583             chr(0x1e58) => '\\.{R}', # LATIN CAPITAL LETTER R WITH DOT ABOVE
584             chr(0x1e59) => '\\.{r}', # LATIN SMALL LETTER R WITH DOT ABOVE
585             chr(0x1e5a) => '\\d{R}', # LATIN CAPITAL LETTER R WITH DOT BELOW
586             chr(0x1e5b) => '\\d{r}', # LATIN SMALL LETTER R WITH DOT BELOW
587             chr(0x1e5e) => '\\b{R}', # LATIN CAPITAL LETTER R WITH LINE BELOW
588             chr(0x1e5f) => '\\b{r}', # LATIN SMALL LETTER R WITH LINE BELOW
589             chr(0x1e60) => '\\.{S}', # LATIN CAPITAL LETTER S WITH DOT ABOVE
590             chr(0x1e61) => '\\.{s}', # LATIN SMALL LETTER S WITH DOT ABOVE
591             chr(0x1e62) => '\\d{S}', # LATIN CAPITAL LETTER S WITH DOT BELOW
592             chr(0x1e63) => '\\d{s}', # LATIN SMALL LETTER S WITH DOT BELOW
593             chr(0x1e6a) => '\\.{T}', # LATIN CAPITAL LETTER T WITH DOT ABOVE
594             chr(0x1e6b) => '\\.{t}', # LATIN SMALL LETTER T WITH DOT ABOVE
595             chr(0x1e6c) => '\\d{T}', # LATIN CAPITAL LETTER T WITH DOT BELOW
596             chr(0x1e6d) => '\\d{t}', # LATIN SMALL LETTER T WITH DOT BELOW
597             chr(0x1e6e) => '\\b{T}', # LATIN CAPITAL LETTER T WITH LINE BELOW
598             chr(0x1e6f) => '\\b{t}', # LATIN SMALL LETTER T WITH LINE BELOW
599             chr(0x1e7c) => '\\~{V}', # LATIN CAPITAL LETTER V WITH TILDE
600             chr(0x1e7d) => '\\~{v}', # LATIN SMALL LETTER V WITH TILDE
601             chr(0x1e7e) => '\\d{V}', # LATIN CAPITAL LETTER V WITH DOT BELOW
602             chr(0x1e7f) => '\\d{v}', # LATIN SMALL LETTER V WITH DOT BELOW
603             chr(0x1e80) => '\\`{W}', # LATIN CAPITAL LETTER W WITH GRAVE
604             chr(0x1e81) => '\\`{w}', # LATIN SMALL LETTER W WITH GRAVE
605             chr(0x1e82) => '\\\'{W}', # LATIN CAPITAL LETTER W WITH ACUTE
606             chr(0x1e83) => '\\\'{w}', # LATIN SMALL LETTER W WITH ACUTE
607             chr(0x1e86) => '\\.{W}', # LATIN CAPITAL LETTER W WITH DOT ABOVE
608             chr(0x1e87) => '\\.{w}', # LATIN SMALL LETTER W WITH DOT ABOVE
609             chr(0x1e88) => '\\d{W}', # LATIN CAPITAL LETTER W WITH DOT BELOW
610             chr(0x1e89) => '\\d{w}', # LATIN SMALL LETTER W WITH DOT BELOW
611             chr(0x1e8a) => '\\.{X}', # LATIN CAPITAL LETTER X WITH DOT ABOVE
612             chr(0x1e8b) => '\\.{x}', # LATIN SMALL LETTER X WITH DOT ABOVE
613             chr(0x1e8e) => '\\.{Y}', # LATIN CAPITAL LETTER Y WITH DOT ABOVE
614             chr(0x1e8f) => '\\.{y}', # LATIN SMALL LETTER Y WITH DOT ABOVE
615             chr(0x1e90) => '\\^{Z}', # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
616             chr(0x1e91) => '\\^{z}', # LATIN SMALL LETTER Z WITH CIRCUMFLEX
617             chr(0x1e92) => '\\d{Z}', # LATIN CAPITAL LETTER Z WITH DOT BELOW
618             chr(0x1e93) => '\\d{z}', # LATIN SMALL LETTER Z WITH DOT BELOW
619             chr(0x1e94) => '\\b{Z}', # LATIN CAPITAL LETTER Z WITH LINE BELOW
620             chr(0x1e95) => '\\b{z}', # LATIN SMALL LETTER Z WITH LINE BELOW
621             chr(0x1e96) => '\\b{h}', # LATIN SMALL LETTER H WITH LINE BELOW
622             chr(0x1e98) => '\\r{w}', # LATIN SMALL LETTER W WITH RING ABOVE
623             chr(0x1e99) => '\\r{y}', # LATIN SMALL LETTER Y WITH RING ABOVE
624             chr(0x1ea0) => '\\d{A}', # LATIN CAPITAL LETTER A WITH DOT BELOW
625             chr(0x1ea1) => '\\d{a}', # LATIN SMALL LETTER A WITH DOT BELOW
626             chr(0x1eb8) => '\\d{E}', # LATIN CAPITAL LETTER E WITH DOT BELOW
627             chr(0x1eb9) => '\\d{e}', # LATIN SMALL LETTER E WITH DOT BELOW
628             chr(0x1ebc) => '\\~{E}', # LATIN CAPITAL LETTER E WITH TILDE
629             chr(0x1ebd) => '\\~{e}', # LATIN SMALL LETTER E WITH TILDE
630             chr(0x1eca) => '\\d{I}', # LATIN CAPITAL LETTER I WITH DOT BELOW
631             chr(0x1ecb) => '\\d{i}', # LATIN SMALL LETTER I WITH DOT BELOW
632             chr(0x1ecc) => '\\d{O}', # LATIN CAPITAL LETTER O WITH DOT BELOW
633             chr(0x1ecd) => '\\d{o}', # LATIN SMALL LETTER O WITH DOT BELOW
634             chr(0x1ee4) => '\\d{U}', # LATIN CAPITAL LETTER U WITH DOT BELOW
635             chr(0x1ee5) => '\\d{u}', # LATIN SMALL LETTER U WITH DOT BELOW
636             chr(0x1ef2) => '\\`{Y}', # LATIN CAPITAL LETTER Y WITH GRAVE
637             chr(0x1ef3) => '\\`{y}', # LATIN SMALL LETTER Y WITH GRAVE
638             chr(0x1ef4) => '\\d{Y}', # LATIN CAPITAL LETTER Y WITH DOT BELOW
639             chr(0x1ef5) => '\\d{y}', # LATIN SMALL LETTER Y WITH DOT BELOW
640             chr(0x1ef8) => '\\~{Y}', # LATIN CAPITAL LETTER Y WITH TILDE
641             chr(0x1ef9) => '\\~{y}', # LATIN SMALL LETTER Y WITH TILDE
642              
643             # General Punctuation
644              
645             chr(0x2002) => '\\phantom{N}', # EN SPACE ( )
646             chr(0x2003) => '\\hspace{1em}', # EM SPACE ( )
647             chr(0x2004) => '\\hspace{.333333em}', # THREE-PER-EM SPACE
648             chr(0x2005) => '\\hspace{.25em}', # FOUR-PER-EM SPACE
649             chr(0x2006) => '\\hspace{.166666em}', # SIX-PER-EM SPACE
650             chr(0x2007) => '\\phantom{0}', # FIGURE SPACE
651             chr(0x2008) => '\\phantom{,}', # PUNCTUATION SPACE
652             chr(0x2009) => '\\,', # THIN SPACE ( )
653             chr(0x200a) => '\\ensuremath{\\mkern1mu}', # HAIR SPACE
654             chr(0x200c) => '{}', # ZERO WIDTH NON-JOINER (‌)
655             chr(0x2013) => '--', # EN DASH (–)
656             chr(0x2014) => '---', # EM DASH (—)
657             chr(0x2015) => '\\rule{1em}{1pt}', # HORIZONTAL BAR
658             chr(0x2016) => '{\\textbardbl}', # DOUBLE VERTICAL LINE
659             chr(0x2018) => '{\\textquoteleft}', # LEFT SINGLE QUOTATION MARK (‘)
660             chr(0x2019) => '{\\textquoteright}', # RIGHT SINGLE QUOTATION MARK (’)
661             chr(0x201a) => '{\\quotesinglbase}', # SINGLE LOW-9 QUOTATION MARK (‚)
662             chr(0x201c) => '{\\textquotedblleft}', # LEFT DOUBLE QUOTATION MARK (“)
663             chr(0x201d) => '{\\textquotedblright}', # RIGHT DOUBLE QUOTATION MARK (”)
664             chr(0x201e) => '{\\quotedblbase}', # DOUBLE LOW-9 QUOTATION MARK („)
665             chr(0x2020) => '{\\textdagger}', # DAGGER (†)
666             chr(0x2021) => '{\\textdaggerdbl}', # DOUBLE DAGGER (‡)
667             chr(0x2022) => '{\\textbullet}', # BULLET (•)
668             chr(0x2026) => '{\\textellipsis}', # HORIZONTAL ELLIPSIS (…)
669             chr(0x2030) => '{\\textperthousand}', # PER MILLE SIGN (‰)
670             chr(0x2032) => '{\\textquotesingle}', # PRIME (′)
671             chr(0x2033) => '{\\textquotedbl}', # DOUBLE PRIME (″)
672             chr(0x2039) => '{\\guilsinglleft}', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK (‹)
673             chr(0x203a) => '{\\guilsinglright}', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (›)
674             chr(0x203b) => '{\\textreferencemark}', # REFERENCE MARK
675             chr(0x203d) => '{\\textinterrobang}', # INTERROBANG
676             chr(0x203e) => '{\\textasciimacron}', # OVERLINE (‾)
677             chr(0x2044) => '{\\textfractionsolidus}', # FRACTION SLASH (⁄)
678              
679             # Currency Symbols
680              
681             chr(0x20a1) => '{\\textcolonmonetary}', # COLON SIGN
682             chr(0x20a4) => '{\\textlira}', # LIRA SIGN
683             chr(0x20a6) => '{\\textnaira}', # NAIRA SIGN
684             chr(0x20a9) => '{\\textwon}', # WON SIGN
685             chr(0x20ab) => '{\\textdong}', # DONG SIGN
686             chr(0x20ac) => '{\\texteuro}', # EURO SIGN (€)
687              
688             # Letterlike Symbols
689              
690             chr(0x2111) => '\\ensuremath{\\Re}', # BLACK-LETTER CAPITAL I (ℑ)
691             chr(0x2116) => '{\\textnumero}', # NUMERO SIGN
692             chr(0x2117) => '{\\textcircledP}', # SOUND RECORDING COPYRIGHT
693             chr(0x2118) => '\\ensuremath{\\wp}', # SCRIPT CAPITAL P (℘)
694             chr(0x211c) => '\\ensuremath{\\Im}', # BLACK-LETTER CAPITAL R (ℜ)
695             chr(0x211e) => '{\\textrecipe}', # PRESCRIPTION TAKE
696             chr(0x2120) => '{\\textservicemark}', # SERVICE MARK
697             chr(0x2122) => '{\\texttrademark}', # TRADE MARK SIGN (™)
698             chr(0x2126) => '{\\textohm}', # OHM SIGN
699             chr(0x2127) => '{\\textmho}', # INVERTED OHM SIGN
700             chr(0x212e) => '{\\textestimated}', # ESTIMATED SYMBOL
701             chr(0x2190) => '{\\textleftarrow}', # LEFTWARDS ARROW (←)
702             chr(0x2191) => '{\\textuparrow}', # UPWARDS ARROW (↑)
703             chr(0x2192) => '{\\textrightarrow}', # RIGHTWARDS ARROW (→)
704             chr(0x2193) => '{\\textdownarrow}', # DOWNWARDS ARROW (↓)
705             chr(0x2194) => '\\ensuremath{\\leftrightarrow}', # LEFT RIGHT ARROW (↔)
706             chr(0x21d0) => '\\ensuremath{\\Leftarrow}', # LEFTWARDS DOUBLE ARROW (⇐)
707             chr(0x21d1) => '\\ensuremath{\\Uparrow}', # UPWARDS DOUBLE ARROW (⇑)
708             chr(0x21d2) => '\\ensuremath{\\Rightarrow}', # RIGHTWARDS DOUBLE ARROW (⇒)
709             chr(0x21d3) => '\\ensuremath{\\Downarrow}', # DOWNWARDS DOUBLE ARROW (⇓)
710             chr(0x21d4) => '\\ensuremath{\\Leftrightarrow}', # LEFT RIGHT DOUBLE ARROW (⇔)
711              
712             # Mathematical Operations
713              
714             chr(0x2200) => '\\ensuremath{\\forall}', # FOR ALL (∀)
715             chr(0x2202) => '\\ensuremath{\\partial}', # PARTIAL DIFFERENTIAL (∂)
716             chr(0x2203) => '\\ensuremath{\\exists}', # THERE EXISTS (∃)
717             chr(0x2205) => '\\ensuremath{\\emptyset}', # EMPTY SET (∅)
718             chr(0x2207) => '\\ensuremath{\\nabla}', # NABLA (∇)
719             chr(0x2208) => '\\ensuremath{\\in}', # ELEMENT OF (∈)
720             chr(0x2209) => '\\ensuremath{\\notin}', # NOT AN ELEMENT OF (∉)
721             chr(0x220b) => '\\ensuremath{\\ni}', # CONTAINS AS MEMBER (∋)
722             chr(0x220f) => '\\ensuremath{\\prod}', # N-ARY PRODUCT (∏)
723             chr(0x2211) => '\\ensuremath{\\sum}', # N-ARY SUMMATION (∑)
724             chr(0x2212) => '\\ensuremath{-}', # MINUS SIGN (−)
725             chr(0x2217) => '\\ensuremath{\\ast}', # ASTERISK OPERATOR (∗)
726             chr(0x221a) => '\\ensuremath{\\surd}', # SQUARE ROOT (√)
727             chr(0x221d) => '\\ensuremath{\\propto}', # PROPORTIONAL TO (∝)
728             chr(0x221e) => '\\ensuremath{\\infty}', # INFINITY (∞)
729             chr(0x2220) => '\\ensuremath{\\angle}', # ANGLE (∠)
730             chr(0x2227) => '\\ensuremath{\\wedge}', # LOGICAL AND (∧)
731             chr(0x2228) => '\\ensuremath{\\vee}', # LOGICAL OR (∨)
732             chr(0x2229) => '\\ensuremath{\\cap}', # INTERSECTION (∩)
733             chr(0x222a) => '\\ensuremath{\\cup}', # UNION (∪)
734             chr(0x222b) => '\\ensuremath{\\int}', # INTEGRAL (∫)
735             chr(0x2234) => '\\ensuremath{\\therefore}', # THEREFORE (∴)
736             chr(0x223c) => '\\ensuremath{\\sim}', # TILDE OPERATOR (∼)
737             chr(0x2245) => '\\ensuremath{\\cong}', # APPROXIMATELY EQUAL TO (≅)
738             chr(0x2248) => '\\ensuremath{\\asymp}', # ALMOST EQUAL TO (≈)
739             chr(0x2260) => '\\ensuremath{\\neq}', # NOT EQUAL TO (≠)
740             chr(0x2261) => '\\ensuremath{\\equiv}', # IDENTICAL TO (≡)
741             chr(0x2264) => '\\ensuremath{\\leq}', # LESS-THAN OR EQUAL TO (≤)
742             chr(0x2265) => '\\ensuremath{\\geq}', # GREATER-THAN OR EQUAL TO (≥)
743             chr(0x2282) => '\\ensuremath{\\subset}', # SUBSET OF (⊂)
744             chr(0x2283) => '\\ensuremath{\\supset}', # SUPERSET OF (⊃)
745             chr(0x2284) => '\\ensuremath{\\not\\subset}', # NOT A SUBSET OF (⊄)
746             chr(0x2286) => '\\ensuremath{\\subseteq}', # SUBSET OF OR EQUAL TO (⊆)
747             chr(0x2287) => '\\ensuremath{\\supseteq}', # SUPERSET OF OR EQUAL TO (⊇)
748             chr(0x2295) => '\\ensuremath{\\oplus}', # CIRCLED PLUS (⊕)
749             chr(0x2297) => '\\ensuremath{\\otimes}', # CIRCLED TIMES (⊗)
750             chr(0x22a5) => '\\ensuremath{\\perp}', # UP TACK (⊥)
751             chr(0x22c5) => '\\ensuremath{\\cdot}', # DOT OPERATOR (⋅)
752             chr(0x2308) => '\\ensuremath{\\lceil}', # LEFT CEILING (⌈)
753             chr(0x2309) => '\\ensuremath{\\rceil}', # RIGHT CEILING (⌉)
754             chr(0x230a) => '\\ensuremath{\\lfloor}', # LEFT FLOOR (⌊)
755             chr(0x230b) => '\\ensuremath{\\rfloor}', # RIGHT FLOOR (⌋)
756             chr(0x2329) => '\\ensuremath{\\langle}', # LEFT-POINTING ANGLE BRACKET (⟨)
757             chr(0x232a) => '\\ensuremath{\\rangle}', # RIGHT-POINTING ANGLE BRACKET (⟩)
758             chr(0x25ca) => '\\ensuremath{\\lozenge}', # LOZENGE (◊)
759              
760             # Miscellaneous Symbols
761              
762             chr(0x263f) => '{\\Mercury}', # MERCURY
763             chr(0x2640) => '{\\Venus}', # FEMALE SIGN
764             chr(0x2641) => '{\\Earth}', # EARTH
765             chr(0x2642) => '{\\Mars}', # MALE SIGN
766             chr(0x2643) => '{\\Jupiter}', # JUPITER
767             chr(0x2644) => '{\\Saturn}', # SATURN
768             chr(0x2645) => '{\\Uranus}', # URANUS
769             chr(0x2646) => '{\\Neptune}', # NEPTUNE
770             chr(0x2647) => '{\\Pluto}', # PLUTO
771             chr(0x2648) => '{\\Aries}', # ARIES
772             chr(0x2649) => '{\\Taurus}', # TAURUS
773             chr(0x264a) => '{\\Gemini}', # GEMINI
774             chr(0x264b) => '{\\Cancer}', # CANCER
775             chr(0x264c) => '{\\Leo}', # LEO
776             chr(0x264d) => '{\\Virgo}', # VIRGO
777             chr(0x264e) => '{\\Libra}', # LIBRA
778             chr(0x264f) => '{\\Scorpio}', # SCORPIUS
779             chr(0x2650) => '{\\Sagittarius}', # SAGITTARIUS
780             chr(0x2651) => '{\\Capricorn}', # CAPRICORN
781             chr(0x2652) => '{\\Aquarius}', # AQUARIUS
782             chr(0x2653) => '{\\Pisces}', # PISCES
783             chr(0x2660) => '\\ensuremath{\\spadesuit}', # BLACK SPADE SUIT (♠)
784             chr(0x2663) => '\\ensuremath{\\clubsuit}', # BLACK CLUB SUIT (♣)
785             chr(0x2665) => '\\ensuremath{\\heartsuit}', # BLACK HEART SUIT (♥)
786             chr(0x2666) => '\\ensuremath{\\diamondsuit}', # BLACK DIAMOND SUIT (♦)
787             chr(0x266d) => '\\ensuremath{\\flat}', # MUSIC FLAT SIGN
788             chr(0x266e) => '\\ensuremath{\\natural}', # MUSIC NATURAL SIGN
789             chr(0x266f) => '\\ensuremath{\\sharp}', # MUSIC SHARP SIGN
790             chr(0x26ad) => '{\\textmarried}', # MARRIAGE SYMBOL
791             chr(0x26ae) => '{\\textdivorced}', # DIVORCE SYMBOL
792              
793             # Supplemental Punctuation
794              
795             chr(0x2e18) => '{\\textinterrobangdown}', # INVERTED INTERROBANG
796             chr(0x2e3a) => '---{}---', # unnamed character
797             chr(0x2e3b) => '---{}---{}---', # unnamed character
798              
799             );
800              
801             %provided_by = (
802              
803             chr(0x0022) => 'textcomp', # QUOTATION MARK
804             chr(0x003c) => 'textcomp', # LESS-THAN SIGN
805             chr(0x003e) => 'textcomp', # GREATER-THAN SIGN
806             chr(0x005c) => 'textcomp', # REVERSE SOLIDUS
807             chr(0x007e) => 'textcomp', # TILDE
808             chr(0x0e3f) => 'textcomp', # THAI CURRENCY SYMBOL BAHT
809             chr(0x2016) => 'textcomp', # DOUBLE VERTICAL LINE
810             chr(0x203b) => 'textcomp', # REFERENCE MARK
811             chr(0x203d) => 'textcomp', # INTERROBANG
812             chr(0x20a1) => 'textcomp', # COLON SIGN
813             chr(0x20a4) => 'textcomp', # LIRA SIGN
814             chr(0x20a6) => 'textcomp', # NAIRA SIGN
815             chr(0x20a9) => 'textcomp', # WON SIGN
816             chr(0x20ab) => 'textcomp', # DONG SIGN
817             chr(0x2116) => 'textcomp', # NUMERO SIGN
818             chr(0x2117) => 'textcomp', # SOUND RECORDING COPYRIGHT
819             chr(0x211e) => 'textcomp', # PRESCRIPTION TAKE
820             chr(0x2120) => 'textcomp', # SERVICE MARK
821             chr(0x2126) => 'textcomp', # OHM SIGN
822             chr(0x2127) => 'textcomp', # INVERTED OHM SIGN
823             chr(0x212e) => 'textcomp', # ESTIMATED SYMBOL
824             chr(0x263f) => 'marvosym', # MERCURY
825             chr(0x2640) => 'marvosym', # FEMALE SIGN
826             chr(0x2641) => 'marvosym', # EARTH
827             chr(0x2642) => 'marvosym', # MALE SIGN
828             chr(0x2643) => 'marvosym', # JUPITER
829             chr(0x2644) => 'marvosym', # SATURN
830             chr(0x2645) => 'marvosym', # URANUS
831             chr(0x2646) => 'marvosym', # NEPTUNE
832             chr(0x2647) => 'marvosym', # PLUTO
833             chr(0x2648) => 'marvosym', # ARIES
834             chr(0x2649) => 'marvosym', # TAURUS
835             chr(0x264a) => 'marvosym', # GEMINI
836             chr(0x264b) => 'marvosym', # CANCER
837             chr(0x264c) => 'marvosym', # LEO
838             chr(0x264d) => 'marvosym', # VIRGO
839             chr(0x264e) => 'marvosym', # LIBRA
840             chr(0x264f) => 'marvosym', # SCORPIUS
841             chr(0x2650) => 'marvosym', # SAGITTARIUS
842             chr(0x2651) => 'marvosym', # CAPRICORN
843             chr(0x2652) => 'marvosym', # AQUARIUS
844             chr(0x2653) => 'marvosym', # PISCES
845             chr(0x26ad) => 'textcomp', # MARRIAGE SYMBOL
846             chr(0x26ae) => 'textcomp', # DIVORCE SYMBOL
847             chr(0x2e18) => 'textcomp', # INVERTED INTERROBANG
848              
849             );
850              
851             reset_latex_encodings(1);
852              
853             sub _compile_encoding_regexp {
854 35     35   20096 $encoded_char_re = join q{}, sort keys %latex_encoding;
855 35         2498 $encoded_char_re =~ s{ ([#\[\]\\\$]) }{\\$1}gmsx;
856 35         8233 $encoded_char_re = eval "qr{[$encoded_char_re]}x";
857 35         109 return;
858             }
859              
860             _compile_encoding_regexp;
861              
862              
863             1;
864              
865             __END__