| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# bibliography package for Perl |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# TeX character set. |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Dana Jacobsen (dana@acm.org) |
|
7
|
|
|
|
|
|
|
# 22 January 1995 (last modified on 14 March 1996) |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# These routines have gone through a major update in November 1995. |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# This is still in beta. |
|
12
|
|
|
|
|
|
|
# There are many characters not implemented, and the underlying charset |
|
13
|
|
|
|
|
|
|
# code is not solid yet. |
|
14
|
|
|
|
|
|
|
# |
|
15
|
|
|
|
|
|
|
# Some ugly convolutions are gone through to make it run at a decent |
|
16
|
|
|
|
|
|
|
# speed. This code is _very_ timing sensitive. On a typical 1043 record |
|
17
|
|
|
|
|
|
|
# run, the first implementation ran at 83 seconds for tocanon, 28 seconds |
|
18
|
|
|
|
|
|
|
# for fromcanon. Two days of work brought this down to 1 second and 2 |
|
19
|
|
|
|
|
|
|
# seconds. |
|
20
|
|
|
|
|
|
|
# Lesson: |
|
21
|
|
|
|
|
|
|
# If you're not careful, you may find the charset code dominating |
|
22
|
|
|
|
|
|
|
# your entire conversion time since it is run for every _field_, but |
|
23
|
|
|
|
|
|
|
# with some careful profiling, it can be very fast. |
|
24
|
|
|
|
|
|
|
# |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
#### |
|
28
|
|
|
|
|
|
|
# |
|
29
|
|
|
|
|
|
|
# ToDo's identified by ptandler, 02-07-18 |
|
30
|
|
|
|
|
|
|
# |
|
31
|
|
|
|
|
|
|
# - Unknown TeX characters in 'ACM SIG{\-}PLAN Notices' --> \- is an optional hyphen |
|
32
|
|
|
|
|
|
|
# - Unknown TeX characters in '\{lopez,borning\}@cs' --> \{ and \} protect braces |
|
33
|
|
|
|
|
|
|
# - braces are not removed ... in bibtex they are often needed to protect the case of words |
|
34
|
|
|
|
|
|
|
# that should not be converted to lowercase in titles. |
|
35
|
|
|
|
|
|
|
# - tex commands like \cite{....} in bibtex entries are treated as unknown tex characters |
|
36
|
|
|
|
|
|
|
# |
|
37
|
|
|
|
|
|
|
#### |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
package bp_cs_tex; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
###### |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$bib'charsets{'tex', 'i_name'} = 'tex'; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$bib'charsets{'tex', 'tocanon'} = "bp_cs_tex'tocanon"; |
|
46
|
|
|
|
|
|
|
$bib'charsets{'tex', 'fromcanon'} = "bp_cs_tex'fromcanon"; |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
$bib'charsets{'tex', 'toesc'} = "[\$\\\\]"; |
|
49
|
|
|
|
|
|
|
# XXXXX We have so many characters to protect, should we even bother? |
|
50
|
|
|
|
|
|
|
$bib'charsets{'tex', 'fromesc'} = "[\\#\$\%\&{}_\|><\^~\200-\377]|${bib'cs_ext}|${bib'cs_meta}"; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
###### |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$cs_init = 0; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# package variables for anyone to use |
|
57
|
|
|
|
|
|
|
$mine = ''; |
|
58
|
|
|
|
|
|
|
$unicode = ''; |
|
59
|
|
|
|
|
|
|
$can = ''; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
###### |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub init_cs { |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Thorn and eth are really nasty since they don't exist in the standard TeX |
|
66
|
|
|
|
|
|
|
# fonts. This is what I came up with in r2b to fake it. Fortunately they |
|
67
|
|
|
|
|
|
|
# aren't used often. Get the cmoer fonts if you want to do them right. |
|
68
|
|
|
|
|
|
|
# My eth is pretty nice, but the thorn leaves a little to be desired. |
|
69
|
|
|
|
|
|
|
|
|
70
|
2
|
|
|
2
|
|
245
|
%charmap = ( |
|
71
|
|
|
|
|
|
|
'00A1', "!'", |
|
72
|
|
|
|
|
|
|
'00A2', '\leavevmode\hbox{\rm\rlap/c}', |
|
73
|
|
|
|
|
|
|
'00A3', '{\pounds}', |
|
74
|
|
|
|
|
|
|
'00A4', '$\spadesuit$', |
|
75
|
|
|
|
|
|
|
'00A5', '\leavevmode\hbox{\rm\rlap=Y}', |
|
76
|
|
|
|
|
|
|
'00A6', '\leavevmode |
|
77
|
|
|
|
|
|
|
\hbox{\hskip.4ex\hbox{\ooalign{\vrule width.2ex height.5ex depth.4ex\crcr |
|
78
|
|
|
|
|
|
|
\hfil\raise.8ex\hbox{\vrule width.2ex height.9ex depth0ex}\hfil}}}', |
|
79
|
|
|
|
|
|
|
'00A7', '\S ', |
|
80
|
|
|
|
|
|
|
'00A8', '{\"{ }}', |
|
81
|
|
|
|
|
|
|
'00A9', '\leavevmode\hbox{\raise.6em\hbox{\copyright}}', |
|
82
|
|
|
|
|
|
|
'00AA', '${}^{\b{\scriptsize a}}$', |
|
83
|
|
|
|
|
|
|
'00AB', '$\scriptscriptstyle\ll$', |
|
84
|
|
|
|
|
|
|
'00AC', '$\neg$', |
|
85
|
|
|
|
|
|
|
'00AE', '\leavevmode\hbox{\raise.6em\hbox{\ooalign{{\mathhexbox20D}\crcr |
|
86
|
|
|
|
|
|
|
\hfil\raise.07ex\hbox{r}\hfil}}}', |
|
87
|
|
|
|
|
|
|
'00AF', '{\={ }}', |
|
88
|
|
|
|
|
|
|
'00B0', '${}^\circ$', |
|
89
|
|
|
|
|
|
|
'00B1', '$\pm$', |
|
90
|
|
|
|
|
|
|
'00B2', '${}^2$', |
|
91
|
|
|
|
|
|
|
'00B3', '${}^3$', |
|
92
|
|
|
|
|
|
|
'00B4', '{\'{ }}', |
|
93
|
|
|
|
|
|
|
'00B5', '$\mu$', |
|
94
|
|
|
|
|
|
|
'00B6', '\P ', |
|
95
|
|
|
|
|
|
|
'00B7', '$\cdot$', |
|
96
|
|
|
|
|
|
|
'00B8', '{\c{ }}', |
|
97
|
|
|
|
|
|
|
'00B9', '${}^1$', |
|
98
|
|
|
|
|
|
|
'00BA', '${}^{\b{\scriptsize o}}$', |
|
99
|
|
|
|
|
|
|
'00BB', '$\scriptscriptstyle\gg$', |
|
100
|
|
|
|
|
|
|
'00BC', '$1\over4$', |
|
101
|
|
|
|
|
|
|
'00BD', '$1\over2$', |
|
102
|
|
|
|
|
|
|
'00BE', '$3\over4$', |
|
103
|
|
|
|
|
|
|
'00BF', '?`', |
|
104
|
|
|
|
|
|
|
'00C0', '{\`A}', |
|
105
|
|
|
|
|
|
|
'00C1', q-{\'A}-, |
|
106
|
|
|
|
|
|
|
'00C2', '{\^A}', |
|
107
|
|
|
|
|
|
|
'00C3', '{\~A}', |
|
108
|
|
|
|
|
|
|
'00C4', '{\"A}', |
|
109
|
|
|
|
|
|
|
'00C5', '{\AA}', |
|
110
|
|
|
|
|
|
|
'00C6', '{\AE}', |
|
111
|
|
|
|
|
|
|
'00C7', '{\c{C}}', |
|
112
|
|
|
|
|
|
|
'00C8', '{\`E}', |
|
113
|
|
|
|
|
|
|
'00C9', q-{\'E}-, |
|
114
|
|
|
|
|
|
|
'00CA', '{\^E}', |
|
115
|
|
|
|
|
|
|
'00CB', '{\"E}', |
|
116
|
|
|
|
|
|
|
'00CC', '{\`I}', |
|
117
|
|
|
|
|
|
|
'00CD', q-{\'I}-, |
|
118
|
|
|
|
|
|
|
'00CE', '{\^I}', |
|
119
|
|
|
|
|
|
|
'00CF', '{\"I}', |
|
120
|
|
|
|
|
|
|
'00D0', '\leavevmode\hbox{\ooalign{{D}\crcr |
|
121
|
|
|
|
|
|
|
\hskip.2ex\raise.25ex\hbox{-}\hfil}}', |
|
122
|
|
|
|
|
|
|
'00D1', '{\~N}', |
|
123
|
|
|
|
|
|
|
'00D2', '{\`O}', |
|
124
|
|
|
|
|
|
|
'00D3', q-{\'O}-, |
|
125
|
|
|
|
|
|
|
'00D4', '{\^O}', |
|
126
|
|
|
|
|
|
|
'00D5', '{\~O}', |
|
127
|
|
|
|
|
|
|
'00D6', '{\"O}', |
|
128
|
|
|
|
|
|
|
'00D7', '$\times$', |
|
129
|
|
|
|
|
|
|
'00D8', '{\O}', |
|
130
|
|
|
|
|
|
|
'00D9', '{\`U}', |
|
131
|
|
|
|
|
|
|
'00DA', q-{\'U}-, |
|
132
|
|
|
|
|
|
|
'00DB', '{\^U}', |
|
133
|
|
|
|
|
|
|
'00DC', '{\"U}', |
|
134
|
|
|
|
|
|
|
'00DD', q-{\'Y}-, |
|
135
|
|
|
|
|
|
|
'00DE', '\leavevmode\hbox{I\hskip-.6ex\raise.5ex\hbox{$\scriptscriptstyle\supset$}}', |
|
136
|
|
|
|
|
|
|
'00DF', '{\ss}', |
|
137
|
|
|
|
|
|
|
'00E0', '{\`a}', |
|
138
|
|
|
|
|
|
|
'00E1', q-{\'a}-, |
|
139
|
|
|
|
|
|
|
'00E2', '{\^a}', |
|
140
|
|
|
|
|
|
|
'00E3', '{\~a}', |
|
141
|
|
|
|
|
|
|
'00E4', '{\"a}', |
|
142
|
|
|
|
|
|
|
'00E5', '{\aa}', |
|
143
|
|
|
|
|
|
|
'00E6', '{\ae}', |
|
144
|
|
|
|
|
|
|
'00E7', '{\c{c}}', |
|
145
|
|
|
|
|
|
|
'00E8', '{\`e}', |
|
146
|
|
|
|
|
|
|
'00E9', q-{\'e}-, |
|
147
|
|
|
|
|
|
|
'00EA', '{\^e}', |
|
148
|
|
|
|
|
|
|
'00EB', '{\"e}', |
|
149
|
|
|
|
|
|
|
'00EC', '{\`i}', |
|
150
|
|
|
|
|
|
|
'00ED', q-{\'i}-, |
|
151
|
|
|
|
|
|
|
'00EE', '{\^i}', |
|
152
|
|
|
|
|
|
|
'00EF', '{\"i}', |
|
153
|
|
|
|
|
|
|
'00F0', '\leavevmode\hbox{\ooalign{$\partial$\crcr\hskip.8ex\raise.7ex\hbox{-}\hfil}}', |
|
154
|
|
|
|
|
|
|
'00F1', '{\~n}', |
|
155
|
|
|
|
|
|
|
'00F2', '{\`o}', |
|
156
|
|
|
|
|
|
|
'00F3', q-{\'o}-, |
|
157
|
|
|
|
|
|
|
'00F4', '{\^o}', |
|
158
|
|
|
|
|
|
|
'00F5', '{\~o}', |
|
159
|
|
|
|
|
|
|
'00F6', '{\"o}', |
|
160
|
|
|
|
|
|
|
'00F7', '$\div$', |
|
161
|
|
|
|
|
|
|
'00F8', '{\o}', |
|
162
|
|
|
|
|
|
|
'00F9', '{\`u}', |
|
163
|
|
|
|
|
|
|
'00FA', q-{\'u}-, |
|
164
|
|
|
|
|
|
|
'00FB', '{\^u}', |
|
165
|
|
|
|
|
|
|
'00FC', '{\"u}', |
|
166
|
|
|
|
|
|
|
'00FD', q-{\'y}-, |
|
167
|
|
|
|
|
|
|
'00FE', '\leavevmode\hbox{{\lower.3ex\hbox{\large l}}\hskip-.52ex o}', |
|
168
|
|
|
|
|
|
|
'00FF', '{\"y}', |
|
169
|
|
|
|
|
|
|
'0107', q-{\'c}-, |
|
170
|
|
|
|
|
|
|
'010C', '{\vC}', |
|
171
|
|
|
|
|
|
|
'010D', '{\vc}', |
|
172
|
|
|
|
|
|
|
'0159', '{\vr}', |
|
173
|
|
|
|
|
|
|
'015F', '{\c{s}}', |
|
174
|
|
|
|
|
|
|
'0160', '{\vS}', |
|
175
|
|
|
|
|
|
|
'0161', '{\vs}', |
|
176
|
|
|
|
|
|
|
'017A', q-{\'z}-, |
|
177
|
|
|
|
|
|
|
'017E', '{\vz}', |
|
178
|
|
|
|
|
|
|
# XXXXX |
|
179
|
|
|
|
|
|
|
# Should these be surrounded by $ (math mode)? |
|
180
|
|
|
|
|
|
|
# Also, what to do with \mu, which is listed twice? |
|
181
|
|
|
|
|
|
|
'03B1', '\alpha', |
|
182
|
|
|
|
|
|
|
'03B2', '\beta', |
|
183
|
|
|
|
|
|
|
'03B3', '\gamma', |
|
184
|
|
|
|
|
|
|
'03B4', '\delta', |
|
185
|
|
|
|
|
|
|
'03B5', '\epsilon', |
|
186
|
|
|
|
|
|
|
'03B6', '\zeta', |
|
187
|
|
|
|
|
|
|
'03B7', '\eta', |
|
188
|
|
|
|
|
|
|
'03B8', '\theta', |
|
189
|
|
|
|
|
|
|
'03B9', '\iota', |
|
190
|
|
|
|
|
|
|
'03BA', '\kappa', |
|
191
|
|
|
|
|
|
|
'03BB', '\lambda', |
|
192
|
|
|
|
|
|
|
'03BC', '\mu', |
|
193
|
|
|
|
|
|
|
'03BD', '\nu', |
|
194
|
|
|
|
|
|
|
'03BE', '\xi', |
|
195
|
|
|
|
|
|
|
'03C0', '\pi', |
|
196
|
|
|
|
|
|
|
'03C1', '\rho', |
|
197
|
|
|
|
|
|
|
'03C2', '\varsigma', |
|
198
|
|
|
|
|
|
|
'03C3', '\sigma', |
|
199
|
|
|
|
|
|
|
'03C4', '\tau', |
|
200
|
|
|
|
|
|
|
'03C5', '\upsilon', |
|
201
|
|
|
|
|
|
|
'03C6', '\phi', |
|
202
|
|
|
|
|
|
|
'03C7', '\chi', |
|
203
|
|
|
|
|
|
|
'03C8', '\psi', |
|
204
|
|
|
|
|
|
|
'03C9', '\omega', |
|
205
|
|
|
|
|
|
|
'2007', '$\:$', |
|
206
|
|
|
|
|
|
|
'2009', '$\,$', |
|
207
|
|
|
|
|
|
|
'201C', '``', |
|
208
|
|
|
|
|
|
|
'201D', '\'\'', |
|
209
|
|
|
|
|
|
|
); |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# This mapping is only used in the from section. We'll do these by hand |
|
212
|
|
|
|
|
|
|
# in the to mapping. |
|
213
|
2
|
|
|
|
|
14
|
%charmap2 = ( |
|
214
|
|
|
|
|
|
|
'00A0', '~', |
|
215
|
|
|
|
|
|
|
'00AD', '-', |
|
216
|
|
|
|
|
|
|
'2002', '\ ', |
|
217
|
|
|
|
|
|
|
'2003', '\ \ ', |
|
218
|
|
|
|
|
|
|
'2014', '---', |
|
219
|
|
|
|
|
|
|
'03BF', 'o', |
|
220
|
|
|
|
|
|
|
); |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# Blah. TeX has such a non-uniform way of handling characters that this is |
|
223
|
|
|
|
|
|
|
# really slow. I'm going to try some optimizations for the tocanon code |
|
224
|
|
|
|
|
|
|
# since that will be heavily used. It makes this stuff less uniform though. |
|
225
|
|
|
|
|
|
|
# Remember that we don't have a full TeX parser, or even a partial one. |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Build up a search string to do the reverse map. |
|
228
|
2
|
|
|
|
|
5
|
$cmap_to_eval = ''; |
|
229
|
2
|
|
|
|
|
4
|
$cmap_from8_eval = ''; |
|
230
|
2
|
|
|
|
|
5
|
$cmap_to_eval_1 = ''; |
|
231
|
2
|
|
|
|
|
5
|
$cmap_to_eval_2 = ''; |
|
232
|
2
|
|
|
|
|
5
|
%rmap = (); |
|
233
|
2
|
|
|
|
|
6
|
%accent = (); |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Step 1: Build a reverse map |
|
236
|
2
|
|
|
|
|
26
|
while (($unicode, $mine) = each %charmap) { |
|
237
|
262
|
|
|
|
|
899
|
$rmap{$mine} = $unicode; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
# Step 2: walk through the keys in sorted order |
|
240
|
2
|
|
|
|
|
5
|
local($mineE); |
|
241
|
2
|
|
|
|
|
150
|
foreach $mine (sort keys %rmap) { |
|
242
|
262
|
|
|
|
|
706
|
$can = &bib'unicode_to_canon( $rmap{$mine} ); |
|
243
|
262
|
|
|
|
|
429
|
$mineE = $mine; |
|
244
|
262
|
|
|
|
|
1781
|
$mineE =~ s/(\W)/\\$1/g; |
|
245
|
|
|
|
|
|
|
# The various maps for tocanon |
|
246
|
262
|
100
|
|
|
|
1088
|
if ($mine =~ /^{\\([`'^"~])([\w])}$/) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
247
|
102
|
|
|
|
|
354
|
$accent{$1 . $2} = $can; |
|
248
|
|
|
|
|
|
|
} elsif ($mine =~ /^{\\([vc])(\w)}$/) { |
|
249
|
12
|
|
|
|
|
39
|
$accent{$1 . $2} = $can; |
|
250
|
|
|
|
|
|
|
} elsif ($mine =~ /^{\\([vc]){(\w)}}$/) { |
|
251
|
6
|
|
|
|
|
20
|
$accent{$1 . $2} = $can; |
|
252
|
|
|
|
|
|
|
} elsif ($mine =~ /leavevmode/) { |
|
253
|
18
|
|
|
|
|
56
|
$cmap_to_eval_1 .= "s/$mineE/$can/g;\n"; |
|
254
|
|
|
|
|
|
|
} elsif ($mine =~ /\$/) { |
|
255
|
40
|
|
|
|
|
97
|
$cmap_to_eval_2 .= "s/$mineE/$can/g;\n"; |
|
256
|
|
|
|
|
|
|
} else { |
|
257
|
84
|
|
|
|
|
176
|
$cmap_to_eval .= "s/$mineE/$can/g;\n"; |
|
258
|
|
|
|
|
|
|
} |
|
259
|
262
|
100
|
|
|
|
567
|
if ( length($can) == 1 ) { |
|
260
|
188
|
|
|
|
|
434
|
$cmap_from8_eval .= "s/$can/$mineE/g;\n"; |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
} |
|
263
|
2
|
|
|
|
|
19
|
$cmap_from8_eval .= "s/\\240/\\~/g;\ns/\\255/-/g;"; |
|
264
|
|
|
|
|
|
|
# leave rmap |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
#%map_diac = ( |
|
267
|
|
|
|
|
|
|
#'tilde', '\~{}', |
|
268
|
|
|
|
|
|
|
#'circ', '\^{}', |
|
269
|
|
|
|
|
|
|
#'lcub', '$\lbrace$', |
|
270
|
|
|
|
|
|
|
#'rcub', '$\rbrace$', |
|
271
|
|
|
|
|
|
|
#'bsol', '$\backslash$', |
|
272
|
|
|
|
|
|
|
#); |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Careful. This is from only. |
|
275
|
2
|
|
|
|
|
47
|
%metamap = ( |
|
276
|
|
|
|
|
|
|
'3100', '{', # Begin protection |
|
277
|
|
|
|
|
|
|
'3110', '}', # End protection |
|
278
|
|
|
|
|
|
|
# fonts |
|
279
|
|
|
|
|
|
|
'0101', '{\rm ', |
|
280
|
|
|
|
|
|
|
'0102', '{\it ', |
|
281
|
|
|
|
|
|
|
'0103', '{\bf ', |
|
282
|
|
|
|
|
|
|
'0111', '}', |
|
283
|
|
|
|
|
|
|
'0112', '}', |
|
284
|
|
|
|
|
|
|
'0113', '}', |
|
285
|
|
|
|
|
|
|
'0110', '}', # previous font. We don't need a font stack to handle it. |
|
286
|
|
|
|
|
|
|
'2102', '{\em ', |
|
287
|
|
|
|
|
|
|
'2112', '}', |
|
288
|
|
|
|
|
|
|
); |
|
289
|
|
|
|
|
|
|
|
|
290
|
2
|
|
|
|
|
10
|
$cs_init = 1; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
###### |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
sub tocanon { |
|
296
|
84
|
|
|
84
|
|
191
|
local($_, $protect) = @_; |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# unprotect the TeX characters |
|
299
|
84
|
50
|
|
|
|
188
|
if ($protect) { |
|
300
|
|
|
|
|
|
|
# input is assumed to be in TeX format, before _any_ canon processing. |
|
301
|
|
|
|
|
|
|
# output is TeX format, but with raw magic characters. |
|
302
|
84
|
|
|
|
|
187
|
s/\$>\$/>/g; |
|
303
|
84
|
|
|
|
|
139
|
s/\$<\$/
|
|
304
|
84
|
|
|
|
|
145
|
s/\$\|\$/\|/g; |
|
305
|
84
|
|
|
|
|
114
|
s/\\_/_/g; |
|
306
|
84
|
|
|
|
|
129
|
s/\$\\rbrace\$/}/g; |
|
307
|
84
|
|
|
|
|
107
|
s/\$\\lbrace\$/{/g; |
|
308
|
84
|
|
|
|
|
119
|
s/\\\&/\&/g; |
|
309
|
84
|
|
|
|
|
113
|
s/\\\%/\%/g; |
|
310
|
84
|
|
|
|
|
105
|
s/\\\$/\$/g; |
|
311
|
84
|
|
|
|
|
176
|
s/\\#/#/g; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
84
|
100
|
|
|
|
218
|
if (/-/) { |
|
315
|
18
|
|
|
|
|
41
|
s/\$-\$/${bib'cs_ext}2212/go; |
|
316
|
18
|
|
|
|
|
34
|
s/\b---\b/${bib'cs_ext}2014/go; |
|
317
|
18
|
|
|
|
|
36
|
s/\b--\b/${bib'cs_ext}2013/go; |
|
318
|
|
|
|
|
|
|
# leave - |
|
319
|
|
|
|
|
|
|
} |
|
320
|
84
|
100
|
|
|
|
269
|
if (/~/) { |
|
321
|
8
|
|
|
|
|
35
|
1 while s/([^\\])~/$1\240/g; |
|
322
|
|
|
|
|
|
|
} |
|
323
|
84
|
|
|
|
|
122
|
s/\\ \\ /${bib'cs_ext}2003/go; |
|
324
|
84
|
|
|
|
|
123
|
s/\\ /${bib'cs_ext}2002/go; |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# Can we go now? |
|
327
|
84
|
100
|
|
|
|
395
|
return $_ unless /\\/; |
|
328
|
|
|
|
|
|
|
|
|
329
|
38
|
100
|
|
|
|
97
|
&init_cs unless $cs_init; |
|
330
|
|
|
|
|
|
|
|
|
331
|
38
|
100
|
|
|
|
172
|
if (/\\[`'^"~vc][{ ]?[\w]/) { |
|
332
|
|
|
|
|
|
|
# ISO -- we try {\"{c}}, {\"c}, \"{c}, \"c |
|
333
|
|
|
|
|
|
|
# ^^^^^ |
|
334
|
|
|
|
|
|
|
# preferred |
|
335
|
|
|
|
|
|
|
# |
|
336
|
|
|
|
|
|
|
# XXXXX What do we do about all the other ways they can try? |
|
337
|
|
|
|
|
|
|
# mgnet.bib uses {\" u} a lot. (got this way now) |
|
338
|
|
|
|
|
|
|
|
|
339
|
30
|
|
|
|
|
128
|
while (/{\\([`'^"~vc])( ?)([\w])}/) { |
|
340
|
32
|
|
|
|
|
142
|
$can = $accent{$1 . $3}; |
|
341
|
32
|
|
|
|
|
112
|
$mine = "{\\$1$2$3}"; |
|
342
|
32
|
50
|
|
|
|
72
|
if (!defined $can) { |
|
343
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
|
344
|
0
|
|
|
|
|
0
|
$can = ''; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
32
|
|
|
|
|
337
|
$mine =~ s/(\W)/\\$1/g; |
|
347
|
32
|
|
|
|
|
573
|
s/$mine/$can/g; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
30
|
|
|
|
|
96
|
while (/{\\([`'^"~vc]){([\w])}}/) { |
|
350
|
2
|
|
|
|
|
11
|
$can = $accent{$1 . $2}; |
|
351
|
2
|
|
|
|
|
8
|
$mine = "{\\$1\{$2\}}"; |
|
352
|
2
|
50
|
|
|
|
11
|
if (!defined $can) { |
|
353
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
|
354
|
0
|
|
|
|
|
0
|
$can = ''; |
|
355
|
|
|
|
|
|
|
} |
|
356
|
2
|
|
|
|
|
18
|
$mine =~ s/(\W)/\\$1/g; |
|
357
|
2
|
|
|
|
|
25
|
s/$mine/$can/g; |
|
358
|
|
|
|
|
|
|
} |
|
359
|
30
|
|
|
|
|
93
|
while (/\\([`'^"~vc]){([\w])}/) { |
|
360
|
0
|
|
|
|
|
0
|
$can = $accent{$1 . $2}; |
|
361
|
0
|
|
|
|
|
0
|
$mine = "\\$1\{$2\}"; |
|
362
|
0
|
0
|
|
|
|
0
|
if (!defined $can) { |
|
363
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
|
364
|
0
|
|
|
|
|
0
|
$can = ''; |
|
365
|
|
|
|
|
|
|
} |
|
366
|
0
|
|
|
|
|
0
|
$mine =~ s/(\W)/\\$1/g; |
|
367
|
0
|
|
|
|
|
0
|
s/$mine/$can/g; |
|
368
|
|
|
|
|
|
|
} |
|
369
|
30
|
|
|
|
|
91
|
while (/\\([`'^"~])( ?)([\w])/) { |
|
370
|
0
|
|
|
|
|
0
|
$can = $accent{$1 . $3}; |
|
371
|
0
|
|
|
|
|
0
|
$mine = "\\$1$2$3"; |
|
372
|
0
|
0
|
|
|
|
0
|
if (!defined $can) { |
|
373
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
|
374
|
0
|
|
|
|
|
0
|
$can = ''; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
0
|
|
|
|
|
0
|
$mine =~ s/(\W)/\\$1/g; |
|
377
|
0
|
|
|
|
|
0
|
s/$mine/$can/g; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# This unfortunately matches \cr and \circ. We aren't doing a loop |
|
381
|
|
|
|
|
|
|
# any more, so it's not even necessary anymore. Let the standard |
|
382
|
|
|
|
|
|
|
# routine try to match and give the normal error message on failure. |
|
383
|
|
|
|
|
|
|
#while (s/(\\[`'^"~vc][{ ]?[\w])//) { |
|
384
|
|
|
|
|
|
|
# &bib'gotwarn("Couldn't parse TeX accented character: $1!"); |
|
385
|
|
|
|
|
|
|
#} |
|
386
|
|
|
|
|
|
|
|
|
387
|
30
|
100
|
|
|
|
231
|
return $_ unless /\\/; |
|
388
|
|
|
|
|
|
|
} # end of standard accented characters |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# XXXXX What about the v, c, and other accents? Do we need another |
|
391
|
|
|
|
|
|
|
# section for those, or can we fit them in above? |
|
392
|
|
|
|
|
|
|
|
|
393
|
12
|
100
|
|
|
|
43
|
if (/leavevmode/) { |
|
394
|
2
|
|
|
|
|
417
|
eval $cmap_to_eval_1; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
12
|
50
|
|
|
|
46
|
if (/\$/) { |
|
397
|
0
|
|
|
|
|
0
|
eval $cmap_to_eval_2; |
|
398
|
|
|
|
|
|
|
} |
|
399
|
12
|
|
|
|
|
6691
|
eval $cmap_to_eval; |
|
400
|
|
|
|
|
|
|
|
|
401
|
12
|
|
|
|
|
44
|
s/\\\^{}/\^/g; |
|
402
|
12
|
|
|
|
|
55
|
s/\\~{\s?}/~/g; |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# hopefully we're done by now |
|
405
|
12
|
50
|
|
|
|
108
|
return $_ unless /\\/; |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# font changes |
|
408
|
|
|
|
|
|
|
# This doesn't work all that well, but most bibliographies are simple |
|
409
|
0
|
|
|
|
|
|
s/\{\\rm ([^{}]*)\}/${bib'cs_meta}0101$1${bib'cs_meta}0110/g; |
|
410
|
0
|
|
|
|
|
|
s/\{\\it ([^{}]*)\}/${bib'cs_meta}0102$1${bib'cs_meta}0110/g; |
|
411
|
0
|
|
|
|
|
|
s/\{\\bf ([^{}]*)\}/${bib'cs_meta}0103$1${bib'cs_meta}0110/g; |
|
412
|
0
|
|
|
|
|
|
s/\{\\em ([^{}]*)\}/${bib'cs_meta}2102$1${bib'cs_meta}2112/g; |
|
413
|
0
|
0
|
|
|
|
|
$_ = &bib'font_check($_) if /${bib'cs_meta}01/o; |
|
414
|
|
|
|
|
|
|
# done with font changing |
|
415
|
|
|
|
|
|
|
|
|
416
|
0
|
0
|
|
|
|
|
return $_ unless /\\/; |
|
417
|
|
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
s/\$\\backslash\$/$bib'cs_temp/g; |
|
419
|
0
|
0
|
|
|
|
|
if (!/\\/) { |
|
420
|
0
|
|
|
|
|
|
s/$bib'cs_temp/\\/go; |
|
421
|
0
|
|
|
|
|
|
return $_; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
0
|
|
|
|
|
|
s/$bib'cs_temp/\\/go; |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# I give up. |
|
426
|
|
|
|
|
|
|
# XXXXX We really ought to remove the escape and meta characters we have |
|
427
|
|
|
|
|
|
|
# converted when we give them this warning. |
|
428
|
0
|
|
|
|
|
|
&bib'gotwarn("Unknown TeX characters in '$_'"); |
|
429
|
0
|
|
|
|
|
|
$_; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
###### |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
sub fromcanon { |
|
435
|
0
|
|
|
0
|
|
|
local($_, $protect) = @_; |
|
436
|
0
|
|
|
|
|
|
local($repl); |
|
437
|
|
|
|
|
|
|
# We no longer check for font matching here, as that should be done by a |
|
438
|
|
|
|
|
|
|
# call to bib'font_check in the tocanon code. |
|
439
|
|
|
|
|
|
|
|
|
440
|
0
|
0
|
|
|
|
|
if ($protect) { |
|
441
|
0
|
|
|
|
|
|
s/\\/$bib'cs_temp/go; |
|
442
|
0
|
|
|
|
|
|
s/#/\\#/g; |
|
443
|
0
|
|
|
|
|
|
s/\$/\\\$/g; |
|
444
|
0
|
|
|
|
|
|
s/\%/\\\%/g; |
|
445
|
0
|
|
|
|
|
|
s/\&/\\\&/g; |
|
446
|
0
|
|
|
|
|
|
s/{/\$\\lbrace\$/g; |
|
447
|
0
|
|
|
|
|
|
s/}/\$\\rbrace\$/g; |
|
448
|
0
|
|
|
|
|
|
s/_/\\_/g; |
|
449
|
0
|
|
|
|
|
|
s/\|/\$\|\$/g; |
|
450
|
0
|
|
|
|
|
|
s/>/\$>\$/g; |
|
451
|
0
|
|
|
|
|
|
s/\$<\$/g; |
|
452
|
0
|
|
|
|
|
|
s/\^/\\^{}/g; |
|
453
|
0
|
|
|
|
|
|
s/~/\\~{}/g; |
|
454
|
0
|
|
|
|
|
|
s/$bib'cs_temp/\$\\backslash\$/go; |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
while (/([\200-\237])/) { |
|
458
|
0
|
|
|
|
|
|
$repl = $1; |
|
459
|
0
|
|
|
|
|
|
$unicode = &bib'canon_to_unicode($repl); |
|
460
|
0
|
|
|
|
|
|
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to TeX"); |
|
461
|
0
|
|
|
|
|
|
s/$repl//g; |
|
462
|
|
|
|
|
|
|
} |
|
463
|
|
|
|
|
|
|
|
|
464
|
0
|
0
|
|
|
|
|
&init_cs unless $cs_init; |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#if (/[\240-\377]/) { |
|
467
|
|
|
|
|
|
|
# eval $cmap_from8_eval; |
|
468
|
|
|
|
|
|
|
#} |
|
469
|
0
|
|
|
|
|
|
s/\240/~/g; |
|
470
|
0
|
|
|
|
|
|
s/\255/-/g; |
|
471
|
0
|
|
|
|
|
|
while (/([\240-\377])/) { |
|
472
|
0
|
|
|
|
|
|
$repl = $1; |
|
473
|
0
|
|
|
|
|
|
$unicode = &bib'canon_to_unicode($repl); |
|
474
|
0
|
|
|
|
|
|
s/$repl/$charmap{$unicode}/g; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Maybe we can go now? |
|
478
|
0
|
0
|
|
|
|
|
return $_ unless /$bib'cs_escape/o; |
|
479
|
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
while (/${bib'cs_ext}(....)/) { |
|
481
|
0
|
|
|
|
|
|
$unicode = $1; |
|
482
|
0
|
0
|
|
|
|
|
if ($unicode =~ /^00[0-7]/) { |
|
483
|
0
|
|
|
|
|
|
1 while s/${bib'cs_ext}00([0-7].)/pack("C", hex($1))/ge; |
|
|
0
|
|
|
|
|
|
|
|
484
|
0
|
|
|
|
|
|
next; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
0
|
0
|
0
|
|
|
|
defined $charmap{$unicode} && s/${bib'cs_ext}$unicode/$charmap{$unicode}/g |
|
487
|
|
|
|
|
|
|
&& next; |
|
488
|
0
|
0
|
0
|
|
|
|
defined $charmap2{$unicode} && s/${bib'cs_ext}$unicode/$charmap2{$unicode}/g |
|
489
|
|
|
|
|
|
|
&& next; |
|
490
|
|
|
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
$can = &bib'unicode_approx($unicode); |
|
492
|
0
|
0
|
0
|
|
|
|
defined $can && s/$bib'cs_ext$unicode/$can/g && next; |
|
493
|
|
|
|
|
|
|
|
|
494
|
0
|
|
|
|
|
|
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to TeX"); |
|
495
|
0
|
|
|
|
|
|
s/${bib'cs_ext}$unicode//g; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
|
|
498
|
0
|
|
|
|
|
|
while (/${bib'cs_meta}(....)/) { |
|
499
|
0
|
|
|
|
|
|
$repl = $1; |
|
500
|
0
|
0
|
0
|
|
|
|
defined $metamap{$repl} && s/${bib'cs_meta}$repl/$metamap{$repl}/g |
|
501
|
|
|
|
|
|
|
&& next; |
|
502
|
|
|
|
|
|
|
|
|
503
|
0
|
|
|
|
|
|
$can = &bib'meta_approx($repl); |
|
504
|
0
|
0
|
0
|
|
|
|
defined $can && s/$bib'cs_meta$repl/$can/g && next; |
|
505
|
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
&bib'gotwarn("Can't convert ".&bib'meta_name($repl)." to TeX"); |
|
507
|
0
|
|
|
|
|
|
s/${bib'cs_meta}$repl//g; |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
|
$_; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
###### |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
####################### |
|
517
|
|
|
|
|
|
|
# end of package |
|
518
|
|
|
|
|
|
|
####################### |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
1; |