line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# bibliography package for Perl |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# TeX character set. |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Dana Jacobsen (dana@acm.org) |
7
|
|
|
|
|
|
|
# 22 January 1995 (last modified on 14 March 1996) |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# These routines have gone through a major update in November 1995. |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# This is still in beta. |
12
|
|
|
|
|
|
|
# There are many characters not implemented, and the underlying charset |
13
|
|
|
|
|
|
|
# code is not solid yet. |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# Some ugly convolutions are gone through to make it run at a decent |
16
|
|
|
|
|
|
|
# speed. This code is _very_ timing sensitive. On a typical 1043 record |
17
|
|
|
|
|
|
|
# run, the first implementation ran at 83 seconds for tocanon, 28 seconds |
18
|
|
|
|
|
|
|
# for fromcanon. Two days of work brought this down to 1 second and 2 |
19
|
|
|
|
|
|
|
# seconds. |
20
|
|
|
|
|
|
|
# Lesson: |
21
|
|
|
|
|
|
|
# If you're not careful, you may find the charset code dominating |
22
|
|
|
|
|
|
|
# your entire conversion time since it is run for every _field_, but |
23
|
|
|
|
|
|
|
# with some careful profiling, it can be very fast. |
24
|
|
|
|
|
|
|
# |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
#### |
28
|
|
|
|
|
|
|
# |
29
|
|
|
|
|
|
|
# ToDo's identified by ptandler, 02-07-18 |
30
|
|
|
|
|
|
|
# |
31
|
|
|
|
|
|
|
# - Unknown TeX characters in 'ACM SIG{\-}PLAN Notices' --> \- is an optional hyphen |
32
|
|
|
|
|
|
|
# - Unknown TeX characters in '\{lopez,borning\}@cs' --> \{ and \} protect braces |
33
|
|
|
|
|
|
|
# - braces are not removed ... in bibtex they are often needed to protect the case of words |
34
|
|
|
|
|
|
|
# that should not be converted to lowercase in titles. |
35
|
|
|
|
|
|
|
# - tex commands like \cite{....} in bibtex entries are treated as unknown tex characters |
36
|
|
|
|
|
|
|
# |
37
|
|
|
|
|
|
|
#### |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
package bp_cs_tex; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
###### |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$bib'charsets{'tex', 'i_name'} = 'tex'; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$bib'charsets{'tex', 'tocanon'} = "bp_cs_tex'tocanon"; |
46
|
|
|
|
|
|
|
$bib'charsets{'tex', 'fromcanon'} = "bp_cs_tex'fromcanon"; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
$bib'charsets{'tex', 'toesc'} = "[\$\\\\]"; |
49
|
|
|
|
|
|
|
# XXXXX We have so many characters to protect, should we even bother? |
50
|
|
|
|
|
|
|
$bib'charsets{'tex', 'fromesc'} = "[\\#\$\%\&{}_\|><\^~\200-\377]|${bib'cs_ext}|${bib'cs_meta}"; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
###### |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$cs_init = 0; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# package variables for anyone to use |
57
|
|
|
|
|
|
|
$mine = ''; |
58
|
|
|
|
|
|
|
$unicode = ''; |
59
|
|
|
|
|
|
|
$can = ''; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
###### |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub init_cs { |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Thorn and eth are really nasty since they don't exist in the standard TeX |
66
|
|
|
|
|
|
|
# fonts. This is what I came up with in r2b to fake it. Fortunately they |
67
|
|
|
|
|
|
|
# aren't used often. Get the cmoer fonts if you want to do them right. |
68
|
|
|
|
|
|
|
# My eth is pretty nice, but the thorn leaves a little to be desired. |
69
|
|
|
|
|
|
|
|
70
|
2
|
|
|
2
|
|
245
|
%charmap = ( |
71
|
|
|
|
|
|
|
'00A1', "!'", |
72
|
|
|
|
|
|
|
'00A2', '\leavevmode\hbox{\rm\rlap/c}', |
73
|
|
|
|
|
|
|
'00A3', '{\pounds}', |
74
|
|
|
|
|
|
|
'00A4', '$\spadesuit$', |
75
|
|
|
|
|
|
|
'00A5', '\leavevmode\hbox{\rm\rlap=Y}', |
76
|
|
|
|
|
|
|
'00A6', '\leavevmode |
77
|
|
|
|
|
|
|
\hbox{\hskip.4ex\hbox{\ooalign{\vrule width.2ex height.5ex depth.4ex\crcr |
78
|
|
|
|
|
|
|
\hfil\raise.8ex\hbox{\vrule width.2ex height.9ex depth0ex}\hfil}}}', |
79
|
|
|
|
|
|
|
'00A7', '\S ', |
80
|
|
|
|
|
|
|
'00A8', '{\"{ }}', |
81
|
|
|
|
|
|
|
'00A9', '\leavevmode\hbox{\raise.6em\hbox{\copyright}}', |
82
|
|
|
|
|
|
|
'00AA', '${}^{\b{\scriptsize a}}$', |
83
|
|
|
|
|
|
|
'00AB', '$\scriptscriptstyle\ll$', |
84
|
|
|
|
|
|
|
'00AC', '$\neg$', |
85
|
|
|
|
|
|
|
'00AE', '\leavevmode\hbox{\raise.6em\hbox{\ooalign{{\mathhexbox20D}\crcr |
86
|
|
|
|
|
|
|
\hfil\raise.07ex\hbox{r}\hfil}}}', |
87
|
|
|
|
|
|
|
'00AF', '{\={ }}', |
88
|
|
|
|
|
|
|
'00B0', '${}^\circ$', |
89
|
|
|
|
|
|
|
'00B1', '$\pm$', |
90
|
|
|
|
|
|
|
'00B2', '${}^2$', |
91
|
|
|
|
|
|
|
'00B3', '${}^3$', |
92
|
|
|
|
|
|
|
'00B4', '{\'{ }}', |
93
|
|
|
|
|
|
|
'00B5', '$\mu$', |
94
|
|
|
|
|
|
|
'00B6', '\P ', |
95
|
|
|
|
|
|
|
'00B7', '$\cdot$', |
96
|
|
|
|
|
|
|
'00B8', '{\c{ }}', |
97
|
|
|
|
|
|
|
'00B9', '${}^1$', |
98
|
|
|
|
|
|
|
'00BA', '${}^{\b{\scriptsize o}}$', |
99
|
|
|
|
|
|
|
'00BB', '$\scriptscriptstyle\gg$', |
100
|
|
|
|
|
|
|
'00BC', '$1\over4$', |
101
|
|
|
|
|
|
|
'00BD', '$1\over2$', |
102
|
|
|
|
|
|
|
'00BE', '$3\over4$', |
103
|
|
|
|
|
|
|
'00BF', '?`', |
104
|
|
|
|
|
|
|
'00C0', '{\`A}', |
105
|
|
|
|
|
|
|
'00C1', q-{\'A}-, |
106
|
|
|
|
|
|
|
'00C2', '{\^A}', |
107
|
|
|
|
|
|
|
'00C3', '{\~A}', |
108
|
|
|
|
|
|
|
'00C4', '{\"A}', |
109
|
|
|
|
|
|
|
'00C5', '{\AA}', |
110
|
|
|
|
|
|
|
'00C6', '{\AE}', |
111
|
|
|
|
|
|
|
'00C7', '{\c{C}}', |
112
|
|
|
|
|
|
|
'00C8', '{\`E}', |
113
|
|
|
|
|
|
|
'00C9', q-{\'E}-, |
114
|
|
|
|
|
|
|
'00CA', '{\^E}', |
115
|
|
|
|
|
|
|
'00CB', '{\"E}', |
116
|
|
|
|
|
|
|
'00CC', '{\`I}', |
117
|
|
|
|
|
|
|
'00CD', q-{\'I}-, |
118
|
|
|
|
|
|
|
'00CE', '{\^I}', |
119
|
|
|
|
|
|
|
'00CF', '{\"I}', |
120
|
|
|
|
|
|
|
'00D0', '\leavevmode\hbox{\ooalign{{D}\crcr |
121
|
|
|
|
|
|
|
\hskip.2ex\raise.25ex\hbox{-}\hfil}}', |
122
|
|
|
|
|
|
|
'00D1', '{\~N}', |
123
|
|
|
|
|
|
|
'00D2', '{\`O}', |
124
|
|
|
|
|
|
|
'00D3', q-{\'O}-, |
125
|
|
|
|
|
|
|
'00D4', '{\^O}', |
126
|
|
|
|
|
|
|
'00D5', '{\~O}', |
127
|
|
|
|
|
|
|
'00D6', '{\"O}', |
128
|
|
|
|
|
|
|
'00D7', '$\times$', |
129
|
|
|
|
|
|
|
'00D8', '{\O}', |
130
|
|
|
|
|
|
|
'00D9', '{\`U}', |
131
|
|
|
|
|
|
|
'00DA', q-{\'U}-, |
132
|
|
|
|
|
|
|
'00DB', '{\^U}', |
133
|
|
|
|
|
|
|
'00DC', '{\"U}', |
134
|
|
|
|
|
|
|
'00DD', q-{\'Y}-, |
135
|
|
|
|
|
|
|
'00DE', '\leavevmode\hbox{I\hskip-.6ex\raise.5ex\hbox{$\scriptscriptstyle\supset$}}', |
136
|
|
|
|
|
|
|
'00DF', '{\ss}', |
137
|
|
|
|
|
|
|
'00E0', '{\`a}', |
138
|
|
|
|
|
|
|
'00E1', q-{\'a}-, |
139
|
|
|
|
|
|
|
'00E2', '{\^a}', |
140
|
|
|
|
|
|
|
'00E3', '{\~a}', |
141
|
|
|
|
|
|
|
'00E4', '{\"a}', |
142
|
|
|
|
|
|
|
'00E5', '{\aa}', |
143
|
|
|
|
|
|
|
'00E6', '{\ae}', |
144
|
|
|
|
|
|
|
'00E7', '{\c{c}}', |
145
|
|
|
|
|
|
|
'00E8', '{\`e}', |
146
|
|
|
|
|
|
|
'00E9', q-{\'e}-, |
147
|
|
|
|
|
|
|
'00EA', '{\^e}', |
148
|
|
|
|
|
|
|
'00EB', '{\"e}', |
149
|
|
|
|
|
|
|
'00EC', '{\`i}', |
150
|
|
|
|
|
|
|
'00ED', q-{\'i}-, |
151
|
|
|
|
|
|
|
'00EE', '{\^i}', |
152
|
|
|
|
|
|
|
'00EF', '{\"i}', |
153
|
|
|
|
|
|
|
'00F0', '\leavevmode\hbox{\ooalign{$\partial$\crcr\hskip.8ex\raise.7ex\hbox{-}\hfil}}', |
154
|
|
|
|
|
|
|
'00F1', '{\~n}', |
155
|
|
|
|
|
|
|
'00F2', '{\`o}', |
156
|
|
|
|
|
|
|
'00F3', q-{\'o}-, |
157
|
|
|
|
|
|
|
'00F4', '{\^o}', |
158
|
|
|
|
|
|
|
'00F5', '{\~o}', |
159
|
|
|
|
|
|
|
'00F6', '{\"o}', |
160
|
|
|
|
|
|
|
'00F7', '$\div$', |
161
|
|
|
|
|
|
|
'00F8', '{\o}', |
162
|
|
|
|
|
|
|
'00F9', '{\`u}', |
163
|
|
|
|
|
|
|
'00FA', q-{\'u}-, |
164
|
|
|
|
|
|
|
'00FB', '{\^u}', |
165
|
|
|
|
|
|
|
'00FC', '{\"u}', |
166
|
|
|
|
|
|
|
'00FD', q-{\'y}-, |
167
|
|
|
|
|
|
|
'00FE', '\leavevmode\hbox{{\lower.3ex\hbox{\large l}}\hskip-.52ex o}', |
168
|
|
|
|
|
|
|
'00FF', '{\"y}', |
169
|
|
|
|
|
|
|
'0107', q-{\'c}-, |
170
|
|
|
|
|
|
|
'010C', '{\vC}', |
171
|
|
|
|
|
|
|
'010D', '{\vc}', |
172
|
|
|
|
|
|
|
'0159', '{\vr}', |
173
|
|
|
|
|
|
|
'015F', '{\c{s}}', |
174
|
|
|
|
|
|
|
'0160', '{\vS}', |
175
|
|
|
|
|
|
|
'0161', '{\vs}', |
176
|
|
|
|
|
|
|
'017A', q-{\'z}-, |
177
|
|
|
|
|
|
|
'017E', '{\vz}', |
178
|
|
|
|
|
|
|
# XXXXX |
179
|
|
|
|
|
|
|
# Should these be surrounded by $ (math mode)? |
180
|
|
|
|
|
|
|
# Also, what to do with \mu, which is listed twice? |
181
|
|
|
|
|
|
|
'03B1', '\alpha', |
182
|
|
|
|
|
|
|
'03B2', '\beta', |
183
|
|
|
|
|
|
|
'03B3', '\gamma', |
184
|
|
|
|
|
|
|
'03B4', '\delta', |
185
|
|
|
|
|
|
|
'03B5', '\epsilon', |
186
|
|
|
|
|
|
|
'03B6', '\zeta', |
187
|
|
|
|
|
|
|
'03B7', '\eta', |
188
|
|
|
|
|
|
|
'03B8', '\theta', |
189
|
|
|
|
|
|
|
'03B9', '\iota', |
190
|
|
|
|
|
|
|
'03BA', '\kappa', |
191
|
|
|
|
|
|
|
'03BB', '\lambda', |
192
|
|
|
|
|
|
|
'03BC', '\mu', |
193
|
|
|
|
|
|
|
'03BD', '\nu', |
194
|
|
|
|
|
|
|
'03BE', '\xi', |
195
|
|
|
|
|
|
|
'03C0', '\pi', |
196
|
|
|
|
|
|
|
'03C1', '\rho', |
197
|
|
|
|
|
|
|
'03C2', '\varsigma', |
198
|
|
|
|
|
|
|
'03C3', '\sigma', |
199
|
|
|
|
|
|
|
'03C4', '\tau', |
200
|
|
|
|
|
|
|
'03C5', '\upsilon', |
201
|
|
|
|
|
|
|
'03C6', '\phi', |
202
|
|
|
|
|
|
|
'03C7', '\chi', |
203
|
|
|
|
|
|
|
'03C8', '\psi', |
204
|
|
|
|
|
|
|
'03C9', '\omega', |
205
|
|
|
|
|
|
|
'2007', '$\:$', |
206
|
|
|
|
|
|
|
'2009', '$\,$', |
207
|
|
|
|
|
|
|
'201C', '``', |
208
|
|
|
|
|
|
|
'201D', '\'\'', |
209
|
|
|
|
|
|
|
); |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# This mapping is only used in the from section. We'll do these by hand |
212
|
|
|
|
|
|
|
# in the to mapping. |
213
|
2
|
|
|
|
|
14
|
%charmap2 = ( |
214
|
|
|
|
|
|
|
'00A0', '~', |
215
|
|
|
|
|
|
|
'00AD', '-', |
216
|
|
|
|
|
|
|
'2002', '\ ', |
217
|
|
|
|
|
|
|
'2003', '\ \ ', |
218
|
|
|
|
|
|
|
'2014', '---', |
219
|
|
|
|
|
|
|
'03BF', 'o', |
220
|
|
|
|
|
|
|
); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# Blah. TeX has such a non-uniform way of handling characters that this is |
223
|
|
|
|
|
|
|
# really slow. I'm going to try some optimizations for the tocanon code |
224
|
|
|
|
|
|
|
# since that will be heavily used. It makes this stuff less uniform though. |
225
|
|
|
|
|
|
|
# Remember that we don't have a full TeX parser, or even a partial one. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Build up a search string to do the reverse map. |
228
|
2
|
|
|
|
|
5
|
$cmap_to_eval = ''; |
229
|
2
|
|
|
|
|
4
|
$cmap_from8_eval = ''; |
230
|
2
|
|
|
|
|
5
|
$cmap_to_eval_1 = ''; |
231
|
2
|
|
|
|
|
5
|
$cmap_to_eval_2 = ''; |
232
|
2
|
|
|
|
|
5
|
%rmap = (); |
233
|
2
|
|
|
|
|
6
|
%accent = (); |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Step 1: Build a reverse map |
236
|
2
|
|
|
|
|
26
|
while (($unicode, $mine) = each %charmap) { |
237
|
262
|
|
|
|
|
899
|
$rmap{$mine} = $unicode; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
# Step 2: walk through the keys in sorted order |
240
|
2
|
|
|
|
|
5
|
local($mineE); |
241
|
2
|
|
|
|
|
150
|
foreach $mine (sort keys %rmap) { |
242
|
262
|
|
|
|
|
706
|
$can = &bib'unicode_to_canon( $rmap{$mine} ); |
243
|
262
|
|
|
|
|
429
|
$mineE = $mine; |
244
|
262
|
|
|
|
|
1781
|
$mineE =~ s/(\W)/\\$1/g; |
245
|
|
|
|
|
|
|
# The various maps for tocanon |
246
|
262
|
100
|
|
|
|
1088
|
if ($mine =~ /^{\\([`'^"~])([\w])}$/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
247
|
102
|
|
|
|
|
354
|
$accent{$1 . $2} = $can; |
248
|
|
|
|
|
|
|
} elsif ($mine =~ /^{\\([vc])(\w)}$/) { |
249
|
12
|
|
|
|
|
39
|
$accent{$1 . $2} = $can; |
250
|
|
|
|
|
|
|
} elsif ($mine =~ /^{\\([vc]){(\w)}}$/) { |
251
|
6
|
|
|
|
|
20
|
$accent{$1 . $2} = $can; |
252
|
|
|
|
|
|
|
} elsif ($mine =~ /leavevmode/) { |
253
|
18
|
|
|
|
|
56
|
$cmap_to_eval_1 .= "s/$mineE/$can/g;\n"; |
254
|
|
|
|
|
|
|
} elsif ($mine =~ /\$/) { |
255
|
40
|
|
|
|
|
97
|
$cmap_to_eval_2 .= "s/$mineE/$can/g;\n"; |
256
|
|
|
|
|
|
|
} else { |
257
|
84
|
|
|
|
|
176
|
$cmap_to_eval .= "s/$mineE/$can/g;\n"; |
258
|
|
|
|
|
|
|
} |
259
|
262
|
100
|
|
|
|
567
|
if ( length($can) == 1 ) { |
260
|
188
|
|
|
|
|
434
|
$cmap_from8_eval .= "s/$can/$mineE/g;\n"; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
} |
263
|
2
|
|
|
|
|
19
|
$cmap_from8_eval .= "s/\\240/\\~/g;\ns/\\255/-/g;"; |
264
|
|
|
|
|
|
|
# leave rmap |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
#%map_diac = ( |
267
|
|
|
|
|
|
|
#'tilde', '\~{}', |
268
|
|
|
|
|
|
|
#'circ', '\^{}', |
269
|
|
|
|
|
|
|
#'lcub', '$\lbrace$', |
270
|
|
|
|
|
|
|
#'rcub', '$\rbrace$', |
271
|
|
|
|
|
|
|
#'bsol', '$\backslash$', |
272
|
|
|
|
|
|
|
#); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Careful. This is from only. |
275
|
2
|
|
|
|
|
47
|
%metamap = ( |
276
|
|
|
|
|
|
|
'3100', '{', # Begin protection |
277
|
|
|
|
|
|
|
'3110', '}', # End protection |
278
|
|
|
|
|
|
|
# fonts |
279
|
|
|
|
|
|
|
'0101', '{\rm ', |
280
|
|
|
|
|
|
|
'0102', '{\it ', |
281
|
|
|
|
|
|
|
'0103', '{\bf ', |
282
|
|
|
|
|
|
|
'0111', '}', |
283
|
|
|
|
|
|
|
'0112', '}', |
284
|
|
|
|
|
|
|
'0113', '}', |
285
|
|
|
|
|
|
|
'0110', '}', # previous font. We don't need a font stack to handle it. |
286
|
|
|
|
|
|
|
'2102', '{\em ', |
287
|
|
|
|
|
|
|
'2112', '}', |
288
|
|
|
|
|
|
|
); |
289
|
|
|
|
|
|
|
|
290
|
2
|
|
|
|
|
10
|
$cs_init = 1; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
###### |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
sub tocanon { |
296
|
84
|
|
|
84
|
|
191
|
local($_, $protect) = @_; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# unprotect the TeX characters |
299
|
84
|
50
|
|
|
|
188
|
if ($protect) { |
300
|
|
|
|
|
|
|
# input is assumed to be in TeX format, before _any_ canon processing. |
301
|
|
|
|
|
|
|
# output is TeX format, but with raw magic characters. |
302
|
84
|
|
|
|
|
187
|
s/\$>\$/>/g; |
303
|
84
|
|
|
|
|
139
|
s/\$<\$/
|
304
|
84
|
|
|
|
|
145
|
s/\$\|\$/\|/g; |
305
|
84
|
|
|
|
|
114
|
s/\\_/_/g; |
306
|
84
|
|
|
|
|
129
|
s/\$\\rbrace\$/}/g; |
307
|
84
|
|
|
|
|
107
|
s/\$\\lbrace\$/{/g; |
308
|
84
|
|
|
|
|
119
|
s/\\\&/\&/g; |
309
|
84
|
|
|
|
|
113
|
s/\\\%/\%/g; |
310
|
84
|
|
|
|
|
105
|
s/\\\$/\$/g; |
311
|
84
|
|
|
|
|
176
|
s/\\#/#/g; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
84
|
100
|
|
|
|
218
|
if (/-/) { |
315
|
18
|
|
|
|
|
41
|
s/\$-\$/${bib'cs_ext}2212/go; |
316
|
18
|
|
|
|
|
34
|
s/\b---\b/${bib'cs_ext}2014/go; |
317
|
18
|
|
|
|
|
36
|
s/\b--\b/${bib'cs_ext}2013/go; |
318
|
|
|
|
|
|
|
# leave - |
319
|
|
|
|
|
|
|
} |
320
|
84
|
100
|
|
|
|
269
|
if (/~/) { |
321
|
8
|
|
|
|
|
35
|
1 while s/([^\\])~/$1\240/g; |
322
|
|
|
|
|
|
|
} |
323
|
84
|
|
|
|
|
122
|
s/\\ \\ /${bib'cs_ext}2003/go; |
324
|
84
|
|
|
|
|
123
|
s/\\ /${bib'cs_ext}2002/go; |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# Can we go now? |
327
|
84
|
100
|
|
|
|
395
|
return $_ unless /\\/; |
328
|
|
|
|
|
|
|
|
329
|
38
|
100
|
|
|
|
97
|
&init_cs unless $cs_init; |
330
|
|
|
|
|
|
|
|
331
|
38
|
100
|
|
|
|
172
|
if (/\\[`'^"~vc][{ ]?[\w]/) { |
332
|
|
|
|
|
|
|
# ISO -- we try {\"{c}}, {\"c}, \"{c}, \"c |
333
|
|
|
|
|
|
|
# ^^^^^ |
334
|
|
|
|
|
|
|
# preferred |
335
|
|
|
|
|
|
|
# |
336
|
|
|
|
|
|
|
# XXXXX What do we do about all the other ways they can try? |
337
|
|
|
|
|
|
|
# mgnet.bib uses {\" u} a lot. (got this way now) |
338
|
|
|
|
|
|
|
|
339
|
30
|
|
|
|
|
128
|
while (/{\\([`'^"~vc])( ?)([\w])}/) { |
340
|
32
|
|
|
|
|
142
|
$can = $accent{$1 . $3}; |
341
|
32
|
|
|
|
|
112
|
$mine = "{\\$1$2$3}"; |
342
|
32
|
50
|
|
|
|
72
|
if (!defined $can) { |
343
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
344
|
0
|
|
|
|
|
0
|
$can = ''; |
345
|
|
|
|
|
|
|
} |
346
|
32
|
|
|
|
|
337
|
$mine =~ s/(\W)/\\$1/g; |
347
|
32
|
|
|
|
|
573
|
s/$mine/$can/g; |
348
|
|
|
|
|
|
|
} |
349
|
30
|
|
|
|
|
96
|
while (/{\\([`'^"~vc]){([\w])}}/) { |
350
|
2
|
|
|
|
|
11
|
$can = $accent{$1 . $2}; |
351
|
2
|
|
|
|
|
8
|
$mine = "{\\$1\{$2\}}"; |
352
|
2
|
50
|
|
|
|
11
|
if (!defined $can) { |
353
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
354
|
0
|
|
|
|
|
0
|
$can = ''; |
355
|
|
|
|
|
|
|
} |
356
|
2
|
|
|
|
|
18
|
$mine =~ s/(\W)/\\$1/g; |
357
|
2
|
|
|
|
|
25
|
s/$mine/$can/g; |
358
|
|
|
|
|
|
|
} |
359
|
30
|
|
|
|
|
93
|
while (/\\([`'^"~vc]){([\w])}/) { |
360
|
0
|
|
|
|
|
0
|
$can = $accent{$1 . $2}; |
361
|
0
|
|
|
|
|
0
|
$mine = "\\$1\{$2\}"; |
362
|
0
|
0
|
|
|
|
0
|
if (!defined $can) { |
363
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
364
|
0
|
|
|
|
|
0
|
$can = ''; |
365
|
|
|
|
|
|
|
} |
366
|
0
|
|
|
|
|
0
|
$mine =~ s/(\W)/\\$1/g; |
367
|
0
|
|
|
|
|
0
|
s/$mine/$can/g; |
368
|
|
|
|
|
|
|
} |
369
|
30
|
|
|
|
|
91
|
while (/\\([`'^"~])( ?)([\w])/) { |
370
|
0
|
|
|
|
|
0
|
$can = $accent{$1 . $3}; |
371
|
0
|
|
|
|
|
0
|
$mine = "\\$1$2$3"; |
372
|
0
|
0
|
|
|
|
0
|
if (!defined $can) { |
373
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Can't convert TeX '$mine' in $_ to canon"); |
374
|
0
|
|
|
|
|
0
|
$can = ''; |
375
|
|
|
|
|
|
|
} |
376
|
0
|
|
|
|
|
0
|
$mine =~ s/(\W)/\\$1/g; |
377
|
0
|
|
|
|
|
0
|
s/$mine/$can/g; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# This unfortunately matches \cr and \circ. We aren't doing a loop |
381
|
|
|
|
|
|
|
# any more, so it's not even necessary anymore. Let the standard |
382
|
|
|
|
|
|
|
# routine try to match and give the normal error message on failure. |
383
|
|
|
|
|
|
|
#while (s/(\\[`'^"~vc][{ ]?[\w])//) { |
384
|
|
|
|
|
|
|
# &bib'gotwarn("Couldn't parse TeX accented character: $1!"); |
385
|
|
|
|
|
|
|
#} |
386
|
|
|
|
|
|
|
|
387
|
30
|
100
|
|
|
|
231
|
return $_ unless /\\/; |
388
|
|
|
|
|
|
|
} # end of standard accented characters |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# XXXXX What about the v, c, and other accents? Do we need another |
391
|
|
|
|
|
|
|
# section for those, or can we fit them in above? |
392
|
|
|
|
|
|
|
|
393
|
12
|
100
|
|
|
|
43
|
if (/leavevmode/) { |
394
|
2
|
|
|
|
|
417
|
eval $cmap_to_eval_1; |
395
|
|
|
|
|
|
|
} |
396
|
12
|
50
|
|
|
|
46
|
if (/\$/) { |
397
|
0
|
|
|
|
|
0
|
eval $cmap_to_eval_2; |
398
|
|
|
|
|
|
|
} |
399
|
12
|
|
|
|
|
6691
|
eval $cmap_to_eval; |
400
|
|
|
|
|
|
|
|
401
|
12
|
|
|
|
|
44
|
s/\\\^{}/\^/g; |
402
|
12
|
|
|
|
|
55
|
s/\\~{\s?}/~/g; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# hopefully we're done by now |
405
|
12
|
50
|
|
|
|
108
|
return $_ unless /\\/; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# font changes |
408
|
|
|
|
|
|
|
# This doesn't work all that well, but most bibliographies are simple |
409
|
0
|
|
|
|
|
|
s/\{\\rm ([^{}]*)\}/${bib'cs_meta}0101$1${bib'cs_meta}0110/g; |
410
|
0
|
|
|
|
|
|
s/\{\\it ([^{}]*)\}/${bib'cs_meta}0102$1${bib'cs_meta}0110/g; |
411
|
0
|
|
|
|
|
|
s/\{\\bf ([^{}]*)\}/${bib'cs_meta}0103$1${bib'cs_meta}0110/g; |
412
|
0
|
|
|
|
|
|
s/\{\\em ([^{}]*)\}/${bib'cs_meta}2102$1${bib'cs_meta}2112/g; |
413
|
0
|
0
|
|
|
|
|
$_ = &bib'font_check($_) if /${bib'cs_meta}01/o; |
414
|
|
|
|
|
|
|
# done with font changing |
415
|
|
|
|
|
|
|
|
416
|
0
|
0
|
|
|
|
|
return $_ unless /\\/; |
417
|
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
s/\$\\backslash\$/$bib'cs_temp/g; |
419
|
0
|
0
|
|
|
|
|
if (!/\\/) { |
420
|
0
|
|
|
|
|
|
s/$bib'cs_temp/\\/go; |
421
|
0
|
|
|
|
|
|
return $_; |
422
|
|
|
|
|
|
|
} |
423
|
0
|
|
|
|
|
|
s/$bib'cs_temp/\\/go; |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# I give up. |
426
|
|
|
|
|
|
|
# XXXXX We really ought to remove the escape and meta characters we have |
427
|
|
|
|
|
|
|
# converted when we give them this warning. |
428
|
0
|
|
|
|
|
|
&bib'gotwarn("Unknown TeX characters in '$_'"); |
429
|
0
|
|
|
|
|
|
$_; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
###### |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
sub fromcanon { |
435
|
0
|
|
|
0
|
|
|
local($_, $protect) = @_; |
436
|
0
|
|
|
|
|
|
local($repl); |
437
|
|
|
|
|
|
|
# We no longer check for font matching here, as that should be done by a |
438
|
|
|
|
|
|
|
# call to bib'font_check in the tocanon code. |
439
|
|
|
|
|
|
|
|
440
|
0
|
0
|
|
|
|
|
if ($protect) { |
441
|
0
|
|
|
|
|
|
s/\\/$bib'cs_temp/go; |
442
|
0
|
|
|
|
|
|
s/#/\\#/g; |
443
|
0
|
|
|
|
|
|
s/\$/\\\$/g; |
444
|
0
|
|
|
|
|
|
s/\%/\\\%/g; |
445
|
0
|
|
|
|
|
|
s/\&/\\\&/g; |
446
|
0
|
|
|
|
|
|
s/{/\$\\lbrace\$/g; |
447
|
0
|
|
|
|
|
|
s/}/\$\\rbrace\$/g; |
448
|
0
|
|
|
|
|
|
s/_/\\_/g; |
449
|
0
|
|
|
|
|
|
s/\|/\$\|\$/g; |
450
|
0
|
|
|
|
|
|
s/>/\$>\$/g; |
451
|
0
|
|
|
|
|
|
s/\$<\$/g; |
452
|
0
|
|
|
|
|
|
s/\^/\\^{}/g; |
453
|
0
|
|
|
|
|
|
s/~/\\~{}/g; |
454
|
0
|
|
|
|
|
|
s/$bib'cs_temp/\$\\backslash\$/go; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
while (/([\200-\237])/) { |
458
|
0
|
|
|
|
|
|
$repl = $1; |
459
|
0
|
|
|
|
|
|
$unicode = &bib'canon_to_unicode($repl); |
460
|
0
|
|
|
|
|
|
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to TeX"); |
461
|
0
|
|
|
|
|
|
s/$repl//g; |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
0
|
0
|
|
|
|
|
&init_cs unless $cs_init; |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#if (/[\240-\377]/) { |
467
|
|
|
|
|
|
|
# eval $cmap_from8_eval; |
468
|
|
|
|
|
|
|
#} |
469
|
0
|
|
|
|
|
|
s/\240/~/g; |
470
|
0
|
|
|
|
|
|
s/\255/-/g; |
471
|
0
|
|
|
|
|
|
while (/([\240-\377])/) { |
472
|
0
|
|
|
|
|
|
$repl = $1; |
473
|
0
|
|
|
|
|
|
$unicode = &bib'canon_to_unicode($repl); |
474
|
0
|
|
|
|
|
|
s/$repl/$charmap{$unicode}/g; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Maybe we can go now? |
478
|
0
|
0
|
|
|
|
|
return $_ unless /$bib'cs_escape/o; |
479
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
while (/${bib'cs_ext}(....)/) { |
481
|
0
|
|
|
|
|
|
$unicode = $1; |
482
|
0
|
0
|
|
|
|
|
if ($unicode =~ /^00[0-7]/) { |
483
|
0
|
|
|
|
|
|
1 while s/${bib'cs_ext}00([0-7].)/pack("C", hex($1))/ge; |
|
0
|
|
|
|
|
|
|
484
|
0
|
|
|
|
|
|
next; |
485
|
|
|
|
|
|
|
} |
486
|
0
|
0
|
0
|
|
|
|
defined $charmap{$unicode} && s/${bib'cs_ext}$unicode/$charmap{$unicode}/g |
487
|
|
|
|
|
|
|
&& next; |
488
|
0
|
0
|
0
|
|
|
|
defined $charmap2{$unicode} && s/${bib'cs_ext}$unicode/$charmap2{$unicode}/g |
489
|
|
|
|
|
|
|
&& next; |
490
|
|
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
$can = &bib'unicode_approx($unicode); |
492
|
0
|
0
|
0
|
|
|
|
defined $can && s/$bib'cs_ext$unicode/$can/g && next; |
493
|
|
|
|
|
|
|
|
494
|
0
|
|
|
|
|
|
&bib'gotwarn("Can't convert ".&bib'unicode_name($unicode)." to TeX"); |
495
|
0
|
|
|
|
|
|
s/${bib'cs_ext}$unicode//g; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
0
|
|
|
|
|
|
while (/${bib'cs_meta}(....)/) { |
499
|
0
|
|
|
|
|
|
$repl = $1; |
500
|
0
|
0
|
0
|
|
|
|
defined $metamap{$repl} && s/${bib'cs_meta}$repl/$metamap{$repl}/g |
501
|
|
|
|
|
|
|
&& next; |
502
|
|
|
|
|
|
|
|
503
|
0
|
|
|
|
|
|
$can = &bib'meta_approx($repl); |
504
|
0
|
0
|
0
|
|
|
|
defined $can && s/$bib'cs_meta$repl/$can/g && next; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
&bib'gotwarn("Can't convert ".&bib'meta_name($repl)." to TeX"); |
507
|
0
|
|
|
|
|
|
s/${bib'cs_meta}$repl//g; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
|
$_; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
###### |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
####################### |
517
|
|
|
|
|
|
|
# end of package |
518
|
|
|
|
|
|
|
####################### |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
1; |