line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
72220
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
28
|
|
2
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
3
|
|
|
|
|
|
|
package LaTeX::ToUnicode; |
4
|
|
|
|
|
|
|
BEGIN { |
5
|
1
|
|
|
1
|
|
75
|
$LaTeX::ToUnicode::VERSION = '0.11'; |
6
|
|
|
|
|
|
|
} |
7
|
|
|
|
|
|
|
#ABSTRACT: Convert LaTeX commands to Unicode (simplistically) |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
require Exporter; |
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw( convert ); |
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
7
|
use utf8; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
15
|
1
|
|
|
1
|
|
520
|
use LaTeX::ToUnicode::Tables; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
842
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
sub convert { |
19
|
102
|
|
|
102
|
1
|
55942
|
my ( $string, %options ) = @_; |
20
|
102
|
|
|
|
|
224
|
$string = _convert_commands( $string ); |
21
|
102
|
|
|
|
|
219
|
$string = _convert_accents( $string ); |
22
|
102
|
100
|
|
|
|
250
|
$string = _convert_german( $string ) if $options{german}; |
23
|
102
|
|
|
|
|
200
|
$string = _convert_symbols( $string ); |
24
|
102
|
|
|
|
|
202
|
$string = _convert_specials( $string ); |
25
|
102
|
|
|
|
|
197
|
$string = _convert_ligatures( $string ); |
26
|
102
|
|
|
|
|
213
|
$string = _convert_markups( $string ); |
27
|
102
|
|
|
|
|
220
|
$string =~ s/{(\w*)}/$1/g; |
28
|
102
|
|
|
|
|
415
|
$string; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub _convert_commands { |
32
|
102
|
|
|
102
|
|
169
|
my $string = shift; |
33
|
|
|
|
|
|
|
|
34
|
102
|
|
|
|
|
409
|
foreach my $command ( keys %LaTeX::ToUnicode::Tables::COMMANDS ) { |
35
|
1632
|
|
|
|
|
11175
|
$string =~ s/\{\\$command\}/$LaTeX::ToUnicode::Tables::COMMANDS{$command}/g; |
36
|
1632
|
|
|
|
|
14167
|
$string =~ s/\\$command(?=\s|\b)/$LaTeX::ToUnicode::Tables::COMMANDS{$command}/g; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
102
|
|
|
|
|
323
|
$string; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub _convert_accents { |
43
|
102
|
|
|
102
|
|
177
|
my $string = shift; |
44
|
102
|
50
|
|
|
|
322
|
$string =~ s/(\{\\(.)\{(\\?\w{1,2})\}\})/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # {\"{a}} |
|
29
|
|
|
|
|
223
|
|
45
|
102
|
100
|
|
|
|
441
|
$string =~ s/(\{\\(.)(\\?\w{1,2})\})/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # {\"a} |
|
47
|
|
|
|
|
337
|
|
46
|
102
|
50
|
|
|
|
266
|
$string =~ s/(\\(.)(\\?\w{1,2}))/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # \"a |
|
17
|
|
|
|
|
136
|
|
47
|
102
|
50
|
|
|
|
177
|
$string =~ s/(\\(.)\{(\\?\w{1,2})\})/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # \"{a} |
|
1
|
|
|
|
|
8
|
|
48
|
102
|
|
|
|
|
256
|
$string; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub _convert_german { |
52
|
3
|
|
|
3
|
|
5
|
my $string = shift; |
53
|
|
|
|
|
|
|
|
54
|
3
|
|
|
|
|
19
|
foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::GERMAN ) { |
55
|
87
|
|
|
|
|
526
|
$string =~ s/\Q$symbol\E/$LaTeX::ToUnicode::Tables::GERMAN{$symbol}/g; |
56
|
|
|
|
|
|
|
} |
57
|
3
|
|
|
|
|
11
|
$string; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _convert_symbols { |
61
|
102
|
|
|
102
|
|
149
|
my $string = shift; |
62
|
|
|
|
|
|
|
|
63
|
102
|
|
|
|
|
410
|
foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::SYMBOLS ) { |
64
|
2244
|
|
|
|
|
13486
|
$string =~ s/{\\$symbol}/$LaTeX::ToUnicode::Tables::SYMBOLS{$symbol}/g; |
65
|
2244
|
|
|
|
|
14416
|
$string =~ s/\\$symbol\b/$LaTeX::ToUnicode::Tables::SYMBOLS{$symbol}/g; |
66
|
|
|
|
|
|
|
} |
67
|
102
|
|
|
|
|
307
|
$string; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Replace \ with . |
71
|
|
|
|
|
|
|
sub _convert_specials { |
72
|
102
|
|
|
102
|
|
165
|
my $string = shift; |
73
|
102
|
|
|
|
|
294
|
my $specials = join( '|', @LaTeX::ToUnicode::Tables::SPECIALS ); |
74
|
102
|
|
|
|
|
312
|
my $pattern = qr/\\($specials)/o; |
75
|
102
|
|
|
|
|
415
|
$string =~ s/$pattern/$1/g; |
76
|
102
|
|
|
|
|
185
|
$string =~ s/\\\$/\$/g; |
77
|
102
|
|
|
|
|
257
|
$string; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub _convert_ligatures { |
81
|
102
|
|
|
102
|
|
148
|
my $string = shift; |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# have to convert these in order specified. |
84
|
102
|
|
|
|
|
355
|
my @ligs = @LaTeX::ToUnicode::Tables::LIGATURES; |
85
|
102
|
|
|
|
|
254
|
for (my $i = 0; $i < @ligs; $i+=2) { |
86
|
816
|
|
|
|
|
1404
|
my $in = $ligs[$i]; |
87
|
816
|
|
|
|
|
1095
|
my $out = $ligs[$i+1]; |
88
|
816
|
|
|
|
|
5592
|
$string =~ s/\Q$in\E/$out/g; |
89
|
|
|
|
|
|
|
} |
90
|
102
|
|
|
|
|
330
|
$string; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# |
94
|
|
|
|
|
|
|
sub _convert_markups { |
95
|
102
|
|
|
102
|
|
159
|
my $string = shift; |
96
|
102
|
|
|
|
|
132
|
my $orig_string = $string; |
97
|
|
|
|
|
|
|
|
98
|
102
|
|
|
|
|
253
|
my $markups = join( '|', @LaTeX::ToUnicode::Tables::MARKUPS ); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Remove \textMARKUP{...}, leaving just the {...} |
101
|
102
|
|
|
|
|
329
|
$string =~ s/\\text($markups)\b\s*//g; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Remove braces and \command in: {... \command ...} |
104
|
102
|
|
|
|
|
341
|
$string =~ s/(\{[^{}]+)\\(?:$markups)\s+([^{}]+\})/$1$2/g; |
105
|
|
|
|
|
|
|
# |
106
|
|
|
|
|
|
|
# Remove braces and \command in: {\command ...} |
107
|
102
|
|
|
|
|
335
|
$string =~ s/\{\\(?:$markups)\s+([^{}]*)\}/$1/g; |
108
|
|
|
|
|
|
|
# |
109
|
|
|
|
|
|
|
# Remove: {\command |
110
|
|
|
|
|
|
|
# Although this will leave unmatched } chars behind, there's no |
111
|
|
|
|
|
|
|
# alternative without full parsing, since the bib entry will often |
112
|
|
|
|
|
|
|
# look like: {\em {The TeX{}book}}. Also might, in principle, be |
113
|
|
|
|
|
|
|
# at the end of a line. |
114
|
102
|
|
|
|
|
233
|
$string =~ s/\{\\(?:$markups)\b\s*//g; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Ultimately we remove all braces in ltx2crossrefxml SanitizeText fns, |
117
|
|
|
|
|
|
|
# so the unmatched braces don't matter ... that code should be moved here. |
118
|
|
|
|
|
|
|
|
119
|
102
|
|
|
|
|
234
|
$string; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
1; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
__END__ |