line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# /=====================================================================\ # |
2
|
|
|
|
|
|
|
# | LaTeXML::Post | # |
3
|
|
|
|
|
|
|
# | PostProcessing driver | # |
4
|
|
|
|
|
|
|
# |=====================================================================| # |
5
|
|
|
|
|
|
|
# | Part of LaTeXML: | # |
6
|
|
|
|
|
|
|
# | Public domain software, produced as part of work done by the | # |
7
|
|
|
|
|
|
|
# | United States Government & not subject to copyright in the US. | # |
8
|
|
|
|
|
|
|
# |---------------------------------------------------------------------| # |
9
|
|
|
|
|
|
|
# | Bruce Miller #_# | # |
10
|
|
|
|
|
|
|
# | http://dlmf.nist.gov/LaTeXML/ (o o) | # |
11
|
|
|
|
|
|
|
# \=========================================================ooo==U==ooo=/ # |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
package LaTeXML::Post; |
14
|
1
|
|
|
1
|
|
3859
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
24
|
|
15
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
16
|
1
|
|
|
1
|
|
447
|
use Time::HiRes; |
|
1
|
|
|
|
|
953
|
|
|
1
|
|
|
|
|
4
|
|
17
|
1
|
|
|
1
|
|
394
|
use LaTeXML::Util::Radix; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
101
|
|
18
|
1
|
|
|
1
|
|
492
|
use Encode; |
|
1
|
|
|
|
|
7155
|
|
|
1
|
|
|
|
|
58
|
|
19
|
1
|
|
|
1
|
|
4
|
use base qw(Exporter); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
53
|
|
20
|
1
|
|
|
1
|
|
3
|
use base qw(LaTeXML::Common::Object); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
395
|
|
21
|
|
|
|
|
|
|
use LaTeXML::Global; |
22
|
|
|
|
|
|
|
use LaTeXML::Common::Error; |
23
|
|
|
|
|
|
|
use LaTeXML::Core::State; |
24
|
|
|
|
|
|
|
our @EXPORT = (@LaTeXML::Common::Error::EXPORT); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub new { |
27
|
|
|
|
|
|
|
my ($class, %options) = @_; |
28
|
|
|
|
|
|
|
my $self = bless { status => {}, %options }, $class; |
29
|
|
|
|
|
|
|
$$self{verbosity} = 0 unless defined $$self{verbosity}; |
30
|
|
|
|
|
|
|
# TEMPORARY HACK!!!! |
31
|
|
|
|
|
|
|
# Create a State object, essentially only to hold verbosity (for now) |
32
|
|
|
|
|
|
|
# so that Errors can be reported, managed and recorded |
33
|
|
|
|
|
|
|
# Eventually will be a "real" State (or other configuration object) |
34
|
|
|
|
|
|
|
$$self{state} = LaTeXML::Core::State->new(); |
35
|
|
|
|
|
|
|
$$self{state}->assignValue(VERBOSITY => $$self{verbosity}); |
36
|
|
|
|
|
|
|
return $self; } |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
#====================================================================== |
39
|
|
|
|
|
|
|
sub ProcessChain { |
40
|
|
|
|
|
|
|
my ($self, $doc, @postprocessors) = @_; |
41
|
|
|
|
|
|
|
return $self->withState(sub { |
42
|
|
|
|
|
|
|
return $self->ProcessChain_internal($doc, @postprocessors); }); } |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub ProcessChain_internal { |
45
|
|
|
|
|
|
|
my ($self, $doc, @postprocessors) = @_; |
46
|
|
|
|
|
|
|
local $LaTeXML::POST = $self; |
47
|
|
|
|
|
|
|
local $LaTeXML::Post::NOTEINFO = undef; |
48
|
|
|
|
|
|
|
local $LaTeXML::Post::DOCUMENT = $doc; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my @docs = ($doc); |
51
|
|
|
|
|
|
|
NoteBegin("post-processing"); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
foreach my $processor (@postprocessors) { |
54
|
|
|
|
|
|
|
local $LaTeXML::Post::PROCESSOR = $processor; |
55
|
|
|
|
|
|
|
my @newdocs = (); |
56
|
|
|
|
|
|
|
foreach my $doc (@docs) { |
57
|
|
|
|
|
|
|
local $LaTeXML::Post::DOCUMENT = $doc; |
58
|
|
|
|
|
|
|
if (my @nodes = grep { $_ } $processor->toProcess($doc)) { # If there are nodes to process |
59
|
|
|
|
|
|
|
my $n = scalar(@nodes); |
60
|
|
|
|
|
|
|
my $msg = join(' ', $processor->getName || '', |
61
|
|
|
|
|
|
|
$doc->siteRelativeDestination || '', |
62
|
|
|
|
|
|
|
($n > 1 ? "$n to process" : 'processing')); |
63
|
|
|
|
|
|
|
NoteBegin($msg); |
64
|
|
|
|
|
|
|
push(@newdocs, $processor->process($doc, @nodes)); |
65
|
|
|
|
|
|
|
NoteEnd($msg); } |
66
|
|
|
|
|
|
|
else { |
67
|
|
|
|
|
|
|
push(@newdocs, $doc); } } |
68
|
|
|
|
|
|
|
@docs = @newdocs; } |
69
|
|
|
|
|
|
|
NoteEnd("post-processing"); |
70
|
|
|
|
|
|
|
return @docs; } |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
## HACK!!! |
73
|
|
|
|
|
|
|
## This is a copy of withState from LaTeXML::Core.pm |
74
|
|
|
|
|
|
|
## This should eventually be in a higher level, common class |
75
|
|
|
|
|
|
|
## using a common State or configuration object |
76
|
|
|
|
|
|
|
## in order to wrap ALL processing. |
77
|
|
|
|
|
|
|
sub withState { |
78
|
|
|
|
|
|
|
my ($self, $closure) = @_; |
79
|
|
|
|
|
|
|
local $STATE = $$self{state}; |
80
|
|
|
|
|
|
|
# And, set fancy error handler for ANY die! |
81
|
|
|
|
|
|
|
local $SIG{__DIE__} = \&LaTeXML::Common::Error::perl_die_handler; |
82
|
|
|
|
|
|
|
local $SIG{INT} = \&LaTeXML::Common::Error::perl_interrupt_handler; |
83
|
|
|
|
|
|
|
local $SIG{__WARN__} = \&LaTeXML::Common::Error::perl_warn_handler; |
84
|
|
|
|
|
|
|
local $SIG{'ALRM'} = \&LaTeXML::Common::Error::perl_timeout_handler; |
85
|
|
|
|
|
|
|
local $SIG{'TERM'} = \&LaTeXML::Common::Error::perl_terminate_handler; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
local $LaTeXML::DUAL_BRANCH = ''; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
return &$closure($STATE); } |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub getStatusCode { |
92
|
|
|
|
|
|
|
my ($self) = @_; |
93
|
|
|
|
|
|
|
return $$self{state}->getStatusCode; } |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub getStatusMessage { |
96
|
|
|
|
|
|
|
my ($self) = @_; |
97
|
|
|
|
|
|
|
return $$self{state}->getStatusMessage; } |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#====================================================================== |
100
|
|
|
|
|
|
|
# "Global" Post processing services |
101
|
|
|
|
|
|
|
#====================================================================== |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Return a sorter appropriate for lang (if Unicode::Collate::Locale available), |
104
|
|
|
|
|
|
|
# or an undifferentiated Unicode sorter (if only Unicode::Collate is available), |
105
|
|
|
|
|
|
|
# or just a dumb stand-in for perl's sort |
106
|
|
|
|
|
|
|
sub getsorter { |
107
|
|
|
|
|
|
|
my ($self, $lang) = @_; |
108
|
|
|
|
|
|
|
my $collator; |
109
|
|
|
|
|
|
|
if ($collator = $$self{collatorcache}{$lang}) { } |
110
|
|
|
|
|
|
|
elsif ($collator = eval { |
111
|
|
|
|
|
|
|
local $LaTeXML::IGNORE_ERRORS = 1; |
112
|
|
|
|
|
|
|
require 'Unicode/Collate/Locale.pm'; |
113
|
|
|
|
|
|
|
Unicode::Collate::Locale->new( |
114
|
|
|
|
|
|
|
locale => $lang, |
115
|
|
|
|
|
|
|
variable => 'non-ignorable', # I think; at least space shouldn't be ignored |
116
|
|
|
|
|
|
|
upper_before_lower => 1); }) { } |
117
|
|
|
|
|
|
|
elsif ($collator = eval { |
118
|
|
|
|
|
|
|
local $LaTeXML::IGNORE_ERRORS = 1; |
119
|
|
|
|
|
|
|
require 'Unicode/Collate.pm'; |
120
|
|
|
|
|
|
|
Unicode::Collate->new( |
121
|
|
|
|
|
|
|
variable => 'non-ignorable', # I think; at least space shouldn't be ignored |
122
|
|
|
|
|
|
|
upper_before_lower => 1); }) { |
123
|
|
|
|
|
|
|
Info('expected', 'Unicode::Collate::Locale', undef, |
124
|
|
|
|
|
|
|
"No Unicode::Collate::Locale found;", |
125
|
|
|
|
|
|
|
"using Unicode::Collate; ignoring language='$lang'"); } |
126
|
|
|
|
|
|
|
else { |
127
|
|
|
|
|
|
|
# Otherwise, just use primitive codepoint ordering. |
128
|
|
|
|
|
|
|
$collator = LaTeXML::Post::DumbCollator->new(); |
129
|
|
|
|
|
|
|
Info('expected', 'Unicode::Collate::Locale', undef, |
130
|
|
|
|
|
|
|
"No Unicode::Collate::Locale or Unicode::Collate", |
131
|
|
|
|
|
|
|
"using perl's sort; ignoring language='$lang'"); } |
132
|
|
|
|
|
|
|
$$self{collatorcache}{$lang} = $collator; |
133
|
|
|
|
|
|
|
return $collator; } |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
136
|
|
|
|
|
|
|
package LaTeXML::Post::DumbCollator; |
137
|
|
|
|
|
|
|
use strict; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub new { |
140
|
|
|
|
|
|
|
my ($class) = @_; |
141
|
|
|
|
|
|
|
return bless {}, $class; } |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub sort { |
144
|
|
|
|
|
|
|
my ($self, @things) = @_; |
145
|
|
|
|
|
|
|
return (sort @things); } |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
148
|
|
|
|
|
|
|
package LaTeXML::Post::Processor; |
149
|
|
|
|
|
|
|
use strict; |
150
|
|
|
|
|
|
|
use LaTeXML::Post; |
151
|
|
|
|
|
|
|
use LaTeXML::Common::Error; |
152
|
|
|
|
|
|
|
use LaTeXML::Common::XML; |
153
|
|
|
|
|
|
|
use LaTeXML::Util::Pathname; |
154
|
|
|
|
|
|
|
use base qw(LaTeXML::Common::Object); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# An Abstract Post Processor |
157
|
|
|
|
|
|
|
sub new { |
158
|
|
|
|
|
|
|
my ($class, %options) = @_; |
159
|
|
|
|
|
|
|
my $self = bless {%options}, $class; |
160
|
|
|
|
|
|
|
$$self{verbosity} = 0 unless defined $$self{verbosity}; |
161
|
|
|
|
|
|
|
$$self{resource_directory} = $options{resource_directory}; |
162
|
|
|
|
|
|
|
$$self{resource_prefix} = $options{resource_prefix}; |
163
|
|
|
|
|
|
|
my $name = $class; $name =~ s/^LaTeXML::Post:://; |
164
|
|
|
|
|
|
|
$$self{name} = $name; |
165
|
|
|
|
|
|
|
return $self; } |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub getName { |
168
|
|
|
|
|
|
|
my ($self) = @_; |
169
|
|
|
|
|
|
|
return $$self{name}; } |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Return the nodes to be processed; by default the document element. |
172
|
|
|
|
|
|
|
# This allows processors to focus on specific kinds of nodes, |
173
|
|
|
|
|
|
|
# or to skip processing if there are none to process. |
174
|
|
|
|
|
|
|
sub toProcess { |
175
|
|
|
|
|
|
|
my ($self, $doc) = @_; |
176
|
|
|
|
|
|
|
return $doc->getDocumentElement; } |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# This must be defined to do whatever processing is needed to @toprocess nodes. |
179
|
|
|
|
|
|
|
sub process { |
180
|
|
|
|
|
|
|
my ($self, $doc, @toprocess) = @_; |
181
|
|
|
|
|
|
|
Fatal("misdefined", $self, $doc, "This post-processor is abstract; does not implement ->process"); |
182
|
|
|
|
|
|
|
return $doc; } |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
#====================================================================== |
185
|
|
|
|
|
|
|
# Some postprocessors will want to create a bunch of "resource"s, |
186
|
|
|
|
|
|
|
# such as generated or transformed image files, or other data files. |
187
|
|
|
|
|
|
|
# These should return a pathname, relative to the document's destination, |
188
|
|
|
|
|
|
|
# for storing a resource associated with $node. |
189
|
|
|
|
|
|
|
# Will use the Post option resource_directory |
190
|
|
|
|
|
|
|
sub desiredResourcePathname { |
191
|
|
|
|
|
|
|
my ($self, $doc, $node, $source, $type) = @_; |
192
|
|
|
|
|
|
|
return; } |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# Ideally this would return a pathname relative to the document |
195
|
|
|
|
|
|
|
# but I think we've accommodated absolute ones. |
196
|
|
|
|
|
|
|
sub generateResourcePathname { |
197
|
|
|
|
|
|
|
my ($self, $doc, $node, $source, $type) = @_; |
198
|
|
|
|
|
|
|
my $subdir = $$self{resource_directory} || ''; |
199
|
|
|
|
|
|
|
my $prefix = $$self{resource_prefix} || "x"; |
200
|
|
|
|
|
|
|
my $counter = join('_', "_max", $subdir, $prefix, "counter_"); |
201
|
|
|
|
|
|
|
my $n = $doc->cacheLookup($counter) || 0; |
202
|
|
|
|
|
|
|
my $name = $prefix . ++$n; |
203
|
|
|
|
|
|
|
$doc->cacheStore($counter, $n); |
204
|
|
|
|
|
|
|
return pathname_make(dir => $subdir, name => $name, type => $type); } |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
207
|
|
|
|
|
|
|
package LaTeXML::Post::MathProcessor; |
208
|
|
|
|
|
|
|
use strict; |
209
|
|
|
|
|
|
|
use LaTeXML::Post; |
210
|
|
|
|
|
|
|
use LaTeXML::Common::Error; |
211
|
|
|
|
|
|
|
use base qw(LaTeXML::Post::Processor); |
212
|
|
|
|
|
|
|
use LaTeXML::Common::XML; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# This is an abstract class; A complete MathProcessor will need to define: |
215
|
|
|
|
|
|
|
# $self->convertNode($doc,$xmath) |
216
|
|
|
|
|
|
|
# to generate the converted math node |
217
|
|
|
|
|
|
|
# $self->combineParallel($doc,$math,$primary,@secondaries) |
218
|
|
|
|
|
|
|
# to combine the $primary (the result of $self's conversion of $math) |
219
|
|
|
|
|
|
|
# with the results of other math processors to create the |
220
|
|
|
|
|
|
|
# parallel markup appropriate for this processor's markup. |
221
|
|
|
|
|
|
|
# $self->rawIDSuffix returns a short string to append to id's for nodes |
222
|
|
|
|
|
|
|
# using this markup. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# Top level processing finds and converts all top-level math nodes. |
225
|
|
|
|
|
|
|
# Any nested mathnodes would only appear within ltx:XMText; |
226
|
|
|
|
|
|
|
# postprocessors must handle nested math as appropriate (but see convertXMTextContent) |
227
|
|
|
|
|
|
|
# Invokes preprocess on each before doing the conversion in case |
228
|
|
|
|
|
|
|
# analysis is needed. |
229
|
|
|
|
|
|
|
sub toProcess { |
230
|
|
|
|
|
|
|
my ($self, $doc) = @_; |
231
|
|
|
|
|
|
|
return $doc->findnodes('//ltx:Math[not(ancestor::ltx:Math)]'); } |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub process { |
234
|
|
|
|
|
|
|
my ($self, $doc, @maths) = @_; |
235
|
|
|
|
|
|
|
local $LaTeXML::Post::MATHPROCESSOR = $self; |
236
|
|
|
|
|
|
|
$doc->markXMNodeVisibility; |
237
|
|
|
|
|
|
|
$self->preprocess($doc, @maths); |
238
|
|
|
|
|
|
|
if ($$self{parallel}) { |
239
|
|
|
|
|
|
|
my @secondaries = @{ $$self{secondary_processors} }; |
240
|
|
|
|
|
|
|
# What's the right test for when cross-referencing should be done? |
241
|
|
|
|
|
|
|
# For now: only when the primary and some secondary can cross-ref |
242
|
|
|
|
|
|
|
# (otherwise, end up with peculiar structures?) |
243
|
|
|
|
|
|
|
my ($proc1, @ignore) = grep { $_->can('addCrossref') } @secondaries; |
244
|
|
|
|
|
|
|
if ($self->can('addCrossref') && $proc1) { |
245
|
|
|
|
|
|
|
$$self{crossreferencing} = 1; # We'll need ID's! |
246
|
|
|
|
|
|
|
$$proc1{crossreferencing} = 1; } |
247
|
|
|
|
|
|
|
foreach my $proc (@secondaries) { |
248
|
|
|
|
|
|
|
local $LaTeXML::Post::MATHPROCESSOR = $proc; |
249
|
|
|
|
|
|
|
$proc->preprocess($doc, @maths); } } |
250
|
|
|
|
|
|
|
# Re-Fetch the math nodes, in case preprocessing has messed them up!!! |
251
|
|
|
|
|
|
|
@maths = $self->toProcess($doc); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
## Do in reverse, since (in LaTeXML) we allow math nested within text within math. |
254
|
|
|
|
|
|
|
## So, we want to converted any nested expressions first, so they get carried along |
255
|
|
|
|
|
|
|
## with the outer ones. |
256
|
|
|
|
|
|
|
my $n = 0; |
257
|
|
|
|
|
|
|
foreach my $math (reverse(@maths)) { |
258
|
|
|
|
|
|
|
# If parent is MathBranch, which branch number is it? |
259
|
|
|
|
|
|
|
# (note: the MathBranch will be in a ltx:MathFork, with a ltx:Math being 1st child) |
260
|
|
|
|
|
|
|
my @preceding = $doc->findnodes("parent::ltx:MathBranch/preceding-sibling::*", $math); |
261
|
|
|
|
|
|
|
local $LaTeXML::Post::MathProcessor::FORK = scalar(@preceding); |
262
|
|
|
|
|
|
|
$self->processNode($doc, $math); |
263
|
|
|
|
|
|
|
$n++; } |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# Experimentally, cross reference ??? (or clearer name?) |
266
|
|
|
|
|
|
|
if ($$self{parallel}) { |
267
|
|
|
|
|
|
|
# There could be various strategies when there are more than 2 parallel conversions, |
268
|
|
|
|
|
|
|
# eg a cycle or something.... |
269
|
|
|
|
|
|
|
# Here, we simply take the first two processors that know how to addCrossref |
270
|
|
|
|
|
|
|
# and connect their nodes to each other. |
271
|
|
|
|
|
|
|
my ($proc1, $proc2, @ignore) |
272
|
|
|
|
|
|
|
= grep { $_->can('addCrossref') } $self, @{ $$self{secondary_processors} }; |
273
|
|
|
|
|
|
|
if ($proc1 && $proc2) { |
274
|
|
|
|
|
|
|
# First, prepare a list of all Math id's, in document order, to simplify crossreferencing |
275
|
|
|
|
|
|
|
my $ids = {}; |
276
|
|
|
|
|
|
|
my $pos = 0; |
277
|
|
|
|
|
|
|
foreach my $n ($doc->findnodes('descendant-or-self::ltx:Math/descendant::*[@xml:id]')) { |
278
|
|
|
|
|
|
|
$$ids{ $n->getAttribute('xml:id') } = $pos++; } |
279
|
|
|
|
|
|
|
$$proc1{crossreferencing_ids} = $ids; |
280
|
|
|
|
|
|
|
$$proc2{crossreferencing_ids} = $ids; |
281
|
|
|
|
|
|
|
# Now do cross referencing |
282
|
|
|
|
|
|
|
$proc1->addCrossrefs($doc, $proc2); |
283
|
|
|
|
|
|
|
$proc2->addCrossrefs($doc, $proc1); } } |
284
|
|
|
|
|
|
|
NoteProgressDetailed(" [converted $n Maths]"); |
285
|
|
|
|
|
|
|
return $doc; } |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# Make THIS MathProcessor the primary branch (of whatever parallel markup it supports), |
288
|
|
|
|
|
|
|
# and make all of the @moreprocessors be secondary ones. |
289
|
|
|
|
|
|
|
sub setParallel { |
290
|
|
|
|
|
|
|
my ($self, @moreprocessors) = @_; |
291
|
|
|
|
|
|
|
if (@moreprocessors) { |
292
|
|
|
|
|
|
|
$$self{parallel} = 1; |
293
|
|
|
|
|
|
|
map { $$_{is_secondary} = 1 } @moreprocessors; # Mark the others as secondary |
294
|
|
|
|
|
|
|
$$self{secondary_processors} = [@moreprocessors]; |
295
|
|
|
|
|
|
|
$$self{name} .= '[w/' . join('+', map { $_->getName } @moreprocessors) . ']'; } |
296
|
|
|
|
|
|
|
else { |
297
|
|
|
|
|
|
|
$$self{parallel} = 0; } |
298
|
|
|
|
|
|
|
return; } |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# Optional; if you want to do anything before translation |
301
|
|
|
|
|
|
|
sub preprocess { |
302
|
|
|
|
|
|
|
my ($self, $doc, @nodes) = @_; |
303
|
|
|
|
|
|
|
return; } |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# $self->processNode($doc,$mathnode) is the top-level conversion |
306
|
|
|
|
|
|
|
# It converts the XMath within $mathnode, and adds it to the $mathnode, |
307
|
|
|
|
|
|
|
# This invokes $self->convertNode($doc,$xmath) to get the conversion. |
308
|
|
|
|
|
|
|
sub processNode { |
309
|
|
|
|
|
|
|
my ($self, $doc, $math) = @_; |
310
|
|
|
|
|
|
|
my $xmath = $doc->findnode('ltx:XMath', $math); |
311
|
|
|
|
|
|
|
return unless $xmath; # Nothing to convert if there's no XMath ... ! |
312
|
|
|
|
|
|
|
local $LaTeXML::Post::MATHPROCESSOR = $self; |
313
|
|
|
|
|
|
|
my $conversion; |
314
|
|
|
|
|
|
|
# XMath will be removed (LATER!), but mark its ids as reusable. |
315
|
|
|
|
|
|
|
$doc->preremoveNodes($xmath); |
316
|
|
|
|
|
|
|
if ($$self{parallel}) { |
317
|
|
|
|
|
|
|
my $primary = $self->convertNode($doc, $xmath); |
318
|
|
|
|
|
|
|
my @secondaries = (); |
319
|
|
|
|
|
|
|
foreach my $proc (@{ $$self{secondary_processors} }) { |
320
|
|
|
|
|
|
|
local $LaTeXML::Post::MATHPROCESSOR = $proc; |
321
|
|
|
|
|
|
|
my $secondary = $proc->convertNode($doc, $xmath); |
322
|
|
|
|
|
|
|
# IF it is (first) image, copy image attributes to ltx:Math ??? |
323
|
|
|
|
|
|
|
$self->maybeSetMathImage($math, $secondary); |
324
|
|
|
|
|
|
|
push(@secondaries, $secondary); } |
325
|
|
|
|
|
|
|
$conversion = $self->combineParallel($doc, $xmath, $primary, @secondaries); } |
326
|
|
|
|
|
|
|
else { |
327
|
|
|
|
|
|
|
$conversion = $self->convertNode($doc, $xmath); |
328
|
|
|
|
|
|
|
$self->maybeSetMathImage($math, $conversion); } |
329
|
|
|
|
|
|
|
# we now REMOVE the ltx:XMath from the ltx:Math, and whitespace |
330
|
|
|
|
|
|
|
# (if there's an XMath PostProcessing module, it will add it back, with appropriate id's) |
331
|
|
|
|
|
|
|
if (my $xml = $$conversion{xml}) { |
332
|
|
|
|
|
|
|
$$conversion{xml} = $self->outerWrapper($doc, $xmath, $xml); } |
333
|
|
|
|
|
|
|
$doc->removeNodes($xmath); |
334
|
|
|
|
|
|
|
# NOTE: Unless XMath is the primary, (preserving the XMath, w/no IDSuffix) |
335
|
|
|
|
|
|
|
# we've got to remove the id's from the XMath, since the primary will get same id's |
336
|
|
|
|
|
|
|
# and (some versions) of libxml2 complain! |
337
|
|
|
|
|
|
|
if ($$conversion{mimetype} && ($$conversion{mimetype} ne 'application/x-latexml')) { |
338
|
|
|
|
|
|
|
map { $_->removeAttribute('xml:id') } |
339
|
|
|
|
|
|
|
$doc->findnodes('descendant-or-self::*[@xml:id]', $xmath); } |
340
|
|
|
|
|
|
|
$doc->removeBlankNodes($math); |
341
|
|
|
|
|
|
|
if (my $new = $$conversion{xml}) { |
342
|
|
|
|
|
|
|
$doc->addNodes($math, $new); } |
343
|
|
|
|
|
|
|
# else ? |
344
|
|
|
|
|
|
|
return; } |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub maybeSetMathImage { |
347
|
|
|
|
|
|
|
my ($self, $math, $conversion) = @_; |
348
|
|
|
|
|
|
|
if ((($$conversion{mimetype} || '') =~ /^image\//) # Got an image? |
349
|
|
|
|
|
|
|
&& !$math->getAttribute('imagesrc')) { # and it's the first one |
350
|
|
|
|
|
|
|
if (my $src = $$conversion{src}) { |
351
|
|
|
|
|
|
|
$math->setAttribute(imagesrc => $src); |
352
|
|
|
|
|
|
|
$math->setAttribute(imagewidth => $$conversion{width}); |
353
|
|
|
|
|
|
|
$math->setAttribute(imageheight => $$conversion{height}); |
354
|
|
|
|
|
|
|
$math->setAttribute(imagedepth => $$conversion{depth}); } } |
355
|
|
|
|
|
|
|
return; } |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# NOTE: Sort out how parallel & outerWrapper should work. |
358
|
|
|
|
|
|
|
# It probably ought to be that if the conversion is being embedded in |
359
|
|
|
|
|
|
|
# something from another namespace, it needs the wrapper. |
360
|
|
|
|
|
|
|
# ie. when mixing parallel markups, NOT just at the top level, although certainly there too. |
361
|
|
|
|
|
|
|
# |
362
|
|
|
|
|
|
|
# This should wrap the resulting conversion with m:math or om:OMA or whatever appropriate? |
363
|
|
|
|
|
|
|
sub outerWrapper { |
364
|
|
|
|
|
|
|
my ($self, $doc, $xmath, $conversion) = @_; |
365
|
|
|
|
|
|
|
return $conversion; } |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# This should proably be from the core of the current ->processNode |
368
|
|
|
|
|
|
|
sub convertNode { |
369
|
|
|
|
|
|
|
my ($self, $doc, $node) = @_; |
370
|
|
|
|
|
|
|
Fatal('misdefined', (ref $self), undef, |
371
|
|
|
|
|
|
|
"Abstract package: math conversion has not been defined for this MathProcessor"); |
372
|
|
|
|
|
|
|
return; } |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# This should be implemented by potential Primaries |
375
|
|
|
|
|
|
|
# Maybe the caller of this should check the namespaces, and call wrapper if needed? |
376
|
|
|
|
|
|
|
sub combineParallel { |
377
|
|
|
|
|
|
|
my ($self, $doc, $xmath, $primary, @secondaries) = @_; |
378
|
|
|
|
|
|
|
LaTeXML::Post::Error('misdefined', (ref $self), undef, |
379
|
|
|
|
|
|
|
"Abstract package: combining parallel markup has not been defined for this MathProcessor", |
380
|
|
|
|
|
|
|
"dropping the extra markup from: " . join(',', map { $$_{processor} } @secondaries)); |
381
|
|
|
|
|
|
|
return $primary; } |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# A helper for converting XMText |
384
|
|
|
|
|
|
|
# ltx:XMText escapes back to general ltx markup; the only element within XMath that does. |
385
|
|
|
|
|
|
|
# BUT it can contain nested ltx:Math! |
386
|
|
|
|
|
|
|
# When converting to (potentially parallel markup, coarse-grained), |
387
|
|
|
|
|
|
|
# the non-math needs to be duplicated, but with the ID's modified, |
388
|
|
|
|
|
|
|
# AND the nested math needs to be converted to ONLY the current target's markup |
389
|
|
|
|
|
|
|
# NOT parallel within each nested math, although it should still be cross-referencable to others! |
390
|
|
|
|
|
|
|
# moreover, the math will need the outerWrapper. |
391
|
|
|
|
|
|
|
my $NBSP = pack('U', 0xA0); # CONSTANT |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub convertXMTextContent { |
394
|
|
|
|
|
|
|
my ($self, $doc, $convertspaces, @nodes) = @_; |
395
|
|
|
|
|
|
|
my @result = (); |
396
|
|
|
|
|
|
|
foreach my $node (@nodes) { |
397
|
|
|
|
|
|
|
if ($node->nodeType == XML_TEXT_NODE) { |
398
|
|
|
|
|
|
|
my $string = $node->textContent; |
399
|
|
|
|
|
|
|
if ($convertspaces) { |
400
|
|
|
|
|
|
|
$string =~ s/^\s+/$NBSP/; $string =~ s/\s+$/$NBSP/; } |
401
|
|
|
|
|
|
|
push(@result, $string); } |
402
|
|
|
|
|
|
|
else { |
403
|
|
|
|
|
|
|
my $tag = $doc->getQName($node); |
404
|
|
|
|
|
|
|
if ($tag eq 'ltx:XMath') { |
405
|
|
|
|
|
|
|
my $conversion = $self->convertNode($doc, $node); |
406
|
|
|
|
|
|
|
my $xml = $$conversion{xml}; |
407
|
|
|
|
|
|
|
# And if no xml ???? |
408
|
|
|
|
|
|
|
push(@result, $self->outerWrapper($doc, $node, $xml)); } |
409
|
|
|
|
|
|
|
else { |
410
|
|
|
|
|
|
|
my %attr = (); |
411
|
|
|
|
|
|
|
foreach my $attr ($node->attributes) { |
412
|
|
|
|
|
|
|
my $atype = $attr->nodeType; |
413
|
|
|
|
|
|
|
if ($atype == XML_ATTRIBUTE_NODE) { |
414
|
|
|
|
|
|
|
my $key = $attr->nodeName; |
415
|
|
|
|
|
|
|
my $value = $attr->getValue; |
416
|
|
|
|
|
|
|
if ($key =~ /^_/) { } # don't copy internal attributes ??? |
417
|
|
|
|
|
|
|
elsif ($key eq 'xml:id') { } # ignore; we'll handle fragid??? |
418
|
|
|
|
|
|
|
elsif ($key eq 'fragid') { |
419
|
|
|
|
|
|
|
my $id = $doc->uniquifyID($value, $self->IDSuffix); |
420
|
|
|
|
|
|
|
$attr{'xml:id'} = $id; } |
421
|
|
|
|
|
|
|
else { |
422
|
|
|
|
|
|
|
$attr{$key} = $attr->value; } } } |
423
|
|
|
|
|
|
|
# Probably should invoke associateNode ??? |
424
|
|
|
|
|
|
|
push(@result, |
425
|
|
|
|
|
|
|
[$tag, {%attr}, $self->convertXMTextContent($doc, $convertspaces, $node->childNodes)]); } } } |
426
|
|
|
|
|
|
|
return @result; } |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# When converting an XMath node (with an id) to some other format, |
429
|
|
|
|
|
|
|
# we will generate an id for the new node. |
430
|
|
|
|
|
|
|
# This method returns a suffix to be added to the XMath id. |
431
|
|
|
|
|
|
|
# The primary format gets the id's unchanged, but secondary ones get a suffix (eg. ".pmml") |
432
|
|
|
|
|
|
|
sub IDSuffix { |
433
|
|
|
|
|
|
|
my ($self) = @_; |
434
|
|
|
|
|
|
|
return ($$self{is_secondary} ? $self->rawIDSuffix : ''); } |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub rawIDSuffix { |
437
|
|
|
|
|
|
|
return ''; } |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# In order to do cross-referencing betweeen formats, and to relate semantic/presentation |
440
|
|
|
|
|
|
|
# information to content/presentation nodes (resp), we want to associate each |
441
|
|
|
|
|
|
|
# generated node (MathML, OpenMath,...) with a "source" XMath node "responsible" for it's generation. |
442
|
|
|
|
|
|
|
# This is often the "current" XMath node that was being converted, but sometimes |
443
|
|
|
|
|
|
|
# * the containing XMDual (which makes more sense when we've generated a "container") |
444
|
|
|
|
|
|
|
# * the containing XMDual's semantic operator (makes more sense when we're generating |
445
|
|
|
|
|
|
|
# tokens that are only visible from the presentation branch) |
446
|
|
|
|
|
|
|
sub associateNode { |
447
|
|
|
|
|
|
|
my ($self, $node, $currentnode, $noxref) = @_; |
448
|
|
|
|
|
|
|
my $r = ref $node; |
449
|
|
|
|
|
|
|
return unless $currentnode && $r && ($r eq 'ARRAY' || $r eq 'XML::LibXML::Element'); |
450
|
|
|
|
|
|
|
my $document = $LaTeXML::Post::DOCUMENT; |
451
|
|
|
|
|
|
|
# Check if already associated with a source node |
452
|
|
|
|
|
|
|
my $isarray = ref $node eq 'ARRAY'; |
453
|
|
|
|
|
|
|
# What kind of branch are we generating? |
454
|
|
|
|
|
|
|
my $ispresentation = $self->rawIDSuffix eq '.pmml'; # TEMPORARY HACK: BAAAAD method! |
455
|
|
|
|
|
|
|
my $iscontainer = 0; |
456
|
|
|
|
|
|
|
if ($isarray) { |
457
|
|
|
|
|
|
|
return if $$node[1]{'_sourced'}; |
458
|
|
|
|
|
|
|
$$node[1]{'_sourced'} = 1; |
459
|
|
|
|
|
|
|
my ($tag, $attr, @children) = @$node; |
460
|
|
|
|
|
|
|
$iscontainer = grep { ref $_ } @children; } |
461
|
|
|
|
|
|
|
else { |
462
|
|
|
|
|
|
|
return if $node->getAttribute('_sourced'); |
463
|
|
|
|
|
|
|
$node->setAttribute('_sourced' => 1); |
464
|
|
|
|
|
|
|
$iscontainer = scalar(element_nodes($node)); } |
465
|
|
|
|
|
|
|
my $sourcenode = $currentnode; |
466
|
|
|
|
|
|
|
# If the generated node is a "container" (non-token!), use the container as source |
467
|
|
|
|
|
|
|
if ($iscontainer) { |
468
|
|
|
|
|
|
|
if (my $container = $document->findnode('ancestor-or-self::ltx:XMDual[1]', $sourcenode)) { |
469
|
|
|
|
|
|
|
$sourcenode = $container; } } |
470
|
|
|
|
|
|
|
# If the current node is appropriately visible, use it. |
471
|
|
|
|
|
|
|
elsif ($currentnode->getAttribute(($ispresentation ? '_cvis' : '_pvis'))) { } |
472
|
|
|
|
|
|
|
# Else (current node isn't visible); try to find content OPERATOR |
473
|
|
|
|
|
|
|
elsif (my $container = $document->findnode('ancestor-or-self::ltx:XMDual[1]', $sourcenode)) { |
474
|
|
|
|
|
|
|
my ($op) = element_nodes($container); |
475
|
|
|
|
|
|
|
my $q = $document->getQName($op) || 'unknown'; |
476
|
|
|
|
|
|
|
if ($q eq 'ltx:XMTok') { } |
477
|
|
|
|
|
|
|
elsif ($q eq 'ltx:XMApp') { |
478
|
|
|
|
|
|
|
($op) = element_nodes($op); |
479
|
|
|
|
|
|
|
if ($document->getQName($op) eq 'ltx:XMRef') { |
480
|
|
|
|
|
|
|
$op = $document->realizeXMNode($op); } } |
481
|
|
|
|
|
|
|
if ($op && !$op->getAttribute('_pvis')) { |
482
|
|
|
|
|
|
|
$sourcenode = $op; } |
483
|
|
|
|
|
|
|
else { |
484
|
|
|
|
|
|
|
$sourcenode = $container; } } |
485
|
|
|
|
|
|
|
# If we're intending to cross-reference, then source & generated nodes will need ID's |
486
|
|
|
|
|
|
|
if ($$self{crossreferencing}) { |
487
|
|
|
|
|
|
|
if (!$noxref && !$sourcenode->getAttribute('fragid')) { # If no ID, but need one |
488
|
|
|
|
|
|
|
$document->generateNodeID($sourcenode, '', 1); } # but the ID is reusable |
489
|
|
|
|
|
|
|
if (my $sourceid = $sourcenode->getAttribute('fragid')) { # If source has ID |
490
|
|
|
|
|
|
|
my $nodeid = $currentnode->getAttribute('fragid') || $sourceid; |
491
|
|
|
|
|
|
|
my $id = $document->uniquifyID($nodeid, $self->IDSuffix); |
492
|
|
|
|
|
|
|
if ($isarray) { |
493
|
|
|
|
|
|
|
$$node[1]{'xml:id'} = $id; } |
494
|
|
|
|
|
|
|
else { |
495
|
|
|
|
|
|
|
$node->setAttribute('xml:id' => $id); } |
496
|
|
|
|
|
|
|
push(@{ $$self{convertedIDs}{$sourceid} }, $id) unless $noxref; } } |
497
|
|
|
|
|
|
|
$self->associateNodeHook($node, $sourcenode, $noxref); |
498
|
|
|
|
|
|
|
if ($isarray) { # Array represented |
499
|
|
|
|
|
|
|
map { $self->associateNode($_, $currentnode, $noxref) } @$node[2 .. $#$node]; } |
500
|
|
|
|
|
|
|
else { # LibXML node |
501
|
|
|
|
|
|
|
map { $self->associateNode($_, $currentnode, $noxref) } element_nodes($node); } |
502
|
|
|
|
|
|
|
return; } |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# Customization hook for adding other attributes to the generated math nodes. |
505
|
|
|
|
|
|
|
sub associateNodeHook { |
506
|
|
|
|
|
|
|
my ($self, $node, $sourcenode, $noxref) = @_; |
507
|
|
|
|
|
|
|
return; } |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
sub shownode { |
510
|
|
|
|
|
|
|
my ($node, $level) = @_; |
511
|
|
|
|
|
|
|
$level = 0 unless defined $level; |
512
|
|
|
|
|
|
|
my $ref = ref $node; |
513
|
|
|
|
|
|
|
if ($ref eq 'ARRAY') { |
514
|
|
|
|
|
|
|
my ($tag, $attr, @children) = @$node; |
515
|
|
|
|
|
|
|
return "\n" . (' ' x $level) |
516
|
|
|
|
|
|
|
. '[' . $tag . ',{' . join(',', map { $_ . '=>' . $$attr{$_} } sort keys %$attr) . '},' |
517
|
|
|
|
|
|
|
. join(',', map { shownode($_, $level + 1) } @children) . ']'; } |
518
|
|
|
|
|
|
|
elsif ($ref =~ /^XML/) { |
519
|
|
|
|
|
|
|
return $node->toString; } |
520
|
|
|
|
|
|
|
else { |
521
|
|
|
|
|
|
|
return "$node"; } } |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# Add backref linkages (eg. xref) onto the nodes that $self created (converted from XMath) |
524
|
|
|
|
|
|
|
# to reference those that $otherprocessor created. |
525
|
|
|
|
|
|
|
# NOTE: Subclass MUST define addCrossref($node,$xref_id) to add the |
526
|
|
|
|
|
|
|
# id of the "Other Interesting Node" to the (array represented) xml $node |
527
|
|
|
|
|
|
|
# in whatever fashion the markup for that processor uses. |
528
|
|
|
|
|
|
|
# |
529
|
|
|
|
|
|
|
# This may be another useful place to add a hook? |
530
|
|
|
|
|
|
|
# It would provide the list of cross-refenced nodes in document order |
531
|
|
|
|
|
|
|
# This would allow deciding whether or not to copy foreign attributes or other interesting things |
532
|
|
|
|
|
|
|
sub addCrossrefs { |
533
|
|
|
|
|
|
|
my ($self, $doc, $otherprocessor) = @_; |
534
|
|
|
|
|
|
|
my $selfs_map = $$self{convertedIDs}; |
535
|
|
|
|
|
|
|
my $others_map = $$otherprocessor{convertedIDs}; |
536
|
|
|
|
|
|
|
my $xrefids = $$self{crossreferencing_ids}; |
537
|
|
|
|
|
|
|
foreach my $xid (keys %$selfs_map) { # For each Math id that $self converted |
538
|
|
|
|
|
|
|
if (my $other_ids = $$others_map{$xid}) { # Did $other also convert those ids? |
539
|
|
|
|
|
|
|
my $xref_id = $$other_ids[0]; |
540
|
|
|
|
|
|
|
if (scalar(@$other_ids) > 1) { # Find 1st in document order! (order is cached) |
541
|
|
|
|
|
|
|
($xref_id) = sort { $$xrefids{$a} <=> $$xrefids{$b} } @$other_ids; } |
542
|
|
|
|
|
|
|
foreach my $id (@{ $$selfs_map{$xid} }) { # look at each node $self created from $xid |
543
|
|
|
|
|
|
|
if (my $node = $doc->findNodeByID($id)) { # If we find a node, |
544
|
|
|
|
|
|
|
$self->addCrossref($node, $xref_id); } } } } # add a crossref from it to $others's node |
545
|
|
|
|
|
|
|
return; } |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
package LaTeXML::Post::Document; |
550
|
|
|
|
|
|
|
use strict; |
551
|
|
|
|
|
|
|
use LaTeXML::Common::XML; |
552
|
|
|
|
|
|
|
use LaTeXML::Util::Pathname; |
553
|
|
|
|
|
|
|
use LaTeXML::Util::Radix; |
554
|
|
|
|
|
|
|
use DB_File; |
555
|
|
|
|
|
|
|
use Unicode::Normalize; |
556
|
|
|
|
|
|
|
use LaTeXML::Post; # to import error handling... |
557
|
|
|
|
|
|
|
use LaTeXML::Common::Error; |
558
|
|
|
|
|
|
|
use base qw(LaTeXML::Common::Object); |
559
|
|
|
|
|
|
|
our $NSURI = "http://dlmf.nist.gov/LaTeXML"; |
560
|
|
|
|
|
|
|
our $XPATH = LaTeXML::Common::XML::XPath->new(ltx => $NSURI); |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# Useful options: |
563
|
|
|
|
|
|
|
# destination = the ultimate destination file for this document to be written. |
564
|
|
|
|
|
|
|
# destinationDirectory = the directory it will be stored in (derived from $destination) |
565
|
|
|
|
|
|
|
# siteDirectory = the root directory of where the entire site will be contained |
566
|
|
|
|
|
|
|
# namespaces = a hash of namespace prefix => namespace uri |
567
|
|
|
|
|
|
|
# namespaceURIs = reverse hash of above. |
568
|
|
|
|
|
|
|
# nocache = a boolean, disables storing of permanent LaTeXML.cache |
569
|
|
|
|
|
|
|
# the cache is used to remember things like image conversions from previous runs. |
570
|
|
|
|
|
|
|
# searchpaths = array of paths to search for other resources |
571
|
|
|
|
|
|
|
# Note that these may not be LaTeXML documents (maybe html or ....) |
572
|
|
|
|
|
|
|
sub new { |
573
|
|
|
|
|
|
|
my ($class, $xmldoc, %options) = @_; |
574
|
|
|
|
|
|
|
my $self = $class->new_internal($xmldoc, %options); |
575
|
|
|
|
|
|
|
$self->setDocument_internal($xmldoc); |
576
|
|
|
|
|
|
|
return $self; } |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
sub new_internal { |
579
|
|
|
|
|
|
|
my ($class, $xmldoc, %options) = @_; |
580
|
|
|
|
|
|
|
my %data = (); |
581
|
|
|
|
|
|
|
if (ref $class) { # Cloning! |
582
|
|
|
|
|
|
|
map { $data{$_} = $$class{$_} } keys %$class; |
583
|
|
|
|
|
|
|
$class = ref $class; } |
584
|
|
|
|
|
|
|
map { $data{$_} = $options{$_} } keys %options; # These override. |
585
|
|
|
|
|
|
|
if ((defined $options{destination}) && (!defined $options{destinationDirectory})) { |
586
|
|
|
|
|
|
|
my ($dir, $name, $ext) = pathname_split($data{destination}); |
587
|
|
|
|
|
|
|
$data{destinationDirectory} = $dir || '.'; } |
588
|
|
|
|
|
|
|
# Check consistency of siteDirectory (providing there's a destinationDirectory) |
589
|
|
|
|
|
|
|
if ($data{destinationDirectory}) { |
590
|
|
|
|
|
|
|
if ($data{siteDirectory}) { |
591
|
|
|
|
|
|
|
Fatal('unexpected', $data{destinationDirectory}, undef, |
592
|
|
|
|
|
|
|
"The destination directory ($data{destinationDirectory})" |
593
|
|
|
|
|
|
|
. " must be within the siteDirectory ($data{siteDirectory})") |
594
|
|
|
|
|
|
|
unless pathname_is_contained($data{destinationDirectory}, $data{siteDirectory}); } |
595
|
|
|
|
|
|
|
else { |
596
|
|
|
|
|
|
|
$data{siteDirectory} = $data{destinationDirectory}; } } |
597
|
|
|
|
|
|
|
# Start, at least, with our own namespaces. |
598
|
|
|
|
|
|
|
$data{namespaces} = { ltx => $NSURI } unless $data{namespaces}; |
599
|
|
|
|
|
|
|
$data{namespaceURIs} = { $NSURI => 'ltx' } unless $data{namespaceURIs}; |
600
|
|
|
|
|
|
|
$data{idcache} = {}; |
601
|
|
|
|
|
|
|
$data{idcache_reusable} = {}; |
602
|
|
|
|
|
|
|
$data{idcache_reserve} = {}; |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
my $self = bless {%data}, $class; |
605
|
|
|
|
|
|
|
return $self; } |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
sub newFromFile { |
608
|
|
|
|
|
|
|
my ($class, $source, %options) = @_; |
609
|
|
|
|
|
|
|
$options{source} = $source; |
610
|
|
|
|
|
|
|
if (!$options{sourceDirectory}) { |
611
|
|
|
|
|
|
|
my ($dir, $name, $ext) = pathname_split($source); |
612
|
|
|
|
|
|
|
$options{sourceDirectory} = $dir || '.'; } |
613
|
|
|
|
|
|
|
my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseFile($source), %options); |
614
|
|
|
|
|
|
|
$doc->validate if $$doc{validate}; |
615
|
|
|
|
|
|
|
return $doc; } |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
sub newFromString { |
618
|
|
|
|
|
|
|
my ($class, $string, %options) = @_; |
619
|
|
|
|
|
|
|
$options{sourceDirectory} = '.' unless $options{sourceDirectory}; |
620
|
|
|
|
|
|
|
my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseString($string), %options); |
621
|
|
|
|
|
|
|
$doc->validate if $$doc{validate}; |
622
|
|
|
|
|
|
|
return $doc; } |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
sub newFromSTDIN { |
625
|
|
|
|
|
|
|
my ($class, %options) = @_; |
626
|
|
|
|
|
|
|
my $string; |
627
|
|
|
|
|
|
|
{ local $/ = undef; $string = <>; } |
628
|
|
|
|
|
|
|
$options{sourceDirectory} = '.' unless $options{sourceDirectory}; |
629
|
|
|
|
|
|
|
my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseString($string), %options); |
630
|
|
|
|
|
|
|
$doc->validate if $$doc{validate}; |
631
|
|
|
|
|
|
|
return $doc; } |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#====================================================================== |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# This is for creating essentially "sub documents" |
636
|
|
|
|
|
|
|
# that are in some sense children of $self, possibly removed or cloned from it. |
637
|
|
|
|
|
|
|
# And they are presumably LaTeXML documents |
638
|
|
|
|
|
|
|
sub newDocument { |
639
|
|
|
|
|
|
|
my ($self, $root, %options) = @_; |
640
|
|
|
|
|
|
|
my $clone_suffix = $options{clone_suffix}; |
641
|
|
|
|
|
|
|
delete $options{clone_suffix}; |
642
|
|
|
|
|
|
|
my $doc = $self->new_internal(undef, %options); |
643
|
|
|
|
|
|
|
$doc->setDocument_internal($root, clone_suffix => $clone_suffix); |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
if (my $root_id = $self->getDocumentElement->getAttribute('xml:id')) { |
646
|
|
|
|
|
|
|
$$doc{split_from_id} = $root_id; } |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
# Copy any processing instructions. |
649
|
|
|
|
|
|
|
foreach my $pi ($self->findnodes(".//processing-instruction('latexml')")) { |
650
|
|
|
|
|
|
|
$doc->getDocument->appendChild($pi->cloneNode); } |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
# And any resource elements |
653
|
|
|
|
|
|
|
if (my @resources = $self->findnodes("descendant::ltx:resource")) { |
654
|
|
|
|
|
|
|
$doc->addNodes($doc->getDocumentElement, @resources); } # cloning, as needed... |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# If new document has no date, try to add one |
657
|
|
|
|
|
|
|
$doc->addDate($self); |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
# And copy class from the top-level document; This is risky... |
660
|
|
|
|
|
|
|
# We want to preserve global document style information |
661
|
|
|
|
|
|
|
# But some may refer specifically to the document, and NOT to the parts? |
662
|
|
|
|
|
|
|
if (my $class = $self->getDocumentElement->getAttribute('class')) { |
663
|
|
|
|
|
|
|
my $root = $doc->getDocumentElement; |
664
|
|
|
|
|
|
|
my $oclass = $root->getAttribute('class'); |
665
|
|
|
|
|
|
|
$root->setAttribute(class => ($oclass ? $oclass . ' ' . $class : $class)); } |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
# Finally, return the new document. |
668
|
|
|
|
|
|
|
return $doc; } |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
sub setDocument_internal { |
671
|
|
|
|
|
|
|
my ($self, $root, %options) = @_; |
672
|
|
|
|
|
|
|
# Build the document's XML |
673
|
|
|
|
|
|
|
my $roottype = ref $root; |
674
|
|
|
|
|
|
|
if ($roottype eq 'LaTeXML::Core::Document') { |
675
|
|
|
|
|
|
|
$root = $root->getDocument; |
676
|
|
|
|
|
|
|
$roottype = ref $root; } |
677
|
|
|
|
|
|
|
if (my $clone_suffix = $options{clone_suffix}) { |
678
|
|
|
|
|
|
|
if ($roottype eq 'XML::LibXML::Document') { |
679
|
|
|
|
|
|
|
Fatal('internal', 'unimplemented', undef, |
680
|
|
|
|
|
|
|
"Have not yet implemented cloning for entire documents"); } |
681
|
|
|
|
|
|
|
# Just make a clone, and then insert that. |
682
|
|
|
|
|
|
|
$root = $self->cloneNode($root, $clone_suffix); } |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
if ($roottype eq 'XML::LibXML::Document') { |
685
|
|
|
|
|
|
|
$$self{document} = $root; |
686
|
|
|
|
|
|
|
foreach my $node ($self->findnodes("//*[\@xml:id]")) { # Now record all ID's |
687
|
|
|
|
|
|
|
$$self{idcache}{ $node->getAttribute('xml:id') } = $node; } |
688
|
|
|
|
|
|
|
# Fetch any additional namespaces from the root |
689
|
|
|
|
|
|
|
foreach my $ns ($root->documentElement->getNamespaces) { |
690
|
|
|
|
|
|
|
my ($prefix, $uri) = ($ns->getLocalName, $ns->getData); |
691
|
|
|
|
|
|
|
if ($prefix) { |
692
|
|
|
|
|
|
|
$$self{namespaces}{$prefix} = $uri unless $$self{namespaces}{$prefix}; |
693
|
|
|
|
|
|
|
$$self{namespaceURIs}{$uri} = $prefix unless $$self{namespaceURIs}{$uri}; } } |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
# Extract data from latexml's ProcessingInstructions |
696
|
|
|
|
|
|
|
# I'd like to provide structured access to the PI's for those modules that need them, |
697
|
|
|
|
|
|
|
# but it isn't quite clear what that api should be. |
698
|
|
|
|
|
|
|
$$self{processingInstructions} = |
699
|
|
|
|
|
|
|
[map { $_->textContent } $XPATH->findnodes('.//processing-instruction("latexml")', $root)]; |
700
|
|
|
|
|
|
|
# Combine specified paths with any from the PI's |
701
|
|
|
|
|
|
|
my @paths = (); |
702
|
|
|
|
|
|
|
@paths = @{ $$self{searchpaths} } if $$self{searchpaths}; |
703
|
|
|
|
|
|
|
foreach my $pi (@{ $$self{processingInstructions} }) { |
704
|
|
|
|
|
|
|
if ($pi =~ /^\s*searchpaths\s*=\s*([\"\'])(.*?)\1\s*$/) { |
705
|
|
|
|
|
|
|
push(@paths, split(',', $2)); } } |
706
|
|
|
|
|
|
|
push(@paths, pathname_absolute($$self{sourceDirectory})) if $$self{sourceDirectory}; |
707
|
|
|
|
|
|
|
$$self{searchpaths} = [@paths]; } |
708
|
|
|
|
|
|
|
elsif ($roottype eq 'XML::LibXML::Element') { |
709
|
|
|
|
|
|
|
$$self{document} = XML::LibXML::Document->new("1.0", "UTF-8"); |
710
|
|
|
|
|
|
|
# Assume we've got any namespaces already ? |
711
|
|
|
|
|
|
|
if (my $parent = $self->findnode('ancestor::*[@id][1]', $root)) { |
712
|
|
|
|
|
|
|
$$self{parent_id} = $parent->getAttribute('xml:id'); } |
713
|
|
|
|
|
|
|
# if no cloning requested, we can just plug the node directly in. |
714
|
|
|
|
|
|
|
# (otherwise, we should use addNodes?) |
715
|
|
|
|
|
|
|
# Seems that only importNode (NOT adopt) works correctly, |
716
|
|
|
|
|
|
|
# PROVIDED we also set the namespace. |
717
|
|
|
|
|
|
|
$$self{document}->setDocumentElement($$self{document}->importNode($root)); |
718
|
|
|
|
|
|
|
# $$self{document}->documentElement->setNamespace($root->namespaceURI, $root->prefix, 1); |
719
|
|
|
|
|
|
|
$root->setNamespace($root->namespaceURI, $root->prefix, 1); |
720
|
|
|
|
|
|
|
foreach my $node ($self->findnodes("//*[\@xml:id]")) { # Now record all ID's |
721
|
|
|
|
|
|
|
$$self{idcache}{ $node->getAttribute('xml:id') } = $node; } } |
722
|
|
|
|
|
|
|
elsif ($roottype eq 'ARRAY') { |
723
|
|
|
|
|
|
|
$$self{document} = XML::LibXML::Document->new("1.0", "UTF-8"); |
724
|
|
|
|
|
|
|
my ($tag, $attributes, @children) = @$root; |
725
|
|
|
|
|
|
|
my ($prefix, $localname) = $tag =~ /^(.*):(.*)$/; |
726
|
|
|
|
|
|
|
my $nsuri = $$self{namespaces}{$prefix}; |
727
|
|
|
|
|
|
|
my $node = $$self{document}->createElementNS($nsuri, $localname); |
728
|
|
|
|
|
|
|
$$self{document}->setDocumentElement($node); |
729
|
|
|
|
|
|
|
map { $$attributes{$_} && $node->setAttribute($_ => $$attributes{$_}) } keys %$attributes |
730
|
|
|
|
|
|
|
if $attributes; |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
if (my $id = $$attributes{'xml:id'}) { |
733
|
|
|
|
|
|
|
$self->recordID($id => $node); } |
734
|
|
|
|
|
|
|
$self->addNodes($node, @children); } |
735
|
|
|
|
|
|
|
else { |
736
|
|
|
|
|
|
|
Fatal('unexpected', $root, undef, "Dont know how to use '$root' as document element"); } |
737
|
|
|
|
|
|
|
return $self; } |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
our @MonthNames = (qw( January February March April May June |
740
|
|
|
|
|
|
|
July August September October November December)); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub addDate { |
743
|
|
|
|
|
|
|
my ($self, $fromdoc) = @_; |
744
|
|
|
|
|
|
|
if (!$self->findnodes('ltx:date', $self->getDocumentElement)) { |
745
|
|
|
|
|
|
|
my @dates; |
746
|
|
|
|
|
|
|
# $fromdoc's document has some, so copy them. |
747
|
|
|
|
|
|
|
if ($fromdoc && (@dates = $fromdoc->findnodes('ltx:date', $fromdoc->getDocumentElement))) { |
748
|
|
|
|
|
|
|
$self->addNodes($self->getDocumentElement, @dates); } |
749
|
|
|
|
|
|
|
else { |
750
|
|
|
|
|
|
|
my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time()); |
751
|
|
|
|
|
|
|
$self->addNodes($self->getDocumentElement, |
752
|
|
|
|
|
|
|
['ltx:date', { role => 'creation' }, |
753
|
|
|
|
|
|
|
$MonthNames[$mon] . " " . $mday . ", " . (1900 + $year)]); } } |
754
|
|
|
|
|
|
|
return; } |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
#====================================================================== |
757
|
|
|
|
|
|
|
# Accessors |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
sub getDocument { |
760
|
|
|
|
|
|
|
my ($self) = @_; |
761
|
|
|
|
|
|
|
return $$self{document}; } |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
sub getDocumentElement { |
764
|
|
|
|
|
|
|
my ($self) = @_; |
765
|
|
|
|
|
|
|
return $$self{document}->documentElement; } |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
sub getSource { |
768
|
|
|
|
|
|
|
my ($self) = @_; |
769
|
|
|
|
|
|
|
return $$self{source}; } |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
sub getSourceDirectory { |
772
|
|
|
|
|
|
|
my ($self) = @_; |
773
|
|
|
|
|
|
|
return $$self{sourceDirectory} || '.'; } |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
sub getSearchPaths { |
776
|
|
|
|
|
|
|
my ($self) = @_; |
777
|
|
|
|
|
|
|
return @{ $$self{searchpaths} }; } |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
sub getDestination { |
780
|
|
|
|
|
|
|
my ($self) = @_; |
781
|
|
|
|
|
|
|
return $$self{destination}; } |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
sub getDestinationDirectory { |
784
|
|
|
|
|
|
|
my ($self) = @_; |
785
|
|
|
|
|
|
|
return $$self{destinationDirectory}; } |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
sub getSiteDirectory { |
788
|
|
|
|
|
|
|
my ($self) = @_; |
789
|
|
|
|
|
|
|
return $$self{siteDirectory}; } |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
# Given an absolute pathname in the document destination directory, |
792
|
|
|
|
|
|
|
# return the corresponding pathname relative to the site directory (they maybe different!). |
793
|
|
|
|
|
|
|
sub siteRelativePathname { |
794
|
|
|
|
|
|
|
my ($self, $pathname) = @_; |
795
|
|
|
|
|
|
|
return (defined $pathname ? pathname_relative($pathname, $$self{siteDirectory}) : undef); } |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
sub siteRelativeDestination { |
798
|
|
|
|
|
|
|
my ($self) = @_; |
799
|
|
|
|
|
|
|
return (defined $$self{destination} |
800
|
|
|
|
|
|
|
? pathname_relative($$self{destination}, $$self{siteDirectory}) |
801
|
|
|
|
|
|
|
: undef); } |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
sub getParentDocument { |
804
|
|
|
|
|
|
|
my ($self) = @_; |
805
|
|
|
|
|
|
|
return $$self{parentDocument}; } |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
sub getAncestorDocument { |
808
|
|
|
|
|
|
|
my ($self) = @_; |
809
|
|
|
|
|
|
|
my ($doc, $d) = $self; |
810
|
|
|
|
|
|
|
while ($d = $$doc{parentDocument}) { |
811
|
|
|
|
|
|
|
$doc = $d; } |
812
|
|
|
|
|
|
|
return $doc; } |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
sub toString { |
815
|
|
|
|
|
|
|
my ($self) = @_; |
816
|
|
|
|
|
|
|
return $$self{document}->toString(1); } |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
sub getDestinationExtension { |
819
|
|
|
|
|
|
|
my ($self) = @_; |
820
|
|
|
|
|
|
|
return ($$self{destination} =~ /\.([^\.\/]*)$/ ? $1 : undef); } |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
sub checkDestination { |
823
|
|
|
|
|
|
|
my ($self, $reldest) = @_; |
824
|
|
|
|
|
|
|
# make absolute (if not already absolute), hopefully in destination directory. |
825
|
|
|
|
|
|
|
my $dest = pathname_absolute($reldest, $self->getDestinationDirectory); |
826
|
|
|
|
|
|
|
if (my $destdir = pathname_directory($dest)) { |
827
|
|
|
|
|
|
|
pathname_mkdir($destdir) |
828
|
|
|
|
|
|
|
or return Fatal("I/O", $destdir, undef, |
829
|
|
|
|
|
|
|
"Could not create directory $destdir for $reldest: $!"); } |
830
|
|
|
|
|
|
|
return $dest; } |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
sub stringify { |
833
|
|
|
|
|
|
|
my ($self) = @_; |
834
|
|
|
|
|
|
|
return 'Post::Document[' . $self->siteRelativeDestination . ']'; } |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
#====================================================================== |
837
|
|
|
|
|
|
|
sub validate { |
838
|
|
|
|
|
|
|
my ($self) = @_; |
839
|
|
|
|
|
|
|
# Check for a RelaxNGSchema PI |
840
|
|
|
|
|
|
|
my $schema; |
841
|
|
|
|
|
|
|
foreach my $pi (@{ $$self{processingInstructions} }) { |
842
|
|
|
|
|
|
|
if ($pi =~ /^\s*RelaxNGSchema\s*=\s*([\"\'])(.*?)\1\s*$/) { |
843
|
|
|
|
|
|
|
$schema = $2; } } |
844
|
|
|
|
|
|
|
if ($schema) { # Validate using rng |
845
|
|
|
|
|
|
|
my $rng = LaTeXML::Common::XML::RelaxNG->new($schema, searchpaths => [$self->getSearchPaths]); |
846
|
|
|
|
|
|
|
LaTeXML::Post::Error('I/O', $schema, undef, "Failed to load RelaxNG schema $schema" . "Response was: $@") |
847
|
|
|
|
|
|
|
unless $rng; |
848
|
|
|
|
|
|
|
my $v = eval { |
849
|
|
|
|
|
|
|
local $LaTeXML::IGNORE_ERRORS = 1; |
850
|
|
|
|
|
|
|
$rng->validate($$self{document}); }; |
851
|
|
|
|
|
|
|
LaTeXML::Post::Error("malformed", 'document', undef, |
852
|
|
|
|
|
|
|
"Document fails RelaxNG validation (" . $schema . ")", |
853
|
|
|
|
|
|
|
"Validation reports: " . $@) if $@ || !defined $v; } |
854
|
|
|
|
|
|
|
elsif (my $decldtd = $$self{document}->internalSubset) { # Else look for DTD Declaration |
855
|
|
|
|
|
|
|
my $dtd = XML::LibXML::Dtd->new($decldtd->publicId, $decldtd->systemId); |
856
|
|
|
|
|
|
|
if (!$dtd) { |
857
|
|
|
|
|
|
|
LaTeXML::Post::Error("I/O", $decldtd->publicId, undef, |
858
|
|
|
|
|
|
|
"Failed to load DTD " . $decldtd->publicId . " at " . $decldtd->systemId, |
859
|
|
|
|
|
|
|
"skipping validation"); } |
860
|
|
|
|
|
|
|
else { |
861
|
|
|
|
|
|
|
my $v = eval { |
862
|
|
|
|
|
|
|
local $LaTeXML::IGNORE_ERRORS = 1; |
863
|
|
|
|
|
|
|
$$self{document}->validate($dtd); }; |
864
|
|
|
|
|
|
|
LaTeXML::Post::Error("malformed", 'document', undef, |
865
|
|
|
|
|
|
|
"Document failed DTD validation (" . $decldtd->systemId . ")", |
866
|
|
|
|
|
|
|
"Validation reports: " . $@) if $@ || !defined $v; } } |
867
|
|
|
|
|
|
|
else { # Nothing found to validate with |
868
|
|
|
|
|
|
|
LaTeXML::Post::Warn("expected", 'schema', undef, |
869
|
|
|
|
|
|
|
"No Schema or DTD found for this document"); } |
870
|
|
|
|
|
|
|
return; } |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
sub idcheck { |
873
|
|
|
|
|
|
|
my ($self) = @_; |
874
|
|
|
|
|
|
|
my %idcache = (); |
875
|
|
|
|
|
|
|
my %dups = (); |
876
|
|
|
|
|
|
|
my %missing = (); |
877
|
|
|
|
|
|
|
foreach my $node ($self->findnodes("//*[\@xml:id]")) { |
878
|
|
|
|
|
|
|
my $id = $node->getAttribute('xml:id'); |
879
|
|
|
|
|
|
|
$dups{$id} = 1 if $idcache{$id}; |
880
|
|
|
|
|
|
|
$idcache{$id} = 1; } |
881
|
|
|
|
|
|
|
foreach my $id (keys %{ $$self{idcache} }) { |
882
|
|
|
|
|
|
|
$missing{$id} = 1 unless $idcache{$id}; } |
883
|
|
|
|
|
|
|
LaTeXML::Post::Warn("unexpected", 'ids', undef, |
884
|
|
|
|
|
|
|
"IDs were duplicated in cache for " . $self->siteRelativeDestination, |
885
|
|
|
|
|
|
|
join(',', keys %dups)) |
886
|
|
|
|
|
|
|
if keys %dups; |
887
|
|
|
|
|
|
|
LaTeXML::Post::Warn("expected", 'ids', undef, "IDs were cached for " . $self->siteRelativeDestination |
888
|
|
|
|
|
|
|
. " but not in document", |
889
|
|
|
|
|
|
|
join(',', keys %missing)) |
890
|
|
|
|
|
|
|
if keys %missing; |
891
|
|
|
|
|
|
|
return; } |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
#====================================================================== |
894
|
|
|
|
|
|
|
sub findnodes { |
895
|
|
|
|
|
|
|
my ($self, $path, $node) = @_; |
896
|
|
|
|
|
|
|
return $XPATH->findnodes($path, $node || $$self{document}); } |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
# Similar but returns only 1st node |
899
|
|
|
|
|
|
|
sub findnode { |
900
|
|
|
|
|
|
|
my ($self, $path, $node) = @_; |
901
|
|
|
|
|
|
|
my ($first) = $XPATH->findnodes($path, $node || $$self{document}); |
902
|
|
|
|
|
|
|
return $first; } |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub findvalue { |
905
|
|
|
|
|
|
|
my ($self, $path, $node) = @_; |
906
|
|
|
|
|
|
|
return $XPATH->findvalue($path, $node || $$self{document}); } |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
sub addNamespace { |
909
|
|
|
|
|
|
|
my ($self, $nsuri, $prefix) = @_; |
910
|
|
|
|
|
|
|
if (!$$self{namespaces}{$prefix} || ($$self{namespaces}{$prefix} ne $nsuri) |
911
|
|
|
|
|
|
|
|| (($self->getDocumentElement->lookupNamespacePrefix($nsuri) || '') ne $prefix)) { |
912
|
|
|
|
|
|
|
$$self{namespaces}{$prefix} = $nsuri; |
913
|
|
|
|
|
|
|
$$self{namespaceURIs}{$nsuri} = $prefix; |
914
|
|
|
|
|
|
|
$XPATH->registerNS($prefix => $nsuri); |
915
|
|
|
|
|
|
|
$self->getDocumentElement->setNamespace($nsuri, $prefix, 0); } |
916
|
|
|
|
|
|
|
return; } |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
use Carp; |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
sub getQName { |
921
|
|
|
|
|
|
|
my ($self, $node) = @_; |
922
|
|
|
|
|
|
|
if (ref $node eq 'ARRAY') { |
923
|
|
|
|
|
|
|
return $$node[0]; } |
924
|
|
|
|
|
|
|
elsif (ref $node eq 'XML::LibXML::Element') { |
925
|
|
|
|
|
|
|
my $nsuri = $node->namespaceURI; |
926
|
|
|
|
|
|
|
if (!$nsuri) { # No namespace at all??? |
927
|
|
|
|
|
|
|
if ($node->nodeType == XML_ELEMENT_NODE) { |
928
|
|
|
|
|
|
|
return $node->localname; } |
929
|
|
|
|
|
|
|
else { |
930
|
|
|
|
|
|
|
return; } } |
931
|
|
|
|
|
|
|
elsif (my $prefix = $$self{namespaceURIs}{$nsuri}) { |
932
|
|
|
|
|
|
|
return $prefix . ":" . $node->localname; } |
933
|
|
|
|
|
|
|
else { |
934
|
|
|
|
|
|
|
# Hasn't got one; we'll create a prefix for internal use. |
935
|
|
|
|
|
|
|
my $prefix = "_ns" . (1 + scalar(grep { /^_ns\d+$/ } keys %{ $$self{namespaces} })); |
936
|
|
|
|
|
|
|
# Register it, but Don't add it to the document!!! (or xpath, for that matter) |
937
|
|
|
|
|
|
|
$$self{namespaces}{$prefix} = $nsuri; |
938
|
|
|
|
|
|
|
$$self{namespaceURIs}{$nsuri} = $prefix; |
939
|
|
|
|
|
|
|
return $prefix . ":" . $node->localname; } } |
940
|
|
|
|
|
|
|
else { |
941
|
|
|
|
|
|
|
# confess "What's this? $node\n"; |
942
|
|
|
|
|
|
|
return; } } |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
#====================================================================== |
945
|
|
|
|
|
|
|
# ADD nodes to $node in the document $self. |
946
|
|
|
|
|
|
|
# This takes a convenient recursive reprsentation for xml: |
947
|
|
|
|
|
|
|
# data = string | [$tagname, {attr=>value,..}, @children...] |
948
|
|
|
|
|
|
|
# The $tagname should have a namespace prefix whose URI has been |
949
|
|
|
|
|
|
|
# registered with addNamespace. |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
# Note that we're currently ignoring duplicated ids. |
952
|
|
|
|
|
|
|
# these should only happen from rearrangement and copying of document fragments |
953
|
|
|
|
|
|
|
# with embedded bits of math in them, which have those XMTok/XMRef pairs. |
954
|
|
|
|
|
|
|
# If those are the cases, we should end up finding the original id'd item, anyway, right? |
955
|
|
|
|
|
|
|
# |
956
|
|
|
|
|
|
|
# NOTE that only XML::LibXML's addNewChild deals cleanly with namespaces |
957
|
|
|
|
|
|
|
# and since there is only an "add" (ie. append) version (not prepend, insert after, etc) |
958
|
|
|
|
|
|
|
# we have to orient everything towards appending. |
959
|
|
|
|
|
|
|
# In particular, see the perversity in the following few methods. |
960
|
|
|
|
|
|
|
sub addNodes { |
961
|
|
|
|
|
|
|
my ($self, $node, @data) = @_; |
962
|
|
|
|
|
|
|
foreach my $child (@data) { |
963
|
|
|
|
|
|
|
if (ref $child eq 'ARRAY') { |
964
|
|
|
|
|
|
|
my ($tag, $attributes, @children) = @$child; |
965
|
|
|
|
|
|
|
if ($tag eq '_Fragment_') { |
966
|
|
|
|
|
|
|
my $indent; # Derive indentation from indentation of $node |
967
|
|
|
|
|
|
|
if (my $pre = $node->previousSibling) { |
968
|
|
|
|
|
|
|
if (($pre->nodeType == XML_TEXT_NODE) && (($pre = $pre->textContent) =~ /^\s*$/)) { |
969
|
|
|
|
|
|
|
$indent = $pre . ' '; } } |
970
|
|
|
|
|
|
|
if ($indent) { |
971
|
|
|
|
|
|
|
$self->addNodes($node, map { ($indent, $_) } @children); } |
972
|
|
|
|
|
|
|
else { |
973
|
|
|
|
|
|
|
$self->addNodes($node, @children); } } |
974
|
|
|
|
|
|
|
else { |
975
|
|
|
|
|
|
|
my ($prefix, $localname) = $tag =~ /^(.*):(.*)$/; |
976
|
|
|
|
|
|
|
my $nsuri = $prefix && $$self{namespaces}{$prefix}; |
977
|
|
|
|
|
|
|
LaTeXML::Post::Warn('expected', 'namespace', undef, "No namespace on '$tag'") unless $nsuri; |
978
|
|
|
|
|
|
|
my $new; |
979
|
|
|
|
|
|
|
if (ref $node eq 'LibXML::XML::Document') { |
980
|
|
|
|
|
|
|
$new = $node->createElementNS($nsuri, $localname); |
981
|
|
|
|
|
|
|
$node->setDocumentElement($new); } |
982
|
|
|
|
|
|
|
else { |
983
|
|
|
|
|
|
|
$new = $node->addNewChild($nsuri, $localname); } |
984
|
|
|
|
|
|
|
if ($attributes) { |
985
|
|
|
|
|
|
|
foreach my $key (sort keys %$attributes) { |
986
|
|
|
|
|
|
|
next unless defined $$attributes{$key}; |
987
|
|
|
|
|
|
|
next if $key =~ /^_/; # Ignore internal attributes |
988
|
|
|
|
|
|
|
my ($attrprefix, $attrname) = $key =~ /^(.*):(.*)$/; |
989
|
|
|
|
|
|
|
my $value = $$attributes{$key}; |
990
|
|
|
|
|
|
|
if ($key eq 'xml:id') { |
991
|
|
|
|
|
|
|
if (defined $$self{idcache}{$value}) { # Duplicated ID ?!?! |
992
|
|
|
|
|
|
|
my $newid = $self->uniquifyID($value); |
993
|
|
|
|
|
|
|
Info('unexpected', 'duplicate_id', undef, |
994
|
|
|
|
|
|
|
"Duplicated id=$value using $newid " . ($$self{destination} || '')); |
995
|
|
|
|
|
|
|
$value = $newid; } |
996
|
|
|
|
|
|
|
$self->recordID($value => $new); |
997
|
|
|
|
|
|
|
$new->setAttribute($key, $value); } |
998
|
|
|
|
|
|
|
elsif ($attrprefix && ($attrprefix ne 'xml')) { |
999
|
|
|
|
|
|
|
my $attrnsuri = $attrprefix && $$self{namespaces}{$attrprefix}; |
1000
|
|
|
|
|
|
|
$new->setAttributeNS($attrnsuri, $key, $$attributes{$key}); } |
1001
|
|
|
|
|
|
|
else { |
1002
|
|
|
|
|
|
|
$new->setAttribute($key, $$attributes{$key}); } } } |
1003
|
|
|
|
|
|
|
$self->addNodes($new, @children); } } |
1004
|
|
|
|
|
|
|
elsif ((ref $child) =~ /^XML::LibXML::/) { |
1005
|
|
|
|
|
|
|
my $type = $child->nodeType; |
1006
|
|
|
|
|
|
|
if ($type == XML_ELEMENT_NODE) { |
1007
|
|
|
|
|
|
|
# Note: this isn't actually much slower than $node->appendChild($child) ! |
1008
|
|
|
|
|
|
|
my $nsuri = $child->namespaceURI; |
1009
|
|
|
|
|
|
|
my $localname = $child->localname; |
1010
|
|
|
|
|
|
|
my $new; |
1011
|
|
|
|
|
|
|
if (ref $node eq 'LibXML::XML::Document') { |
1012
|
|
|
|
|
|
|
$new = $node->createElementNS($nsuri, $localname); |
1013
|
|
|
|
|
|
|
$node->setDocumentElement($new); } |
1014
|
|
|
|
|
|
|
else { |
1015
|
|
|
|
|
|
|
$new = $node->addNewChild($nsuri, $localname); } |
1016
|
|
|
|
|
|
|
foreach my $attr ($child->attributes) { |
1017
|
|
|
|
|
|
|
my $atype = $attr->nodeType; |
1018
|
|
|
|
|
|
|
if ($atype == XML_ATTRIBUTE_NODE) { |
1019
|
|
|
|
|
|
|
my $key = $attr->nodeName; |
1020
|
|
|
|
|
|
|
if ($key =~ /^_/) { } # don't copy internal attributes |
1021
|
|
|
|
|
|
|
elsif ($key eq 'xml:id') { |
1022
|
|
|
|
|
|
|
my $value = $attr->getValue; |
1023
|
|
|
|
|
|
|
my $old; |
1024
|
|
|
|
|
|
|
if ((defined($old = $$self{idcache}{$value})) # if xml:id was already used |
1025
|
|
|
|
|
|
|
&& !$old->isSameNode($child)) { # and the node was a different one |
1026
|
|
|
|
|
|
|
my $newid = $self->uniquifyID($value); |
1027
|
|
|
|
|
|
|
Info('unexpected', 'duplicate_id', undef, |
1028
|
|
|
|
|
|
|
"Duplicated id=$value using $newid " . ($$self{destination} || '')); |
1029
|
|
|
|
|
|
|
$value = $newid; } |
1030
|
|
|
|
|
|
|
$self->recordID($value => $new); |
1031
|
|
|
|
|
|
|
$new->setAttribute($key, $value); } |
1032
|
|
|
|
|
|
|
elsif (my $ns = $attr->namespaceURI) { |
1033
|
|
|
|
|
|
|
$new->setAttributeNS($ns, $attr->name, $attr->getValue); } |
1034
|
|
|
|
|
|
|
else { |
1035
|
|
|
|
|
|
|
$new->setAttribute($attr->localname, $attr->getValue); } } |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
$self->addNodes($new, $child->childNodes); } |
1038
|
|
|
|
|
|
|
elsif ($type == XML_DOCUMENT_FRAG_NODE) { |
1039
|
|
|
|
|
|
|
$self->addNodes($node, $child->childNodes); } |
1040
|
|
|
|
|
|
|
elsif ($type == XML_TEXT_NODE) { |
1041
|
|
|
|
|
|
|
$node->appendTextNode($child->textContent); } |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
elsif (ref $child) { |
1044
|
|
|
|
|
|
|
LaTeXML::Post::Warn('misdefined', $child, undef, "Dont know how to add $child to $node; ignoring"); } |
1045
|
|
|
|
|
|
|
elsif (defined $child) { |
1046
|
|
|
|
|
|
|
$node->appendTextNode($child); } } |
1047
|
|
|
|
|
|
|
return; } |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
# Remove @nodes from the document |
1050
|
|
|
|
|
|
|
# Allow the nodes to be array form with possibly nested XML that needs to be removed. |
1051
|
|
|
|
|
|
|
sub removeNodes { |
1052
|
|
|
|
|
|
|
my ($self, @nodes) = @_; |
1053
|
|
|
|
|
|
|
foreach my $node (@nodes) { |
1054
|
|
|
|
|
|
|
my $ref = ref $node; |
1055
|
|
|
|
|
|
|
if (!$ref) { } |
1056
|
|
|
|
|
|
|
elsif ($ref eq 'ARRAY') { |
1057
|
|
|
|
|
|
|
my ($t, $a, @n) = @$node; |
1058
|
|
|
|
|
|
|
if (my $id = $$a{'xml:id'}) { |
1059
|
|
|
|
|
|
|
if ($$self{idcache}{$id}) { |
1060
|
|
|
|
|
|
|
delete $$self{idcache}{$id}; } } |
1061
|
|
|
|
|
|
|
$self->removeNodes(@n); } |
1062
|
|
|
|
|
|
|
elsif ($ref =~ /^XML::LibXML::/) { |
1063
|
|
|
|
|
|
|
if ($node->nodeType == XML_ELEMENT_NODE) { |
1064
|
|
|
|
|
|
|
foreach my $idd ($self->findnodes("descendant-or-self::*[\@xml:id]", $node)) { |
1065
|
|
|
|
|
|
|
my $id = $idd->getAttribute('xml:id'); |
1066
|
|
|
|
|
|
|
if ($$self{idcache}{$id}) { |
1067
|
|
|
|
|
|
|
delete $$self{idcache}{$id}; } } } |
1068
|
|
|
|
|
|
|
$node->unlinkNode; } } |
1069
|
|
|
|
|
|
|
return; } |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# These nodes will be removed, but later |
1072
|
|
|
|
|
|
|
# So mark all id's in these trees as reusable |
1073
|
|
|
|
|
|
|
sub preremoveNodes { |
1074
|
|
|
|
|
|
|
my ($self, @nodes) = @_; |
1075
|
|
|
|
|
|
|
foreach my $node (@nodes) { |
1076
|
|
|
|
|
|
|
my $ref = ref $node; |
1077
|
|
|
|
|
|
|
if (!$ref) { } |
1078
|
|
|
|
|
|
|
elsif ($ref eq 'ARRAY') { |
1079
|
|
|
|
|
|
|
my ($t, $a, @n) = @$node; |
1080
|
|
|
|
|
|
|
if (my $id = $$a{'xml:id'}) { |
1081
|
|
|
|
|
|
|
$$self{idcache_reusable}{$id} = 1; } |
1082
|
|
|
|
|
|
|
$self->preremoveNodes(@n); } |
1083
|
|
|
|
|
|
|
elsif ($ref =~ /^XML::LibXML::/) { |
1084
|
|
|
|
|
|
|
if ($node->nodeType == XML_ELEMENT_NODE) { |
1085
|
|
|
|
|
|
|
foreach my $idd ($self->findnodes("descendant-or-self::*[\@xml:id]", $node)) { |
1086
|
|
|
|
|
|
|
my $id = $idd->getAttribute('xml:id'); |
1087
|
|
|
|
|
|
|
$$self{idcache_reusable}{$id} = 1; } } } } |
1088
|
|
|
|
|
|
|
return; } |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
sub removeBlankNodes { |
1091
|
|
|
|
|
|
|
my ($self, $node) = @_; |
1092
|
|
|
|
|
|
|
my $n = 0; |
1093
|
|
|
|
|
|
|
foreach my $child ($node->childNodes) { |
1094
|
|
|
|
|
|
|
if (($child->nodeType == XML_TEXT_NODE) && ($child->textContent =~ /^\s*$/)) { |
1095
|
|
|
|
|
|
|
$node->removeChild($child); $n++; } } |
1096
|
|
|
|
|
|
|
return $n; } |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
# Replace $node by @replacements in the document |
1099
|
|
|
|
|
|
|
sub replaceNode { |
1100
|
|
|
|
|
|
|
my ($self, $node, @replacements) = @_; |
1101
|
|
|
|
|
|
|
my ($parent, $following) = ($node->parentNode, undef); |
1102
|
|
|
|
|
|
|
# Note that since we can only append new stuff, we've got to remove the following first. |
1103
|
|
|
|
|
|
|
my @save = (); |
1104
|
|
|
|
|
|
|
while (($following = $parent->lastChild) && ($$following != $$node)) { # Remove & Save following siblings. |
1105
|
|
|
|
|
|
|
unshift(@save, $parent->removeChild($following)); } |
1106
|
|
|
|
|
|
|
$self->removeNodes($node); |
1107
|
|
|
|
|
|
|
$self->addNodes($parent, @replacements); |
1108
|
|
|
|
|
|
|
map { $parent->appendChild($_) } @save; # Put these back. |
1109
|
|
|
|
|
|
|
return; } |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
# Put @nodes at the beginning of $node. |
1112
|
|
|
|
|
|
|
sub prependNodes { |
1113
|
|
|
|
|
|
|
my ($self, $node, @nodes) = @_; |
1114
|
|
|
|
|
|
|
my @save = (); |
1115
|
|
|
|
|
|
|
# Note that since we can only append new stuff, we've got to remove the following first. |
1116
|
|
|
|
|
|
|
while (my $last = $node->lastChild) { # Remove, but save, all children |
1117
|
|
|
|
|
|
|
unshift(@save, $node->removeChild($last)); } |
1118
|
|
|
|
|
|
|
$self->addNodes($node, @nodes); # Now, add the new nodes. |
1119
|
|
|
|
|
|
|
map { $node->appendChild($_) } @save; # Put these back. |
1120
|
|
|
|
|
|
|
return; } |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
# Clone a node, but adjusting it so that it has unique id's. |
1123
|
|
|
|
|
|
|
# $document->cloneNode($node) or ->cloneNode($node,$idsuffix) |
1124
|
|
|
|
|
|
|
# This clones the node and adjusts any xml:id's within it to be unique. |
1125
|
|
|
|
|
|
|
# Any idref's to those ids will be changed to the new id values. |
1126
|
|
|
|
|
|
|
# If $idsuffix is supplied, it can be a simple string to append to the ids; |
1127
|
|
|
|
|
|
|
# else can be a function of the id to modify it. |
1128
|
|
|
|
|
|
|
# Then each $id is checked to see whether it is unique; If needed, |
1129
|
|
|
|
|
|
|
# one or more letters are appended, until a new id is found. |
1130
|
|
|
|
|
|
|
sub cloneNode { |
1131
|
|
|
|
|
|
|
my ($self, $node, $idsuffix, %options) = @_; |
1132
|
|
|
|
|
|
|
return $node unless ref $node; |
1133
|
|
|
|
|
|
|
my $copy = $node->cloneNode(1); |
1134
|
|
|
|
|
|
|
my $nocache = $options{nocache}; |
1135
|
|
|
|
|
|
|
#### $idsuffix = '' unless defined $idsuffix; |
1136
|
|
|
|
|
|
|
# Find all id's defined in the copy and change the id. |
1137
|
|
|
|
|
|
|
my %idmap = (); |
1138
|
|
|
|
|
|
|
foreach my $n ($self->findnodes('descendant-or-self::*[@xml:id]', $copy)) { |
1139
|
|
|
|
|
|
|
my $id = $n->getAttribute('xml:id'); |
1140
|
|
|
|
|
|
|
my $newid = $self->uniquifyID($id, $idsuffix); |
1141
|
|
|
|
|
|
|
$idmap{$id} = $newid; |
1142
|
|
|
|
|
|
|
$self->recordID($newid => $n) unless $nocache; |
1143
|
|
|
|
|
|
|
$n->setAttribute('xml:id' => $newid); |
1144
|
|
|
|
|
|
|
if (my $fragid = $n->getAttribute('fragid')) { # GACK!! |
1145
|
|
|
|
|
|
|
$n->setAttribute(fragid => substr($newid, length($id) - length($fragid))); } } |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
# Now, replace all REFERENCES to those modified ids. |
1148
|
|
|
|
|
|
|
foreach my $n ($self->findnodes('descendant-or-self::*[@idref]', $copy)) { |
1149
|
|
|
|
|
|
|
if (my $id = $idmap{ $n->getAttribute('idref') }) { |
1150
|
|
|
|
|
|
|
$n->setAttribute(idref => $id); } } # use id or fragid? |
1151
|
|
|
|
|
|
|
# Finally, we probably shouldn't have any labels attributes in here either |
1152
|
|
|
|
|
|
|
foreach my $n ($self->findnodes('descendant-or-self::*[@labels]', $copy)) { |
1153
|
|
|
|
|
|
|
$n->removeAttribute('labels'); } |
1154
|
|
|
|
|
|
|
# And, if we're relocating the node across documents, |
1155
|
|
|
|
|
|
|
# we may need to patch relative pathnames! |
1156
|
|
|
|
|
|
|
# ????? Something to think about in the future... |
1157
|
|
|
|
|
|
|
# if(my $base = $options{basepathname}){ |
1158
|
|
|
|
|
|
|
# foreach my $n ($self->findnodes('descendant::*/@graphic or descendant::*/@href', $copy)) { |
1159
|
|
|
|
|
|
|
# $n->setvalue(relocate($n->value,$base)); }} |
1160
|
|
|
|
|
|
|
return $copy; } |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
sub cloneNodes { |
1163
|
|
|
|
|
|
|
my ($self, @nodes) = @_; |
1164
|
|
|
|
|
|
|
return map { $self->cloneNode($_) } @nodes; } |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
sub addSSValues { |
1167
|
|
|
|
|
|
|
my ($self, $node, $key, $values) = @_; |
1168
|
|
|
|
|
|
|
$values = $values->toAttribute if ref $values; |
1169
|
|
|
|
|
|
|
if ((defined $values) && ($values ne '')) { # Skip if `empty'; but 0 is OK! |
1170
|
|
|
|
|
|
|
my @values = split(/\s/, $values); |
1171
|
|
|
|
|
|
|
if (my $oldvalues = $node->getAttribute($key)) { # previous values? |
1172
|
|
|
|
|
|
|
my @old = split(/\s/, $oldvalues); |
1173
|
|
|
|
|
|
|
foreach my $new (@values) { |
1174
|
|
|
|
|
|
|
push(@old, $new) unless grep { $_ eq $new } @old; } |
1175
|
|
|
|
|
|
|
$node->setAttribute($key => join(' ', sort @old)); } |
1176
|
|
|
|
|
|
|
else { |
1177
|
|
|
|
|
|
|
$node->setAttribute($key => join(' ', sort @values)); } } |
1178
|
|
|
|
|
|
|
return; } |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
sub addClass { |
1181
|
|
|
|
|
|
|
my ($self, $node, $class) = @_; |
1182
|
|
|
|
|
|
|
return $self->addSSValues($node, class => $class); } |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
#====================================================================== |
1185
|
|
|
|
|
|
|
# DUPLICATED from Core::Document...(see discussion there) |
1186
|
|
|
|
|
|
|
# Decorations on one side of an XMDual should be attributed to the |
1187
|
|
|
|
|
|
|
# parent node on the other side (see ->associateIDs) |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
sub markXMNodeVisibility { |
1190
|
|
|
|
|
|
|
my ($self) = @_; |
1191
|
|
|
|
|
|
|
foreach my $math ($self->findnodes('//ltx:XMath/*')) { |
1192
|
|
|
|
|
|
|
$self->markXMNodeVisibility_aux($math, 1, 1); } |
1193
|
|
|
|
|
|
|
return; } |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
sub markXMNodeVisibility_aux { |
1196
|
|
|
|
|
|
|
my ($self, $node, $cvis, $pvis) = @_; |
1197
|
|
|
|
|
|
|
return unless $node; |
1198
|
|
|
|
|
|
|
my $qname = $self->getQName($node); |
1199
|
|
|
|
|
|
|
return if (!$cvis || $node->getAttribute('_cvis')) && (!$pvis || $node->getAttribute('_pvis')); |
1200
|
|
|
|
|
|
|
$node->setAttribute('_cvis' => 1) if $cvis; |
1201
|
|
|
|
|
|
|
$node->setAttribute('_pvis' => 1) if $pvis; |
1202
|
|
|
|
|
|
|
if ($qname eq 'ltx:XMDual') { |
1203
|
|
|
|
|
|
|
my ($c, $p) = element_nodes($node); |
1204
|
|
|
|
|
|
|
$self->markXMNodeVisibility_aux($c, 1, 0) if $cvis; |
1205
|
|
|
|
|
|
|
$self->markXMNodeVisibility_aux($p, 0, 1) if $pvis; } |
1206
|
|
|
|
|
|
|
elsif ($qname eq 'ltx:XMRef') { |
1207
|
|
|
|
|
|
|
# $self->markXMNodeVisibility_aux($self->realizeXMNode($node),$cvis,$pvis); } |
1208
|
|
|
|
|
|
|
my $id = $node->getAttribute('idref'); |
1209
|
|
|
|
|
|
|
$self->markXMNodeVisibility_aux($self->findNodeByID($id), $cvis, $pvis); } |
1210
|
|
|
|
|
|
|
else { |
1211
|
|
|
|
|
|
|
foreach my $child (element_nodes($node)) { |
1212
|
|
|
|
|
|
|
$self->markXMNodeVisibility_aux($child, $cvis, $pvis); } } |
1213
|
|
|
|
|
|
|
return; } |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
#====================================================================== |
1216
|
|
|
|
|
|
|
# Given a list of nodes (or node constructors [tag,attr,content...]) |
1217
|
|
|
|
|
|
|
# conjoin given a conjunction like ',' or a pair like [',', ' and '] |
1218
|
|
|
|
|
|
|
sub conjoin { |
1219
|
|
|
|
|
|
|
my ($self, $conjunction, @nodes) = @_; |
1220
|
|
|
|
|
|
|
my ($comma, $and) = ($conjunction, $conjunction); |
1221
|
|
|
|
|
|
|
($comma, $and) = @$conjunction if ref $conjunction; |
1222
|
|
|
|
|
|
|
my $n = scalar(@nodes); |
1223
|
|
|
|
|
|
|
if ($n < 2) { |
1224
|
|
|
|
|
|
|
return @nodes; } |
1225
|
|
|
|
|
|
|
else { |
1226
|
|
|
|
|
|
|
my @foo = (); |
1227
|
|
|
|
|
|
|
push(@foo, shift(@nodes)); |
1228
|
|
|
|
|
|
|
while ($nodes[1]) { |
1229
|
|
|
|
|
|
|
push(@foo, $comma, shift(@nodes)); } |
1230
|
|
|
|
|
|
|
push(@foo, $and, shift(@nodes)); |
1231
|
|
|
|
|
|
|
return @foo; } } |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
# Find the initial letter in a string, or *. |
1234
|
|
|
|
|
|
|
# Uses unicode decomposition to reduce accented characters to A-Z |
1235
|
|
|
|
|
|
|
# If $force is true, skips any non-letter initials |
1236
|
|
|
|
|
|
|
sub initial { |
1237
|
|
|
|
|
|
|
my ($self, $string, $force) = @_; |
1238
|
|
|
|
|
|
|
$string = NFD($string); # Decompose accents, etc. |
1239
|
|
|
|
|
|
|
$string =~ s/^\s+//gs; |
1240
|
|
|
|
|
|
|
$string =~ s/^[^a-zA-Z]*// if $force; |
1241
|
|
|
|
|
|
|
return ($string =~ /^([a-zA-Z])/ ? uc($1) : '*'); } |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
# This would typically be called to normalize the leading/trailing whitespace of nodes |
1244
|
|
|
|
|
|
|
# that take mixed markup. WE SHOULDN'T BE DOING THIS. We need to NOT add "ignorable whitespace" |
1245
|
|
|
|
|
|
|
# to nodes that CAN HAVE mixed content. otherwise we don't know if it is ignorable! |
1246
|
|
|
|
|
|
|
sub trimChildNodes { |
1247
|
|
|
|
|
|
|
my ($self, $node) = @_; |
1248
|
|
|
|
|
|
|
if (!$node) { |
1249
|
|
|
|
|
|
|
return (); } |
1250
|
|
|
|
|
|
|
elsif (!ref $node) { |
1251
|
|
|
|
|
|
|
return ($node); } |
1252
|
|
|
|
|
|
|
elsif (my @children = $node->childNodes) { |
1253
|
|
|
|
|
|
|
if ($children[0]->nodeType == XML_TEXT_NODE) { |
1254
|
|
|
|
|
|
|
my $s = $children[0]->data; |
1255
|
|
|
|
|
|
|
$s =~ s/^\s+//; |
1256
|
|
|
|
|
|
|
if ($s) { |
1257
|
|
|
|
|
|
|
$children[0]->setData($s); } |
1258
|
|
|
|
|
|
|
else { |
1259
|
|
|
|
|
|
|
shift(@children); } } |
1260
|
|
|
|
|
|
|
if ($children[-1]->nodeType == XML_TEXT_NODE) { |
1261
|
|
|
|
|
|
|
my $s = $children[-1]->data; |
1262
|
|
|
|
|
|
|
$s =~ s/\s+$//; |
1263
|
|
|
|
|
|
|
if ($s) { |
1264
|
|
|
|
|
|
|
$children[-1]->setData($s); } |
1265
|
|
|
|
|
|
|
else { |
1266
|
|
|
|
|
|
|
pop(@children); } } |
1267
|
|
|
|
|
|
|
return @children; } |
1268
|
|
|
|
|
|
|
else { |
1269
|
|
|
|
|
|
|
return (); } } |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
sub unisort { |
1272
|
|
|
|
|
|
|
my ($self, @keys) = @_; |
1273
|
|
|
|
|
|
|
# Get a (possibly cached) sorter from POST appropriate for this document's language |
1274
|
|
|
|
|
|
|
my $lang = $self->getDocumentElement->getAttribute('xml:lang') || 'en'; |
1275
|
|
|
|
|
|
|
return $LaTeXML::POST->getsorter($lang)->sort(@keys); } |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
#====================================================================== |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
sub addNavigation { |
1280
|
|
|
|
|
|
|
my ($self, $relation, $id) = @_; |
1281
|
|
|
|
|
|
|
return if $self->findnode('//ltx:navigation/ltx:ref[@rel="' . $relation . '"][@idref="' . $id . '"]'); |
1282
|
|
|
|
|
|
|
my $ref = ['ltx:ref', { idref => $id, rel => $relation, show => 'toctitle' }]; |
1283
|
|
|
|
|
|
|
if (my $nav = $self->findnode('//ltx:navigation')) { |
1284
|
|
|
|
|
|
|
$self->addNodes($nav, $ref); } |
1285
|
|
|
|
|
|
|
else { |
1286
|
|
|
|
|
|
|
$self->addNodes($self->getDocumentElement, ['ltx:navigation', {}, $ref]); } |
1287
|
|
|
|
|
|
|
return; } |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
#====================================================================== |
1290
|
|
|
|
|
|
|
# Support for ID's |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
sub recordID { |
1293
|
|
|
|
|
|
|
my ($self, $id, $node) = @_; |
1294
|
|
|
|
|
|
|
# make an issue if already there? |
1295
|
|
|
|
|
|
|
$$self{idcache}{$id} = $node; |
1296
|
|
|
|
|
|
|
delete $$self{idcache_reserve}{$id}; # And no longer reserved |
1297
|
|
|
|
|
|
|
delete $$self{idcache_reusable}{$id}; # or reusable |
1298
|
|
|
|
|
|
|
return; } |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
sub findNodeByID { |
1301
|
|
|
|
|
|
|
my ($self, $id) = @_; |
1302
|
|
|
|
|
|
|
my $node = $$self{idcache}{$id}; |
1303
|
|
|
|
|
|
|
return $$self{idcache}{$id}; } |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
sub realizeXMNode { |
1306
|
|
|
|
|
|
|
my ($self, $node) = @_; |
1307
|
|
|
|
|
|
|
if ($self->getQName($node) eq 'ltx:XMRef') { |
1308
|
|
|
|
|
|
|
my $id = $node->getAttribute('idref'); |
1309
|
|
|
|
|
|
|
if (my $realnode = $self->findNodeByID($id)) { |
1310
|
|
|
|
|
|
|
#print STDERR "REALIZE $id => $realnode\n"; |
1311
|
|
|
|
|
|
|
return $realnode; } |
1312
|
|
|
|
|
|
|
else { |
1313
|
|
|
|
|
|
|
Fatal('expected', 'id', undef, "Cannot find a node with xml:id='$id'"); |
1314
|
|
|
|
|
|
|
return; } } |
1315
|
|
|
|
|
|
|
else { |
1316
|
|
|
|
|
|
|
return $node; } } |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
sub uniquifyID { |
1319
|
|
|
|
|
|
|
my ($self, $baseid, $suffix) = @_; |
1320
|
|
|
|
|
|
|
my $id = $baseid; |
1321
|
|
|
|
|
|
|
$id = (ref $suffix eq 'CODE' ? &$suffix($id) : $id . $suffix) if defined $suffix; |
1322
|
|
|
|
|
|
|
my $cachekey = $id; |
1323
|
|
|
|
|
|
|
while (($$self{idcache}{$id} || $$self{idcache_reserve}{$id}) && !$$self{idcache_reusable}{$id}) { |
1324
|
|
|
|
|
|
|
$id = $baseid . radix_alpha(++$$self{idcache_clashes}{$cachekey}); |
1325
|
|
|
|
|
|
|
$id = (ref $suffix eq 'CODE' ? &$suffix($id) : $id . $suffix) if defined $suffix; } |
1326
|
|
|
|
|
|
|
delete $$self{idcache_reusable}{$id}; # $id is no longer reusable |
1327
|
|
|
|
|
|
|
$$self{idcache_reserve}{$id} = 1; # and we'll consider it reserved until recorded. |
1328
|
|
|
|
|
|
|
return $id; } |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
# Generate, add and register an xml:id for $node. |
1331
|
|
|
|
|
|
|
# Unless it already has an id, the created id will |
1332
|
|
|
|
|
|
|
# be "structured" relative to it's parent using $prefix |
1333
|
|
|
|
|
|
|
sub generateNodeID { |
1334
|
|
|
|
|
|
|
my ($self, $node, $prefix, $reusable) = @_; |
1335
|
|
|
|
|
|
|
my $id = $node->getAttribute('xml:id'); |
1336
|
|
|
|
|
|
|
return $id if $id; |
1337
|
|
|
|
|
|
|
# Find the closest parent with an ID |
1338
|
|
|
|
|
|
|
my ($parent, $pid, $n) = ($node->parentNode, undef, undef); |
1339
|
|
|
|
|
|
|
while ($parent && !($pid = $parent->getAttribute('xml:id'))) { |
1340
|
|
|
|
|
|
|
$parent = $parent->parentNode; } |
1341
|
|
|
|
|
|
|
# Now find the next unused id relative to the parent id, as "prefix" |
1342
|
|
|
|
|
|
|
$pid .= '.' if $pid; |
1343
|
|
|
|
|
|
|
for ($n = 1 ; ($id = $pid . $prefix . $n) |
1344
|
|
|
|
|
|
|
&& ($$self{idcache}{$id} || $$self{idcache_reserved}{$id}) ; $n++) { } |
1345
|
|
|
|
|
|
|
$node->setAttribute('xml:id' => $id); |
1346
|
|
|
|
|
|
|
$$self{idcache}{$id} = $node; |
1347
|
|
|
|
|
|
|
$$self{idcache_reusable}{$id} = $reusable; |
1348
|
|
|
|
|
|
|
# If we've already been scanned, and have fragid's, create one here, too. |
1349
|
|
|
|
|
|
|
if (my $fragid = $parent && $parent->getAttribute('fragid')) { |
1350
|
|
|
|
|
|
|
$node->setAttribute(fragid => $fragid . '.' . $prefix . $n); } |
1351
|
|
|
|
|
|
|
return $id; } |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
#====================================================================== |
1354
|
|
|
|
|
|
|
# adjust_latexml_doctype($doc,"Foo","Bar") => |
1355
|
|
|
|
|
|
|
#
|
1356
|
|
|
|
|
|
|
# "http://dlmf.nist.gov/LaTeXML/LaTeXML-Foo-Bar.dtd"> |
1357
|
|
|
|
|
|
|
sub adjust_latexml_doctype { |
1358
|
|
|
|
|
|
|
my ($self, @additions) = @_; |
1359
|
|
|
|
|
|
|
my $doc = $$self{document}; |
1360
|
|
|
|
|
|
|
if (my $dtd = $doc->internalSubset) { |
1361
|
|
|
|
|
|
|
if ($dtd->toString |
1362
|
|
|
|
|
|
|
=~ /^$/) { |
1363
|
|
|
|
|
|
|
my ($root, $parts, $system) = ($1, $3, $5); |
1364
|
|
|
|
|
|
|
my ($type, @addns) = split(/ \+ /, $parts); |
1365
|
|
|
|
|
|
|
my %addns = (); |
1366
|
|
|
|
|
|
|
map { $addns{$_} = 1 } @addns, @additions; |
1367
|
|
|
|
|
|
|
@addns = sort keys %addns; |
1368
|
|
|
|
|
|
|
my $publicid = join(' + ', "-//NIST LaTeXML//LaTeXML $type", @addns); |
1369
|
|
|
|
|
|
|
my $systemid = join('-', "http://dlmf.nist.gov/LaTeXML/LaTeXML", @addns) . ".dtd"; |
1370
|
|
|
|
|
|
|
$doc->removeInternalSubset; # Apparently we've got to remove it first. |
1371
|
|
|
|
|
|
|
$doc->createInternalSubset($root, $publicid, $systemid); } } |
1372
|
|
|
|
|
|
|
return; } |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
#====================================================================== |
1375
|
|
|
|
|
|
|
# Cache support: storage of data from previous run. |
1376
|
|
|
|
|
|
|
# ? |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
# cacheFile as parameter ???? |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
sub cacheLookup { |
1381
|
|
|
|
|
|
|
my ($self, $key) = @_; |
1382
|
|
|
|
|
|
|
$self->openCache; |
1383
|
|
|
|
|
|
|
return $$self{cache}{$key}; } |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
sub cacheStore { |
1386
|
|
|
|
|
|
|
my ($self, $key, $value) = @_; |
1387
|
|
|
|
|
|
|
$self->openCache; |
1388
|
|
|
|
|
|
|
if (defined $value) { |
1389
|
|
|
|
|
|
|
$$self{cache}{$key} = $value; } |
1390
|
|
|
|
|
|
|
else { |
1391
|
|
|
|
|
|
|
delete $$self{cache}{$key}; } |
1392
|
|
|
|
|
|
|
return; } |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
sub openCache { |
1395
|
|
|
|
|
|
|
my ($self) = @_; |
1396
|
|
|
|
|
|
|
if (!$$self{cache}) { |
1397
|
|
|
|
|
|
|
$$self{cache} = {}; |
1398
|
|
|
|
|
|
|
my $dbfile = $self->checkDestination("LaTeXML.cache"); |
1399
|
|
|
|
|
|
|
tie %{ $$self{cache} }, 'DB_File', $dbfile, O_RDWR | O_CREAT |
1400
|
|
|
|
|
|
|
or return Fatal('internal', 'db', undef, |
1401
|
|
|
|
|
|
|
"Couldn't create DB cache for " . $self->getDestination, |
1402
|
|
|
|
|
|
|
"Message was: " . $!, |
1403
|
|
|
|
|
|
|
(-f $dbfile ? "\n(possibly incompatible db format?)" : '')); |
1404
|
|
|
|
|
|
|
} |
1405
|
|
|
|
|
|
|
return; } |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
sub closeCache { |
1408
|
|
|
|
|
|
|
my ($self) = @_; |
1409
|
|
|
|
|
|
|
if ($$self{cache}) { |
1410
|
|
|
|
|
|
|
untie %{ $$self{cache} }; |
1411
|
|
|
|
|
|
|
$$self{cache} = undef; } |
1412
|
|
|
|
|
|
|
return; } |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
1; |
1415
|
|
|
|
|
|
|
#====================================================================== |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
__END__ |