File Coverage

blib/lib/LaTeXML/Post.pm
Criterion Covered Total %
statement 21 21 100.0
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 28 28 100.0


line stmt bran cond sub pod time code
1             # /=====================================================================\ #
2             # | LaTeXML::Post | #
3             # | PostProcessing driver | #
4             # |=====================================================================| #
5             # | Part of LaTeXML: | #
6             # | Public domain software, produced as part of work done by the | #
7             # | United States Government & not subject to copyright in the US. | #
8             # |---------------------------------------------------------------------| #
9             # | Bruce Miller #_# | #
10             # | http://dlmf.nist.gov/LaTeXML/ (o o) | #
11             # \=========================================================ooo==U==ooo=/ #
12              
13             package LaTeXML::Post;
14 1     1   4865 use strict;
  1         1  
  1         26  
15 1     1   3 use warnings;
  1         1  
  1         24  
16 1     1   505 use Time::HiRes;
  1         1095  
  1         3  
17 1     1   423 use LaTeXML::Util::Radix;
  1         2  
  1         112  
18 1     1   472 use Encode;
  1         10901  
  1         108  
19 1     1   9 use base qw(Exporter);
  1         2  
  1         104  
20 1     1   7 use base qw(LaTeXML::Common::Object);
  1         2  
  1         550  
21             use LaTeXML::Global;
22             use LaTeXML::Common::Error;
23             use LaTeXML::Core::State;
24             our @EXPORT = (@LaTeXML::Common::Error::EXPORT);
25              
26             sub new {
27             my ($class, %options) = @_;
28             my $self = bless { status => {}, %options }, $class;
29             $$self{verbosity} = 0 unless defined $$self{verbosity};
30             # TEMPORARY HACK!!!!
31             # Create a State object, essentially only to hold verbosity (for now)
32             # so that Errors can be reported, managed and recorded
33             # Eventually will be a "real" State (or other configuration object)
34             $$self{state} = LaTeXML::Core::State->new();
35             $$self{state}->assignValue(VERBOSITY => $$self{verbosity});
36             return $self; }
37              
38             #======================================================================
39             sub ProcessChain {
40             my ($self, $doc, @postprocessors) = @_;
41             return $self->withState(sub {
42             return $self->ProcessChain_internal($doc, @postprocessors); }); }
43              
44             sub ProcessChain_internal {
45             my ($self, $doc, @postprocessors) = @_;
46             local $LaTeXML::POST = $self;
47             local $LaTeXML::Post::NOTEINFO = undef;
48             local $LaTeXML::Post::DOCUMENT = $doc;
49              
50             my @docs = ($doc);
51             NoteBegin("post-processing");
52              
53             foreach my $processor (@postprocessors) {
54             local $LaTeXML::Post::PROCESSOR = $processor;
55             my @newdocs = ();
56             foreach my $doc (@docs) {
57             local $LaTeXML::Post::DOCUMENT = $doc;
58             if (my @nodes = grep { $_ } $processor->toProcess($doc)) { # If there are nodes to process
59             my $n = scalar(@nodes);
60             my $msg = join(' ', $processor->getName || '',
61             $doc->siteRelativeDestination || '',
62             ($n > 1 ? "$n to process" : 'processing'));
63             NoteBegin($msg);
64             push(@newdocs, $processor->process($doc, @nodes));
65             NoteEnd($msg); }
66             else {
67             push(@newdocs, $doc); } }
68             @docs = @newdocs; }
69             NoteEnd("post-processing");
70             return @docs; }
71              
72             ## HACK!!!
73             ## This is a copy of withState from LaTeXML::Core.pm
74             ## This should eventually be in a higher level, common class
75             ## using a common State or configuration object
76             ## in order to wrap ALL processing.
77             sub withState {
78             my ($self, $closure) = @_;
79             local $STATE = $$self{state};
80             # And, set fancy error handler for ANY die!
81             local $SIG{__DIE__} = \&LaTeXML::Common::Error::perl_die_handler;
82             local $SIG{INT} = \&LaTeXML::Common::Error::perl_interrupt_handler;
83             local $SIG{__WARN__} = \&LaTeXML::Common::Error::perl_warn_handler;
84             local $SIG{'ALRM'} = \&LaTeXML::Common::Error::perl_timeout_handler;
85             local $SIG{'TERM'} = \&LaTeXML::Common::Error::perl_terminate_handler;
86              
87             local $LaTeXML::DUAL_BRANCH = '';
88              
89             return &$closure($STATE); }
90              
91             sub getStatusCode {
92             my ($self) = @_;
93             return $$self{state}->getStatusCode; }
94              
95             sub getStatusMessage {
96             my ($self) = @_;
97             return $$self{state}->getStatusMessage; }
98              
99             #======================================================================
100             # "Global" Post processing services
101             #======================================================================
102              
103             # Return a sorter appropriate for lang (if Unicode::Collate::Locale available),
104             # or an undifferentiated Unicode sorter (if only Unicode::Collate is available),
105             # or just a dumb stand-in for perl's sort
106             sub getsorter {
107             my ($self, $lang) = @_;
108             my $collator;
109             if ($collator = $$self{collatorcache}{$lang}) { }
110             elsif ($collator = eval {
111             local $LaTeXML::IGNORE_ERRORS = 1;
112             require 'Unicode/Collate/Locale.pm';
113             Unicode::Collate::Locale->new(
114             locale => $lang,
115             variable => 'non-ignorable', # I think; at least space shouldn't be ignored
116             upper_before_lower => 1); }) { }
117             elsif ($collator = eval {
118             local $LaTeXML::IGNORE_ERRORS = 1;
119             require 'Unicode/Collate.pm';
120             Unicode::Collate->new(
121             variable => 'non-ignorable', # I think; at least space shouldn't be ignored
122             upper_before_lower => 1); }) {
123             Info('expected', 'Unicode::Collate::Locale', undef,
124             "No Unicode::Collate::Locale found;",
125             "using Unicode::Collate; ignoring language='$lang'"); }
126             else {
127             # Otherwise, just use primitive codepoint ordering.
128             $collator = LaTeXML::Post::DumbCollator->new();
129             Info('expected', 'Unicode::Collate::Locale', undef,
130             "No Unicode::Collate::Locale or Unicode::Collate",
131             "using perl's sort; ignoring language='$lang'"); }
132             $$self{collatorcache}{$lang} = $collator;
133             return $collator; }
134              
135             #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
136             package LaTeXML::Post::DumbCollator;
137             use strict;
138              
139             sub new {
140             my ($class) = @_;
141             return bless {}, $class; }
142              
143             sub sort {
144             my ($self, @things) = @_;
145             return (sort @things); }
146              
147             #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148             package LaTeXML::Post::Processor;
149             use strict;
150             use LaTeXML::Post;
151             use LaTeXML::Common::Error;
152             use LaTeXML::Common::XML;
153             use LaTeXML::Util::Pathname;
154             use base qw(LaTeXML::Common::Object);
155              
156             # An Abstract Post Processor
157             sub new {
158             my ($class, %options) = @_;
159             my $self = bless {%options}, $class;
160             $$self{verbosity} = 0 unless defined $$self{verbosity};
161             $$self{resource_directory} = $options{resource_directory};
162             $$self{resource_prefix} = $options{resource_prefix};
163             my $name = $class; $name =~ s/^LaTeXML::Post:://;
164             $$self{name} = $name;
165             return $self; }
166              
167             sub getName {
168             my ($self) = @_;
169             return $$self{name}; }
170              
171             # Return the nodes to be processed; by default the document element.
172             # This allows processors to focus on specific kinds of nodes,
173             # or to skip processing if there are none to process.
174             sub toProcess {
175             my ($self, $doc) = @_;
176             return $doc->getDocumentElement; }
177              
178             # This must be defined to do whatever processing is needed to @toprocess nodes.
179             sub process {
180             my ($self, $doc, @toprocess) = @_;
181             Fatal("misdefined", $self, $doc, "This post-processor is abstract; does not implement ->process");
182             return $doc; }
183              
184             #======================================================================
185             # Some postprocessors will want to create a bunch of "resource"s,
186             # such as generated or transformed image files, or other data files.
187             # These should return a pathname, relative to the document's destination,
188             # for storing a resource associated with $node.
189             # Will use the Post option resource_directory
190             sub desiredResourcePathname {
191             my ($self, $doc, $node, $source, $type) = @_;
192             return; }
193              
194             # Ideally this would return a pathname relative to the document
195             # but I think we've accommodated absolute ones.
196             sub generateResourcePathname {
197             my ($self, $doc, $node, $source, $type) = @_;
198             my $subdir = $$self{resource_directory} || '';
199             my $prefix = $$self{resource_prefix} || "x";
200             my $counter = join('_', "_max", $subdir, $prefix, "counter_");
201             my $n = $doc->cacheLookup($counter) || 0;
202             my $name = $prefix . ++$n;
203             $doc->cacheStore($counter, $n);
204             return pathname_make(dir => $subdir, name => $name, type => $type); }
205              
206             #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
207             package LaTeXML::Post::MathProcessor;
208             use strict;
209             use LaTeXML::Post;
210             use LaTeXML::Common::Error;
211             use base qw(LaTeXML::Post::Processor);
212             use LaTeXML::Common::XML;
213              
214             # This is an abstract class; A complete MathProcessor will need to define:
215             # $self->convertNode($doc,$xmath)
216             # to generate the converted math node
217             # $self->combineParallel($doc,$math,$primary,@secondaries)
218             # to combine the $primary (the result of $self's conversion of $math)
219             # with the results of other math processors to create the
220             # parallel markup appropriate for this processor's markup.
221             # $self->rawIDSuffix returns a short string to append to id's for nodes
222             # using this markup.
223              
224             # Top level processing finds and converts all top-level math nodes.
225             # Any nested mathnodes would only appear within ltx:XMText;
226             # postprocessors must handle nested math as appropriate (but see convertXMTextContent)
227             # Invokes preprocess on each before doing the conversion in case
228             # analysis is needed.
229             sub toProcess {
230             my ($self, $doc) = @_;
231             return $doc->findnodes('//ltx:Math[not(ancestor::ltx:Math)]'); }
232              
233             sub process {
234             my ($self, $doc, @maths) = @_;
235             local $LaTeXML::Post::MATHPROCESSOR = $self;
236             $doc->markXMNodeVisibility;
237             $self->preprocess($doc, @maths);
238             if ($$self{parallel}) {
239             my @secondaries = @{ $$self{secondary_processors} };
240             # What's the right test for when cross-referencing should be done?
241             # For now: only when the primary and some secondary can cross-ref
242             # (otherwise, end up with peculiar structures?)
243             my ($proc1, @ignore) = grep { $_->can('addCrossref') } @secondaries;
244             if ($self->can('addCrossref') && $proc1) {
245             $$self{crossreferencing} = 1; # We'll need ID's!
246             $$proc1{crossreferencing} = 1; }
247             foreach my $proc (@secondaries) {
248             local $LaTeXML::Post::MATHPROCESSOR = $proc;
249             $proc->preprocess($doc, @maths); } }
250             # Re-Fetch the math nodes, in case preprocessing has messed them up!!!
251             @maths = $self->toProcess($doc);
252              
253             ## Do in reverse, since (in LaTeXML) we allow math nested within text within math.
254             ## So, we want to converted any nested expressions first, so they get carried along
255             ## with the outer ones.
256             my $n = 0;
257             foreach my $math (reverse(@maths)) {
258             # If parent is MathBranch, which branch number is it?
259             # (note: the MathBranch will be in a ltx:MathFork, with a ltx:Math being 1st child)
260             my @preceding = $doc->findnodes("parent::ltx:MathBranch/preceding-sibling::*", $math);
261             local $LaTeXML::Post::MathProcessor::FORK = scalar(@preceding);
262             $self->processNode($doc, $math);
263             $n++; }
264              
265             # Experimentally, cross reference ??? (or clearer name?)
266             if ($$self{parallel}) {
267             # There could be various strategies when there are more than 2 parallel conversions,
268             # eg a cycle or something....
269             # Here, we simply take the first two processors that know how to addCrossref
270             # and connect their nodes to each other.
271             my ($proc1, $proc2, @ignore)
272             = grep { $_->can('addCrossref') } $self, @{ $$self{secondary_processors} };
273             if ($proc1 && $proc2) {
274             # First, prepare a list of all Math id's, in document order, to simplify crossreferencing
275             my $ids = {};
276             my $pos = 0;
277             foreach my $n ($doc->findnodes('descendant-or-self::ltx:Math/descendant::*[@xml:id]')) {
278             $$ids{ $n->getAttribute('xml:id') } = $pos++; }
279             $$proc1{crossreferencing_ids} = $ids;
280             $$proc2{crossreferencing_ids} = $ids;
281             # Now do cross referencing
282             $proc1->addCrossrefs($doc, $proc2);
283             $proc2->addCrossrefs($doc, $proc1); } }
284             NoteProgressDetailed(" [converted $n Maths]");
285             return $doc; }
286              
287             # Make THIS MathProcessor the primary branch (of whatever parallel markup it supports),
288             # and make all of the @moreprocessors be secondary ones.
289             sub setParallel {
290             my ($self, @moreprocessors) = @_;
291             if (@moreprocessors) {
292             $$self{parallel} = 1;
293             map { $$_{is_secondary} = 1 } @moreprocessors; # Mark the others as secondary
294             $$self{secondary_processors} = [@moreprocessors];
295             $$self{name} .= '[w/' . join('+', map { $_->getName } @moreprocessors) . ']'; }
296             else {
297             $$self{parallel} = 0; }
298             return; }
299              
300             # Optional; if you want to do anything before translation
301             sub preprocess {
302             my ($self, $doc, @nodes) = @_;
303             return; }
304              
305             # $self->processNode($doc,$mathnode) is the top-level conversion
306             # It converts the XMath within $mathnode, and adds it to the $mathnode,
307             # This invokes $self->convertNode($doc,$xmath) to get the conversion.
308             sub processNode {
309             my ($self, $doc, $math) = @_;
310             my $xmath = $doc->findnode('ltx:XMath', $math);
311             return unless $xmath; # Nothing to convert if there's no XMath ... !
312             local $LaTeXML::Post::MATHPROCESSOR = $self;
313             my $conversion;
314             # XMath will be removed (LATER!), but mark its ids as reusable.
315             $doc->preremoveNodes($xmath);
316             if ($$self{parallel}) {
317             my $primary = $self->convertNode($doc, $xmath);
318             my @secondaries = ();
319             foreach my $proc (@{ $$self{secondary_processors} }) {
320             local $LaTeXML::Post::MATHPROCESSOR = $proc;
321             my $secondary = $proc->convertNode($doc, $xmath);
322             # IF it is (first) image, copy image attributes to ltx:Math ???
323             $self->maybeSetMathImage($math, $secondary);
324             push(@secondaries, $secondary); }
325             $conversion = $self->combineParallel($doc, $xmath, $primary, @secondaries); }
326             else {
327             $conversion = $self->convertNode($doc, $xmath);
328             $self->maybeSetMathImage($math, $conversion); }
329             # we now REMOVE the ltx:XMath from the ltx:Math, and whitespace
330             # (if there's an XMath PostProcessing module, it will add it back, with appropriate id's)
331             if (my $xml = $$conversion{xml}) {
332             $$conversion{xml} = $self->outerWrapper($doc, $xmath, $xml); }
333             $doc->removeNodes($xmath);
334             # NOTE: Unless XMath is the primary, (preserving the XMath, w/no IDSuffix)
335             # we've got to remove the id's from the XMath, since the primary will get same id's
336             # and (some versions) of libxml2 complain!
337             if ($$conversion{mimetype} && ($$conversion{mimetype} ne 'application/x-latexml')) {
338             map { $_->removeAttribute('xml:id') }
339             $doc->findnodes('descendant-or-self::*[@xml:id]', $xmath); }
340             $doc->removeBlankNodes($math);
341             if (my $new = $$conversion{xml}) {
342             $doc->addNodes($math, $new); }
343             # else ?
344             return; }
345              
346             sub maybeSetMathImage {
347             my ($self, $math, $conversion) = @_;
348             if ((($$conversion{mimetype} || '') =~ /^image\//) # Got an image?
349             && !$math->getAttribute('imagesrc')) { # and it's the first one
350             if (my $src = $$conversion{src}) {
351             $math->setAttribute(imagesrc => $src);
352             $math->setAttribute(imagewidth => $$conversion{width});
353             $math->setAttribute(imageheight => $$conversion{height});
354             $math->setAttribute(imagedepth => $$conversion{depth}); } }
355             return; }
356              
357             # NOTE: Sort out how parallel & outerWrapper should work.
358             # It probably ought to be that if the conversion is being embedded in
359             # something from another namespace, it needs the wrapper.
360             # ie. when mixing parallel markups, NOT just at the top level, although certainly there too.
361             #
362             # This should wrap the resulting conversion with m:math or om:OMA or whatever appropriate?
363             sub outerWrapper {
364             my ($self, $doc, $xmath, $conversion) = @_;
365             return $conversion; }
366              
367             # This should proably be from the core of the current ->processNode
368             sub convertNode {
369             my ($self, $doc, $node) = @_;
370             Fatal('misdefined', (ref $self), undef,
371             "Abstract package: math conversion has not been defined for this MathProcessor");
372             return; }
373              
374             # This should be implemented by potential Primaries
375             # Maybe the caller of this should check the namespaces, and call wrapper if needed?
376             sub combineParallel {
377             my ($self, $doc, $xmath, $primary, @secondaries) = @_;
378             LaTeXML::Post::Error('misdefined', (ref $self), undef,
379             "Abstract package: combining parallel markup has not been defined for this MathProcessor",
380             "dropping the extra markup from: " . join(',', map { $$_{processor} } @secondaries));
381             return $primary; }
382              
383             # A helper for converting XMText
384             # ltx:XMText escapes back to general ltx markup; the only element within XMath that does.
385             # BUT it can contain nested ltx:Math!
386             # When converting to (potentially parallel markup, coarse-grained),
387             # the non-math needs to be duplicated, but with the ID's modified,
388             # AND the nested math needs to be converted to ONLY the current target's markup
389             # NOT parallel within each nested math, although it should still be cross-referencable to others!
390             # moreover, the math will need the outerWrapper.
391             my $NBSP = pack('U', 0xA0); # CONSTANT
392              
393             sub convertXMTextContent {
394             my ($self, $doc, $convertspaces, @nodes) = @_;
395             my @result = ();
396             foreach my $node (@nodes) {
397             if ($node->nodeType == XML_TEXT_NODE) {
398             my $string = $node->textContent;
399             if ($convertspaces) {
400             $string =~ s/^\s+/$NBSP/; $string =~ s/\s+$/$NBSP/; }
401             push(@result, $string); }
402             else {
403             my $tag = $doc->getQName($node);
404             if ($tag eq 'ltx:XMath') {
405             my $conversion = $self->convertNode($doc, $node);
406             my $xml = $$conversion{xml};
407             # And if no xml ????
408             push(@result, $self->outerWrapper($doc, $node, $xml)); }
409             else {
410             my %attr = ();
411             foreach my $attr ($node->attributes) {
412             my $atype = $attr->nodeType;
413             if ($atype == XML_ATTRIBUTE_NODE) {
414             my $key = $attr->nodeName;
415             my $value = $attr->getValue;
416             if ($key =~ /^_/) { } # don't copy internal attributes ???
417             elsif ($key eq 'xml:id') { } # ignore; we'll handle fragid???
418             elsif ($key eq 'fragid') {
419             my $id = $doc->uniquifyID($value, $self->IDSuffix);
420             $attr{'xml:id'} = $id; }
421             else {
422             $attr{$key} = $attr->value; } } }
423             # Probably should invoke associateNode ???
424             push(@result,
425             [$tag, {%attr}, $self->convertXMTextContent($doc, $convertspaces, $node->childNodes)]); } } }
426             return @result; }
427              
428             # When converting an XMath node (with an id) to some other format,
429             # we will generate an id for the new node.
430             # This method returns a suffix to be added to the XMath id.
431             # The primary format gets the id's unchanged, but secondary ones get a suffix (eg. ".pmml")
432             sub IDSuffix {
433             my ($self) = @_;
434             return ($$self{is_secondary} ? $self->rawIDSuffix : ''); }
435              
436             sub rawIDSuffix {
437             return ''; }
438              
439             # In order to do cross-referencing betweeen formats, and to relate semantic/presentation
440             # information to content/presentation nodes (resp), we want to associate each
441             # generated node (MathML, OpenMath,...) with a "source" XMath node "responsible" for it's generation.
442             # This is often the "current" XMath node that was being converted, but sometimes
443             # * the containing XMDual (which makes more sense when we've generated a "container")
444             # * the containing XMDual's semantic operator (makes more sense when we're generating
445             # tokens that are only visible from the presentation branch)
446             sub associateNode {
447             my ($self, $node, $currentnode, $noxref) = @_;
448             my $r = ref $node;
449             return unless $currentnode && $r && ($r eq 'ARRAY' || $r eq 'XML::LibXML::Element');
450             my $document = $LaTeXML::Post::DOCUMENT;
451             # Check if already associated with a source node
452             my $isarray = ref $node eq 'ARRAY';
453             # What kind of branch are we generating?
454             my $ispresentation = $self->rawIDSuffix eq '.pmml'; # TEMPORARY HACK: BAAAAD method!
455             my $iscontainer = 0;
456             if ($isarray) {
457             return if $$node[1]{'_sourced'};
458             $$node[1]{'_sourced'} = 1;
459             my ($tag, $attr, @children) = @$node;
460             $iscontainer = grep { ref $_ } @children; }
461             else {
462             return if $node->getAttribute('_sourced');
463             $node->setAttribute('_sourced' => 1);
464             $iscontainer = scalar(element_nodes($node)); }
465             my $sourcenode = $currentnode;
466             # If the generated node is a "container" (non-token!), use the container as source
467             if ($iscontainer) {
468             if (my $container = $document->findnode('ancestor-or-self::ltx:XMDual[1]', $sourcenode)) {
469             $sourcenode = $container; } }
470             # If the current node is appropriately visible, use it.
471             elsif ($currentnode->getAttribute(($ispresentation ? '_cvis' : '_pvis'))) { }
472             # Else (current node isn't visible); try to find content OPERATOR
473             elsif (my $container = $document->findnode('ancestor-or-self::ltx:XMDual[1]', $sourcenode)) {
474             my ($op) = element_nodes($container);
475             my $q = $document->getQName($op) || 'unknown';
476             if ($q eq 'ltx:XMTok') { }
477             elsif ($q eq 'ltx:XMApp') {
478             ($op) = element_nodes($op);
479             if ($document->getQName($op) eq 'ltx:XMRef') {
480             $op = $document->realizeXMNode($op); } }
481             if ($op && !$op->getAttribute('_pvis')) {
482             $sourcenode = $op; }
483             else {
484             $sourcenode = $container; } }
485             # If we're intending to cross-reference, then source & generated nodes will need ID's
486             if ($$self{crossreferencing}) {
487             if (!$noxref && !$sourcenode->getAttribute('fragid')) { # If no ID, but need one
488             $document->generateNodeID($sourcenode, '', 1); } # but the ID is reusable
489             if (my $sourceid = $sourcenode->getAttribute('fragid')) { # If source has ID
490             my $nodeid = $currentnode->getAttribute('fragid') || $sourceid;
491             my $id = $document->uniquifyID($nodeid, $self->IDSuffix);
492             if ($isarray) {
493             $$node[1]{'xml:id'} = $id; }
494             else {
495             $node->setAttribute('xml:id' => $id); }
496             push(@{ $$self{convertedIDs}{$sourceid} }, $id) unless $noxref; } }
497             $self->associateNodeHook($node, $sourcenode, $noxref);
498             if ($isarray) { # Array represented
499             map { $self->associateNode($_, $currentnode, $noxref) } @$node[2 .. $#$node]; }
500             else { # LibXML node
501             map { $self->associateNode($_, $currentnode, $noxref) } element_nodes($node); }
502             return; }
503              
504             # Customization hook for adding other attributes to the generated math nodes.
505             sub associateNodeHook {
506             my ($self, $node, $sourcenode, $noxref) = @_;
507             return; }
508              
509             sub shownode {
510             my ($node, $level) = @_;
511             $level = 0 unless defined $level;
512             my $ref = ref $node;
513             if ($ref eq 'ARRAY') {
514             my ($tag, $attr, @children) = @$node;
515             return "\n" . (' ' x $level)
516             . '[' . $tag . ',{' . join(',', map { $_ . '=>' . $$attr{$_} } sort keys %$attr) . '},'
517             . join(',', map { shownode($_, $level + 1) } @children) . ']'; }
518             elsif ($ref =~ /^XML/) {
519             return $node->toString; }
520             else {
521             return "$node"; } }
522              
523             # Add backref linkages (eg. xref) onto the nodes that $self created (converted from XMath)
524             # to reference those that $otherprocessor created.
525             # NOTE: Subclass MUST define addCrossref($node,$xref_id) to add the
526             # id of the "Other Interesting Node" to the (array represented) xml $node
527             # in whatever fashion the markup for that processor uses.
528             #
529             # This may be another useful place to add a hook?
530             # It would provide the list of cross-refenced nodes in document order
531             # This would allow deciding whether or not to copy foreign attributes or other interesting things
532             sub addCrossrefs {
533             my ($self, $doc, $otherprocessor) = @_;
534             my $selfs_map = $$self{convertedIDs};
535             my $others_map = $$otherprocessor{convertedIDs};
536             my $xrefids = $$self{crossreferencing_ids};
537             foreach my $xid (keys %$selfs_map) { # For each Math id that $self converted
538             if (my $other_ids = $$others_map{$xid}) { # Did $other also convert those ids?
539             my $xref_id = $$other_ids[0];
540             if (scalar(@$other_ids) > 1) { # Find 1st in document order! (order is cached)
541             ($xref_id) = sort { $$xrefids{$a} <=> $$xrefids{$b} } @$other_ids; }
542             foreach my $id (@{ $$selfs_map{$xid} }) { # look at each node $self created from $xid
543             if (my $node = $doc->findNodeByID($id)) { # If we find a node,
544             $self->addCrossref($node, $xref_id); } } } } # add a crossref from it to $others's node
545             return; }
546              
547             #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
548              
549             package LaTeXML::Post::Document;
550             use strict;
551             use LaTeXML::Common::XML;
552             use LaTeXML::Util::Pathname;
553             use LaTeXML::Util::Radix;
554             use DB_File;
555             use Unicode::Normalize;
556             use LaTeXML::Post; # to import error handling...
557             use LaTeXML::Common::Error;
558             use base qw(LaTeXML::Common::Object);
559             our $NSURI = "http://dlmf.nist.gov/LaTeXML";
560             our $XPATH = LaTeXML::Common::XML::XPath->new(ltx => $NSURI);
561              
562             # Useful options:
563             # destination = the ultimate destination file for this document to be written.
564             # destinationDirectory = the directory it will be stored in (derived from $destination)
565             # siteDirectory = the root directory of where the entire site will be contained
566             # namespaces = a hash of namespace prefix => namespace uri
567             # namespaceURIs = reverse hash of above.
568             # nocache = a boolean, disables storing of permanent LaTeXML.cache
569             # the cache is used to remember things like image conversions from previous runs.
570             # searchpaths = array of paths to search for other resources
571             # Note that these may not be LaTeXML documents (maybe html or ....)
572             sub new {
573             my ($class, $xmldoc, %options) = @_;
574             my $self = $class->new_internal($xmldoc, %options);
575             $self->setDocument_internal($xmldoc);
576             return $self; }
577              
578             sub new_internal {
579             my ($class, $xmldoc, %options) = @_;
580             my %data = ();
581             if (ref $class) { # Cloning!
582             map { $data{$_} = $$class{$_} } keys %$class;
583             $class = ref $class; }
584             map { $data{$_} = $options{$_} } keys %options; # These override.
585             if ((defined $options{destination}) && (!defined $options{destinationDirectory})) {
586             my ($dir, $name, $ext) = pathname_split($data{destination});
587             $data{destinationDirectory} = $dir || '.'; }
588             # Check consistency of siteDirectory (providing there's a destinationDirectory)
589             if ($data{destinationDirectory}) {
590             if ($data{siteDirectory}) {
591             Fatal('unexpected', $data{destinationDirectory}, undef,
592             "The destination directory ($data{destinationDirectory})"
593             . " must be within the siteDirectory ($data{siteDirectory})")
594             unless pathname_is_contained($data{destinationDirectory}, $data{siteDirectory}); }
595             else {
596             $data{siteDirectory} = $data{destinationDirectory}; } }
597             # Start, at least, with our own namespaces.
598             $data{namespaces} = { ltx => $NSURI } unless $data{namespaces};
599             $data{namespaceURIs} = { $NSURI => 'ltx' } unless $data{namespaceURIs};
600             $data{idcache} = {};
601             $data{idcache_reusable} = {};
602             $data{idcache_reserve} = {};
603              
604             my $self = bless {%data}, $class;
605             return $self; }
606              
607             sub newFromFile {
608             my ($class, $source, %options) = @_;
609             $options{source} = $source;
610             if (!$options{sourceDirectory}) {
611             my ($dir, $name, $ext) = pathname_split($source);
612             $options{sourceDirectory} = $dir || '.'; }
613             my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseFile($source), %options);
614             $doc->validate if $$doc{validate};
615             return $doc; }
616              
617             sub newFromString {
618             my ($class, $string, %options) = @_;
619             $options{sourceDirectory} = '.' unless $options{sourceDirectory};
620             my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseString($string), %options);
621             $doc->validate if $$doc{validate};
622             return $doc; }
623              
624             sub newFromSTDIN {
625             my ($class, %options) = @_;
626             my $string;
627             { local $/ = undef; $string = <>; }
628             $options{sourceDirectory} = '.' unless $options{sourceDirectory};
629             my $doc = $class->new(LaTeXML::Common::XML::Parser->new()->parseString($string), %options);
630             $doc->validate if $$doc{validate};
631             return $doc; }
632              
633             #======================================================================
634              
635             # This is for creating essentially "sub documents"
636             # that are in some sense children of $self, possibly removed or cloned from it.
637             # And they are presumably LaTeXML documents
638             sub newDocument {
639             my ($self, $root, %options) = @_;
640             my $clone_suffix = $options{clone_suffix};
641             delete $options{clone_suffix};
642             my $doc = $self->new_internal(undef, %options);
643             $doc->setDocument_internal($root, clone_suffix => $clone_suffix);
644              
645             if (my $root_id = $self->getDocumentElement->getAttribute('xml:id')) {
646             $$doc{split_from_id} = $root_id; }
647              
648             # Copy any processing instructions.
649             foreach my $pi ($self->findnodes(".//processing-instruction('latexml')")) {
650             $doc->getDocument->appendChild($pi->cloneNode); }
651              
652             # And any resource elements
653             if (my @resources = $self->findnodes("descendant::ltx:resource")) {
654             $doc->addNodes($doc->getDocumentElement, @resources); } # cloning, as needed...
655              
656             # If new document has no date, try to add one
657             $doc->addDate($self);
658              
659             # And copy class from the top-level document; This is risky...
660             # We want to preserve global document style information
661             # But some may refer specifically to the document, and NOT to the parts?
662             if (my $class = $self->getDocumentElement->getAttribute('class')) {
663             my $root = $doc->getDocumentElement;
664             my $oclass = $root->getAttribute('class');
665             $root->setAttribute(class => ($oclass ? $oclass . ' ' . $class : $class)); }
666              
667             # Finally, return the new document.
668             return $doc; }
669              
670             sub setDocument_internal {
671             my ($self, $root, %options) = @_;
672             # Build the document's XML
673             my $roottype = ref $root;
674             if ($roottype eq 'LaTeXML::Core::Document') {
675             $root = $root->getDocument;
676             $roottype = ref $root; }
677             if (my $clone_suffix = $options{clone_suffix}) {
678             if ($roottype eq 'XML::LibXML::Document') {
679             Fatal('internal', 'unimplemented', undef,
680             "Have not yet implemented cloning for entire documents"); }
681             # Just make a clone, and then insert that.
682             $root = $self->cloneNode($root, $clone_suffix); }
683              
684             if ($roottype eq 'XML::LibXML::Document') {
685             $$self{document} = $root;
686             foreach my $node ($self->findnodes("//*[\@xml:id]")) { # Now record all ID's
687             $$self{idcache}{ $node->getAttribute('xml:id') } = $node; }
688             # Fetch any additional namespaces from the root
689             foreach my $ns ($root->documentElement->getNamespaces) {
690             my ($prefix, $uri) = ($ns->getLocalName, $ns->getData);
691             if ($prefix) {
692             $$self{namespaces}{$prefix} = $uri unless $$self{namespaces}{$prefix};
693             $$self{namespaceURIs}{$uri} = $prefix unless $$self{namespaceURIs}{$uri}; } }
694              
695             # Extract data from latexml's ProcessingInstructions
696             # I'd like to provide structured access to the PI's for those modules that need them,
697             # but it isn't quite clear what that api should be.
698             $$self{processingInstructions} =
699             [map { $_->textContent } $XPATH->findnodes('.//processing-instruction("latexml")', $root)];
700             # Combine specified paths with any from the PI's
701             my @paths = ();
702             @paths = @{ $$self{searchpaths} } if $$self{searchpaths};
703             foreach my $pi (@{ $$self{processingInstructions} }) {
704             if ($pi =~ /^\s*searchpaths\s*=\s*([\"\'])(.*?)\1\s*$/) {
705             push(@paths, split(',', $2)); } }
706             push(@paths, pathname_absolute($$self{sourceDirectory})) if $$self{sourceDirectory};
707             $$self{searchpaths} = [@paths]; }
708             elsif ($roottype eq 'XML::LibXML::Element') {
709             $$self{document} = XML::LibXML::Document->new("1.0", "UTF-8");
710             # Assume we've got any namespaces already ?
711             if (my $parent = $self->findnode('ancestor::*[@id][1]', $root)) {
712             $$self{parent_id} = $parent->getAttribute('xml:id'); }
713             # if no cloning requested, we can just plug the node directly in.
714             # (otherwise, we should use addNodes?)
715             # Seems that only importNode (NOT adopt) works correctly,
716             # PROVIDED we also set the namespace.
717             $$self{document}->setDocumentElement($$self{document}->importNode($root));
718             # $$self{document}->documentElement->setNamespace($root->namespaceURI, $root->prefix, 1);
719             $root->setNamespace($root->namespaceURI, $root->prefix, 1);
720             foreach my $node ($self->findnodes("//*[\@xml:id]")) { # Now record all ID's
721             $$self{idcache}{ $node->getAttribute('xml:id') } = $node; } }
722             elsif ($roottype eq 'ARRAY') {
723             $$self{document} = XML::LibXML::Document->new("1.0", "UTF-8");
724             my ($tag, $attributes, @children) = @$root;
725             my ($prefix, $localname) = $tag =~ /^(.*):(.*)$/;
726             my $nsuri = $$self{namespaces}{$prefix};
727             my $node = $$self{document}->createElementNS($nsuri, $localname);
728             $$self{document}->setDocumentElement($node);
729             map { $$attributes{$_} && $node->setAttribute($_ => $$attributes{$_}) } keys %$attributes
730             if $attributes;
731              
732             if (my $id = $$attributes{'xml:id'}) {
733             $self->recordID($id => $node); }
734             $self->addNodes($node, @children); }
735             else {
736             Fatal('unexpected', $root, undef, "Dont know how to use '$root' as document element"); }
737             return $self; }
738              
739             our @MonthNames = (qw( January February March April May June
740             July August September October November December));
741              
742             sub addDate {
743             my ($self, $fromdoc) = @_;
744             if (!$self->findnodes('ltx:date', $self->getDocumentElement)) {
745             my @dates;
746             # $fromdoc's document has some, so copy them.
747             if ($fromdoc && (@dates = $fromdoc->findnodes('ltx:date', $fromdoc->getDocumentElement))) {
748             $self->addNodes($self->getDocumentElement, @dates); }
749             else {
750             my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time());
751             $self->addNodes($self->getDocumentElement,
752             ['ltx:date', { role => 'creation' },
753             $MonthNames[$mon] . " " . $mday . ", " . (1900 + $year)]); } }
754             return; }
755              
756             #======================================================================
757             # Accessors
758              
759             sub getDocument {
760             my ($self) = @_;
761             return $$self{document}; }
762              
763             sub getDocumentElement {
764             my ($self) = @_;
765             return $$self{document}->documentElement; }
766              
767             sub getSource {
768             my ($self) = @_;
769             return $$self{source}; }
770              
771             sub getSourceDirectory {
772             my ($self) = @_;
773             return $$self{sourceDirectory} || '.'; }
774              
775             sub getSearchPaths {
776             my ($self) = @_;
777             return @{ $$self{searchpaths} }; }
778              
779             sub getDestination {
780             my ($self) = @_;
781             return $$self{destination}; }
782              
783             sub getDestinationDirectory {
784             my ($self) = @_;
785             return $$self{destinationDirectory}; }
786              
787             sub getSiteDirectory {
788             my ($self) = @_;
789             return $$self{siteDirectory}; }
790              
791             # Given an absolute pathname in the document destination directory,
792             # return the corresponding pathname relative to the site directory (they maybe different!).
793             sub siteRelativePathname {
794             my ($self, $pathname) = @_;
795             return (defined $pathname ? pathname_relative($pathname, $$self{siteDirectory}) : undef); }
796              
797             sub siteRelativeDestination {
798             my ($self) = @_;
799             return (defined $$self{destination}
800             ? pathname_relative($$self{destination}, $$self{siteDirectory})
801             : undef); }
802              
803             sub getParentDocument {
804             my ($self) = @_;
805             return $$self{parentDocument}; }
806              
807             sub getAncestorDocument {
808             my ($self) = @_;
809             my ($doc, $d) = $self;
810             while ($d = $$doc{parentDocument}) {
811             $doc = $d; }
812             return $doc; }
813              
814             sub toString {
815             my ($self) = @_;
816             return $$self{document}->toString(1); }
817              
818             sub getDestinationExtension {
819             my ($self) = @_;
820             return ($$self{destination} =~ /\.([^\.\/]*)$/ ? $1 : undef); }
821              
822             sub checkDestination {
823             my ($self, $reldest) = @_;
824             # make absolute (if not already absolute), hopefully in destination directory.
825             my $dest = pathname_absolute($reldest, $self->getDestinationDirectory);
826             if (my $destdir = pathname_directory($dest)) {
827             pathname_mkdir($destdir)
828             or return Fatal("I/O", $destdir, undef,
829             "Could not create directory $destdir for $reldest: $!"); }
830             return $dest; }
831              
832             sub stringify {
833             my ($self) = @_;
834             return 'Post::Document[' . $self->siteRelativeDestination . ']'; }
835              
836             #======================================================================
837             sub validate {
838             my ($self) = @_;
839             # Check for a RelaxNGSchema PI
840             my $schema;
841             foreach my $pi (@{ $$self{processingInstructions} }) {
842             if ($pi =~ /^\s*RelaxNGSchema\s*=\s*([\"\'])(.*?)\1\s*$/) {
843             $schema = $2; } }
844             if ($schema) { # Validate using rng
845             my $rng = LaTeXML::Common::XML::RelaxNG->new($schema, searchpaths => [$self->getSearchPaths]);
846             LaTeXML::Post::Error('I/O', $schema, undef, "Failed to load RelaxNG schema $schema" . "Response was: $@")
847             unless $rng;
848             my $v = eval {
849             local $LaTeXML::IGNORE_ERRORS = 1;
850             $rng->validate($$self{document}); };
851             LaTeXML::Post::Error("malformed", 'document', undef,
852             "Document fails RelaxNG validation (" . $schema . ")",
853             "Validation reports: " . $@) if $@ || !defined $v; }
854             elsif (my $decldtd = $$self{document}->internalSubset) { # Else look for DTD Declaration
855             my $dtd = XML::LibXML::Dtd->new($decldtd->publicId, $decldtd->systemId);
856             if (!$dtd) {
857             LaTeXML::Post::Error("I/O", $decldtd->publicId, undef,
858             "Failed to load DTD " . $decldtd->publicId . " at " . $decldtd->systemId,
859             "skipping validation"); }
860             else {
861             my $v = eval {
862             local $LaTeXML::IGNORE_ERRORS = 1;
863             $$self{document}->validate($dtd); };
864             LaTeXML::Post::Error("malformed", 'document', undef,
865             "Document failed DTD validation (" . $decldtd->systemId . ")",
866             "Validation reports: " . $@) if $@ || !defined $v; } }
867             else { # Nothing found to validate with
868             LaTeXML::Post::Warn("expected", 'schema', undef,
869             "No Schema or DTD found for this document"); }
870             return; }
871              
872             sub idcheck {
873             my ($self) = @_;
874             my %idcache = ();
875             my %dups = ();
876             my %missing = ();
877             foreach my $node ($self->findnodes("//*[\@xml:id]")) {
878             my $id = $node->getAttribute('xml:id');
879             $dups{$id} = 1 if $idcache{$id};
880             $idcache{$id} = 1; }
881             foreach my $id (keys %{ $$self{idcache} }) {
882             $missing{$id} = 1 unless $idcache{$id}; }
883             LaTeXML::Post::Warn("unexpected", 'ids', undef,
884             "IDs were duplicated in cache for " . $self->siteRelativeDestination,
885             join(',', keys %dups))
886             if keys %dups;
887             LaTeXML::Post::Warn("expected", 'ids', undef, "IDs were cached for " . $self->siteRelativeDestination
888             . " but not in document",
889             join(',', keys %missing))
890             if keys %missing;
891             return; }
892              
893             #======================================================================
894             sub findnodes {
895             my ($self, $path, $node) = @_;
896             return $XPATH->findnodes($path, $node || $$self{document}); }
897              
898             # Similar but returns only 1st node
899             sub findnode {
900             my ($self, $path, $node) = @_;
901             my ($first) = $XPATH->findnodes($path, $node || $$self{document});
902             return $first; }
903              
904             sub findvalue {
905             my ($self, $path, $node) = @_;
906             return $XPATH->findvalue($path, $node || $$self{document}); }
907              
908             sub addNamespace {
909             my ($self, $nsuri, $prefix) = @_;
910             if (!$$self{namespaces}{$prefix} || ($$self{namespaces}{$prefix} ne $nsuri)
911             || (($self->getDocumentElement->lookupNamespacePrefix($nsuri) || '') ne $prefix)) {
912             $$self{namespaces}{$prefix} = $nsuri;
913             $$self{namespaceURIs}{$nsuri} = $prefix;
914             $XPATH->registerNS($prefix => $nsuri);
915             $self->getDocumentElement->setNamespace($nsuri, $prefix, 0); }
916             return; }
917              
918             use Carp;
919              
920             sub getQName {
921             my ($self, $node) = @_;
922             if (ref $node eq 'ARRAY') {
923             return $$node[0]; }
924             elsif (ref $node eq 'XML::LibXML::Element') {
925             my $nsuri = $node->namespaceURI;
926             if (!$nsuri) { # No namespace at all???
927             if ($node->nodeType == XML_ELEMENT_NODE) {
928             return $node->localname; }
929             else {
930             return; } }
931             elsif (my $prefix = $$self{namespaceURIs}{$nsuri}) {
932             return $prefix . ":" . $node->localname; }
933             else {
934             # Hasn't got one; we'll create a prefix for internal use.
935             my $prefix = "_ns" . (1 + scalar(grep { /^_ns\d+$/ } keys %{ $$self{namespaces} }));
936             # Register it, but Don't add it to the document!!! (or xpath, for that matter)
937             $$self{namespaces}{$prefix} = $nsuri;
938             $$self{namespaceURIs}{$nsuri} = $prefix;
939             return $prefix . ":" . $node->localname; } }
940             else {
941             # confess "What's this? $node\n";
942             return; } }
943              
944             #======================================================================
945             # ADD nodes to $node in the document $self.
946             # This takes a convenient recursive reprsentation for xml:
947             # data = string | [$tagname, {attr=>value,..}, @children...]
948             # The $tagname should have a namespace prefix whose URI has been
949             # registered with addNamespace.
950              
951             # Note that we're currently ignoring duplicated ids.
952             # these should only happen from rearrangement and copying of document fragments
953             # with embedded bits of math in them, which have those XMTok/XMRef pairs.
954             # If those are the cases, we should end up finding the original id'd item, anyway, right?
955             #
956             # NOTE that only XML::LibXML's addNewChild deals cleanly with namespaces
957             # and since there is only an "add" (ie. append) version (not prepend, insert after, etc)
958             # we have to orient everything towards appending.
959             # In particular, see the perversity in the following few methods.
960             sub addNodes {
961             my ($self, $node, @data) = @_;
962             foreach my $child (@data) {
963             if (ref $child eq 'ARRAY') {
964             my ($tag, $attributes, @children) = @$child;
965             if ($tag eq '_Fragment_') {
966             my $indent; # Derive indentation from indentation of $node
967             if (my $pre = $node->previousSibling) {
968             if (($pre->nodeType == XML_TEXT_NODE) && (($pre = $pre->textContent) =~ /^\s*$/)) {
969             $indent = $pre . ' '; } }
970             if ($indent) {
971             $self->addNodes($node, map { ($indent, $_) } @children); }
972             else {
973             $self->addNodes($node, @children); } }
974             else {
975             my ($prefix, $localname) = $tag =~ /^(.*):(.*)$/;
976             my $nsuri = $prefix && $$self{namespaces}{$prefix};
977             LaTeXML::Post::Warn('expected', 'namespace', undef, "No namespace on '$tag'") unless $nsuri;
978             my $new;
979             if (ref $node eq 'LibXML::XML::Document') {
980             $new = $node->createElementNS($nsuri, $localname);
981             $node->setDocumentElement($new); }
982             else {
983             $new = $node->addNewChild($nsuri, $localname); }
984             if ($attributes) {
985             foreach my $key (sort keys %$attributes) {
986             next unless defined $$attributes{$key};
987             next if $key =~ /^_/; # Ignore internal attributes
988             my ($attrprefix, $attrname) = $key =~ /^(.*):(.*)$/;
989             my $value = $$attributes{$key};
990             if ($key eq 'xml:id') {
991             if (defined $$self{idcache}{$value}) { # Duplicated ID ?!?!
992             my $newid = $self->uniquifyID($value);
993             Info('unexpected', 'duplicate_id', undef,
994             "Duplicated id=$value using $newid " . ($$self{destination} || ''));
995             $value = $newid; }
996             $self->recordID($value => $new);
997             $new->setAttribute($key, $value); }
998             elsif ($attrprefix && ($attrprefix ne 'xml')) {
999             my $attrnsuri = $attrprefix && $$self{namespaces}{$attrprefix};
1000             $new->setAttributeNS($attrnsuri, $key, $$attributes{$key}); }
1001             else {
1002             $new->setAttribute($key, $$attributes{$key}); } } }
1003             $self->addNodes($new, @children); } }
1004             elsif ((ref $child) =~ /^XML::LibXML::/) {
1005             my $type = $child->nodeType;
1006             if ($type == XML_ELEMENT_NODE) {
1007             # Note: this isn't actually much slower than $node->appendChild($child) !
1008             my $nsuri = $child->namespaceURI;
1009             my $localname = $child->localname;
1010             my $new;
1011             if (ref $node eq 'LibXML::XML::Document') {
1012             $new = $node->createElementNS($nsuri, $localname);
1013             $node->setDocumentElement($new); }
1014             else {
1015             $new = $node->addNewChild($nsuri, $localname); }
1016             foreach my $attr ($child->attributes) {
1017             my $atype = $attr->nodeType;
1018             if ($atype == XML_ATTRIBUTE_NODE) {
1019             my $key = $attr->nodeName;
1020             if ($key =~ /^_/) { } # don't copy internal attributes
1021             elsif ($key eq 'xml:id') {
1022             my $value = $attr->getValue;
1023             my $old;
1024             if ((defined($old = $$self{idcache}{$value})) # if xml:id was already used
1025             && !$old->isSameNode($child)) { # and the node was a different one
1026             my $newid = $self->uniquifyID($value);
1027             Info('unexpected', 'duplicate_id', undef,
1028             "Duplicated id=$value using $newid " . ($$self{destination} || ''));
1029             $value = $newid; }
1030             $self->recordID($value => $new);
1031             $new->setAttribute($key, $value); }
1032             elsif (my $ns = $attr->namespaceURI) {
1033             $new->setAttributeNS($ns, $attr->name, $attr->getValue); }
1034             else {
1035             $new->setAttribute($attr->localname, $attr->getValue); } }
1036             }
1037             $self->addNodes($new, $child->childNodes); }
1038             elsif ($type == XML_DOCUMENT_FRAG_NODE) {
1039             $self->addNodes($node, $child->childNodes); }
1040             elsif ($type == XML_TEXT_NODE) {
1041             $node->appendTextNode($child->textContent); }
1042             }
1043             elsif (ref $child) {
1044             LaTeXML::Post::Warn('misdefined', $child, undef, "Dont know how to add $child to $node; ignoring"); }
1045             elsif (defined $child) {
1046             $node->appendTextNode($child); } }
1047             return; }
1048              
1049             # Remove @nodes from the document
1050             # Allow the nodes to be array form with possibly nested XML that needs to be removed.
1051             sub removeNodes {
1052             my ($self, @nodes) = @_;
1053             foreach my $node (@nodes) {
1054             my $ref = ref $node;
1055             if (!$ref) { }
1056             elsif ($ref eq 'ARRAY') {
1057             my ($t, $a, @n) = @$node;
1058             if (my $id = $$a{'xml:id'}) {
1059             if ($$self{idcache}{$id}) {
1060             delete $$self{idcache}{$id}; } }
1061             $self->removeNodes(@n); }
1062             elsif ($ref =~ /^XML::LibXML::/) {
1063             if ($node->nodeType == XML_ELEMENT_NODE) {
1064             foreach my $idd ($self->findnodes("descendant-or-self::*[\@xml:id]", $node)) {
1065             my $id = $idd->getAttribute('xml:id');
1066             if ($$self{idcache}{$id}) {
1067             delete $$self{idcache}{$id}; } } }
1068             $node->unlinkNode; } }
1069             return; }
1070              
1071             # These nodes will be removed, but later
1072             # So mark all id's in these trees as reusable
1073             sub preremoveNodes {
1074             my ($self, @nodes) = @_;
1075             foreach my $node (@nodes) {
1076             my $ref = ref $node;
1077             if (!$ref) { }
1078             elsif ($ref eq 'ARRAY') {
1079             my ($t, $a, @n) = @$node;
1080             if (my $id = $$a{'xml:id'}) {
1081             $$self{idcache_reusable}{$id} = 1; }
1082             $self->preremoveNodes(@n); }
1083             elsif ($ref =~ /^XML::LibXML::/) {
1084             if ($node->nodeType == XML_ELEMENT_NODE) {
1085             foreach my $idd ($self->findnodes("descendant-or-self::*[\@xml:id]", $node)) {
1086             my $id = $idd->getAttribute('xml:id');
1087             $$self{idcache_reusable}{$id} = 1; } } } }
1088             return; }
1089              
1090             sub removeBlankNodes {
1091             my ($self, $node) = @_;
1092             my $n = 0;
1093             foreach my $child ($node->childNodes) {
1094             if (($child->nodeType == XML_TEXT_NODE) && ($child->textContent =~ /^\s*$/)) {
1095             $node->removeChild($child); $n++; } }
1096             return $n; }
1097              
1098             # Replace $node by @replacements in the document
1099             sub replaceNode {
1100             my ($self, $node, @replacements) = @_;
1101             my ($parent, $following) = ($node->parentNode, undef);
1102             # Note that since we can only append new stuff, we've got to remove the following first.
1103             my @save = ();
1104             while (($following = $parent->lastChild) && ($$following != $$node)) { # Remove & Save following siblings.
1105             unshift(@save, $parent->removeChild($following)); }
1106             $self->removeNodes($node);
1107             $self->addNodes($parent, @replacements);
1108             map { $parent->appendChild($_) } @save; # Put these back.
1109             return; }
1110              
1111             # Put @nodes at the beginning of $node.
1112             sub prependNodes {
1113             my ($self, $node, @nodes) = @_;
1114             my @save = ();
1115             # Note that since we can only append new stuff, we've got to remove the following first.
1116             while (my $last = $node->lastChild) { # Remove, but save, all children
1117             unshift(@save, $node->removeChild($last)); }
1118             $self->addNodes($node, @nodes); # Now, add the new nodes.
1119             map { $node->appendChild($_) } @save; # Put these back.
1120             return; }
1121              
1122             # Clone a node, but adjusting it so that it has unique id's.
1123             # $document->cloneNode($node) or ->cloneNode($node,$idsuffix)
1124             # This clones the node and adjusts any xml:id's within it to be unique.
1125             # Any idref's to those ids will be changed to the new id values.
1126             # If $idsuffix is supplied, it can be a simple string to append to the ids;
1127             # else can be a function of the id to modify it.
1128             # Then each $id is checked to see whether it is unique; If needed,
1129             # one or more letters are appended, until a new id is found.
1130             sub cloneNode {
1131             my ($self, $node, $idsuffix, %options) = @_;
1132             return $node unless ref $node;
1133             my $copy = $node->cloneNode(1);
1134             my $nocache = $options{nocache};
1135             #### $idsuffix = '' unless defined $idsuffix;
1136             # Find all id's defined in the copy and change the id.
1137             my %idmap = ();
1138             foreach my $n ($self->findnodes('descendant-or-self::*[@xml:id]', $copy)) {
1139             my $id = $n->getAttribute('xml:id');
1140             my $newid = $self->uniquifyID($id, $idsuffix);
1141             $idmap{$id} = $newid;
1142             $self->recordID($newid => $n) unless $nocache;
1143             $n->setAttribute('xml:id' => $newid);
1144             if (my $fragid = $n->getAttribute('fragid')) { # GACK!!
1145             $n->setAttribute(fragid => substr($newid, length($id) - length($fragid))); } }
1146              
1147             # Now, replace all REFERENCES to those modified ids.
1148             foreach my $n ($self->findnodes('descendant-or-self::*[@idref]', $copy)) {
1149             if (my $id = $idmap{ $n->getAttribute('idref') }) {
1150             $n->setAttribute(idref => $id); } } # use id or fragid?
1151             # Finally, we probably shouldn't have any labels attributes in here either
1152             foreach my $n ($self->findnodes('descendant-or-self::*[@labels]', $copy)) {
1153             $n->removeAttribute('labels'); }
1154             # And, if we're relocating the node across documents,
1155             # we may need to patch relative pathnames!
1156             # ????? Something to think about in the future...
1157             # if(my $base = $options{basepathname}){
1158             # foreach my $n ($self->findnodes('descendant::*/@graphic or descendant::*/@href', $copy)) {
1159             # $n->setvalue(relocate($n->value,$base)); }}
1160             return $copy; }
1161              
1162             sub cloneNodes {
1163             my ($self, @nodes) = @_;
1164             return map { $self->cloneNode($_) } @nodes; }
1165              
1166             sub addSSValues {
1167             my ($self, $node, $key, $values) = @_;
1168             $values = $values->toAttribute if ref $values;
1169             if ((defined $values) && ($values ne '')) { # Skip if `empty'; but 0 is OK!
1170             my @values = split(/\s/, $values);
1171             if (my $oldvalues = $node->getAttribute($key)) { # previous values?
1172             my @old = split(/\s/, $oldvalues);
1173             foreach my $new (@values) {
1174             push(@old, $new) unless grep { $_ eq $new } @old; }
1175             $node->setAttribute($key => join(' ', sort @old)); }
1176             else {
1177             $node->setAttribute($key => join(' ', sort @values)); } }
1178             return; }
1179              
1180             sub addClass {
1181             my ($self, $node, $class) = @_;
1182             return $self->addSSValues($node, class => $class); }
1183              
1184             #======================================================================
1185             # DUPLICATED from Core::Document...(see discussion there)
1186             # Decorations on one side of an XMDual should be attributed to the
1187             # parent node on the other side (see ->associateIDs)
1188              
1189             sub markXMNodeVisibility {
1190             my ($self) = @_;
1191             foreach my $math ($self->findnodes('//ltx:XMath/*')) {
1192             $self->markXMNodeVisibility_aux($math, 1, 1); }
1193             return; }
1194              
1195             sub markXMNodeVisibility_aux {
1196             my ($self, $node, $cvis, $pvis) = @_;
1197             return unless $node;
1198             my $qname = $self->getQName($node);
1199             return if (!$cvis || $node->getAttribute('_cvis')) && (!$pvis || $node->getAttribute('_pvis'));
1200             $node->setAttribute('_cvis' => 1) if $cvis;
1201             $node->setAttribute('_pvis' => 1) if $pvis;
1202             if ($qname eq 'ltx:XMDual') {
1203             my ($c, $p) = element_nodes($node);
1204             $self->markXMNodeVisibility_aux($c, 1, 0) if $cvis;
1205             $self->markXMNodeVisibility_aux($p, 0, 1) if $pvis; }
1206             elsif ($qname eq 'ltx:XMRef') {
1207             # $self->markXMNodeVisibility_aux($self->realizeXMNode($node),$cvis,$pvis); }
1208             my $id = $node->getAttribute('idref');
1209             $self->markXMNodeVisibility_aux($self->findNodeByID($id), $cvis, $pvis); }
1210             else {
1211             foreach my $child (element_nodes($node)) {
1212             $self->markXMNodeVisibility_aux($child, $cvis, $pvis); } }
1213             return; }
1214              
1215             #======================================================================
1216             # Given a list of nodes (or node constructors [tag,attr,content...])
1217             # conjoin given a conjunction like ',' or a pair like [',', ' and ']
1218             sub conjoin {
1219             my ($self, $conjunction, @nodes) = @_;
1220             my ($comma, $and) = ($conjunction, $conjunction);
1221             ($comma, $and) = @$conjunction if ref $conjunction;
1222             my $n = scalar(@nodes);
1223             if ($n < 2) {
1224             return @nodes; }
1225             else {
1226             my @foo = ();
1227             push(@foo, shift(@nodes));
1228             while ($nodes[1]) {
1229             push(@foo, $comma, shift(@nodes)); }
1230             push(@foo, $and, shift(@nodes));
1231             return @foo; } }
1232              
1233             # Find the initial letter in a string, or *.
1234             # Uses unicode decomposition to reduce accented characters to A-Z
1235             # If $force is true, skips any non-letter initials
1236             sub initial {
1237             my ($self, $string, $force) = @_;
1238             $string = NFD($string); # Decompose accents, etc.
1239             $string =~ s/^\s+//gs;
1240             $string =~ s/^[^a-zA-Z]*// if $force;
1241             return ($string =~ /^([a-zA-Z])/ ? uc($1) : '*'); }
1242              
1243             # This would typically be called to normalize the leading/trailing whitespace of nodes
1244             # that take mixed markup. WE SHOULDN'T BE DOING THIS. We need to NOT add "ignorable whitespace"
1245             # to nodes that CAN HAVE mixed content. otherwise we don't know if it is ignorable!
1246             sub trimChildNodes {
1247             my ($self, $node) = @_;
1248             if (!$node) {
1249             return (); }
1250             elsif (!ref $node) {
1251             return ($node); }
1252             elsif (my @children = $node->childNodes) {
1253             if ($children[0]->nodeType == XML_TEXT_NODE) {
1254             my $s = $children[0]->data;
1255             $s =~ s/^\s+//;
1256             if ($s) {
1257             $children[0]->setData($s); }
1258             else {
1259             shift(@children); } }
1260             if ($children[-1]->nodeType == XML_TEXT_NODE) {
1261             my $s = $children[-1]->data;
1262             $s =~ s/\s+$//;
1263             if ($s) {
1264             $children[-1]->setData($s); }
1265             else {
1266             pop(@children); } }
1267             return @children; }
1268             else {
1269             return (); } }
1270              
1271             sub unisort {
1272             my ($self, @keys) = @_;
1273             # Get a (possibly cached) sorter from POST appropriate for this document's language
1274             my $lang = $self->getDocumentElement->getAttribute('xml:lang') || 'en';
1275             return $LaTeXML::POST->getsorter($lang)->sort(@keys); }
1276              
1277             #======================================================================
1278              
1279             sub addNavigation {
1280             my ($self, $relation, $id) = @_;
1281             return if $self->findnode('//ltx:navigation/ltx:ref[@rel="' . $relation . '"][@idref="' . $id . '"]');
1282             my $ref = ['ltx:ref', { idref => $id, rel => $relation, show => 'toctitle' }];
1283             if (my $nav = $self->findnode('//ltx:navigation')) {
1284             $self->addNodes($nav, $ref); }
1285             else {
1286             $self->addNodes($self->getDocumentElement, ['ltx:navigation', {}, $ref]); }
1287             return; }
1288              
1289             #======================================================================
1290             # Support for ID's
1291              
1292             sub recordID {
1293             my ($self, $id, $node) = @_;
1294             # make an issue if already there?
1295             $$self{idcache}{$id} = $node;
1296             delete $$self{idcache_reserve}{$id}; # And no longer reserved
1297             delete $$self{idcache_reusable}{$id}; # or reusable
1298             return; }
1299              
1300             sub findNodeByID {
1301             my ($self, $id) = @_;
1302             my $node = $$self{idcache}{$id};
1303             return $$self{idcache}{$id}; }
1304              
1305             sub realizeXMNode {
1306             my ($self, $node) = @_;
1307             if ($self->getQName($node) eq 'ltx:XMRef') {
1308             my $id = $node->getAttribute('idref');
1309             if (my $realnode = $self->findNodeByID($id)) {
1310             #print STDERR "REALIZE $id => $realnode\n";
1311             return $realnode; }
1312             else {
1313             Fatal('expected', 'id', undef, "Cannot find a node with xml:id='$id'");
1314             return; } }
1315             else {
1316             return $node; } }
1317              
1318             sub uniquifyID {
1319             my ($self, $baseid, $suffix) = @_;
1320             my $id = $baseid;
1321             $id = (ref $suffix eq 'CODE' ? &$suffix($id) : $id . $suffix) if defined $suffix;
1322             my $cachekey = $id;
1323             while (($$self{idcache}{$id} || $$self{idcache_reserve}{$id}) && !$$self{idcache_reusable}{$id}) {
1324             $id = $baseid . radix_alpha(++$$self{idcache_clashes}{$cachekey});
1325             $id = (ref $suffix eq 'CODE' ? &$suffix($id) : $id . $suffix) if defined $suffix; }
1326             delete $$self{idcache_reusable}{$id}; # $id is no longer reusable
1327             $$self{idcache_reserve}{$id} = 1; # and we'll consider it reserved until recorded.
1328             return $id; }
1329              
1330             # Generate, add and register an xml:id for $node.
1331             # Unless it already has an id, the created id will
1332             # be "structured" relative to it's parent using $prefix
1333             sub generateNodeID {
1334             my ($self, $node, $prefix, $reusable) = @_;
1335             my $id = $node->getAttribute('xml:id');
1336             return $id if $id;
1337             # Find the closest parent with an ID
1338             my ($parent, $pid, $n) = ($node->parentNode, undef, undef);
1339             while ($parent && !($pid = $parent->getAttribute('xml:id'))) {
1340             $parent = $parent->parentNode; }
1341             # Now find the next unused id relative to the parent id, as "prefix"
1342             $pid .= '.' if $pid;
1343             for ($n = 1 ; ($id = $pid . $prefix . $n)
1344             && ($$self{idcache}{$id} || $$self{idcache_reserved}{$id}) ; $n++) { }
1345             $node->setAttribute('xml:id' => $id);
1346             $$self{idcache}{$id} = $node;
1347             $$self{idcache_reusable}{$id} = $reusable;
1348             # If we've already been scanned, and have fragid's, create one here, too.
1349             if (my $fragid = $parent && $parent->getAttribute('fragid')) {
1350             $node->setAttribute(fragid => $fragid . '.' . $prefix . $n); }
1351             return $id; }
1352              
1353             #======================================================================
1354             # adjust_latexml_doctype($doc,"Foo","Bar") =>
1355             #
1356             # "http://dlmf.nist.gov/LaTeXML/LaTeXML-Foo-Bar.dtd">
1357             sub adjust_latexml_doctype {
1358             my ($self, @additions) = @_;
1359             my $doc = $$self{document};
1360             if (my $dtd = $doc->internalSubset) {
1361             if ($dtd->toString
1362             =~ /^$/) {
1363             my ($root, $parts, $system) = ($1, $3, $5);
1364             my ($type, @addns) = split(/ \+ /, $parts);
1365             my %addns = ();
1366             map { $addns{$_} = 1 } @addns, @additions;
1367             @addns = sort keys %addns;
1368             my $publicid = join(' + ', "-//NIST LaTeXML//LaTeXML $type", @addns);
1369             my $systemid = join('-', "http://dlmf.nist.gov/LaTeXML/LaTeXML", @addns) . ".dtd";
1370             $doc->removeInternalSubset; # Apparently we've got to remove it first.
1371             $doc->createInternalSubset($root, $publicid, $systemid); } }
1372             return; }
1373              
1374             #======================================================================
1375             # Cache support: storage of data from previous run.
1376             # ?
1377              
1378             # cacheFile as parameter ????
1379              
1380             sub cacheLookup {
1381             my ($self, $key) = @_;
1382             $self->openCache;
1383             return $$self{cache}{$key}; }
1384              
1385             sub cacheStore {
1386             my ($self, $key, $value) = @_;
1387             $self->openCache;
1388             if (defined $value) {
1389             $$self{cache}{$key} = $value; }
1390             else {
1391             delete $$self{cache}{$key}; }
1392             return; }
1393              
1394             sub openCache {
1395             my ($self) = @_;
1396             if (!$$self{cache}) {
1397             $$self{cache} = {};
1398             my $dbfile = $self->checkDestination("LaTeXML.cache");
1399             tie %{ $$self{cache} }, 'DB_File', $dbfile, O_RDWR | O_CREAT
1400             or return Fatal('internal', 'db', undef,
1401             "Couldn't create DB cache for " . $self->getDestination,
1402             "Message was: " . $!,
1403             (-f $dbfile ? "\n(possibly incompatible db format?)" : ''));
1404             }
1405             return; }
1406              
1407             sub closeCache {
1408             my ($self) = @_;
1409             if ($$self{cache}) {
1410             untie %{ $$self{cache} };
1411             $$self{cache} = undef; }
1412             return; }
1413              
1414             1;
1415             #======================================================================
1416              
1417             __END__