File Coverage

blib/lib/PDF/Builder/Content/Text.pm
Criterion Covered Total %
statement 219 2150 10.1
branch 92 1158 7.9
condition 45 602 7.4
subroutine 19 49 38.7
pod 16 17 94.1
total 391 3976 9.8


line stmt bran cond sub pod time code
1             package PDF::Builder::Content::Text;
2              
3 39     39   289 use base 'PDF::Builder::Content';
  39         101  
  39         5294  
4              
5 39     39   274 use strict;
  39         93  
  39         1092  
6 39     39   200 use warnings;
  39         94  
  39         10141  
7 39     39   381 use Carp;
  39         95  
  39         2941  
8 39     39   313 use List::Util qw(min max);
  39         88  
  39         2605  
9 39     39   270 use version;
  39         113  
  39         472  
10             #use Data::Dumper; # for debugging
11             # $Data::Dumper::Sortkeys = 1; # hash keys in sorted order
12             # print Dumper(var); usage of Dumper
13            
14             # >>>>>>>>>>>>>>>>>> CRITICAL !!!! <<<<<<<<<<<<<<<<<<<<<<
15             # when update column() tags and CSS with new/changed support, also update
16             # Column_docs.pm (immediately) and perhaps #195 list (AT release).
17             # any examples/ changes update Examples on website (AT release)
18              
19             our $VERSION = '3.028'; # VERSION
20             our $LAST_UPDATE = '3.028'; # manually update whenever code is changed
21              
22             my $TextMarkdown = '1.000031'; # minimum version of Text::Markdown;
23             #my $TextMultiMarkdown = '1.005'; # TBD minimum version of Text::MultiMarkdown;
24             my $HTMLTreeBldr = '5.07'; # minimum version of HTML::TreeBuilder
25              
26             =head1 NAME
27              
28             PDF::Builder::Content::Text - Additional specialized text-related formatting methods
29              
30             Inherits from L<PDF::Builder::Content>
31              
32             B<Note:> If you have used some of these methods in PDF::Builder with a
33             I<graphics>
34             type object (e.g., $page->gfx()->method()), you may have to change to a I<text>
35             type object (e.g., $page->text()->method()).
36              
37             =head1 METHODS
38              
39             =cut
40              
41             sub new {
42 19     19 1 69 my ($class) = @_;
43 19         136 my $self = $class->SUPER::new(@_);
44 19         127 $self->textstart();
45 19         59 return $self;
46             }
47              
48             =head2 Single Lines from a String
49              
50             =head3 text_left
51              
52             $width = $content->text_left($text, %opts)
53              
54             =over
55              
56             Alias for C<text>. Implemented for symmetry, for those who use a lot of
57             C<text_center> and C<text_right>, and desire a matching C<text_left>.
58              
59             Adds text to the page (left justified), at the current position.
60             Note that there is no maximum width, and nothing to keep you from overflowing
61             the physical page on the right!
62             The width used (in points) is B<returned>.
63              
64             =back
65              
66             =cut
67              
68             sub text_left {
69 0     0 1 0 my ($self, $text, @opts) = @_;
70              
71             # override any stray 'align' that got through to here
72 0         0 return $self->text($text, @opts, 'align'=>'l');
73             }
74              
75             =head3 text_center
76              
77             $width = $content->text_center($text, %opts)
78              
79             =over
80              
81             As C<text>, but I<centered> on the current point.
82              
83             Adds text to the page (centered).
84             The width used (in points) is B<returned>.
85              
86             =back
87              
88             =cut
89              
90             sub text_center {
91 6     6 1 31 my ($self, $text, @opts) = @_;
92              
93             # override any stray 'align' that got through to here
94 6         35 return $self->text($text, @opts, 'align'=>'c');
95             }
96              
97             =head3 text_right
98              
99             $width = $content->text_right($text, %opts)
100              
101             =over
102              
103             As C<text>, but right-aligned to the current point.
104              
105             Adds text to the page (right justified).
106             Note that there is no maximum width, and nothing to keep you from overflowing
107             the physical page on the left!
108             The width used (in points) is B<returned>.
109              
110             =back
111              
112             =cut
113              
114             sub text_right {
115 3     3 1 16 my ($self, $text, @opts) = @_;
116              
117             # override any stray 'align' that got through to here
118 3         16 return $self->text($text, @opts, 'align'=>'r');
119             }
120              
121             =head3 text_justified
122              
123             $width = $content->text_justified($text, $width, %opts)
124              
125             =over
126            
127             As C<text>, but stretches text using C<wordspace>, C<charspace>, and (as a
128             last resort) C<hscale>, to fill the desired
129             (available) C<$width>. Note that if the desired width is I<less> than the
130             natural width taken by the text, it will be I<condensed> to fit, using the
131             same three routines.
132              
133             The unchanged C<$width> is B<returned>, unless there was some reason to
134             change it (e.g., overflow).
135              
136             B<Options:>
137              
138             =over
139              
140             =item 'nocs' => value
141              
142             If this option value is 1 (default 0), do B<not> use any intercharacter
143             spacing. This is useful for connected characters, such as fonts for Arabic,
144             Devanagari, Latin cursive handwriting, etc. You don't want to add additional
145             space between characters during justification, which would disconnect them.
146              
147             I<Word> (interword) spacing values (explicit or default) are doubled if
148             nocs is 1. This is to make up for the lack of added/subtracted intercharacter
149             spacing.
150              
151             =item 'wordsp' => value
152              
153             The percentage of one space character (default 100) that is the maximum amount
154             to add to (each) interword spacing to expand the line.
155             If C<nocs> is 1, double C<value>.
156              
157             =item 'charsp' => value
158              
159             If adding interword space didn't do enough, the percentage of one em (default
160             100) that is the maximum amount to add to (each) intercharacter spacing to
161             further expand the line.
162             If C<nocs> is 1, force C<value> to 0.
163              
164             =item 'wordspa' => value
165              
166             If adding intercharacter space didn't do enough, the percentage of one space
167             character (default 100) that is the maximum I<additional> amount to add to
168             (each) interword spacing to further expand the line.
169             If C<nocs> is 1, double C<value>.
170              
171             =item 'charspa' => value
172              
173             If adding more interword space didn't do enough, the percentage of one em
174             (default 100) that is the maximum I<additional> amount to add to (each)
175             intercharacter spacing to further expand the line.
176             If C<nocs> is 1, force C<value> to 0.
177              
178             =item 'condw' => value
179              
180             The percentage of one space character (default 25) that is the maximum amount
181             to subtract from (each) interword spacing to condense the line.
182             If C<nocs> is 1, double C<value>.
183              
184             =item 'condc' => value
185              
186             If removing interword space didn't do enough, the percentage of one em
187             (default 10) that is the maximum amount to subtract from (each) intercharacter
188             spacing to further condense the line.
189             If C<nocs> is 1, force C<value> to 0.
190              
191             =back
192              
193             If expansion (or reduction) wordspace and charspace changes didn't do enough
194             to make the line fit the desired width, use C<hscale()> to finish expanding or
195             condensing the line to fit.
196              
197             =back
198              
199             =cut
200              
201             sub text_justified {
202 4     4 1 23 my ($self, $text, $width, %opts) = @_;
203             # copy dashed option names to the preferred undashed names
204 4 50 33     23 if (defined $opts{'-wordsp'} && !defined $opts{'wordsp'}) { $opts{'wordsp'} = delete($opts{'-wordsp'}); }
  0         0  
205 4 50 33     17 if (defined $opts{'-charsp'} && !defined $opts{'charsp'}) { $opts{'charsp'} = delete($opts{'-charsp'}); }
  0         0  
206 4 50 33     20 if (defined $opts{'-wordspa'} && !defined $opts{'wordspa'}) { $opts{'wordspa'} = delete($opts{'-wordspa'}); }
  0         0  
207 4 50 33     21 if (defined $opts{'-charspa'} && !defined $opts{'charspa'}) { $opts{'charspa'} = delete($opts{'-charspa'}); }
  0         0  
208 4 50 33     19 if (defined $opts{'-condw'} && !defined $opts{'condw'}) { $opts{'condw'} = delete($opts{'-condw'}); }
  0         0  
209 4 50 33     18 if (defined $opts{'-condc'} && !defined $opts{'condc'}) { $opts{'condc'} = delete($opts{'-condc'}); }
  0         0  
210 4 50 33     16 if (defined $opts{'-nocs'} && !defined $opts{'nocs'}) { $opts{'nocs'} = delete($opts{'-nocs'}); }
  0         0  
211              
212             # optional parameters to control how expansion or condensation are done
213             # 1. expand interword space up to 100% of 1 space
214 4 50       30 my $wordsp = defined($opts{'wordsp'})? $opts{'wordsp'}: 100;
215             # 2. expand intercharacter space up to 100% of 1em
216 4 50       15 my $charsp = defined($opts{'charsp'})? $opts{'charsp'}: 100;
217             # 3. expand interword space up to another 100% of 1 space
218 4 50       14 my $wordspa = defined($opts{'wordspa'})? $opts{'wordspa'}: 100;
219             # 4. expand intercharacter space up to another 100% of 1em
220 4 50       12 my $charspa = defined($opts{'charspa'})? $opts{'charspa'}: 100;
221             # 5. condense interword space up to 25% of 1 space
222 4 50       17 my $condw = defined($opts{'condw'})? $opts{'condw'}: 25;
223             # 6. condense intercharacter space up to 10% of 1em
224 4 50       16 my $condc = defined($opts{'condc'})? $opts{'condc'}: 10;
225             # 7. if still short or long, hscale()
226              
227 4 50       12 my $nocs = defined($opts{'nocs'})? $opts{'nocs'}: 0;
228 4 50       14 if ($nocs) {
229 0         0 $charsp = $charspa = $condc = 0;
230 0         0 $wordsp *= 2;
231 0         0 $wordspa *= 2;
232 0         0 $condw *= 2;
233             }
234              
235             # with original wordspace, charspace, and hscale settings
236             # note that we do NOT change any existing charspace here
237 4         20 my $length = $self->advancewidth($text, %opts);
238 4         14 my $overage = $length - $width; # > 0, raw text is too wide, < 0, narrow
239              
240 4         10 my ($i, @chars, $val, $limit);
241 4         20 my $hs = $self->hscale(); # save old settings and reset to 0
242 4         23 my $ws = $self->wordspace();
243 4         16 my $cs = $self->charspace();
244 4         20 $self->hscale(100); $self->wordspace(0); $self->charspace(0);
  4         16  
  4         18  
245              
246             # not near perfect fit? not within .1 pt of fitting
247 4 50       20 if (abs($overage) > 0.1) {
248              
249             # how many interword spaces can we change with wordspace?
250 4         10 my $num_spaces = 0;
251             # how many intercharacter spaces can be added to or removed?
252 4         6 my $num_chars = -1;
253 4         30 @chars = split //, $text;
254 4         20 for ($i=0; $i<scalar @chars; $i++) {
255 78 100       151 if ($chars[$i] eq ' ') { $num_spaces++; } # TBD other whitespace?
  16         22  
256 78         140 $num_chars++; # count spaces as characters, too
257             }
258 4         16 my $em = $self->advancewidth('M');
259 4         17 my $sp = $self->advancewidth(' ');
260              
261 4 50       16 if ($overage > 0) {
262             # too wide: need to condense it
263             # 1. subtract from interword space, up to -$condw/100 $sp
264 0 0 0     0 if ($overage > 0 && $num_spaces > 0 && $condw > 0) {
      0        
265 0         0 $val = $overage/$num_spaces;
266 0         0 $limit = $condw/100*$sp;
267 0 0       0 if ($val > $limit) { $val = $limit; }
  0         0  
268 0         0 $self->wordspace(-$val);
269 0         0 $overage -= $val*$num_spaces;
270             }
271             # 2. subtract from intercharacter space, up to -$condc/100 $em
272 0 0 0     0 if ($overage > 0 && $num_chars > 0 && $condc > 0) {
      0        
273 0         0 $val = $overage/$num_chars;
274 0         0 $limit = $condc/100*$em;
275 0 0       0 if ($val > $limit) { $val = $limit; }
  0         0  
276 0         0 $self->charspace(-$val);
277 0         0 $overage -= $val*$num_chars;
278             }
279             # 3. nothing more to do than scale down with hscale()
280             } else {
281             # too narrow: need to expand it (usual case)
282 4         11 $overage = -$overage; # working with positive value is easier
283             # 1. add to interword space, up to $wordsp/100 $sp
284 4 50 33     39 if ($overage > 0 && $num_spaces > 0 && $wordsp > 0) {
      33        
285 4         12 $val = $overage/$num_spaces;
286 4         11 $limit = $wordsp/100*$sp;
287 4 100       17 if ($val > $limit) { $val = $limit; }
  1         3  
288 4         17 $self->wordspace($val);
289 4         12 $overage -= $val*$num_spaces;
290             }
291             # 2. add to intercharacter space, up to $charsp/100 $em
292 4 50 66     28 if ($overage > 0 && $num_chars > 0 && $charsp > 0) {
      66        
293 1         3 $val = $overage/$num_chars;
294 1         3 $limit = $charsp/100*$em;
295 1 50       4 if ($val > $limit) { $val = $limit; }
  0         0  
296 1         5 $self->charspace($val);
297 1         3 $overage -= $val*$num_chars;
298             }
299             # 3. add to interword space, up to $wordspa/100 $sp additional
300 4 0 33     21 if ($overage > 0 && $num_spaces > 0 && $wordspa > 0) {
      33        
301 0         0 $val = $overage/$num_spaces;
302 0         0 $limit = $wordspa/100*$sp;
303 0 0       0 if ($val > $limit) { $val = $limit; }
  0         0  
304 0         0 $self->wordspace($val+$self->wordspace());
305 0         0 $overage -= $val*$num_spaces;
306             }
307             # 4. add to intercharacter space, up to $charspa/100 $em additional
308 4 0 33     18 if ($overage > 0 && $num_chars > 0 && $charspa > 0) {
      33        
309 0         0 $val = $overage/$num_chars;
310 0         0 $limit = $charspa/100*$em;
311 0 0       0 if ($val > $limit) { $val = $limit; }
  0         0  
312 0         0 $self->charspace($val+$self->charspace());
313 0         0 $overage -= $val*$num_chars;
314             }
315             # 5. nothing more to do than scale up with hscale()
316             }
317              
318             # last ditch effort to fill the line: use hscale()
319             # temporarily resets hscale to expand width of line to match $width
320             # wordspace and charspace are already (temporarily) at max/min
321 4 50       33 if ($overage > 0.1) {
322 0         0 $self->hscale(100*($width/$self->advancewidth($text, %opts)));
323             }
324              
325             } # original $overage was not near 0
326             # do the output, with wordspace, charspace, and possibly hscale changed
327             # override any stray 'align' that got through to here
328 4         31 $self->text($text, %opts, 'align'=>'l');
329              
330             # restore settings
331 4         19 $self->hscale($hs); $self->wordspace($ws); $self->charspace($cs);
  4         19  
  4         15  
332              
333 4         32 return $width;
334             } # end of text_justified()
335              
336             =head2 Multiple Lines from a String
337              
338             The string is split at regular blanks (spaces), x20, to find the longest
339             substring that will fit the C<$width>.
340             If a single word is longer than C<$width>, it will overflow.
341             To stay strictly within the desired bounds, set the option
342             C<spillover>=E<gt>0 to disallow spillover.
343              
344             =head3 Hyphenation
345              
346             If hyphenation is enabled, those methods which split up a string into multiple
347             lines (the "text fill", paragraph, and section methods) will attempt to split
348             up the word that overflows the line, in order to pack the text even more
349             tightly ("greedy" line splitting). There are a number of controls over where a
350             word may be split, but note that there is nothing language-specific (i.e.,
351             following a given language's rules for where a word may be split). This is left
352             to other packages.
353              
354             There are hard coded minimums of 2 letters before the split, and 2 letters after
355             the split. See C<Hyphenate_basic.pm>. Note that neither hyphenation nor simple
356             line splitting makes any attempt to prevent widows and orphans, prevent
357             splitting of the last word in a column or page, or otherwise engage in
358             more desirable I<paragraph shaping>.
359              
360             =over
361              
362             =item 'hyphenate' => value
363              
364             0: no hyphenation (B<default>), 1: do basic hyphenation. Always allows
365             splitting at a soft hyphen (\xAD). Unicode hyphen (U+2010) and non-splitting
366             hyphen (U+2011) are ignored as split points.
367              
368             =item 'spHH' => value
369              
370             0: do I<not> split at a hard hyphen (x\2D), 1: I<OK to split> (B<default>)
371              
372             =item 'spOP' => value
373              
374             0: do I<not> split after most punctuation, 1: I<OK to split> (B<default>)
375              
376             =item 'spDR' => value
377              
378             0: do I<not> split after a run of one or more digits, 1: I<OK to split> (B<default>)
379              
380             =item 'spLR' => value
381              
382             0: do I<not> split after a run of one or more ASCII letters, 1: I<OK to split> (B<default>)
383              
384             =item 'spCC' => value
385              
386             0: do I<not> split in camelCase between a lowercase letter and an
387             uppercase letter, 1: I<OK to split> (B<default>)
388              
389             =item 'spRB' => value
390              
391             0: do I<not> split on a Required Blank (&nbsp;), is B<default>.
392             1: I<OK to split on Required Blank.> Try to avoid this; it is a desperation
393             move!
394              
395             =item 'spFS' => value
396              
397             0: do I<not> split where it will I<just> fit (middle of word!), is B<default>.
398             1: I<OK to split to just fit the available space.> Try to avoid this; it is a
399             super desperation move, and the split will probably make no linguistic sense!
400              
401             =item 'min_prefix' => value
402              
403             Minimum number of letters I<before> word split point (hyphenation point).
404             The B<default> is 2.
405              
406             =item 'min_suffix' => value
407              
408             Minimum number of letters I<after> word split point (hyphenation point).
409             The B<default> is 3.
410              
411             =back
412              
413             =head3 Methods
414              
415             =cut
416              
417             # splits input text (on spaces) into words, glues them back together until
418             # have filled desired (available) width. return the new line and remaining
419             # text. runs of spaces should be preserved. if the first word of a line does
420             # not fit within the alloted space, and cannot be split short enough, just
421             # accept the overflow.
422             sub _text_fill_line {
423 20     20   69 my ($self, $text, $width, $over, %opts) = @_;
424             # copy dashed option names to the preferred undashed names
425 20 50 33     72 if (defined $opts{'-hyphenate'} && !defined $opts{'hyphenate'}) { $opts{'hyphenate'} = delete($opts{'-hyphenate'}); }
  0         0  
426 20 50 33     83 if (defined $opts{'-lang'} && !defined $opts{'lang'}) { $opts{'lang'} = delete($opts{'-lang'}); }
  0         0  
427 20 50 33     66 if (defined $opts{'-nosplit'} && !defined $opts{'nosplit'}) { $opts{'nosplit'} = delete($opts{'-nosplit'}); }
  0         0  
428              
429             # options of interest
430 20 50       54 my $hyphenate = defined($opts{'hyphenate'})? $opts{'hyphenate'}: 0; # default off
431             #my $lang = defined($opts{'lang'})? $opts{'lang'}: 'en'; # English rules by default
432 20         39 my $lang = 'basic';
433             #my $nosplit = defined($opts{'nosplit'})? $opts{'nosplit'}: ''; # indexes NOT to split at, given
434             # as string of integers
435             # my @noSplit = split /[,\s]+/, $nosplit; # normally empty array
436             # 1. indexes start at 0 (split after character N not permitted)
437             # 2. SHYs (soft hyphens) should be skipped
438             # 3. need to map entire string's indexes to each word under
439             # consideration for splitting (hyphenation)
440              
441             # TBD should we consider any non-ASCII spaces?
442             # don't split on non-breaking space (required blank).
443 20         135 my @txt = split(/\x20/, $text);
444 20         44 my @line = ();
445 20         41 local $"; # intent is that reset of separator ($") is local to block
446 20         42 $"=' '; ## no critic
447 20         37 my $lastWord = ''; # the one that didn't quite fit
448 20         35 my $overflowed = 0;
449              
450 20         53 while (@txt) {
451             # build up @line from @txt array until overfills line.
452             # need to remove SHYs (soft hyphens) at this point.
453 119         226 $lastWord = shift @txt; # preserve any SHYs in the word
454 119         270 push @line, (_removeSHY($lastWord));
455             # one space between each element of line, like join(' ', @line)
456 119         513 $overflowed = $self->advancewidth("@line", %opts) > $width;
457 119 100       400 last if $overflowed;
458             }
459             # if overflowed, and overflow not allowed, remove the last word added,
460             # unless single word in line and we're not going to attempt word splitting.
461 20 100 66     96 if ($overflowed && !$over) {
462 13 50 33     92 if ($hyphenate && @line == 1 || @line > 1) {
      33        
463 13         29 pop @line; # discard last (or only) word
464 13         44 unshift @txt,$lastWord; # restore with SHYs intact
465             }
466             # if not hyphenating (splitting words), just leave oversized
467             # single-word line. if hyphenating, could have empty @line.
468             }
469              
470 20         63 my $Txt = "@txt"; # remaining text to put on next line
471 20         64 my $Line = "@line"; # line that fits, but not yet with any split word
472             # may be empty if first word in line overflows
473              
474             # if we try to hyphenate, try splitting up that last word that
475             # broke the camel's back. otherwise, will return $Line and $Txt as is.
476 20 50 33     62 if ($hyphenate && $overflowed) {
477 0         0 my $space;
478             # @line is current whole word list of line, does NOT overflow because
479             # $lastWord was removed. it may be empty if the first word tried was
480             # too long. @txt is whole word list of the remaining words to be output
481             # (includes $lastWord as its first word).
482             #
483             # we want to try splitting $lastWord into short enough left fragment
484             # (with right fragment remainder as first word of next line). if we
485             # fail to do so, just leave whole word as first word of next line, IF
486             # @line was not empty. if @line was empty, accept the overflow and
487             # output $lastWord as @line and remove it from @txt.
488 0 0       0 if (@line) {
489             # line not empty. $space is width for word fragment, not
490             # including blank after previous last word of @line.
491 0         0 $space = $width - $self->advancewidth("@line ", %opts);
492             } else {
493             # line empty (first word too long, and we can try hyphenating).
494             # $space is entire $width available for left fragment.
495 0         0 $space = $width;
496             }
497              
498 0 0       0 if ($space > 0) {
499 0         0 my ($wordLeft, $wordRight);
500             # @line is word(s) (if any) currently fitting within $width.
501             # @txt is remaining words unused in this line. $lastWord is first
502             # word of @txt. $space is width remaining to fill in line.
503 0         0 $wordLeft = ''; $wordRight = $lastWord; # fallbacks
  0         0  
504              
505             # if there is an error in Hyphenate_$lang, the message may be
506             # that the splitWord() function can't be found. debug errors by
507             # hard coding the require and splitWord() calls.
508              
509             ## test that Hyphenate_$lang exists. if not, use Hyphenate_en
510             ## TBD: if Hyphenate_$lang is not found, should we fall back to
511             ## English (en) rules, or turn off hyphenation, or do limited
512             ## hyphenation (nothing language-specific)?
513             # only Hyphenate_basic. leave language support to other packages
514 0         0 require PDF::Builder::Content::Hyphenate_basic;
515             #eval "require PDF::Builder::Content::Hyphenate_$lang";
516             #if ($@) {
517             #print "something went wrong with require eval: $@\n";
518             #$lang = 'en'; # perlmonks 27443 fall back to English
519             #require PDF::Builder::Content::Hyphenate_en;
520             #}
521 0         0 ($wordLeft,$wordRight) = PDF::Builder::Content::Hyphenate_basic::splitWord($self, $lastWord, $space, %opts);
522             #eval '($wordLeft,$wordRight) = PDF::Builder::Content::Hyphenate_'.$lang.'::splitWord($self, "$lastWord", $space, %opts)';
523 0 0       0 if ($@) { print "something went wrong with eval: $@\n"; }
  0         0  
524              
525             # $wordLeft is left fragment of $lastWord that fits in $space.
526             # it might be empty '' if couldn't get a small enough piece. it
527             # includes a hyphen, but no leading space, and can be added to
528             # @line.
529             # $wordRight is the remainder of $lastWord (right fragment) that
530             # didn't fit. it might be the entire $lastWord. it shouldn't be
531             # empty, since the whole point of the exercise is that $lastWord
532             # didn't fit in the remaining space. it will replace the first
533             # element of @txt (there should be at least one).
534            
535             # see if have a small enough left fragment of $lastWord to append
536             # to @line. neither left nor right Word should have full $lastWord,
537             # and both cannot be empty. it is highly unlikely that $wordLeft
538             # will be the full $lastWord, but quite possible that it is empty
539             # and $wordRight is $lastWord.
540              
541 0 0       0 if (!@line) {
542             # special case of empty line. if $wordLeft is empty and
543             # $wordRight is presumably the entire $lastWord, use $wordRight
544             # for the line and remove it ($lastWord) from @txt.
545 0 0       0 if ($wordLeft eq '') {
546 0         0 @line = ($wordRight); # probably overflows $width.
547 0         0 shift @txt; # remove $lastWord from @txt.
548             } else {
549             # $wordLeft fragment fits $width.
550 0         0 @line = ($wordLeft); # should fit $width.
551 0         0 shift @txt; # replace first element of @txt ($lastWord)
552 0         0 unshift @txt, $wordRight;
553             }
554             } else {
555             # usual case of some words already in @line. if $wordLeft is
556             # empty and $wordRight is entire $lastWord, we're done here.
557             # if $wordLeft has something, append it to line and replace
558             # first element of @txt with $wordRight (unless empty, which
559             # shouldn't happen).
560 0 0       0 if ($wordLeft eq '') {
561             # was unable to split $lastWord into short enough fragment.
562             # leave @line (already has words) and @txt alone.
563             } else {
564 0         0 push @line, ($wordLeft); # should fit $space.
565 0         0 shift @txt; # replace first element of @txt (was $lastWord)
566 0 0       0 unshift @txt, $wordRight if $wordRight ne '';
567             }
568             }
569              
570             # rebuild $Line and $Txt, in case they were altered.
571 0         0 $Txt = "@txt";
572 0         0 $Line = "@line";
573             } # there was $space available to try to fit a word fragment
574             } # we had an overflow to clean up, and hyphenation (word splitting) OK
575 20         117 return ($Line, $Txt);
576             }
577              
578             # remove soft hyphens (SHYs) from a word. assume is always #173 (good for
579             # Latin-1, CP-1252, UTF-8; might not work for some encodings) TBD
580             sub _removeSHY {
581 119     119   251 my ($word) = @_;
582              
583 119         313 my @chars = split //, $word;
584 119         196 my $out = '';
585 119         218 foreach (@chars) {
586 357 50       684 next if ord($_) == 173;
587 357         574 $out .= $_;
588             }
589 119         341 return $out;
590             }
591              
592             =head4 text_fill_left, text_fill
593              
594             ($width, $leftover) = $content->text_fill_left($string, $width, %opts)
595              
596             =over
597              
598             Fill a line of 'width' with as much text as will fit,
599             and outputs it left justified.
600             The width actually used, and the leftover text (that didn't fit),
601             are B<returned>.
602              
603             =back
604              
605             ($width, $leftover) = $content->text_fill($string, $width, %opts)
606              
607             =over
608              
609             Alias for text_fill_left().
610              
611             =back
612              
613             =cut
614              
615             sub text_fill_left {
616 10     10 1 38 my ($self, $text, $width, %opts) = @_;
617             # copy dashed option names to preferred undashed names
618 10 50 33     59 if (defined $opts{'-spillover'} && !defined $opts{'spillover'}) { $opts{'spillover'} = delete($opts{'-spillover'}); }
  10         35  
619              
620             # 0 = overflow past right margin NOT allowed; 1 = allowed
621 10   33     52 my $over = defined($opts{'spillover'}) && $opts{'spillover'} == 1;
622 10 50       31 $over = 0 if $over eq '';
623 10         42 my ($line, $ret) = $self->_text_fill_line($text, $width, $over, %opts);
624             # override any stray 'align' that got through to here
625 10         57 $width = $self->text($line, %opts, 'align'=>'l');
626 10         78 return ($width, $ret);
627             }
628              
629             sub text_fill {
630 0     0 1 0 my $self = shift;
631 0         0 return $self->text_fill_left(@_);
632             }
633              
634             =head4 text_fill_center
635              
636             ($width, $leftover) = $content->text_fill_center($string, $width, %opts)
637              
638             =over
639              
640             Fill a line of 'width' with as much text as will fit,
641             and outputs it centered.
642             The width actually used, and the leftover text (that didn't fit),
643             are B<returned>.
644              
645             =back
646              
647             =cut
648              
649             sub text_fill_center {
650 2     2 1 6 my ($self, $text, $width, %opts) = @_;
651             # copy dashed option names to preferred undashed names
652 2 50 33     11 if (defined $opts{'-spillover'} && !defined $opts{'spillover'}) { $opts{'spillover'} = delete($opts{'-spillover'}); }
  2         4  
653              
654             # 0 = overflow past right margin NOT allowed; 1 = allowed
655 2   33     8 my $over = defined($opts{'spillover'}) && $opts{'spillover'} == 1;
656 2 50       5 $over = 0 if $over eq '';
657 2         8 my ($line, $ret) = $self->_text_fill_line($text, $width, $over, %opts);
658 2         10 $width = $self->text_center($line, %opts);
659 2         6 return ($width, $ret);
660             }
661              
662             =head4 text_fill_right
663              
664             ($width, $leftover) = $content->text_fill_right($string, $width, %opts)
665              
666             =over
667              
668             Fill a line of 'width' with as much text as will fit,
669             and outputs it right justified.
670             The width actually used, and the leftover text (that didn't fit),
671             are B<returned>.
672              
673             =back
674              
675             =cut
676              
677             sub text_fill_right {
678 2     2 1 7 my ($self, $text, $width, %opts) = @_;
679             # copy dashed option names to preferred undashed names
680 2 50 33     8 if (defined $opts{'-spillover'} && !defined $opts{'spillover'}) { $opts{'spillover'} = delete($opts{'-spillover'}); }
  2         6  
681              
682             # 0 = overflow past right margin NOT allowed; 1 = allowed
683 2   33     8 my $over = defined($opts{'spillover'}) && $opts{'spillover'} == 1;
684 2 50       4 $over = 0 if $over eq '';
685 2         8 my ($line, $ret) = $self->_text_fill_line($text, $width, $over, %opts);
686 2         8 $width = $self->text_right($line, %opts);
687 2         7 return ($width, $ret);
688             }
689              
690             =head4 text_fill_justified
691              
692             ($width, $leftover) = $content->text_fill_justified($string, $width, %opts)
693              
694             =over
695              
696             Fill a line of 'width' with as much text as will fit,
697             and outputs it fully justified (stretched or condensed).
698             The width actually used, and the leftover text (that didn't fit),
699             are B<returned>.
700              
701             Note that the entire line is fit to the available
702             width via a call to C<text_justified>.
703             See C<text_justified> for options to control stretch and condense.
704             The last line is unjustified (normal size) and left aligned by default,
705             although the option
706              
707             B<Options:>
708              
709             =over
710              
711             =item 'last_align' => place
712              
713             where place is 'left' (default), 'center', or 'right' (may be shortened to
714             first letter) allows you to specify the alignment of the last line output.
715              
716             =back
717              
718             =back
719              
720             =cut
721              
722             sub text_fill_justified {
723 6     6 1 26 my ($self, $text, $width, %opts) = @_;
724             # copy dashed option names to preferred undashed names
725 6 100 66     35 if (defined $opts{'-last_align'} && !defined $opts{'last_align'}) { $opts{'last_align'} = delete($opts{'-last_align'}); }
  4         43  
726 6 50 33     31 if (defined $opts{'-spillover'} && !defined $opts{'spillover'}) { $opts{'spillover'} = delete($opts{'-spillover'}); }
  6         15  
727              
728 6         12 my $align = 'l'; # default left align last line
729 6 100       14 if (defined($opts{'last_align'})) {
730 4 50       35 if ($opts{'last_align'} =~ m/^l/i) { $align = 'l'; }
  0 100       0  
    50          
731 2         6 elsif ($opts{'last_align'} =~ m/^c/i) { $align = 'c'; }
732 2         5 elsif ($opts{'last_align'} =~ m/^r/i) { $align = 'r'; }
733 0         0 else { warn "Unknown last_align for justified fill, 'left' used\n"; }
734             }
735              
736             # 0 = overflow past right margin NOT allowed; 1 = allowed
737 6   33     29 my $over = defined($opts{'spillover'}) && $opts{'spillover'} == 1;
738 6 50       19 $over = 0 if $over eq '';
739 6         31 my ($line, $ret) = $self->_text_fill_line($text, $width, $over, %opts);
740             # if last line, use $align (don't justify)
741 6 100       22 if ($ret eq '') {
742 3         12 my $lw = $self->advancewidth($line, %opts);
743             # override any stray 'align' that got through to here
744 3 100       16 if ($align eq 'l') {
    100          
745 1         5 $width = $self->text($line, %opts, 'align'=>'l');
746             } elsif ($align eq 'c') {
747 1         8 $width = $self->text($line, 'indent' => ($width-$lw)/2, %opts, 'align'=>'l');
748             } else { # 'r'
749 1         8 $width = $self->text($line, 'indent' => ($width-$lw), %opts, 'align'=>'l');
750             }
751             } else {
752 3         18 $width = $self->text_justified($line, $width, %opts);
753             }
754 6         30 return ($width, $ret);
755             }
756              
757             =head2 Larger Text Segments
758              
759             =head3 paragraph
760              
761             ($overflow_text, $unused_height) = $txt->paragraph($text, $width,$height, $continue, %opts)
762              
763             ($overflow_text, $unused_height) = $txt->paragraph($text, $width,$height, %opts)
764              
765             $overflow_text = $txt->paragraph($text, $width,$height, $continue, %opts)
766              
767             $overflow_text = $txt->paragraph($text, $width,$height, %opts)
768              
769             =over
770              
771             Print a single string into a rectangular area on the page, of given width and
772             maximum height. The baseline of the first (top) line is at the current text
773             position.
774              
775             Apply the text within the rectangle and B<return> any leftover text (if could
776             not fit all of it within the rectangle). If called in an array context, the
777             unused height is also B<returned> (may be 0 or negative if it just filled the
778             rectangle).
779              
780             C<$continue> is optional, with a default value of 0. An C<%opts> list may be
781             given after the fixed parameters, whether or not C<$continue> is explicitly
782             given.
783              
784             If C<$continue> is 1, the first line does B<not> get special treatment for
785             indenting or outdenting, because we're printing the continuation of the
786             paragraph that was interrupted earlier. If it's 0, the first line may be
787             indented or outdented.
788              
789             B<Options:>
790              
791             =over
792              
793             =item 'pndnt' => $indent
794              
795             Give the amount of indent (positive) or outdent (negative, for "hanging")
796             for paragraph first lines. The unit is I<ems>.
797             This setting is ignored for centered text.
798              
799             =item 'align' => $choice
800              
801             C<$choice> is 'justified', 'right', 'center', 'left'; the default is 'left'.
802             See C<text_justified> call for options to control how a line is expanded or
803             condensed if C<$choice> is 'justified'. C<$choice> may be shortened to the
804             first letter.
805              
806             =item 'last_align' => place
807              
808             where place is 'left' (default), 'center', or 'right' (may be shortened to
809             first letter) allows you to specify the alignment of the last line output,
810             but applies only when C<align> is 'justified'.
811              
812             =item 'underline' => $distance
813              
814             =item 'underline' => [ $distance, $thickness, ... ]
815              
816             If a scalar, distance below baseline,
817             else array reference with pairs of distance and line thickness.
818              
819             =item 'spillover' => $over
820              
821             Controls if words in a line which exceed the given width should be
822             "spilled over" the bounds, or if a new line should be used for this word.
823              
824             C<$over> is 1 or 0, with the default 1 (spills over the width).
825              
826             =back
827              
828             B<Example:>
829              
830             $txt->font($font,$fontsize);
831             $txt->leading($leading);
832             $txt->translate($x,$y);
833             $overflow = $txt->paragraph( 'long paragraph here ...',
834             $width,
835             $y+$leading-$bottom_margin );
836              
837             B<Note:> if you need to change any text treatment I<within> a paragraph
838             (B<bold> or I<italicized> text, for instance), this can not handle it. Only
839             plain text (all the same font, size, etc.) can be typeset with C<paragraph()>.
840             Also, there is currently very limited line splitting (hyphenation) to better
841             fit to a given width, and nothing is done for "widows and orphans".
842              
843             =back
844              
845             =cut
846              
847             # TBD for LTR languages, does indenting on left make sense for right justified?
848             # TBD for bidi/RTL languages, should indenting be on right?
849              
850             sub paragraph {
851 12     12 1 96 my ($self, $text, $width,$height, @optsA) = @_;
852             # if odd number of elements in optsA, it contains $continue flag and
853             # remainder is %opts. if even, paragraph is being called PDF::API2 style
854             # with no $continue (default to 0).
855 12         26 my $continue = 0;
856 12 50       50 if (@optsA % 2) {
857 12         35 $continue = splice(@optsA, 0, 1);
858             }
859 12         47 my %opts = @optsA;
860              
861             # copy dashed option names to preferred undashed names
862 12 100 66     58 if (defined $opts{'-align'} && !defined $opts{'align'}) { $opts{'align'} = delete($opts{'-align'}); }
  5         18  
863 12 50 33     49 if (defined $opts{'-pndnt'} && !defined $opts{'pndnt'}) { $opts{'pndnt'} = delete($opts{'-pndnt'}); }
  0         0  
864              
865 12         27 my @line = ();
866 12         21 my $nwidth = 0;
867 12         40 my $leading = $self->leading();
868 12         26 my $align = 'l'; # default left
869 12 100       43 if (defined($opts{'align'})) {
870 5 50       45 if ($opts{'align'} =~ /^l/i) { $align = 'l'; }
  0 100       0  
    100          
    50          
871 1         14 elsif ($opts{'align'} =~ /^c/i) { $align = 'c'; }
872 1         2 elsif ($opts{'align'} =~ /^r/i) { $align = 'r'; }
873 3         10 elsif ($opts{'align'} =~ /^j/i) { $align = 'j'; }
874 0         0 else { warn "Unknown align value for paragraph(), 'left' used\n"; }
875             } # default stays at 'l'
876 12 50       39 my $indent = defined($opts{'pndnt'})? $opts{'pndnt'}: 0;
877 12 100       45 if ($align eq 'c') { $indent = 0; } # indent/outdent makes no sense centered
  1         1  
878 12         29 my $first_line = !$continue;
879 12         27 my $lw;
880 12         79 my $em = $self->advancewidth('M');
881              
882 12         44 while (length($text) > 0) { # more text to go...
883             # indent == 0 (flush) all lines normal width
884             # indent (>0) first line moved in on left, subsequent normal width
885             # outdent (<0) first line is normal width, subsequent moved in on left
886 20         36 $lw = $width;
887 20 50 33     66 if ($indent > 0 && $first_line) { $lw -= $indent*$em; }
  0         0  
888 20 50 33     69 if ($indent < 0 && !$first_line) { $lw += $indent*$em; }
  0         0  
889             # now, need to indent (move line start) right for 'l' and 'j'
890 20 0 0     50 if ($lw < $width && ($align eq 'l' || $align eq 'j')) {
      33        
891 0         0 $self->cr($leading); # go UP one line
892             # 88*10 text space units per em, negative to right for TJ
893 0         0 $self->nl(88*abs($indent)); # come down to right line and move right
894             }
895              
896 20 100       81 if ($align eq 'j') {
    100          
    100          
897 6         30 ($nwidth,$text) = $self->text_fill_justified($text, $lw, %opts);
898             } elsif ($align eq 'r') {
899 2         6 ($nwidth,$text) = $self->text_fill_right($text, $lw, %opts);
900             } elsif ($align eq 'c') {
901 2         8 ($nwidth,$text) = $self->text_fill_center($text, $lw, %opts);
902             } else { # 'l'
903 10         47 ($nwidth,$text) = $self->text_fill_left($text, $lw, %opts);
904             }
905              
906 20         90 $self->nl();
907 20         62 $first_line = 0;
908              
909             # bail out and just return remaining $text if run out of vertical space
910 20 100       88 last if ($height -= $leading) < 0;
911             }
912              
913 12 100       45 if (wantarray) {
914             # paragraph() called in the context of returning an array
915 6         31 return ($text, $height);
916             }
917 6         32 return $text;
918             }
919              
920             =head3 section, paragraphs
921              
922             ($overflow_text, $continue, $unused_height) = $txt->section($text, $width,$height, $continue, %opts)
923              
924             $overflow_text = $txt->section($text, $width,$height, $continue, %opts)
925              
926             =over
927              
928             The C<$text> contains a string with one or more paragraphs C<$width> wide,
929             starting at the current text position, with a newline \n between each
930             paragraph. Each paragraph is output (see C<paragraph>) until the C<$height>
931             limit is met (a partial paragraph may be at the bottom). Whatever wasn't
932             output, will be B<returned>.
933             If called in an array context, the
934             unused height and the paragraph "continue" flag are also B<returned>.
935              
936             C<$continue> is 0 for the first call of section(), and then use the value
937             returned from the previous call (1 if a paragraph was cut in the middle) to
938             prevent unwanted indenting or outdenting of the first line being printed.
939              
940             B<Options:>
941              
942             =over
943              
944             =item 'pvgap' => $vertical
945              
946             Additional vertical space (unit: pt) between paragraphs (default 0).
947             Note that this space will also be added after the last paragraph printed,
948             B<unless> you give a negative value. The |pvgap| is the value used (positive);
949             negative tells C<section> I<not> to add the gap (space) after the last
950             paragraph in the section.
951              
952             =back
953              
954             See C<paragraph> for other C<%opts> you can use, such as C<align> and C<pndnt>.
955              
956             B<Alternate name:> paragraphs
957              
958             This is for compatibility with PDF::API2, which renamed C<section>.
959              
960             =back
961              
962             =cut
963              
964             # alias for compatibility
965             sub paragraphs {
966 1     1 1 13 return section(@_);
967             }
968              
969             sub section {
970 2     2 1 21 my ($self, $text, $width,$height, $continue, %opts) = @_;
971             # copy dashed option names to preferred undashed names
972 2 50 33     16 if (defined $opts{'-pvgap'} && !defined $opts{'pvgap'}) { $opts{'pvgap'} = delete($opts{'-pvgap'}); }
  0         0  
973              
974 2         33 my $overflow = ''; # text to return if height fills up
975 2 50       11 my $pvgap = defined($opts{'pvgap'})? $opts{'pvgap'}: 0;
976 2 50       11 my $pvgapFlag = ($pvgap >= 0)?1 :0;
977 2         4 $pvgap = abs($pvgap);
978             # $continue =0 if fresh paragraph, or =1 if continuing one cut in middle
979              
980 2         11 my @paras = split(/\n/, $text);
981 2         12 for (my $i=0; $i<@paras; $i++) {
982 6         16 my $para = $paras[$i];
983             # regardless of whether we've run out of space vertically, we will
984             # loop through all the paragraphs requested
985            
986             # already seen inability to output more text?
987             # just put unused text back together into the string
988             # $continue should stay 1
989 6 50       21 if (length($overflow) > 0) {
990 0         0 $overflow .= "\n" . $para;
991 0         0 next;
992             }
993 6         26 ($para, $height) = $self->paragraph($para, $width,$height, $continue, %opts);
994 6         17 $continue = 0;
995 6 100       22 if (length($para) > 0) {
996             # we cut a paragraph in half. set flag that continuation doesn't
997             # get indented/outdented (continue current left margin)
998 2         6 $overflow .= $para;
999 2         5 $continue = 1;
1000             }
1001              
1002             # inter-paragraph vertical space? (0 length $para means that the
1003             # entire paragraph was consumed)
1004             # note that the last paragraph will also get the extra space after it
1005             # and first paragraph did not
1006             # if this is the last paragraph in the section, still want a gap to
1007             # the next section's starting paragraph, so can't simply omit gap.
1008             # however, want to avoid a pending gap (Td) if that's the last of all.
1009 6 0 66     43 if (length($para) == 0 && $pvgap != 0 &&
      0        
      33        
1010             ($i < scalar(@paras)-1 || $pvgapFlag)) {
1011             # move DOWN page by pvgap amount (is > 0)
1012 0         0 $self->cr(-$pvgap); # creates pending Td command
1013 0         0 $height -= $pvgap;
1014             }
1015             }
1016              
1017 2 50       9 if (wantarray) {
1018             # section() called in the context of returning an array
1019 0         0 return ($overflow, $continue, $height);
1020             }
1021 2         15 return $overflow;
1022             }
1023              
1024             =head3 textlabel
1025              
1026             $width = $txt->textlabel($x,$y, $font, $size, $text, %opts)
1027              
1028             =over
1029              
1030             Place a line of text at an arbitrary C<[$x,$y]> on the page, with various text
1031             settings (treatments) specified in the call.
1032              
1033             =over
1034              
1035             =item $font
1036              
1037             A previously created font.
1038              
1039             =item $size
1040              
1041             The font size (points).
1042              
1043             =item $text
1044              
1045             The text to be printed (a single line).
1046              
1047             =back
1048              
1049             B<Options:>
1050              
1051             =over
1052              
1053             =item 'rotate' => $deg
1054              
1055             Rotate C<$deg> degrees counterclockwise from due East.
1056              
1057             =item 'color' => $cspec
1058              
1059             A color name or permitted spec, such as C<#CCE840>, for the character I<fill>.
1060              
1061             =item 'strokecolor' => $cspec
1062              
1063             A color name or permitted spec, such as C<#CCE840>, for the character I<outline>.
1064              
1065             =item 'charspace' => $cdist
1066              
1067             Additional distance between characters.
1068              
1069             =item 'wordspace' => $wdist
1070              
1071             Additional distance between words.
1072              
1073             =item 'hscale' => $hfactor
1074              
1075             Horizontal scaling mode (percentage of normal, default is 100).
1076              
1077             =item 'render' => $mode
1078              
1079             Character rendering mode (outline only, fill only, etc.). See C<render> call.
1080              
1081             =item 'left' => 1
1082              
1083             Left align on the given point. This is the default.
1084              
1085             =item 'center' => 1
1086              
1087             Center the text on the given point.
1088              
1089             =item 'right' => 1
1090              
1091             Right align on the given point.
1092              
1093             =item 'align' => $placement
1094              
1095             Alternate to left, center, and right. C<$placement> is 'left' (default),
1096             'center', or 'right'.
1097              
1098             =back
1099              
1100             Other options available to C<text>, such as underlining, can be used here.
1101              
1102             The width used (in points) is B<returned>.
1103              
1104             =back
1105              
1106             B<Please note> that C<textlabel()> was not designed to interoperate with other
1107             text operations. It is a standalone operation, and does I<not> leave a "next
1108             write" position (or any other setting) for another C<text> mode operation. A
1109             following write will likely be at C<(0,0)>, and not at the expected location.
1110              
1111             C<textlabel()> is intended as an "all in one" convenience function for single
1112             lines of text, such as a label on some
1113             graphics, and not as part of putting down multiple pieces of text. It I<is>
1114             possible to figure out the position of a following write (either C<textlabel>
1115             or C<text>) by adding the returned width to the original position's I<x> value
1116             (assuming left-justified positioning).
1117              
1118             =cut
1119              
1120             sub textlabel {
1121 0     0 1   my ($self, $x,$y, $font, $size, $text, %opts) = @_;
1122             # copy dashed option names to preferred undashed names
1123 0 0 0       if (defined $opts{'-rotate'} && !defined $opts{'rotate'}) { $opts{'rotate'} = delete($opts{'-rotate'}); }
  0            
1124 0 0 0       if (defined $opts{'-color'} && !defined $opts{'color'}) { $opts{'color'} = delete($opts{'-color'}); }
  0            
1125 0 0 0       if (defined $opts{'-strokecolor'} && !defined $opts{'strokecolor'}) { $opts{'strokecolor'} = delete($opts{'-strokecolor'}); }
  0            
1126 0 0 0       if (defined $opts{'-charspace'} && !defined $opts{'charspace'}) { $opts{'charspace'} = delete($opts{'-charspace'}); }
  0            
1127 0 0 0       if (defined $opts{'-hscale'} && !defined $opts{'hscale'}) { $opts{'hscale'} = delete($opts{'-hscale'}); }
  0            
1128 0 0 0       if (defined $opts{'-wordspace'} && !defined $opts{'wordspace'}) { $opts{'wordspace'} = delete($opts{'-wordspace'}); }
  0            
1129 0 0 0       if (defined $opts{'-render'} && !defined $opts{'render'}) { $opts{'render'} = delete($opts{'-render'}); }
  0            
1130 0 0 0       if (defined $opts{'-right'} && !defined $opts{'right'}) { $opts{'right'} = delete($opts{'-right'}); }
  0            
1131 0 0 0       if (defined $opts{'-center'} && !defined $opts{'center'}) { $opts{'center'} = delete($opts{'-center'}); }
  0            
1132 0 0 0       if (defined $opts{'-left'} && !defined $opts{'left'}) { $opts{'left'} = delete($opts{'-left'}); }
  0            
1133 0 0 0       if (defined $opts{'-align'} && !defined $opts{'align'}) { $opts{'align'} = delete($opts{'-align'}); }
  0            
1134 0           my $wht;
1135              
1136 0           my %trans_opts = ( 'translate' => [$x,$y] );
1137 0           my %text_state = ();
1138 0 0         $trans_opts{'rotate'} = $opts{'rotate'} if defined($opts{'rotate'});
1139              
1140 0           my $wastext = $self->_in_text_object();
1141 0 0         if ($wastext) {
1142 0           %text_state = $self->textstate();
1143 0           $self->textend();
1144             }
1145 0           $self->save();
1146 0           $self->textstart();
1147              
1148 0           $self->transform(%trans_opts);
1149              
1150 0 0         $self->fillcolor(ref($opts{'color'}) ? @{$opts{'color'}} : $opts{'color'}) if defined($opts{'color'});
  0 0          
1151 0 0         $self->strokecolor(ref($opts{'strokecolor'}) ? @{$opts{'strokecolor'}} : $opts{'strokecolor'}) if defined($opts{'strokecolor'});
  0 0          
1152              
1153 0           $self->font($font, $size);
1154              
1155 0 0         $self->charspace($opts{'charspace'}) if defined($opts{'charspace'});
1156 0 0         $self->hscale($opts{'hscale'}) if defined($opts{'hscale'});
1157 0 0         $self->wordspace($opts{'wordspace'}) if defined($opts{'wordspace'});
1158 0 0         $self->render($opts{'render'}) if defined($opts{'render'});
1159              
1160 0 0 0       if (defined($opts{'right'}) && $opts{'right'} ||
    0 0        
    0 0        
      0        
      0        
      0        
      0        
      0        
      0        
1161             defined($opts{'align'}) && $opts{'align'} =~ /^r/i) {
1162 0           $wht = $self->text_right($text, %opts);
1163             } elsif (defined($opts{'center'}) && $opts{'center'} ||
1164             defined($opts{'align'}) && $opts{'align'} =~ /^c/i) {
1165 0           $wht = $self->text_center($text, %opts);
1166             } elsif (defined($opts{'left'}) && $opts{'left'} ||
1167             defined($opts{'align'}) && $opts{'align'} =~ /^l/i) {
1168             # override any stray 'align' that got through to here
1169 0           $wht = $self->text($text, %opts, 'align'=>'l'); # explicitly left aligned
1170             } else {
1171             # override any stray 'align' that got through to here
1172 0           $wht = $self->text($text, %opts, 'align'=>'l'); # left aligned by default
1173             }
1174              
1175 0           $self->textend();
1176 0           $self->restore();
1177              
1178 0 0         if ($wastext) {
1179 0           $self->textstart();
1180 0           $self->textstate(%text_state);
1181             }
1182 0           return $wht;
1183             }
1184            
1185             # --------------------- start of column() section ---------------------------
1186             # WARNING: be sure to keep in synch with changes to POD elsewhere, especially
1187             # Column_docs.pm
1188              
1189             =head3 column
1190              
1191             See L<PDF::Builder::Content::Column_docs> for documentation.
1192              
1193             =cut
1194              
1195             # TBD, future:
1196             # * = not official HTML5 or CSS (i.e., an extension)
1197             # perhaps 3.029?
1198             # arbitrary paragraph shapes (path)
1199             # at a minimum, hyphenate-basic usage including &SHY;
1200             # <img>, <sup>, <sub>, <pre>, <nobr>, <br>, <dl>/<dt>/<dd>, <center>*
1201             # <big>*, <bigger>*, <smaller>*, <small>
1202             # <cite>, <q>, <code>, <kbd>, <samp>, <var>
1203             # CSS _expand* to call hscale() and/or condensed/expanded type in get_font()
1204             # (if not doing synfont() call)
1205             # CSS text transform, such as uppercase and lowercase flavors
1206             # CSS em and ex sizes relative to current font size (like %),
1207             # other absolute sizes such as in, cm, mm, px (?)
1208             #
1209             # TBD link page numbers: currently nothing shown ($page_numbers = 0)
1210             # add <_link_page> text</_link_page> inserted BEFORE </_ref>
1211             # page_numbers=1 " on page $fpn" (internal) " on [page $fpn]" (external)
1212             # =2 " on this page" " on previous page" "on following page" etc
1213             # permits user to choose formatting CSS that often will be a bit different
1214             # from rest of link text, such as Roman while link text is italic
1215             # consider $extname of some sort for external links not just [ ] e.g.,
1216             # " on page [$extname $fpn]" extname not necessarily same as file name
1217             # link to id already knows ppn and fpn. link to #p could use an additional
1218             # pass for forward references to get the $fpn. link to ##ND ? might be
1219             # able to determine physical and forrmatted page numbers
1220             # local override (attribute, {&...}) of page_numbers to repair problem areas
1221             #
1222             # possibly...
1223             # <abbr>, <base>, <wbr>
1224             # <article>, <aside>, <section> as predefined page areas?
1225             #
1226             # extensions to HTML and CSS...
1227             # <_sc>* preprocess: around runs of lowercase put <span style="font-size: 80%;
1228             # expand: 110%"> and fold to UPPER CASE. this is post-mytext creation!
1229             # <_pc>* (Petite case) like <sc> but 1ex font-size, expand 120%
1230             # <_dc>* drop caps
1231             # <_ovl>* overline (similar to underline) using CSS text-decoration: overline
1232             # <_k>* kern text (shift left or right) with CSS _kern, or general
1233             # positioning: ability to form (La)TeX logo through character positioning
1234             # what to do at HTML level? x+/- %fs, y+/- %fs
1235             # also useful for <sup>4</sup><sub>2</sub>He notation
1236             # <_vfrac>* vulgar fraction, using sup, sup, kern
1237             # HTML attributes to tune (force end) of something, such as early </sc>
1238             # after X words and/or end of line. flag to ignore next </sc> coming up,
1239             # or just make self-closing with children?
1240             # <_endc>* force end of column here (at this y, while still filling line)
1241             # e.g., to prevent an orphan. optional conditional (e.g., less than 1"
1242             # of vertical space left in column)
1243             # <_keep>* material to keep together, such as headings and paragraph text
1244             # leading (line-height) as a dimension instead of a ratio, convert to ratio
1245             #
1246             # 3.030 or later?
1247             # left/right auto margins? <center> may need this
1248             # Text::KnuthLiang hyphenation
1249             # <hyp>*, <nohyp>* control hypenation in a word (and remember
1250             # rules when see this word again)
1251             # <lang>* define language of a span of text, for hyphenation/audio purposes
1252             # Knuth-Plass paragraph shaping (with proper hyphenation)
1253             # HarfBuzz::Shaper for ligatures, callout of specific glyphs (not entities),
1254             # RTL and non-Western language support. <bdi>, <bdo>
1255             # <nolig></nolig>* forbid ligatures in this range
1256             # <lig gid='nnn'> </lig>* replace character(s) by a ligature
1257             # <alt gid='nnn'> </alt>* replace character(s) by alternate glyph
1258             # such as a swash. font-dependent
1259             # <eqn>* (needs image support, SVG processing)
1260              
1261             sub column {
1262 0     0 1   my ($self, $page, $text, $grfx, $markup, $txt, %opts) = @_;
1263 0           my $pdf = $self->{' api'}->{' FM'}->{' pdf'};
1264              
1265 0           my $rc = 0; # so far, a normal call with input completely consumed
1266 0           my $unused = undef;
1267             # array[1] will be consolidated CSS from any <style> tags
1268 0           my ($x, $y);
1269              
1270 0           my $font_size = 12; # basic default, override with font-size
1271             #if ($text->{' fontsize'} > 0) { $font_size = $text->{' fontsize'}; } # already defined font size?
1272 0 0         if (defined $opts{'font_size'}) { $font_size = $opts{'font_size'}; }
  0            
1273            
1274 0           my $leading = 1.125; # basic default, override with line-height
1275 0 0         if (defined $opts{'leading'}) { $leading=$opts{'leading'}; }
  0            
1276 0           my $marker_width = 1*$font_size; # 2em space for list markers
1277 0           my $marker_gap = $font_size; # 1em space between list marker and item
1278 0 0         if (defined $opts{'marker_width'}) { $marker_width=$opts{'marker_width'}; }
  0            
1279 0 0         if (defined $opts{'marker_gap'}) { $marker_gap=$opts{'marker_gap'}; }
  0            
1280 0           my $page_numbers = 0; # default: formatted pgno not used in links (TBD)
1281             #if (defined $opts{'page_numbers'}) { $page_numbers=$opts{'page_numbers'}; }
1282              
1283 0           my $restore = 0; # restore text state and color at end
1284 0 0         if (defined $opts{'restore'}) { $restore = $opts{'restore'}; }
  0            
1285 0           my @entry_state = (); # font state, color and graphics color
1286 0           push @entry_state, $text->{' font'}; # initially may be undef, then hashref
1287 0           push @entry_state, $text->{' fontsize'}; # initially 0
1288 0           push @entry_state, $text->{' fillcolor'}; # an arrayref, often single number or string
1289 0           push @entry_state, $text->{' strokecolor'}; # an arrayref, often single number or string
1290 0 0 0       if (defined $grfx && ref($grfx) =~ m/^PDF::Builder::Content=HASH/){
1291             # we have a valid grfx, so can use its values
1292 0           push @entry_state, $grfx->{' fillcolor'}; # an array, often single number or string
1293 0           push @entry_state, $grfx->{' strokecolor'}; # an array, often single number or string
1294             } else {
1295             # no grfx, so use undef for values
1296 0           push @entry_state, undef;
1297 0           push @entry_state, undef;
1298             }
1299              
1300             # fallback CSS properties, inserted at array[0]
1301 0           my $default_css = _default_css($pdf, $text, $font_size, $leading, %opts); # per-tag properties
1302             # dump @mytext list within designated column @outline
1303             # for now, the outline is a simple rectangle
1304 0           my $outline_color = 'none'; # optional outline of the column
1305 0 0         $outline_color = $opts{'outline'} if defined $opts{'outline'};
1306              
1307             # define coordinates of column, currently just 'rect' rectangle, but
1308             # in future could be very elaborate
1309 0           my @outline = _get_column_outline($grfx, $outline_color, %opts);
1310 0           my ($col_min_x, $col_min_y, $col_max_x, $col_max_y) =
1311             _get_col_extents(@outline);
1312 0           my $start_y = $col_max_y; # default is at top of column
1313 0           my $topCol = 1; # paragraph is at top of column, don't use margin-top
1314 0 0         $start_y = $opts{'start_y'} if defined $opts{'start_y'};
1315 0 0         if ($start_y != $col_max_y) {
1316             # topCol reset to 0 b/c not at top of column
1317 0           $topCol = 0; # go ahead with any extra top margin
1318             }
1319              
1320             # 'page' parameters
1321 0           my ($pass_count, $max_passes, $ppn, $extfilepath, $fpn, $LR, $bind);
1322 0           $ppn = $extfilepath = $fpn = undef;
1323             # physical page number 1,2,..., filepath/name/ext for THIS output,
1324             # formatted page number string (all for link creation)
1325 0           $LR = 'R'; # for now, just right-hand page
1326 0           $bind = 0; # for now, offset column by 0 points to "outside" of page
1327 0 0         if (defined $opts{'page'}) {
1328 0 0 0       if (!( ref($opts{'page'}) eq 'ARRAY' &&
1329             7 == @{$opts{'page'}} )) {
1330 0           carp "page not anonymous array of length 7, ignored.";
1331             } else {
1332 0           $pass_count = $opts{'page'}->[0];
1333 0           $max_passes = $opts{'page'}->[1];
1334 0           $ppn = $opts{'page'}->[2];
1335 0 0 0       if (defined $ppn && $ppn !~ /^[1-9]\d*$/) {
1336 0           carp "physical page number must be integer > 0";
1337 0           $ppn = 1;
1338             }
1339 0           $extfilepath = $opts{'page'}->[3];
1340             # external name for THIS output (other docs can link to it)
1341             # undef OK, if will never link to this from outside. this name
1342             # is the path and name of this output file in its FINAL home,
1343             # not necessarily where it is created!
1344 0           $fpn = $opts{'page'}->[4];
1345             # formatted page string (THIS page)
1346 0           $LR = $opts{'page'}->[5];
1347 0 0         if (!defined $LR) { $LR = 'R'; }
  0            
1348 0 0 0       if (defined $LR && $LR ne 'L' && $LR ne 'R') {
      0        
1349 0           carp "LR setting should be L or R. force to R";
1350 0           $LR = 'R';
1351             }
1352             # TBD handle 'L' and 'R', for now ignore $LR
1353 0           $bind = $opts{'page'}->[6];
1354             # TBD for now, ignore $bind
1355             }
1356             } else {
1357             # for situations where $opts{'page'} is not passed in because
1358             # we're not doing links and similar. some will be used.
1359 0           $pass_count = 1;
1360 0           $max_passes = 1;
1361 0           $ppn = 1;
1362 0           $extfilepath = '';
1363 0           $fpn = '1';
1364 0           $LR = 'R';
1365 0           $bind = 0;
1366             }
1367              
1368             # what is the state of %state parameter (hashref $state)
1369 0           my $state = undef; # OK, but xrefs and other links disallowed!
1370             # TBD everywhere $state used, check if defined!
1371             # disable all the _ref and _reft stuff if no state
1372 0 0 0       if (defined $opts{'state'} && ref($opts{'state'}) eq 'HASH') {
1373 0           $state = $opts{'state'};
1374             # TBD {source} {target} {params} to read in, write out
1375             # before first pass of first PDF (if multiple), external initialize
1376             }
1377              
1378             # what is the content of $text: string, array, or array of hashes?
1379             # (or already set up, per 'pre' markup)
1380             # break up text into array of hashes so we have one common input
1381 0           my @mytext = _break_text($txt, $markup, %opts,'page_numbers'=>$page_numbers);
1382 0           unshift @mytext, $default_css;
1383              
1384             # each element of mytext is an anonymous hash, with members text=>text
1385             # content, font_size, color, font, variants, etc.
1386             #
1387             # if markup=pre, it's already in final form (array of hashes)
1388             # if none, separate out paragraphs into array of hashes
1389             # if md1 or md2, convert to HTML (error if no converter)
1390             # if html, need to interpret (error if no converter)
1391             # finally, resulting array of hashes is interpreted and fit in column
1392             # process style attributes, tag attributes, style tags, column() options,
1393             # and fixed default attributes in that order to fill in each tag's
1394             # attribute list. on exit from tag, set attributes to restore settings
1395 0           @mytext = _tag_attributes($markup, @mytext);
1396 0           _check_CSS_properties(@mytext);
1397              
1398 0           ($rc, $start_y, $unused) = _output_text($start_y, $col_min_y, \@outline, $pdf, $page, $text, $grfx, $restore, $topCol, $font_size, $markup, $marker_width, $marker_gap, $leading, $opts{'page'}, $page_numbers, $pass_count, $max_passes, $state, @mytext);
1399              
1400 0 0         if ($rc > 1) {
1401             # restore = 2 request restore to @entry_state for rc=0, 3 for 1
1402 0           $text->{' font'} = $entry_state[0];
1403 0           $text->{' fontsize'} = $entry_state[1];
1404 0           $text->{' fillcolor'} = $entry_state[2];
1405 0           $text->{' strokecolor'} = $entry_state[3];
1406 0 0 0       if (defined $grfx && ref($grfx) =~ m/^PDF::Builder::Content=HASH/){
1407             # we have a valid grfx, so can use its values
1408 0           $grfx->{' fillcolor'} = $entry_state[4];
1409 0           $grfx->{' strokecolor'} = $entry_state[5];
1410             } else {
1411             # no grfx, so do nothing
1412             }
1413 0           $rc -= 2;
1414             }
1415              
1416 0           return ($rc, $start_y, $unused);
1417             } # end of column()
1418              
1419             # set up an element containing all the default settings, as well as those
1420             # passed in by column() parameters and options. this is generated once for
1421             # each call to column, in case any parameters or options change.
1422             sub _default_css {
1423 0     0     my ($pdf, $text, $font_size, $leading, %opts) = @_;
1424              
1425             # font size is known
1426             # if user wishes to set font OUTSIDE of column
1427             # if FontManager called outside column() and wish to inherit settings for
1428             # face, style, weight, color (fill), 'font_info'=>'-fm-'
1429             # if FontManager NOT used to set font externally, can just inherit font
1430             # (don't know what it is), current font = -external-. all styles and
1431             # weights are this one font
1432             # otherwise, 'font_info'=>'face:style:weight:color' where style = italic
1433             # or normal, weight = bold or normal, color = a color name e.g., black.
1434             # this face must be known to FontManager
1435             # as last resort, if font not set outside of column, FontManager default
1436 0           my (@cur_font, @cur_color, $current_color);
1437 0 0         if (!defined $opts{'font_info'}) {
1438             # default action: -fm-
1439 0           $opts{'font_info'} = '-fm-';
1440             }
1441             # override any predefined font
1442 0 0         if ($opts{'font_info'} eq '-fm-') {
    0          
1443             # use whatever FontManager thinks is the default font
1444 0           $pdf->get_font('face'=>'default'); # set current font to default
1445 0           @cur_font = $pdf->get_font();
1446 0           $cur_font[1] = $cur_font[2] = 0; # no italic or bold
1447             # use [0..2] of returned array
1448             } elsif ($opts{'font_info'} eq '-ext-') {
1449             # requesting preloaded font, as '-external-'
1450             # there IS a predefined font, from somewhere, to use?
1451 0 0         if ($pdf->get_external_font($text)) {
1452             # failed to find a predefined font. use default
1453 0           $pdf->get_font('face'=>'default'); # set current font to default
1454             }
1455 0           @cur_font = $pdf->get_font(); # use [0..2] of returned array,
1456             # either predefined -external- font, or default font
1457             } else {
1458             # explicitly given font must be KNOWN to FontManager
1459             # family:style:weight:color (normal/0/italic/1, normal/0/bold/1)
1460 0           @cur_font = split /:/, $opts{'font_info'};
1461             # add normal style and weight if not given
1462 0 0         if (@cur_font == 2) { push @cur_font, 0; }
  0            
1463 0 0         if (@cur_font == 1) { push @cur_font, 0,0; }
  0            
1464 0 0         if ("$cur_font[1]" eq 'normal') { $cur_font[1] = 0; }
  0            
1465 0 0         if ("$cur_font[1]" eq 'italic') { $cur_font[1] = 1; }
  0            
1466 0 0         if ("$cur_font[2]" eq 'normal') { $cur_font[2] = 0; }
  0            
1467 0 0         if ("$cur_font[2]" eq 'bold' ) { $cur_font[2] = 1; }
  0            
1468             # set the current font
1469 0 0         if (@cur_font == 4) { $text->fillcolor($cur_font[3]); } # color
  0            
1470 0           $pdf->get_font('face'=>$cur_font[0],
1471             'italic'=>$cur_font[1],
1472             'bold'=>$cur_font[2]);
1473 0           @cur_font = $pdf->get_font();
1474             }
1475             # @cur_font should have (at least) face, italic 0/1, bold 0/1
1476             # to load into 'body' properties later
1477              
1478 0           @cur_color = $text->fillcolor();
1479             # if (defined $opts{'font_color'}) {
1480             # # request override of current text color on entry
1481             # @cur_color = ($opts{'font_color'});
1482             # }
1483 0 0         if (@cur_color == 1) {
1484             # 'name', '#rrggbb' etc. suitable for CSS usage
1485             # TBD: single gray scale value s/b changed to '#rrggbb'
1486             # (might be 0..1, 0..100, 0..ff)? 0 = black
1487 0           $current_color = $cur_color[0];
1488             } else {
1489             # returned an array of values, unsuitable for CSS
1490             # TBD: 3 values 0..1 turn into #rrggbb
1491             # TBD: 3 values 0..100 turn into #rrggbb
1492             # TBD: 3 values 0..ff turn into #rrggbb
1493             # TBD: 4 values like 3, but CMYK
1494             # for now, default to 'black'
1495 0           $current_color = 'black';
1496             }
1497              
1498 0           my %style;
1499 0           $style{'tag'} = 'defaults';
1500 0           $style{'text'} = '';
1501              
1502 0           $style{'body'} = {};
1503 0           $style{'p'} = {};
1504 0           $style{'ol'} = {};
1505 0           $style{'ul'} = {};
1506 0           $style{'_sl'} = {};
1507 0           $style{'h1'} = {};
1508 0           $style{'h2'} = {};
1509 0           $style{'h3'} = {};
1510 0           $style{'h4'} = {};
1511 0           $style{'h5'} = {};
1512 0           $style{'h6'} = {};
1513 0           $style{'i'} = {};
1514 0           $style{'em'} = {};
1515 0           $style{'b'} = {};
1516 0           $style{'strong'} = {};
1517 0           $style{'code'} = {};
1518 0           $style{'hr'} = {};
1519 0           $style{'a'} = {};
1520 0           $style{'_ref'} = {};
1521 0           $style{'_reft'} = {}; # no visible content
1522 0           $style{'_nameddest'} = {}; # no visible content
1523              
1524 0           $style{'body'}->{'font-size'} = $font_size; # must be in points
1525 0           $style{'body'}->{'_parent-fs'} = $font_size; # carry current value
1526 0           $style{'body'}->{'line-height'} = $leading;
1527              
1528             # HARD-CODED default for paragraph indent, top margin
1529 0           my $para = [ 1, 1*$font_size, 0 ];
1530             # if font_size changes, change indentation
1531             # REVISED default if 'para' option given
1532 0 0         if (defined $opts{'para'}) {
1533             #$para->[0] # flag: 0 = <p> is normal top of paragraph (with indent
1534             # and margin), 1 = at top of column, so suppress extra top margin
1535             # (and reset once past this first line)
1536 0           $para->[1] = $opts{'para'}->[0]; # indentation
1537 0           $para->[2] = $opts{'para'}->[1]; # extra top margin
1538             }
1539             # $para flag determines whether these settings are used or ignored (=1,
1540             # we are at the top of a column, ignore text-indent and margin-top)
1541             # set paragraph CSS defaults, may be overridden below
1542 0           $style{'p'}->{'text-indent'} = $para->[1];
1543 0           $style{'p'}->{'margin-top'} = $para->[2];
1544              
1545 0           my $color = $current_color; # text default color
1546 0 0         $color = $opts{'color'} if defined $opts{'color'};
1547 0           $style{'body'}->{'color'} = $color;
1548              
1549             # now for fixed settings
1550 0           $style{'body'}->{'font-family'} = $cur_font[0]; # face
1551 0 0         $style{'body'}->{'font-style'} = $cur_font[1]? 'italic': 'normal';
1552             # TBD future: multiple gradations of weight, numeric and named
1553 0 0         $style{'body'}->{'font-weight'} = $cur_font[2]? 'bold': 'normal';
1554             #$style{'body'}->{'font-variant'} = 'normal'; # small-caps, petite caps
1555             # TBD future: optical size select subfont, slant separate from italic flagm,
1556             # stretch amount (expand/condense)
1557             # TBD future: 'margin' consolidated entry
1558 0           $style{'body'}->{'margin-top'} = '0';
1559 0           $style{'body'}->{'margin-right'} = '0';
1560 0           $style{'body'}->{'margin-bottom'} = '0';
1561 0           $style{'body'}->{'margin-left'} = '0';
1562 0           $style{'body'}->{'_left'} = '0';
1563 0           $style{'body'}->{'_left_nest'} = '0';
1564 0           $style{'body'}->{'_right'} = '0';
1565 0           $style{'body'}->{'text-indent'} = '0';
1566 0           $style{'body'}->{'text-align'} = 'left';
1567             #$style{'body'}->{'text-transform'} = 'none'; # capitalize, uppercase, lowercase
1568             #$style{'body'}->{'border-style'} = 'none'; # solid, dotted, dashed... TBD
1569             #$style{'body'}->{'border-width'} = '1pt';
1570             #$style{'body'}->{'border-color'} = 'inherit';
1571             # TBD border-* individually specify for top/right/bottom/left
1572             # also 'border' consolidated entry
1573 0           $style{'body'}->{'text-decoration'} = 'none';
1574 0           $style{'body'}->{'display'} = 'block';
1575 0           $style{'body'}->{'width'} = '-1'; # used for <hr> length in pts, -1 is full column
1576 0           $style{'body'}->{'height'} = '-1'; # used for <hr> size (thickness) in pts
1577 0           $style{'body'}->{'_href'} = '';
1578 0           $style{'body'}->{'_marker-before'} = '';
1579 0           $style{'body'}->{'_marker-after'} = '.';
1580 0           $style{'body'}->{'_marker-color'} = '';
1581 0           $style{'body'}->{'_marker-font'} = '';
1582 0           $style{'body'}->{'_marker-size'} = $font_size;
1583 0           $style{'body'}->{'_marker-style'} = '';
1584 0           $style{'body'}->{'_marker-text'} = '';
1585 0           $style{'body'}->{'_marker-weight'} = '';
1586 0           $style{'body'}->{'_marker-align'} = 'right';
1587              
1588 0           $style{'p'}->{'display'} = 'block';
1589 0           $style{'font'}->{'display'} = 'inline';
1590 0           $style{'span'}->{'display'} = 'inline';
1591              
1592 0           $style{'ul'}->{'list-style-type'} = '.u';
1593             # disc, circle, square, box, none
1594 0           $style{'ul'}->{'list-style-position'} = 'outside'; # or inside or numeric
1595 0           $style{'ul'}->{'display'} = 'block';
1596             # TBD future: padding and padding-*
1597 0           $style{'ul'}->{'margin-top'} = '50%'; # relative to text's font-size
1598 0           $style{'ul'}->{'margin-bottom'} = '50%';
1599 0           $style{'ul'}->{'_marker-font'} = 'ZapfDingbats';
1600 0           $style{'ul'}->{'_marker-style'} = 'normal';
1601 0           $style{'ul'}->{'_marker-weight'} = 'bold';
1602 0           $style{'ul'}->{'_marker-size'} = "50%";
1603 0           $style{'ul'}->{'_marker-align'} = "right";
1604 0           $style{'_sl'}->{'list-style-type'} = 'none';
1605 0           $style{'_sl'}->{'list-style-position'} = 'outside'; # or inside or numeric
1606 0           $style{'_sl'}->{'display'} = 'block';
1607 0           $style{'_sl'}->{'margin-top'} = '50%'; # relative to text's font-size
1608 0           $style{'_sl'}->{'margin-bottom'} = '50%';
1609 0           $style{'ol'}->{'list-style-type'} = '.o';
1610             # decimal, lower-roman, upper-roman, lower-alpha, upper-alpha, none
1611             # arabic is synonym for decimal
1612 0           $style{'ol'}->{'list-style-position'} = 'outside'; # or inside or numeric
1613 0           $style{'ol'}->{'display'} = 'block';
1614 0           $style{'ol'}->{'margin-top'} = '50%'; # relative to text's font-size
1615 0           $style{'ol'}->{'margin-bottom'} = '50%';
1616 0           $style{'ol'}->{'_marker-before'} = ''; # content to add before marker
1617 0           $style{'ol'}->{'_marker-after'} = '.'; # content to add after marker
1618 0           $style{'ol'}->{'_marker-font'} = ''; # unchanged
1619 0           $style{'ol'}->{'_marker-style'} = 'normal';
1620 0           $style{'ol'}->{'_marker-weight'} = 'bold';
1621 0           $style{'ol'}->{'_marker-size'} = '100%';
1622 0           $style{'ol'}->{'_marker-align'} = "right";
1623 0           $style{'li'}->{'display'} = 'inline'; # should inherit from ul or ol
1624             # marker is block, forcing new line, and li immediately follows
1625              
1626             #$style{'h6'}->{'text-transform'} = 'uppercase'; # heading this level CAPS
1627 0           $style{'h6'}->{'font-weight'} = 'bold'; # all headings bold
1628 0           $style{'h6'}->{'font-size'} = '75%'; # % of original font-size
1629 0           $style{'h6'}->{'margin-top'} = '106%'; # relative to the font-size
1630 0           $style{'h6'}->{'margin-bottom'} = '80%'; # relative to the font-size
1631 0           $style{'h6'}->{'display'} = 'block'; # block (start on new line)
1632              
1633 0           $style{'h5'}->{'font-weight'} = 'bold';
1634 0           $style{'h5'}->{'font-size'} = '85%';
1635 0           $style{'h5'}->{'margin-top'} = '95%';
1636 0           $style{'h5'}->{'margin-bottom'} = '71%';
1637 0           $style{'h5'}->{'display'} = 'block';
1638              
1639 0           $style{'h4'}->{'font-weight'} = 'bold';
1640 0           $style{'h4'}->{'font-size'} = '95%';
1641 0           $style{'h4'}->{'margin-top'} = '82%';
1642 0           $style{'h4'}->{'margin-bottom'} = '61%';
1643 0           $style{'h4'}->{'display'} = 'block';
1644              
1645 0           $style{'h3'}->{'font-weight'} = 'bold';
1646 0           $style{'h3'}->{'font-size'} = '115%';
1647 0           $style{'h3'}->{'margin-top'} = '68%';
1648 0           $style{'h3'}->{'margin-bottom'} = '51%';
1649 0           $style{'h3'}->{'display'} = 'block';
1650              
1651 0           $style{'h2'}->{'font-weight'} = 'bold';
1652 0           $style{'h2'}->{'font-size'} = '150%';
1653 0           $style{'h2'}->{'margin-top'} = '54%';
1654 0           $style{'h2'}->{'margin-bottom'} = '40%';
1655 0           $style{'h2'}->{'display'} = 'block';
1656              
1657 0           $style{'h1'}->{'font-weight'} = 'bold';
1658 0           $style{'h1'}->{'font-size'} = '200%';
1659 0           $style{'h1'}->{'margin-top'} = '40%';
1660 0           $style{'h1'}->{'margin-bottom'} = '30%';
1661 0           $style{'h1'}->{'display'} = 'block';
1662              
1663 0           $style{'i'}->{'font-style'} = 'italic';
1664 0           $style{'i'}->{'display'} = 'inline';
1665 0           $style{'b'}->{'font-weight'} = 'bold';
1666 0           $style{'b'}->{'display'} = 'inline';
1667 0           $style{'em'}->{'font-style'} = 'italic';
1668 0           $style{'em'}->{'display'} = 'inline';
1669 0           $style{'strong'}->{'font-weight'} = 'bold';
1670 0           $style{'strong'}->{'display'} = 'inline';
1671 0           $style{'code'}->{'display'} = 'inline';
1672 0           $style{'code'}->{'font-family'} = 'Courier'; # TBD why does ' default-constant' fail?
1673 0           $style{'code'}->{'font-size'} = '85%';
1674              
1675 0           $style{'u'}->{'display'} = 'inline';
1676 0           $style{'u'}->{'text-decoration'} = 'underline';
1677 0           $style{'ins'}->{'display'} = 'inline';
1678 0           $style{'ins'}->{'text-decoration'} = 'underline';
1679              
1680 0           $style{'s'}->{'display'} = 'inline';
1681 0           $style{'s'}->{'text-decoration'} = 'line-through';
1682 0           $style{'strike'}->{'display'} = 'inline';
1683 0           $style{'strike'}->{'text-decoration'} = 'line-through';
1684 0           $style{'del'}->{'display'} = 'inline';
1685 0           $style{'del'}->{'text-decoration'} = 'line-through';
1686              
1687             # non-standard tag for overline TBD
1688             #$style{'_ovl'}->{'display'} = 'inline';
1689             #$style{'_ovl'}->{'text-decoration'} = 'overline';
1690            
1691             # non-standard tag for kerning (+ font-size fraction to move left, - right)
1692             # e.g., for vulgar fraction adjust / and denominator <sub> TBD
1693             #$style{'_k'}->{'display'} = 'inline';
1694             #$style{'_k'}->{'_kern'} = '0.2';
1695              
1696 0           $style{'hr'}->{'display'} = 'block';
1697 0           $style{'hr'}->{'height'} = '0.5'; # 1/2 pt default thickness
1698 0           $style{'hr'}->{'width'} = '-1'; # default width is full column
1699 0           $style{'hr'}->{'margin-top'} = '100%';
1700 0           $style{'hr'}->{'margin-bottom'} = '100%';
1701              
1702 0           $style{'blockquote'}->{'display'} = 'block';
1703 0           $style{'blockquote'}->{'margin-top'} = '56%';
1704 0           $style{'blockquote'}->{'margin-bottom'} = '56%';
1705 0           $style{'blockquote'}->{'margin-left'} = '300%'; # want 3em TBD
1706 0           $style{'blockquote'}->{'margin-right'} = '300%';
1707 0           $style{'blockquote'}->{'line-height'} = '1.00'; # close spacing
1708 0           $style{'blockquote'}->{'font-size'} = '80%'; # smaller type
1709              
1710             # only browser (URL) applies here, so leave browser style
1711             # other links changed to '_ref', with its own style
1712 0           $style{'a'}->{'text-decoration'} = 'underline'; # browser style
1713             # none, underline, overline, line-through or a combination
1714             # separated by spaces
1715 0           $style{'a'}->{'color'} = 'blue';
1716 0           $style{'a'}->{'display'} = 'inline';
1717 0           $style{'a'}->{'_href'} = '';
1718              
1719 0           $style{'_ref'}->{'color'} = '#660066'; # default link for xrefs
1720 0           $style{'_ref'}->{'font-style'} = 'italic';
1721 0           $style{'_ref'}->{'display'} = 'inline';
1722             # <_reft> and <_nameddest> no visible content, so no styling
1723              
1724             #$style{'sc'}->{'font-size'} = '80%'; # smaller type TBD
1725             #$style{'sc'}->{'_expand'} = '110%'; # wider type TBD _expand
1726             #likewise for pc (petite caps) TBD
1727              
1728 0           $style{'_marker'}->{'display'} = 'block';
1729 0           $style{'_marker'}->{'text-align'} = 'right'; # overwrite with _marker-align
1730             # _marker-align defaulted 'right' in 'ul' and 'ol', N/A in '_sl'
1731             # can set properties in <ol> or <ul> to apply to entire list (inherited)
1732             # this is why unique CSS names _marker-* is needed rather than std names
1733            
1734 0           return \%style;
1735             } # end of _default_css()
1736              
1737             # make sure each tag's attributes are proper property names
1738             # consolidate attributes and style attribute (if any)
1739             # mark empty tags (no explicit end tag will be found)
1740             #
1741             # also insert <_marker> tag before every <li> lacking an explicit one
1742             sub _tag_attributes {
1743 0     0     my ($markup, @mytext) = @_;
1744            
1745             # start at [2], so defaults and styles skipped
1746 0           for (my $el=2; $el < @mytext; $el++) {
1747 0 0         if (ref($mytext[$el]) ne 'HASH') { next; }
  0            
1748 0 0         if ($mytext[$el]->{'tag'} eq '') { next; }
  0            
1749              
1750 0           my $tag = lc($mytext[$el]->{'tag'});
1751 0 0         if (!defined $tag) { next; }
  0            
1752 0 0         if ($tag =~ m#^/#) { next; }
  0            
1753              
1754             # we have a tag that might have one or more attributes that may
1755             # need to be renamed as a CSS property
1756 0 0         if ($tag eq 'font') {
    0          
    0          
    0          
    0          
    0          
1757 0 0         if (defined $mytext[$el]->{'face'}) {
1758 0           $mytext[$el]->{'font-family'} = delete($mytext[$el]->{'face'});
1759             }
1760 0 0         if (defined $mytext[$el]->{'size'}) {
1761 0           $mytext[$el]->{'font-size'} = delete($mytext[$el]->{'size'});
1762             # TBD some sizes may need to be converted to points. for now,
1763             # assume is a bare number (pt), pt, or % like font-size CSS
1764             }
1765             } elsif ($tag eq 'ol') {
1766 0 0         if (defined $mytext[$el]->{'type'}) {
1767 0           $mytext[$el]->{'list-style-type'} = delete($mytext[$el]->{'type'});
1768             }
1769             # note that list-style-type would be aAiI1
1770             # 'start' left unchanged
1771             } elsif ($tag eq 'ul') {
1772 0 0         if (defined $mytext[$el]->{'type'}) {
1773 0           $mytext[$el]->{'list-style-type'} = delete($mytext[$el]->{'type'});
1774             }
1775             } elsif ($tag eq 'li') {
1776             #if (defined $mytext[$el]->{'type'}) {
1777             # $mytext[$el]->{'list-style-type'} = delete($mytext[$el]->{'type'});
1778             #}
1779             # 'value' left unchanged, to be used by <_marker> before this <li>
1780             # 'type' to be used by <_marker> (both, under <ol> only)
1781             } elsif ($tag eq 'a') {
1782 0 0         if (defined $mytext[$el]->{'href'}) {
1783 0           $mytext[$el]->{'_href'} = delete($mytext[$el]->{'href'});
1784             }
1785             } elsif ($tag eq 'hr') {
1786 0 0         if (defined $mytext[$el]->{'size'}) {
1787 0           $mytext[$el]->{'height'} = delete($mytext[$el]->{'size'});
1788             }
1789             }
1790             # add any additional tag attributes -> CSS property here
1791            
1792             # process any style attribute and override attribute values
1793 0 0         if (defined $mytext[$el]->{'style'}) {
1794 0           my $style_attr = _process_style_string({}, $mytext[$el]->{'style'});
1795             # hash of property_name => value pairs
1796 0           foreach (keys %$style_attr) {
1797             # create or override any existing property by this name
1798 0           $mytext[$el]->{$_} = $style_attr->{$_};
1799             }
1800             }
1801              
1802             # list-style-type for ol/ul/li needs fleshing out
1803 0 0         if (defined $mytext[$el]->{'list-style-type'}) {
1804 0 0         if ($mytext[$el]->{'list-style-type'} eq '1') {
    0          
    0          
    0          
    0          
    0          
    0          
1805 0           $mytext[$el]->{'list-style-type'} = 'decimal';
1806             } elsif ($mytext[$el]->{'list-style-type'} eq 'A') {
1807 0           $mytext[$el]->{'list-style-type'} = 'upper-alpha';
1808             } elsif ($mytext[$el]->{'list-style-type'} eq 'a') {
1809 0           $mytext[$el]->{'list-style-type'} = 'lower-alpha';
1810             } elsif ($mytext[$el]->{'list-style-type'} eq 'I') {
1811 0           $mytext[$el]->{'list-style-type'} = 'upper-roman';
1812             } elsif ($mytext[$el]->{'list-style-type'} eq 'i') {
1813 0           $mytext[$el]->{'list-style-type'} = 'lower-roman';
1814             } elsif ($mytext[$el]->{'list-style-type'} eq 'upper-latin') {
1815 0           $mytext[$el]->{'list-style-type'} = 'upper-alpha';
1816             } elsif ($mytext[$el]->{'list-style-type'} eq 'lower-latin') {
1817 0           $mytext[$el]->{'list-style-type'} = 'lower-alpha';
1818             }
1819             # note that there are dozens more valid order list formats that
1820             # are NOT currenty supported (TBD). also, although upper/lower-
1821             # latin is valid, the code is expecting alpha
1822             }
1823              
1824             # VOID elements (br, hr, img, area, base, col, embed, input,
1825             # link, meta, source, track, wbr) do not have a separate end
1826             # tag (no children). also incude style and defaults in this list in
1827             # case a stray one shows up (does not have an end tag). this is NOT
1828             # really "self-closing", although the terms are often used
1829             # interchangeably.
1830 0 0 0       if ($tag eq 'br' || $tag eq 'hr' || $tag eq 'img' || $tag eq 'area' ||
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
1831             $tag eq 'base' || $tag eq 'col' || $tag eq 'embed' ||
1832             $tag eq 'input' || $tag eq 'link' || $tag eq 'meta' ||
1833             $tag eq 'source' || $tag eq 'track' || $tag eq 'wbr' ||
1834             $tag eq 'defaults' || $tag eq 'style') {
1835 0           $mytext[$el]->{'empty_element'} = 1;
1836             }
1837              
1838             # 'next' to here
1839             } # for loop through all user-defined elements
1840 0           return @mytext;
1841             } # end of _tag_attributes()
1842              
1843             # go through <style> tags (element 1) and all element style tags (elements 2+)
1844             # and find any bogus CSS property names. assume anything built into the code
1845             # (defaults, etc.) is legitimate -- this is only for user-supplied CSS.
1846             sub _check_CSS_properties {
1847 0     0     my @mytext = @_;
1848              
1849 0           my ($tag, $style, $stylehash);
1850 0           my @supported_properties = qw(
1851             color font-size line-height margin-top margin-right margin-bottom
1852             margin-left text-indent text-align font-family font-weight font-style
1853             display height width text-decoration _marker-before _marker-after
1854             _marker-color _marker-font _marker-size _marker-style _marker-text
1855             _marker-weight _marker-align list-style-type list-style-position
1856             );
1857              
1858             # 1. element 0 is default CSS, no need to check.
1859             # element 1 is user-supplied <style> tags and style=> column() option.
1860             # should be tag=>'style' and 'text'=>''
1861 0           foreach my $tagname (keys %{ $mytext[1] }) {
  0            
1862 0 0         if ($tagname eq 'tag') { next; }
  0            
1863 0 0         if ($tagname eq 'text') { next; }
  0            
1864             #print "tagname <$tagname> check\n";
1865 0           foreach my $propname (keys %{ $mytext[1]->{$tagname} }) {
  0            
1866             #print "checking <$tagname> property '$propname'\n";
1867 0           my $found = 0;
1868 0           for (my $sup=0; $sup < @supported_properties; $sup++) {
1869 0 0         if ($propname eq $supported_properties[$sup]) {
1870 0           $found = 1;
1871 0           last;
1872             }
1873             }
1874 0 0         if (!$found) {
1875 0           print STDERR "Warning: CSS property name '$propname' found in style option or <style>\n is either invalid, or is unsupported by PDF::Builder.\n";
1876             }
1877             #my $style_string = $mytext[1]->{$sel}; TBD check value
1878             }
1879             }
1880            
1881             # 2. elements 2 and up are tags and text. check tags for style attribute
1882             # and check property names there
1883 0           for (my $el = 2; $el < @mytext; $el++) {
1884 0           $tag = $mytext[$el]->{'tag'};
1885 0 0 0       if ($tag eq '' || substr($tag, 0, 1) eq '/') { next; }
  0            
1886 0           $style = $mytext[$el]->{'style'};
1887 0 0         if (!defined $style) { next; }
  0            
1888              
1889 0           $stylehash = _process_style_string({}, $style);
1890             # look at each defined property. do we support it?
1891 0           foreach (keys %$stylehash) {
1892 0           my $propname = $_;
1893 0           my $found = 0;
1894 0           for (my $sup=0; $sup < @supported_properties; $sup++) {
1895 0 0         if ($propname eq $supported_properties[$sup]) {
1896 0           $found = 1;
1897 0           last;
1898             }
1899             }
1900 0 0         if (!$found) {
1901 0           print STDERR "Warning: CSS property name '$propname' found in element $el (tag <$tag>)\n";
1902 0           print STDERR " style is either invalid, or is unsupported by PDF::Builder.\n";
1903             }
1904             }
1905             # TBD stylehash->$_ check values here
1906             }
1907            
1908 0           return;
1909             } # end of _check_CSS_properties
1910              
1911             # the workhorse of the library: output text (modified by tags) in @mytext
1912             sub _output_text {
1913 0     0     my ($start_y, $min_y, $outl, $pdf, $page, $text, $grfx, $restore, $topCol,
1914             $font_size, $markup, $marker_width, $marker_gap, $leading, $optpage,
1915             $page_numbers, $pass_count, $max_passes, $state, @mytext)
1916             = @_;
1917 0           my @outline = @$outl;
1918              
1919             # 'page' in opts, for cross references and left-right paging
1920 0           my $pc = 1;
1921 0           my $mp = 1;
1922 0           my $ppn = 1;
1923 0           my $filename = '';
1924 0           my $fpn = '1';
1925 0           my $LR = 'R';
1926 0           my $bind = 0; # global item
1927 0 0         if (defined $optpage) {
1928 0           ($pc, $mp, $ppn, $filename, $fpn, $LR, $bind) = @$optpage;
1929             }
1930              
1931             # start_y is the lowest extent of the previous line, or the highest point
1932             # of the column outline, and is where we start the next one.
1933             # min_y is the lowest y available within the column outline, outl.
1934             # pdf is the pdf top-level object.
1935             # text is the text context.
1936             # para is a flag that we are at the top of a column (no margin-top added).
1937             # font_size is the default font size to use.
1938             # markup is 'html', 'pre' etc. in case you need to do something different
1939             # marker_width is width (pt) of list markers (right justify within)
1940             # marker_gap is space (pt) between list marker and item text
1941             # leading is the default leading ratio to use.
1942             # mytext is the array of hashes containing tags, attributes, and text.
1943            
1944 0           my ($start_x, $x,$y, $width, $endx); # current position of text
1945 0           my ($asc, $desc, $desc_leading);
1946 0           my $next_y = $start_y;
1947             # we loop to fill next line, starting with a y position baseline set when
1948             # encounter the next text, and know the font, font_size, and thus the
1949             # ascender/descender extents (which may grow). from that we can find
1950             # the next baseline (which could be moved downwards).
1951             # we loop until we either run out of input text, or run out of column
1952 0           my $need_line = 1; # need to start a new line? always 'yes' (1) on
1953             # call to column(). set to 'yes' if tag is for a block
1954             # level display (treat like a paragraph)
1955 0           my $add_x = 0; # amount to add for indent
1956 0           my $add_y = 0; # amount to drop for first line's top margin
1957 0           my @line_extents = (); # for dealing with changes to vertical extents
1958             # changes mid-line
1959              
1960 0           my $start = 1; # counter for ordered lists
1961 0           my $list_depth_u = 0; # nesting level of ul
1962 0           my $list_depth_s = 0; # nesting level of _sl
1963 0           my $list_depth_o = 0; # nesting level of ol
1964 0           my $list_marker = ''; # li marker text
1965 0           my $reversed_ol = 0; # count down from start
1966              
1967 0           my $phrase='';
1968 0           my $remainder='';
1969 0           my $desired_x; # leave undef, is correction for need_line reset of x
1970 0           my @vmargin = (0, 0); # build up largest vertical margin (most negative and most positive)
1971 0           my $current_prop = _init_current_prop(); # determine if a property has
1972             # changed and PDF::Builder routines need calling. see
1973             # _init_current_prop() for list of properties
1974 0           my @properties = ({}); # stack of properties from tags
1975 0           _update_properties($properties[0], $mytext[0], 'body');
1976 0           _update_properties($properties[0], $mytext[1], 'body');
1977 0           my $call_get_font = 0;
1978 0           my %bad_tags; # keep track of invalid HTML tags
1979 0           my $x_adj = 0; # ul, ol list marker move left from right-align position
1980 0           my $y_adj = 0; # ul list marker elevation
1981              
1982             # mytext[0] should be default css values
1983             # mytext[1] should be any <style> tags (consolidated) plus opts 'style'
1984             # user input tags/text start at mytext[2]
1985              
1986             # starting available space, will be updated as new line needed
1987 0           ($start_x,$y, $width) = _get_baseline($start_y, @outline);
1988              
1989 0           for (my $el = 2; $el < scalar @mytext; $el++) {
1990             # discard any empty elements
1991 0 0         if (ref($mytext[$el]) ne 'HASH') { next; }
  0            
1992 0 0         if (!keys %{$mytext[$el]}) { next; }
  0            
  0            
1993            
1994 0 0         if ($mytext[$el]->{'tag'} ne '') {
1995             # tags/end-tags
1996             # should be a tag or end-tag element defined
1997             # for the most part, just set properties at stack top. sometimes
1998             # special actions need to be taken, with actual output (e.g.,
1999             # <hr> or <img>). remember that the properties stack includes
2000             # any units (%, pt, etc.), while current_prop has been converted
2001             # to points.
2002 0           my $tag = lc($mytext[$el]->{'tag'});
2003              
2004             # ================ <tag> tags ==========================
2005 0 0         if (substr($tag, 0, 1) ne '/') {
2006             # take care of 'beginning' tags. dup the top of the properties
2007             # stack, update properties in the stack top element. note that
2008             # current_prop usually isn't updated until the text is being
2009             # processed. some tags need some special processing if they
2010             # do something that isn't just a property change
2011              
2012             # watch for INK HERE where PDF needs to be told to change
2013              
2014             # properties stack new element ---------------------------------
2015             # 1. dup the top of the properties stack for a new set of
2016             # properties to be modified by attributes and CSS
2017 0           push @properties, {};
2018 0           foreach (keys %{$properties[-2]}) {
  0            
2019 0           $properties[-1]->{$_} = $properties[-2]->{$_};
2020             }
2021             # current_prop is still previous text's properties
2022             # 1a. "drop" any property which should not be inherited
2023             # unless value is 'inherit' (explicit inheritance, TBD)
2024             # width (used by <hr>), margin-*, TBD: border-*,
2025             # background-*, perhaps others. if list gets long enough,
2026             # put in separate routine.
2027 0           $properties[-1]->{'width'} = 0; # used for <hr>
2028 0           $properties[-1]->{'height'} = 0; # used for <hr>
2029 0           $properties[-1]->{'margin-top'} = 0;
2030 0           $properties[-1]->{'margin-bottom'} = 0;
2031 0           $properties[-1]->{'margin-left'} = 0;
2032 0           $properties[-1]->{'margin-right'} = 0;
2033             # 1b. unless first entry, save parent's font-size (points)
2034 0 0         if (@properties > 1) {
2035 0           $properties[-1]->{'_parent-fs'} = $properties[-2]->{'font-size'};
2036             } else {
2037             # very first tag in list, no parent (use body.font-size) should be points
2038 0           $properties[-1]->{'_parent-fs'} = $mytext[0]->{'body'}->{'font-size'};
2039             $properties[-1]->{'_parent-fs'} = $mytext[1]->{'body'}->{'font-size'}
2040 0 0         if defined $mytext[1]->{'body'}->{'font-size'};
2041             # strip off any 'pt' unit and leave as bare number
2042 0           $properties[-1]->{'_parent-fs'} =~ s/pt$//;
2043             }
2044              
2045             # 2. update properties top with element [0] (default CSS)
2046             # per $tag
2047 0           _update_properties($properties[-1], $mytext[0], $tag);
2048              
2049             # 3. update properties top with element [1] (styles CSS)
2050             # per $tag
2051 0           _update_properties($properties[-1], $mytext[1], $tag);
2052              
2053             # 4. update properties top with element [1] per any .class
2054             # (styles CSS, which is only one with .class selectors)
2055 0 0         if (defined $mytext[$el]->{'class'}) {
2056             _update_properties($properties[-1], $mytext[1],
2057 0           '.'.$mytext[$el]->{'class'});
2058             }
2059            
2060             # 5. update properties top with element [1] per any #id
2061             # (styles CSS, which is only one with #id selectors)
2062 0 0         if (defined $mytext[$el]->{'id'}) {
2063             _update_properties($properties[-1], $mytext[1],
2064 0           '#'.$mytext[$el]->{'id'});
2065             }
2066            
2067             # 6. update properties top with any tag/style attributes.
2068             # these come from the tag itself: its attributes,
2069             # overridden by any style attribute. these are the
2070             # highest priority properties. everything copied over to
2071             # the stack top, but anything not a real property will end
2072             # up not being used.
2073 0           _update_properties($properties[-1], $mytext[$el]);
2074             # 6a. 3.028 and 3.029 releases, allow text-height as alias
2075             # for line-height (currently only multiplier of font size)
2076 0 0         if (defined $properties[-1]->{'text-height'}) {
2077             $properties[-1]->{'line-height'} =
2078 0           delete $properties[-1]->{'text-height'}; }
2079            
2080             # 7. update size properties to be simply bare points, rather than e.g., 75%
2081             # remember that $current_prop->{'font-size'} init -1, is what was last written to PDF
2082             # current font size (pt) before properties applied
2083 0           my $fs = $properties[-1]->{'_parent-fs'}; # old font size (should always be one, in points > 0)
2084 0           $fs = $properties[-1]->{'font-size'} = _size2pt($properties[-1]->{'font-size'}, $fs, 'usage'=>'font-size');
2085 0 0         $fs = $font_size if $fs == -1; # just in case a -1 sneaks through, $font_size
2086             # should default to 12, override with 'font_size'=>value
2087              
2088 0           $properties[-1]->{'margin-top'} = _size2pt($properties[-1]->{'margin-top'}, $fs, 'usage'=>'margin-top');
2089 0           $properties[-1]->{'margin-right'} = _size2pt($properties[-1]->{'margin-right'}, $fs, 'usage'=>'margin-right');
2090 0           $properties[-1]->{'margin-bottom'} = _size2pt($properties[-1]->{'margin-bottom'}, $fs, 'usage'=>'margin-bottom');
2091 0           $properties[-1]->{'margin-left'} = _size2pt($properties[-1]->{'margin-left'}, $fs, 'usage'=>'margin-left');
2092             # border-* width (TBD, with border to set all four)
2093             # padding-* (TBD, with padding to set all four)
2094             # width = length of <hr> in pts
2095 0           $properties[-1]->{'width'} = _size2pt($properties[-1]->{'width'}, $fs, 'usage'=>'width');
2096             # height (thickness/size of <hr>) in pts
2097 0           $properties[-1]->{'height'} = _size2pt($properties[-1]->{'height'}, $fs, 'usage'=>'height');
2098 0           $properties[-1]->{'text-indent'} = _size2pt($properties[-1]->{'text-indent'}, $fs, 'usage'=>'text-indent');
2099 0           $properties[-1]->{'_marker-size'} = _size2pt($properties[-1]->{'_marker-size'}, $fs, 'usage'=>'_marker-size');
2100             # TBD should inside and outside be set to point values here?
2101 0 0 0       if (defined $properties[-1]->{'list-style-position'} &&
      0        
2102             $properties[-1]->{'list-style-position'} ne 'inside' &&
2103             $properties[-1]->{'list-style-position'} ne 'outside') {
2104 0           $properties[-1]->{'list-style-position'} = _size2pt($properties[-1]->{'list-style-position'}, $fs,
2105             'parent_size'=>$marker_width + $marker_gap, 'usage'=>'list-style-position');
2106             }
2107              
2108             # update current_prop hash -------------------------------------
2109             # properties stack already updated
2110             # some current_prop must be updated here, such as stroke
2111             # color for <hr>, font-size for top and bottom margins
2112              
2113             # block level elements -----------------------------------------
2114 0 0         if ($properties[-1]->{'display'} eq 'block') {
2115 0           $need_line = 1;
2116 0           $start_y = $next_y;
2117 0           $add_x = $add_y = 0;
2118             # block display with a non-zero top margin and/or bottom
2119             # margin... set skip to larger of the two.
2120             # when text is ready to be output, figure both any new
2121             # top margin (for that text) and compare to the existing
2122             # bottom margin (in points) saved at the end of the previous
2123             # text.
2124             # if paragraph and is marked as a continuation (i.e., spanned two columns),
2125             # suppress indent (below) and suppress top margin by setting topCol flag
2126 0 0 0       my $pcont = ($tag eq 'p' && defined $mytext[$el]->{'cont'} && $mytext[$el]->{'cont'})? 1: 0;
2127 0 0         $topCol = 1 if $pcont;
2128 0           $vmargin[0] = min($vmargin[0], $properties[-1]->{'margin-top'});
2129 0           $vmargin[1] = max($vmargin[1], $properties[-1]->{'margin-top'});
2130             # now that need_line etc. has been set due to block display,
2131             # change stack top into 'inline'
2132 0           $properties[-1]->{'display'} = 'inline';
2133             }
2134              
2135             # handle specific kinds of tags' special processing
2136             # if no code for a tag, yet uncommented, it's supported
2137             # (just no special processing at this point)
2138             # in many cases, all that was needed was to set properties,
2139             # and normal text output takes care of the rest
2140             #
2141 0 0         if ($tag eq 'p') {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
2142             # indent for start of paragraph
2143 0           $add_x = $properties[-1]->{'text-indent'}; # indent by para indent amount
2144 0           $add_y = 0;
2145             # p with cont=>1 is continuation of paragraph in new column
2146             # no indent and no top margin... just start a new line
2147 0 0 0       if (defined $mytext[$el]->{'cont'} && $mytext[$el]->{'cont'}) {
2148 0           $add_x = $add_y = 0;
2149             }
2150             } elsif ($tag eq 'i') {
2151             } elsif ($tag eq 'em') {
2152             } elsif ($tag eq 'b') {
2153             } elsif ($tag eq 'strong') {
2154             } elsif ($tag eq 'font') { # face already renamed to
2155             # font-family, size already renamed to font-size, color
2156             } elsif ($tag eq 'span') {
2157             # needs style= or <style> to be useful
2158             } elsif ($tag eq 'ul') {
2159 0           $list_depth_u++; # for selecting default marker text
2160             # indent each list level by same amount (initially 0)
2161 0           $properties[-1]->{'_left'} = $properties[-1]->{'_left_nest'};
2162             # next list to be nested will start here
2163 0           $properties[-1]->{'_left_nest'} += $marker_width+$marker_gap;
2164             } elsif ($tag eq '_sl') {
2165 0           $list_depth_s++; # for indent level
2166             # indent each list level by same amount (initially 0)
2167 0           $properties[-1]->{'_left'} = $properties[-1]->{'_left_nest'};
2168             # next list to be nested will start here
2169 0           $properties[-1]->{'_left_nest'} += $marker_width+$marker_gap;
2170             } elsif ($tag eq 'ol') {
2171             # save any existing start and reversed_ol values
2172 0           $properties[-2]->{'_start'} = $start; # current start
2173 0           $properties[-2]->{'_reversed_ol'} = $reversed_ol; # cur flag
2174              
2175 0           $start = 1;
2176 0 0         if (defined $mytext[$el]->{'start'}) {
2177 0           $start = $mytext[$el]->{'start'};
2178             }
2179 0 0         if (defined $mytext[$el]->{'reversed'}) {
2180 0           $reversed_ol = 1;
2181             } else {
2182 0           $reversed_ol = 0;
2183             }
2184 0           $list_depth_o++; # for selecting default marker format
2185             # indent each list level by same amount (initially 0)
2186 0           $properties[-1]->{'_left'} = $properties[-1]->{'_left_nest'};
2187 0           $properties[-1]->{'_left_nest'} += $marker_width+$marker_gap;
2188             #} elsif ($tag eq 'img') { # hspace and vspace already
2189             # margins, width, height
2190             # TBD for 3.029 currently ignored
2191             } elsif ($tag eq 'a') {
2192             # no special treatment at this point
2193             #} elsif ($tag eq 'pre') {
2194             # white-space etc. no consolidating whitespace
2195             # TBD for 3.029 currently ignored
2196             } elsif ($tag eq 'code') { # font-family sans-serif + constant width 75% font-size
2197             } elsif ($tag eq 'blockquote') {
2198             } elsif ($tag eq 'li') {
2199             # where to start <li> text
2200             # after /marker, $x is in desired place
2201             # set its new _left for subsequent lines
2202 0 0         if ($properties[-1]->{'list-style-position'} eq 'inside') {
    0          
2203             # _left unchanged
2204             } elsif ($properties[-1]->{'list-style-position'} eq 'outside') {
2205             # li's copy of _left, should be reset at /li
2206 0           $properties[-1]->{'_left'} += $marker_width+$marker_gap;
2207             } else {
2208             # extension to CSS (should already be in pts)
2209 0           $properties[-1]->{'_left'} += $properties[-1]->{'list-style-position'};
2210             }
2211             } elsif ($tag eq 'h1') { # TBD align
2212             # treat headings as paragraphs
2213             } elsif ($tag eq 'h2') {
2214             } elsif ($tag eq 'h3') {
2215             } elsif ($tag eq 'h4') {
2216             } elsif ($tag eq 'h5') {
2217             } elsif ($tag eq 'h6') {
2218             } elsif ($tag eq 'hr') {
2219             # actually draw a horizontal line INK HERE
2220 0           $start_y = $next_y;
2221            
2222             # drop down page by any pending vertical margin spacing
2223 0 0 0       if ($vmargin[0] != 0 || $vmargin[1] != 0) {
2224 0 0         if (!$topCol) {
2225 0           $start_y -= ($vmargin[0]+$vmargin[1]);
2226             }
2227 0           @vmargin = (0, 0); # reset counters
2228             }
2229 0           $topCol = 0; # for rest of column do not suppress vertical margin
2230              
2231 0           my $oldcolor = $grfx->strokecolor();
2232 0           $grfx->strokecolor($properties[-1]->{'color'});
2233 0           my $oldlinewidth = $grfx->linewidth();
2234 0   0       my $thickness = $properties[-1]->{'height'} || 1; # HTML size attribute
2235 0           $grfx->linewidth($thickness);
2236 0           my $y = $start_y - $thickness/2;
2237 0           ($start_x,$y, $width) = _get_baseline($y, @outline);
2238 0           $x = $start_x + $properties[-1]->{'_left'};
2239 0           $width -= $properties[-1]->{'_left'} + $properties[-1]->{'_right'}; # default full width
2240 0           my $available = $width; # full width amount
2241             # if there is a requested width, use the smaller of the two
2242             # TBD future, width as % of possible baseline,
2243             # center or right aligned, explicit units (pt default)
2244 0 0 0       if ($properties[-1]->{'width'} > 0 && # default to use full width is -1
2245             $properties[-1]->{'width'} < $width) {
2246 0           $width = $properties[-1]->{'width'}; # reduced width amount
2247             }
2248 0           my $align = 'center';
2249 0 0         if (defined $mytext[$el]->{'align'}) {
2250 0           $align = lc($mytext[$el]->{'align'});
2251             }
2252 0 0         if ($align eq 'left') {
    0          
2253             # no change to x
2254             } elsif ($align eq 'right') {
2255 0           $x += ($available-$width);
2256             } else {
2257 0 0         if ($align ne 'center') {
2258 0           carp "<hr> align not 'left', 'center', or 'right'. Ignored.";
2259 0           $align = 'center';
2260             }
2261 0           $x += ($available-$width)/2;
2262             }
2263 0           $endx = $x + $width;
2264              
2265 0           $grfx->move($x, $y);
2266 0           $grfx->hline($endx);
2267 0           $grfx->stroke();
2268 0           $y -= $thickness/2;
2269 0           $next_y = $y;
2270             # empty (self closing) tag, so won't go through a /hr to set bottom margin.
2271             # is in empty tag list, so will get proper treatment
2272            
2273             # restore changed values
2274 0           $grfx->linewidth($oldlinewidth);
2275 0           $grfx->strokecolor($oldcolor);
2276             #} elsif ($tag eq 'br') { # TBD force new line
2277             #} elsif ($tag eq 'sup') { # TBD
2278             #} elsif ($tag eq 'sub') { # TBD
2279             } elsif ($tag eq 'u') {
2280             } elsif ($tag eq 'ins') {
2281             } elsif ($tag eq 's') {
2282             } elsif ($tag eq 'strike') {
2283             } elsif ($tag eq 'del') {
2284            
2285             # tags maybe some time in the future TBD
2286             #} elsif ($tag eq 'address') { # inline formatting
2287             #} elsif ($tag eq 'article') { # discrete section
2288             #} elsif ($tag eq 'aside') { # discrete section
2289             #} elsif ($tag eq 'base') {
2290             #} elsif ($tag eq 'basefont') {
2291             #} elsif ($tag eq 'big') { # font-size 125%
2292             # already taken care of head, body
2293             #} elsif ($tag eq 'canvas') {
2294             #} elsif ($tag eq 'caption') {
2295             #} elsif ($tag eq 'center') { # margin-left/right auto
2296             #} elsif ($tag eq 'cite') { # quotes, face?
2297             #} elsif ($tag eq 'dl') { # similar to ul/li
2298             #} elsif ($tag eq 'dt') {
2299             #} elsif ($tag eq 'dd') {
2300             #} elsif ($tag eq 'div') { # requires width, height, left, etc.
2301             #} elsif ($tag eq 'figure') {
2302             #} elsif ($tag eq 'figcap') {
2303             #} elsif ($tag eq 'footer') { # discrete section
2304             #} elsif ($tag eq 'header') { # discrete section
2305             #} elsif ($tag eq 'kbd') { # font-family sans-serif +
2306             # constant width 75% font-size
2307             #} elsif ($tag eq 'mark') {
2308             #} elsif ($tag eq 'nav') { # discrete section
2309             #} elsif ($tag eq 'nobr') { # treat all spaces within as NBSPs?
2310             #} elsif ($tag eq 'q') { # ldquo/rdquo quotes around
2311             #} elsif ($tag eq 'samp') { # font-family sans-serif +
2312             # constant width 75% font-size
2313             #} elsif ($tag eq 'section') { # discrete section
2314             #} elsif ($tag eq 'small') { # font-size 75%
2315             #} elsif ($tag eq 'summary') { # discrete section
2316             } elsif ($tag eq 'style') {
2317             # sometimes some stray empty style tags seem to come
2318             # through... can be ignored
2319             } elsif ($tag eq '_marker') {
2320             # at this point, all properties are set in usual way. only
2321             # tasks remaining are to 1) determine the text,
2322             # 2) set CSS properties to default marker conventions.
2323             # 3) override text, color, etc. from _marker-* properties.
2324             # 4) if not left justified, set reference x location
2325             #
2326             # paragraph, but label depends on parent (list-style-type)
2327             # type and value attributes can override parent
2328             # list-style-type and start
2329 0 0 0       if (defined $properties[-1]->{'_marker-text'} &&
2330             $properties[-1]->{'_marker-text'} ne '') {
2331             # explicitly-defined _marker-text overrides all else
2332 0           $list_marker = $properties[-1]->{'_marker-text'};
2333             } else {
2334             # li's 'value', if any. li is at el+3.
2335             # TBD check if parent is ol? (current_list top == o)
2336 0 0         if (defined $mytext[$el+3]->{'value'}) {
2337 0           $start = $mytext[$el+3]->{'value'};
2338             }
2339             # li's 'list-style-type', if any (was 'type'). li is at el+3.
2340             # TBD does this only apply to <ol>? check?
2341 0 0         if (defined $mytext[$el+3]->{'type'}) {
2342             $properties[-1]->{'list-style-type'} =
2343 0           $mytext[$el+3]->{'type'};
2344             }
2345             # determine li marker
2346             $list_marker = _marker(
2347             $properties[-1]->{'list-style-type'},
2348             $list_depth_u, $list_depth_o, $list_depth_s,
2349             $start,
2350             $properties[-1]->{'_marker-before'},
2351 0           $properties[-1]->{'_marker-after'});
2352 0 0         if (substr($list_marker, 0, 1) eq '.') {
2353             # it's a bullet character (or '')
2354             } else {
2355             # fully formatted ordered list item
2356 0 0         if ($reversed_ol) {
2357 0           $start--;
2358             } else {
2359 0           $start++;
2360             }
2361             }
2362             # starting at _left, position x for marker LJ, CJ, or RJ
2363             # WITHIN _left to _left+marker_width
2364 0           $desired_x = $start_x + $properties[-1]->{'_left'};
2365 0 0         if ($properties[-1]->{'_marker-align'} eq 'left') {
    0          
2366             # should already be at _left
2367 0           $properties[-1]->{'text-align'} = 'left';
2368             } elsif ($properties[-1]->{'_marker-align'} eq 'center') {
2369 0           $desired_x += $marker_width/2;
2370 0           $properties[-1]->{'text-align'} = 'center';
2371             } else { # right (default)
2372 0           $desired_x += $marker_width;
2373 0           $properties[-1]->{'text-align'} = 'right';
2374             }
2375              
2376             # dl: variable length marker width, minimum size given,
2377             # which is where dd left margin is
2378             # handle dl/dt/dd separately from ul/ol/_sl
2379             }
2380              
2381             # list_marker is set
2382 0 0 0       if ($list_marker eq '.none' || $list_marker =~ /^ *$/) {
2383             # list_marker '' or ' ' or '.none': don't reset
2384             # properties as it generates redundant color, font,
2385             # size, etc. changes because no ink laid down
2386             } else {
2387             # issue property changes when necessary
2388 0           my $fs = $properties[-1]->{'font-size'};
2389             # override any other property with corresponding _marker-*
2390             # properties-to-PDF-calls have NOT yet been done
2391 0 0 0       if (defined $properties[-1]->{'_marker-color'} &&
2392             $properties[-1]->{'_marker-color'} ne '') {
2393             $properties[-1]->{'color'} =
2394 0           $properties[-1]->{'_marker-color'};
2395             }
2396 0 0 0       if (defined $properties[-1]->{'_marker-font'} &&
2397             $properties[-1]->{'_marker-font'} ne '') {
2398             $properties[-1]->{'font-family'} =
2399 0           $properties[-1]->{'_marker-font'};
2400             }
2401 0 0 0       if (defined $properties[-1]->{'_marker-style'} &&
2402             $properties[-1]->{'_marker-style'} ne '') {
2403             $properties[-1]->{'font-style'} =
2404 0           $properties[-1]->{'_marker-style'};
2405             }
2406 0 0 0       if (defined $properties[-1]->{'_marker-size'} &&
2407             $properties[-1]->{'_marker-size'} ne '') {
2408             $properties[-1]->{'font-size'} =
2409 0           $properties[-1]->{'_marker-size'};
2410             }
2411 0 0 0       if (defined $properties[-1]->{'_marker-weight'} &&
2412             $properties[-1]->{'_marker-weight'} ne '') {
2413             $properties[-1]->{'font-weight'} =
2414 0           $properties[-1]->{'_marker-weight'};
2415             }
2416             # _marker-align is not a standard CSS property
2417            
2418             # finally, update the text within the _marker
2419 0 0         if ($list_marker ne '') {
2420             # list marker should be nonblank for <ol> and <ul>,
2421             # empty for <_sl> (just leave marker text alone)
2422            
2423             # output the marker. x,y is the upper left baseline of
2424             # the <li> text, so text_right() the marker
2425 0 0         if ($list_marker =~ m/^\./) {
2426             # it's a symbol for <ul>. 50% size, +y by 33% size
2427             # TBD url image and other character symbols
2428             # (possibly in other than Zapf Dingbats).
2429 0 0         if ($list_marker eq '.disc') {
    0          
    0          
    0          
    0          
2430 0           $list_marker = chr(108); # 'l'
2431             } elsif ($list_marker eq '.circle') {
2432 0           $list_marker = chr(109); # 'm'
2433             } elsif ($list_marker eq '.square') {
2434 0           $list_marker = chr(110); # 'n'
2435             } elsif ($list_marker eq '.box') {
2436 0           $list_marker = chr(111); # non-standard 'o'
2437             } elsif ($list_marker eq '.none') {
2438 0           $list_marker = '';
2439             }
2440            
2441             # ul defaults
2442 0           $x_adj = $y_adj = 0;
2443 0 0         if ($list_marker ne '') {
2444             # x_adj (- to left) .3em+2pt for gap marker to text
2445             #$x_adj = -(0.3 * $fs + 2);
2446             # figure y_adj for ul marker (raise, since smaller)
2447             # TBD: new CSS to set adjustments
2448 0           $y_adj = -0.33*_size2pt($properties[-1]->{'font-size'}, $fs, 'usage'=>'list marker raise')/$fs + 0.33;
2449 0           $y_adj *= $fs;
2450             } else {
2451             # empty text
2452             }
2453             } else {
2454             # it's a formatted count for <ol>
2455             # ol defaults
2456             # x_adj (- to left) .3em for gap marker to text
2457             #$x_adj = -(0.3 * $fs);
2458             }
2459              
2460             } else {
2461             # '' list-marker for _sl, leave as is so no output
2462             # no change to font attributes
2463             }
2464             # insert list_marker into text field at $el+1 and end
2465             # of marker at $el+2. no need to change $el.
2466             # IF existing text not empty or blank, leave alone!
2467 0 0         if ($mytext[$el+1]->{'text'} =~ /^ *$/) {
2468 0           $mytext[$el+1]->{'text'} = $list_marker;
2469             }
2470             } # list marker NOT to be skipped
2471 0           $list_marker = '';
2472              
2473             #} elsif ($tag eq '_ovl') { # TBD
2474             #} elsif ($tag eq '_k') { # TBD
2475             } elsif ($tag eq '_move') {
2476             # move left or right on current baseline, per 'x' and/or
2477             # 'dx' attribute values
2478             # TBD: consider y/dy positioning too, would need to adjust
2479             # baseline to new y before getting fresh start_x and x
2480             # first, we need valid $x and $y. if left by the previous
2481             # write, use them. otherwise need to start at the left edge
2482             # of the column (start_x) and y on the baseline
2483 0 0         if (!defined $y) {
2484 0           $y = $start_y - 8.196;
2485             }
2486 0           ($start_x,$y, $width) = _get_baseline($y, @outline);
2487 0 0         if (!defined $x) {
2488 0           $x = $start_x;
2489             }
2490             # need to increase x and decrease width by any
2491             # left margin amount
2492 0           $x = $start_x + $properties[-1]->{'_left'};
2493 0           $width -= $properties[-1]->{'_left'} + $properties[-1]->{'_right'};
2494 0           $endx = $start_x + $width;
2495 0           my ($attr, $attrv, $attru);
2496             # handle "x" attribute first (absolute positioning),
2497             # leaving $x at the new position. no check on going beyond
2498             # either end of the line.
2499 0 0         if (defined $mytext[$el]->{'x'}) {
2500             # 'x' attribute given, treat as move relative to start_x
2501 0           $attr = $mytext[$el]->{'x'};
2502             # TBD: a more rigorous number check
2503 0 0         if ($attr =~ m/^(-?[\d.]+)(pt$|%$|$)/i) {
2504 0           $attrv = $1;
2505 0           $attru = $2;
2506 0 0         if ($attru eq '%') {
2507 0           $x = $start_x + $attrv/100*$width; # % of width
2508             } else {
2509 0           $x = $start_x + $attrv; # pts
2510             }
2511             } # if can't match pattern, x remains unchanged
2512             }
2513             # now handle "dx" attribute (relative positioning),
2514             # leaving $x at the new position. no check on going beyond
2515             # either end of the line.
2516 0 0         if (defined $mytext[$el]->{'dx'}) {
2517             # 'dx' attribute given, treat as move relative to where
2518             # 'x' left it (if given), else relative to current x
2519 0           $attr = $mytext[$el]->{'dx'};
2520             # TBD: a more rigorous number check
2521 0 0         if ($attr =~ m/^(-?[\d.]+)(pt$|%$|$)/i) {
2522 0           $attrv = $1;
2523 0           $attru = $2;
2524 0 0         if ($attru eq '%') {
2525 0           $x += $attrv/100*$width; # % of width
2526             } else {
2527 0           $x += $attrv; # pts
2528             }
2529             } # if can't match pattern, x remains unchanged
2530             }
2531             # allow <0 or >width to go beyond baseline at user's risk
2532             # (likely to be cut off if exceed line end on right, who
2533             # knows what will happen on the left)
2534 0           $text->translate($x, $y);
2535             # any pending need_line will reset x to start_x, so save
2536             # desired x (otherwise is undef)
2537 0           $desired_x = $x;
2538             # HTML::TreeBuilder may have left a /_move tag. problem?
2539              
2540             } elsif ($tag eq '_ref') {
2541             # cross reference tag tgtid= fit=
2542             # $mytext[$el] is this tag, $el+1 is link text (update
2543             # from target if empty or undefined), so there IS a
2544             # child text and end tag for _ref
2545             # add 'annot' info to link text field. output only current
2546             # text of link, save link data for very end.
2547 0           my ($tgtid, $fit, $title);
2548 0           $tgtid = $mytext[$el]->{'tgtid'}; # required!
2549 0 0         if (!defined $tgtid) { croak "<_ref> missing tgtid=."; }
  0            
2550 0           $fit = $mytext[$el]->{'fit'}; # optional
2551 0   0       $fit //= ''; # use default fit
2552 0           $title = $mytext[$el]->{'title'}; # optional
2553 0   0       $title //= '';
2554 0 0         $title = "[no title given]" if $title eq '';
2555             # if no title, try to get from target
2556             # TBD override of page_numbers
2557              
2558 0           my ($tfn, $tppn, $tid);
2559             # first, #id convert to just id (only at beginning), or
2560             # #p-x-y[-z] split into #p and fit
2561 0 0         if ($tgtid =~ /^#[^#]/) {
2562             # starts with single #
2563 0           my @fields = split /-/, $tgtid;
2564             # if size 1, is just #id or #p
2565 0 0 0       if (@fields == 1) {
    0          
2566             # if just #p, see if p is integer 1+
2567 0 0         if ($tgtid =~ /^#[1-9]\d*$/) {
2568             # is #p so leave $tgtid as is
2569             } else {
2570             # is #id -- strip off leading #
2571 0           $tgtid = substr($tgtid, 1);
2572             }
2573             } elsif (@fields == 3 || @fields == 4) {
2574             # possibly #p-x-y-z default z = null
2575             # only checking if p is integer 1+
2576             # TBD check if x and y are numbers >= 0
2577             # TBD check if z is number > 0 or 'null' or 'undef'
2578 0 0         if ($fields[0] =~ /^#[1-9]\d*$/) {
2579             # is #p so build $fit
2580 0           $tgtid = $fields[0];
2581 0 0         if (@fields == 3) { push @fields, 'null'; }
  0            
2582 0 0         if ($fields[3] eq 'undef') { $fields[3] = 'null'; }
  0            
2583 0           $fit = "xyz,$fields[1],$fields[2],$fields[3]";
2584             } else {
2585             # is #id -- strip off leading #
2586 0           $tgtid = substr($tgtid, 1);
2587             }
2588             } else {
2589             # wrong number of fields, is just #id
2590             # so strip off leading #
2591 0           $tgtid = substr($tgtid, 1);
2592             }
2593             }
2594              
2595             # split up tgtid into various fields
2596 0 0         if ($tgtid =~ /##/) {
    0          
2597             # external link's file, and ppn of target
2598 0           ($tfn, $tppn) = split /##/, $tgtid;
2599 0   0       $tfn //= '';
2600 0           $tid = "##$tppn";
2601             } elsif ($tgtid =~ /#/) {
2602             # external link's file, and Named Destination
2603 0           ($tfn, $tppn) = split /#/, $tgtid;
2604 0   0       $tfn //= '';
2605 0           $tid = "#$tppn";
2606             } else {
2607             # an id=
2608 0           $tfn = ''; # internal link only
2609 0           $tppn = -1; # unknown at this time
2610 0           $tid = $tgtid;
2611             }
2612              
2613             # add a new array entry to xrefs, or update existing one
2614             # knowing title, fit, tid, tfn, tppn from <_ref>
2615             # sptr = pointer (ref) to this entry in xrefs
2616             # tptr = pointer (ref) to matching target in xreft
2617 0           my $sindex = $state->{'sindex'};
2618 0           my ($sptr, $tfpn, $tptr);
2619 0 0 0       if ($pass_count == 1 && defined $sindex) {
2620             # add new entry at $sindex
2621 0           $state->{'xrefs'}->[$sindex] = {};
2622             # ptr to hash {id} and its siblings (see Builder.pm)
2623 0           $sptr = $state->{'xrefs'}->[$sindex];
2624             # the following items should never change after the
2625             # first pass
2626             # it's possible that this _ref is totally self-contained
2627             # and does not refer to any target id
2628 0           $sptr->{'id'} = $tid;
2629 0           $sptr->{'fit'} = $fit;
2630 0           $sptr->{'tfn'} = $tfn;
2631             # items that CAN change between passes
2632 0           $sptr->{'title'} = $title;
2633 0           $sptr->{'tx'} = 0;
2634 0           $sptr->{'ty'} = 0;
2635 0           $sptr->{'tfpn'} = '';
2636             } else {
2637             # entry already exists, at $sindex
2638             # update anything that might change pass-to-pass
2639             # set 'changed' flag only if updated AFTER this pass's
2640             # title text and other_pg have been laid down.
2641             # if $page_numbers == 2, a change in ppn's either
2642             # source or target is of concern TBD
2643 0 0         $sptr = $state->{'xrefs'}->[$sindex] if defined $sindex;
2644             # nothing in this section to warrant changed flag
2645             # and we're about to output a fresh copy of link text
2646             # and 'other_pg' text
2647             }
2648              
2649             # whether pass 1 initialization or pass 2+ update
2650             # the following can change without forcing another pass
2651             #
2652 0           $sptr->{'tppn'} = $tppn;
2653 0           $sptr->{'sppn'} = $ppn;
2654              
2655             # have we found this target id already?
2656 0 0         if (defined $state->{'xreft'}{'_reft'}{$tid}) {
2657 0           $tptr = $state->{'xreft'}{'_reft'}{$tid};
2658             } else {
2659 0           $tptr = undef; # just to be certain
2660             }
2661              
2662 0 0         if (defined $tptr) {
2663             # does the title need an update from target?
2664 0 0 0       if ($sptr->{'title'} eq '[no title given]' &&
2665             $tptr->{'title'} ne '[no title given]') {
2666 0           $sptr->{'title'} = $tptr->{'title'};
2667             # no need to mark as changed, as about to output
2668             # the link text (title, other_pg)
2669             #$state->{'changed_target'}->{$tid} = 1;
2670             # update child text
2671 0           $mytext[$el+1]{'text'} = $sptr->{'title'};
2672             }
2673              
2674             # other fields that may change
2675 0           $sptr->{'tx'} = $tptr->{'tx'};
2676 0           $sptr->{'ty'} = $tptr->{'ty'};
2677 0           $sptr->{'tfpn'} = $tptr->{'tfpn'}; # affects other_pg
2678             # other fields that may be overridden by target
2679             $sptr->{'tppn'} = $tptr->{'tppn'}
2680 0 0         if ($sptr->{'tppn'} == -1); # affects other_pg
2681 0           $sptr->{'tag'} = $tptr->{'tag'};
2682             }
2683             # TBD figure 'other_pg' text when actually output it,
2684             # and update field and set flag if changed (pass > 1)
2685             # once know sppn and tppn (in same PDF) and
2686             # $page_numbers > 0. note that a _ref can override
2687             # the global page_numbers with its own (e.g., to
2688             # force = 1 'on page N' when global == 2)
2689 0           $sptr->{'other_pg'} = $sptr->{'prev_other_pg'} = ''; # TBD
2690             #
2691             # Note that Named Destinations do not get a page
2692             # designation output (no "on page $" etc.) regardless
2693             # of $page_numbers setting. TBD what about internal jumps?
2694             # may not know page of an external jump.
2695              
2696             # via 'annot' flag tell title text to grab rectangle corners
2697             # and stick in {'click'} area array. may be multiple such
2698             # rectangles (click areas) if text wraps. also determine
2699             # 'other_pg' string and update entry (TBD)
2700 0           $sptr->{'click'} = [];
2701             # TBD title that includes embedded tags to support
2702 0           $mytext[$el+1]->{'annot'} = $sindex;
2703              
2704 0           $state->{'sindex'} = ++$sindex;
2705              
2706             } elsif ($tag eq '_reft') {
2707             # cross reference target tag id=
2708             # for markdown, only target available
2709 0           my $id = $mytext[$el]->{'id'}; # required!
2710 0 0         if (!defined $id) { croak "<_reft> missing id=."; }
  0            
2711 0           my $title = $mytext[$el]->{'title'}; # optional
2712             # code handling id= and checking tag_lists from here on out
2713             # to deal with <_reft>
2714              
2715             } elsif ($tag eq '_nameddest') {
2716             # define a Named Destination at this point
2717             # possibly a fit attribute is defined
2718 0           my $name = $mytext[$el]->{'name'}; # required!
2719 0 0         if (!defined $name) { croak "<_nameddest> missing name=."; }
  0            
2720 0           my $fit = $mytext[$el]->{'fit'}; #optional
2721 0   0       $fit //= '';
2722              
2723 0           my $ptr = $state->{'nameddest'};
2724 0           $ptr->{$name} = {};
2725 0           $ptr->{$name}{'fit'} = $fit;
2726 0           $ptr->{$name}{'ppn'} = $ppn; # this and following can change
2727 0           $ptr->{$name}{'x'} = $x; # on subsequent passes
2728 0           $ptr->{$name}{'y'} = $y;
2729              
2730             # special directives such as (TBD)
2731             # <_endc> force end of column here (while still filling line)
2732             # e.g., to prevent an orphan
2733             # <_nolig></_nolig> forbid ligatures in this range
2734             # <_lig gid='nnn'>c</_lig> replace character(s) by a ligature
2735             # <_alt gid='nnn'>c</_alt> replace character(s) by alternate
2736             # glyph such as a swash. font-dependent
2737             # <_hyp>, <_nohyp> control hypenation in a word (and remember
2738             # rules when see this word again)
2739              
2740             } else {
2741             # unsupported or invalid tag found
2742             # keep list of those found, error message once per tag
2743             # per column() call
2744 0 0         if (!defined $bad_tags{$tag}) {
2745 0           print STDERR "Warning: tag <$tag> either invalid or currently unsupported by PDF::Builder.\n";
2746 0           $bad_tags{$tag} = 1;
2747             }
2748             # treat as <span>
2749 0           $tag = $mytext[$el]->{'tag'} = 'span';
2750             }
2751              
2752             # any common post-tag work -------------------------------------
2753             # does this tag have an id attribute, and is it in one or
2754             # more of the watch lists to add to references?
2755             # _reft tags already checked that id= given
2756 0 0 0       if (defined $state && exists $mytext[$el]->{'id'}) {
2757 0           my $id = $mytext[$el]->{'id'};
2758             # might have a title, too
2759 0           my $title = $mytext[$el]->{'title'}; # optional (_reft)
2760 0 0         $title = '' if !defined $title;
2761             # if no title in source or target tags, will have to
2762             # look at child text of various tags
2763            
2764             # yes, it has an id. now check against lists
2765             # this tag will produce an entry in xreft for each list
2766             # that it is in TBD find way to consolidate into one?
2767 0           my %tag_lists = %{$state->{'tag_lists'}};
  0            
2768             # will contain at least _reft list with _reft tag
2769             # goes into xreft/listname/id structure
2770 0           foreach my $list (keys %tag_lists) { # _reft, TOC, etc
2771 0           my @tags = @{$tag_lists{$list}}; # tags to check
  0            
2772 0           foreach my $xtag (@tags) {
2773 0 0         if ($tag eq $xtag) {
2774             # this tag (with id=) is being used by target
2775             # list $list (e.g., '_reft')
2776             # add (or update) this tag's data into the $list
2777 0           my $tptr;
2778              
2779 0           $tptr = $state->{'xreft'}->{$list}->{$id};
2780 0 0         if (!defined $tptr) {
2781 0           $state->{'xreft'}->{$list}->{$id} = {};
2782 0           $tptr = $state->{'xreft'}->{$list}->{$id};
2783             # add new entry or overwrites old one
2784             # perhaps pass > 1 see if $id already exists
2785             # these three should never change on update
2786 0           $tptr->{'tfn'} = $filename;
2787 0           $tptr->{'title'} = $title;
2788 0           $tptr->{'tag'} = $tag;
2789             # if title empty, look for child text
2790             # use this title if no title= on <_ref>
2791 0 0         if ($title eq '') {
2792             # heading has child text, add others
2793             # as useful
2794 0 0 0       if ($tag =~ /^h\d$/ ||
      0        
2795             $tag eq '_part' ||
2796             $tag eq '_chap') {
2797 0           $title = _get_child_text(
2798             \@mytext, $el );
2799             # might still be ''
2800             }
2801 0           $tptr->{'title'} = $title;
2802             }
2803             } # add a new id= to xreft, or update existing
2804             # these may change from pass to pass
2805 0           $tptr->{'tppn'} = $ppn;
2806 0           $tptr->{'tfpn'} = $fpn;
2807 0   0       $tptr->{'tx'} = $x//0; # sometimes undef
2808 0           $tptr->{'ty'} = $y;
2809             # done creating or updating an entry
2810              
2811             # every link source using this id gets update
2812             # and "changed" flag set for visible text change
2813 0           for (my $sindex=0;
2814 0           $sindex < scalar(@{$state->{'xrefs'}});
2815             $sindex++) {
2816 0 0         if ($state->{'xrefs'}->[$sindex]->{'id'} eq $id) {
2817             # yes, link source exists. update it and
2818             # set flag if need another pass
2819 0           my $another_pass = 0;
2820 0           my $sptr = $state->{'xrefs'}->[$sindex];
2821 0 0 0       if ($sptr->{'title'} eq '[no title given]' &&
2822             $tptr->{'title'} ne '[no title given]') {
2823 0           $sptr->{'title'} = $tptr->{'title'};
2824 0           $another_pass = 1;
2825             }
2826             # 'other_pg' determined elsewhere
2827 0 0         $state->{'changed_target'}{$id} = 1
2828             if $another_pass;
2829              
2830             # other fields in xrefs to update
2831             # from xreft entry
2832 0           $sptr->{'tx'} = $tptr->{'tx'};
2833 0           $sptr->{'ty'} = $tptr->{'ty'};
2834 0           $sptr->{'tag'} = $tptr->{'tag'};
2835             $sptr->{'tfn'} = $tptr->{'tfn'}
2836 0 0         if $sptr->{'tfn'} eq '';
2837             $sptr->{'tfpn'} = $tptr->{'tfpn'}
2838 0 0         if $sptr->{'tfpn'} eq '';
2839             $sptr->{'tppn'} = $tptr->{'tppn'}
2840 0 0         if $sptr->{'tppn'} < 1;
2841              
2842             } # link source targeting this id
2843             } # loop sindex through all link sources
2844             } # found a tag of interest in a list
2845             } # check against list of tags
2846             } # search through target tag lists
2847             } # tag with id= see if wanted for target lists
2848            
2849 0 0         if (defined $mytext[$el]->{'empty_element'}) {
2850             # empty/void tag, no end tag, pop property stack
2851             # as this tag's actions have already been taken
2852             # update bottom margin. display already reset to 'inline'
2853 0           $vmargin[0] = min($vmargin[0], $properties[-1]->{'margin-bottom'});
2854 0           $vmargin[1] = max($vmargin[1], $properties[-1]->{'margin-bottom'});
2855              
2856 0           pop @properties;
2857             # should revert any changed font-size
2858 0           splice(@mytext, $el, 1);
2859 0           $el--; # end of loop will advance $el
2860             # no text as child of this tag, whatever it does, it has
2861             # to be completely handled in this section
2862             }
2863              
2864             # end of handling starting tags <tag>
2865              
2866             # ================ </tags> end tags ======================
2867             } else {
2868             # take care of 'end' tags. some end tags need some special
2869             # processing if they do something that isn't just a
2870             # property change. current_prop should be up to date.
2871 0           $tag = lc(substr($tag, 1)); # discard /
2872              
2873             # note that current_prop should be all up to date by the
2874             # time you hit the end tag
2875             # this tag post-processing is BEFORE vertical margins and
2876             # popping of properties stack for this and nested tags
2877             # processing specific to specific end tags ---------------------
2878 0 0         if ($tag eq 'ul') {
    0          
    0          
    0          
2879 0           $list_depth_u--;
2880             } elsif ($tag eq '_sl') {
2881 0           $list_depth_s--;
2882             } elsif ($tag eq 'ol') {
2883 0           $list_depth_o--;
2884             # restore any saved start and reversed_ol values
2885 0           $start = $properties[-2]->{'_start'}; # current start
2886 0           $reversed_ol = $properties[-2]->{'_reversed_ol'}; # cur flag
2887             } elsif ($tag eq '_marker') {
2888             # bump x position past gap to li start (li is inline)
2889 0           $x = $start_x + $properties[-1]->{'_left'} +
2890             $marker_width + $marker_gap;
2891 0           $text->translate($x, $y);
2892 0           $desired_x = $x;
2893             }
2894              
2895             # ready to pick larger of top and bottom margins (block display)
2896             # block display element end (including paragraphs)
2897             # start next material on new line
2898 0 0         if ($current_prop->{'display'} eq 'block') {
2899 0           $need_line = 1;
2900 0           $start_y = $next_y;
2901 0           $add_x = $add_y = 0;
2902             # now that need_line, etc. are set, make inline
2903 0           $current_prop->{'display'} = 'inline';
2904 0           $vmargin[0] = min($vmargin[0], $properties[-1]->{'margin-bottom'});
2905 0           $vmargin[1] = max($vmargin[1], $properties[-1]->{'margin-bottom'});
2906             }
2907              
2908             # pop properties stack and remove element ----------------------
2909             # last step is to pop the properties stack and remove this
2910             # element, its start tag, and everything in-between. adjust
2911             # $el and loop again.
2912 0           for (my $first = $el-1; $first>1; $first--) {
2913             # looking for a tag matching $tag
2914 0 0 0       if ($mytext[$first]->{'text'} eq '' &&
2915             $mytext[$first]->{'tag'} eq $tag) {
2916             # found it at $first
2917 0           my $len = $el - $first + 1;
2918 0           splice(@mytext, $first, $len);
2919 0           $el -= $len; # end of loop will advance $el
2920 0           pop @properties;
2921             # restore current font size
2922 0           last;
2923             }
2924             }
2925             # this tag post-processing is AFTER vertical margins and
2926             # popping of properties stack for this and nested tags
2927             # (currently none)
2928 0 0         if (@mytext == 2) { last; } # have used up all input text!
  0            
2929             # only default values and style element are left
2930 0           next; # next mytext element s/b one after batch just removed
2931            
2932             # end of handling end tags </tag>
2933             }
2934              
2935             # end of tag processing
2936              
2937             # ========================== text to output =================
2938             } else {
2939             # normally text is not empty '', but sometimes such may come
2940             # through. a blank text is still valid
2941 0 0         if ($mytext[$el]->{'text'} eq "\n") { next; } # EOL too
  0            
2942 0 0         if ($mytext[$el]->{'text'} eq '') { next; }
  0            
2943              
2944             # we should be at a new text entry ("phrase") INK HERE
2945             # we have text to output on the page, using properties at the
2946             # properties stack top. compare against current properties to
2947             # see if need to make any calls (font, color, etc.) to make.
2948              
2949             # drop down page by any pending vertical margin spacing
2950 0 0 0       if ($vmargin[0] != 0 || $vmargin[1] != 0) {
2951 0 0         if (!$topCol) {
2952 0           $start_y -= ($vmargin[0]+$vmargin[1]);
2953             }
2954 0           @vmargin = (0, 0); # reset counters
2955             }
2956 0           $topCol = 0; # for rest of column do not suppress vertical margin
2957              
2958             # after tags processed, and property list (properties[-1]) updated,
2959             # typically at start of a text string (phrase) we will call PDF
2960             # updates such as fillcolor, get_font, etc. and at the same time
2961             # update current_prop to match.
2962              
2963             # what properties have changed and need PDF calls to update?
2964             # TBD future: separate slant and italic, optical size
2965 0           $call_get_font = 0;
2966 0 0         if ($properties[-1]->{'font-family'} ne $current_prop->{'font-family'}) {
2967 0           $call_get_font = 1;
2968             # a font label known to FontManager
2969 0           $current_prop->{'font-family'} = $properties[-1]->{'font-family'};
2970             }
2971 0 0         if ($properties[-1]->{'font-style'} ne $current_prop->{'font-style'}) {
2972 0           $call_get_font = 1;
2973             # normal or italic (TBD separate slant)
2974 0           $current_prop->{'font-style'} = $properties[-1]->{'font-style'};
2975             }
2976 0 0         if ($properties[-1]->{'font-weight'} ne $current_prop->{'font-weight'}) {
2977 0           $call_get_font = 1;
2978             # normal or bold (TBD multiple steps, numeric and named)
2979 0           $current_prop->{'font-weight'} = $properties[-1]->{'font-weight'};
2980             }
2981             # font size
2982             # don't want to trigger font call unless numeric value changed
2983             # current_prop's s/b in points, newval will be in points. if
2984             # properties (latest request) is a relative size (e.g., %),
2985             # what it is relative to is NOT the last font size used
2986             # (current_prop), but carried-along current font size.
2987             my $newval = _size2pt($properties[-1]->{'font-size'},
2988 0           $properties[-1]->{'_parent-fs'}, 'usage'=>'font-size');
2989             # newval is the latest requested size (in points), while
2990             # current_prop is last one used for output (in points)
2991 0 0         if ($newval != $current_prop->{'font-size'}) {
2992 0           $call_get_font = 1;
2993 0           $current_prop->{'font-size'} = $newval;
2994             }
2995             # any size as a percentage of font-size will use the current fs
2996             # should be in points by now, might not equal current_prop{font-size}
2997 0           my $fs = $properties[-1]->{'font-size'};
2998              
2999             # uncommon to only change font size without also changing something
3000             # else, so make font selection call at the same time, besides,
3001             # there is very little involved in just returning current font.
3002 0 0         if ($call_get_font) {
3003             # TBD future additional options, expanded weight
3004             $text->font($pdf->get_font(
3005             'face' => $current_prop->{'font-family'},
3006             'italic' => ($current_prop->{'font-style'} eq 'normal')? 0: 1,
3007 0 0         'bold' => ($current_prop->{'font-weight'} eq 'normal')? 0: 1,
    0          
3008             ), $fs);
3009 0           $current_prop->{'font-size'} = $fs;
3010             }
3011             # font-size should be set in current_prop for use by margins, etc.
3012              
3013             # don't know if color will be used for text or for graphics draw,
3014             # so set both
3015 0 0         if ($properties[-1]->{'color'} ne $current_prop->{'color'}) {
3016 0           $current_prop->{'color'} = $properties[-1]->{'color'};
3017 0           $text->fillcolor($current_prop->{'color'});
3018 0           $text->strokecolor($current_prop->{'color'});
3019 0 0 0       if (defined $grfx && ref($grfx) =~ m/^PDF::Builder::Content/) {
3020 0           $grfx->fillcolor($current_prop->{'color'});
3021 0           $grfx->strokecolor($current_prop->{'color'});
3022             }
3023             }
3024              
3025             # these properties don't get a PDF::Builder call
3026             # update text-indent, etc. of current_prop, even if we don't
3027             # call a Builder routine to set them in PDF, so we can always use
3028             # current_prop instead of switching between the two. current_prop
3029             # property lengths should always be in pts (no labeled dimensions).
3030 0           $current_prop->{'text-indent'} = $properties[-1]->{'text-indent'}; # should already be pts
3031 0           $current_prop->{'text-decoration'} = $properties[-1]->{'text-decoration'};
3032 0           $current_prop->{'text-align'} = $properties[-1]->{'text-align'};
3033 0           $current_prop->{'margin-top'} = _size2pt($properties[-1]->{'margin-top'}, $fs, 'usage'=>'margin-top');
3034             # the incremental right margin, and the running total
3035 0           $current_prop->{'margin-right'} = _size2pt($properties[-1]->{'margin-right'}, $fs, 'usage'=>'margin-right');
3036 0           $properties[-1]->{'_right'} += $current_prop->{'margin-right'};
3037 0           $current_prop->{'margin-bottom'} = _size2pt($properties[-1]->{'margin-bottom'}, $fs, 'usage'=>'margin-bottom');
3038             # the incremental left margin, and the running total
3039 0           $current_prop->{'margin-left'} = _size2pt($properties[-1]->{'margin-left'}, $fs, 'usage'=>'margin-left');
3040 0           $properties[-1]->{'_left'} += $current_prop->{'margin-left'};
3041             # line-height is expected to be a multiplier to font-size, so
3042             # % or pts value would have to be converted back to ratio TBD
3043 0           $current_prop->{'line-height'} = $properties[-1]->{'line-height'}; # numeric ratio
3044 0           $current_prop->{'display'} = $properties[-1]->{'display'};
3045 0           $current_prop->{'list-style-type'} = $properties[-1]->{'list-style-type'};
3046             $current_prop->{'list-style-position'} = $properties[-1]->{'list-style-position'}
3047 0 0         if defined $properties[-1]->{'list-style-position'};
3048 0           $current_prop->{'_href'} = $properties[-1]->{'_href'};
3049             # current_prop should now be up to date with properties[-1], and
3050             # any Builder calls have been made
3051              
3052             # we're ready to roll, and output the actual text itself
3053             #
3054             # fill line from element $el at current x,y until will exceed endx
3055             # then get next baseline
3056             # if this phrase doesn't finish out the line, will start next
3057             # mytext element at the x,y it left off. otherwise, unused portion
3058             # of phrase (remainder) becomes the next element to process.
3059 0           $phrase = $mytext[$el]->{'text'}; # there should always be a text
3060             #
3061             # $list_marker was set in li tag processing
3062             # note that ol is bold, ul is Symbol (replace macros .disc, etc.).
3063             # content of li is with new left margin. first line ($list_marker
3064             # ne '') text_right of $list_marker at left margin of li text.
3065             # then set $list_marker to '' to cancel out until next li.
3066 0           $remainder = '';
3067              
3068             # for now, all whitespace convert to single blanks
3069             # TBD blank preserve for <code> or <pre> (CSS white-space)
3070 0           $phrase =~ s/\s+/ /g;
3071              
3072             # click areas ------------------------------------------------------
3073             # if 'annot' field (attribute) exists for a text, we want to define
3074             # a rectangle around it for an annotation click area (several
3075             # rectangles, even across multiple columns, are possible if the
3076             # phrase is long enough to split in the middle).
3077             # value = element number in state->xrefs array to update rect
3078             # with [ UL, LR ] values being assembled
3079             # at end (when LR done), push to state->xrefs->[elno]{click}
3080             # (could already have one or more subarrays)
3081 0           my $click_ele;
3082 0 0         if (defined $mytext[$el]->{'annot'}) {
3083 0           $click_ele = $mytext[$el]->{'annot'};
3084 0           $click_ele = $state->{'xrefs'}->[$click_ele]{'click'};
3085             # for every chunk of text the phrase gets split into, push
3086             # an element on the 'click' anonymous array, consisting of
3087             # the [sppn, [ULx,ULy, LRx,LRy]]
3088             }
3089              
3090             # output text itself -----------------------------------------------
3091             # a phrase may have multiple words. see if entire thing fits, and if
3092             # not, start trimming off right end (split into a new element)
3093            
3094 0           while ($phrase ne '') {
3095             # one of four things to handle:
3096             # 1. entire phrase fits at x -- just write it out
3097             # 2. none of phrase fits at x (all went into remainder) --
3098             # go to next line to check and write (not all may fit)
3099             # 3. phrase split into (shortened) phrase (that fits) and a
3100             # remainder -- write out phrase, remainder to next line to
3101             # check and write (not all may fit)
3102             # 4. phrase consists of just one word, AND it's too long to
3103             # fit on the full line. it must be split somewhere to fit
3104             # the line.
3105              
3106 0           my ($x_click, $y_click, $y_click_bot);
3107 0           my $full_line = 0;
3108             # this is to force start of a new line at start_y?
3109             # phrase still has content, and there may be remainder.
3110             # don't forget to set the new start_y when need_line=1
3111 0 0         if ($need_line) {
3112             # first, set font (current, or something specified)
3113 0 0         if ($topCol) { # at top of column, font undefined
3114 0           $text->font($pdf->get_font('face'=>'current'), $fs);
3115             }
3116              
3117             # extents above and below the baseline (so far)?
3118             ($asc, $desc, $desc_leading) =
3119             _get_fv_extents($pdf, $font_size,
3120 0           $properties[-1]->{'line-height'});
3121 0           $next_y = $start_y - $add_y - $asc + $desc_leading;
3122             # did we go too low? will return -1 (start_x) and
3123             # remainder of input
3124             # don't include leading when seeing if line dips too low
3125 0 0         if ($start_y - $add_y - $asc + $desc < $min_y) { last; }
  0            
3126             # start_y and next_y are vertical extent of this line
3127             # (so far)
3128             # y is the y value of the baseline (so far)
3129 0           $y = $start_y - $add_y - $asc;
3130              
3131             # how tall is the line? need to set baseline. add_y is
3132             # any paragraph top margin to drop further. note that this
3133             # is just the starting point -- the line could get taller
3134 0           ($start_x,$y, $width) = _get_baseline($y, @outline);
3135 0           $x = $start_x + $properties[-1]->{'_left'};
3136 0           $width -= $properties[-1]->{'_left'} + $properties[-1]->{'_right'};
3137 0           $endx = $x + $width;
3138             # at this point, we have established the next baseline
3139             # (x,y start and width/end x). fill this line.
3140 0           $x += $add_x; $add_x = 0; # indent
  0            
3141 0           $add_y = 0; # para top margin extra
3142 0           $need_line = 0;
3143 0           $full_line = 1;
3144              
3145             # was there already a "desired x" value, such as <_move>?
3146 0 0         if (defined $desired_x) {
3147 0           $x = $desired_x;
3148 0           $desired_x = undef;
3149             }
3150              
3151             # stuff to remember if need to shift line down due to
3152             # vertical extents increase
3153             # TBD: may need to change LR corner of last line of an
3154             # annotation click area if content further along line
3155             # moves baseline down
3156 0           @line_extents = ();
3157 0           push @line_extents, $start_x; # current baseline's start
3158 0           push @line_extents, $x; # current baseline
3159             # note that $x advances with each write
3160 0           push @line_extents, $y;
3161 0           push @line_extents, $width;
3162 0           push @line_extents, $endx;
3163 0           push @line_extents, $next_y;
3164 0           push @line_extents, $asc; # current vertical extents
3165 0           push @line_extents, $desc;
3166 0           push @line_extents, $desc_leading;
3167             # text and graphics contexts and
3168             # where the current line starts in the streams
3169 0           push @line_extents, $text;
3170 0           push @line_extents, length($text->{' stream'});
3171 0           push @line_extents, $grfx;
3172 0 0 0       if (defined $grfx && ref($grfx) =~ m/^PDF::Builder::Content/) {
3173 0           push @line_extents, length($grfx->{' stream'});
3174             } else {
3175 0           push @line_extents, 0;
3176             }
3177 0           push @line_extents, $start_y;
3178 0           push @line_extents, $min_y;
3179 0           push @line_extents, \@outline;
3180 0           push @line_extents, $properties[-1]->{'_left'};
3181             #push @line_extents, $properties[-1]->{'_left_nest'};
3182 0           push @line_extents, $properties[-1]->{'_right'};
3183              
3184             # if starting a line, make sure no leading whitespace
3185             # TBD if pre, don't remove whitespace
3186 0           $phrase =~ s/^\s+//;
3187             } else {
3188             # cancel desired_x if not used
3189 0           $desired_x = undef;
3190             }
3191            
3192             # have a phrase to attempt to add to output, and an
3193             # x,y to start it at (tentative if start of line)
3194             # x is current user-specified position to align at, and
3195             # if not LJ, will be adjusted so write is CJ or RJ there
3196 0           my $w = $text->advancewidth($phrase); # will use $w later
3197 0           my $align = $properties[-1]->{'text-align'};
3198 0 0 0       if ($align eq 'c' || $align eq 'center') {
    0 0        
3199 0           $x -= $w/2; # back up 1/2 phrase to real starting point
3200 0 0         if ($x+$x_adj < $start_x) {
3201 0           carp "Centered text of width $w: left edge ".($x+$x_adj)." is left of column start $start_x. Results unpredictable.\n";
3202             }
3203 0 0         if ($x+$x_adj+$w > $endx) {
3204 0           carp "Centered text of width $w: right edge ".($x+$x_adj+$w)." is right of column end $endx. Results unpredictable.\n";
3205             }
3206 0           $text->translate($x+$x_adj, $y+$y_adj);
3207             } elsif ($align eq 'r' || $align eq 'right') {
3208 0           $x -= $w; # back up by phrase to real starting point
3209 0 0         if ($x+$x_adj < $start_x) {
3210 0           carp "Right-aligned text of width $w: left edge ".($x+$x_adj)." is left of column start $start_x. Results unpredictable.\n";
3211             }
3212 0 0         if ($x+$x_adj+$w > $endx) {
3213 0           carp "Right-aligned text of width $w: right edge ".($x+$x_adj+$w)." is right of column end $endx. Results unpredictable.\n";
3214             }
3215 0           $text->translate($x+$x_adj, $y+$y_adj);
3216             } else { # align l/left
3217             # no x adjustment for phrase width
3218 0           $text->translate($x+$x_adj, $y+$y_adj);
3219             }
3220 0           $align = 'left'; # have set x,y to actual start point
3221              
3222             # $x,$y is where we will actually start writing the phrase
3223             # (adjusted per text-align setting)
3224 0 0         if ($x + $w <= $endx) {
3225 0           my $rc;
3226             # no worry, the entire phrase fits (case 1.)
3227             # y (and possibly x) might change if extents change
3228 0           my $w = $text->advancewidth($phrase);
3229 0 0         if ($current_prop->{'text-decoration'} ne 'none') {
3230             # output any requested line strokes, after baseline
3231             # positioned and before baseline adjusted
3232             # supported: underline, line-through, overline
3233             # may be a combination separated by spaces
3234             # inherit current color (strokecolor) setting
3235 0           my $font = $pdf->get_font('face'=>'current');
3236 0           my $upem = $font->upem();
3237 0   0       my $strokethickness = $font->underlinethickness() || 1;
3238 0           $strokethickness *= $fs/$upem;
3239 0   0       my $stroke_ydist = $font->underlineposition() || 1;
3240              
3241             # don't stroke through any trailing whitespace
3242 0           my $trail = 0; # width of WS
3243 0 0         if ($phrase =~ m/(\s+)$/) {
3244 0           $trail = $text->advancewidth($1);
3245             }
3246              
3247 0           $stroke_ydist *= $fs/$upem;
3248             # TBD consider whether to draw lines in graphics
3249             # context instead (could end up with text under line)
3250 0           $text->add('ET'); # go into graphics mode
3251 0           $text->add("$strokethickness w");
3252             # baseline is x,y to x+w,y, ydist is < 0
3253 0 0         if ($current_prop->{'text-decoration'} =~ m#underline#) {
3254             # use ydist as-is
3255 0           $text->add("$x ".($y+$stroke_ydist)." m");
3256 0           $text->add(($x+$w-$trail)." ".($y+$stroke_ydist)." l");
3257             }
3258 0 0         if ($current_prop->{'text-decoration'} =~ m#line-through#) {
3259             # use new ydist at .3fs
3260 0           $stroke_ydist = 0.3*$fs;
3261 0           $text->add("$x ".($y+$stroke_ydist)." m");
3262 0           $text->add(($x+$w-$trail)." ".($y+$stroke_ydist)." l");
3263             }
3264 0 0         if ($current_prop->{'text-decoration'} =~ m#overline#) {
3265             # use new ydist at 0.65fs
3266 0           $stroke_ydist = 0.70*$fs;
3267 0           $text->add("$x ".($y+$stroke_ydist)." m");
3268 0           $text->add(($x+$w-$trail)." ".($y+$stroke_ydist)." l");
3269             }
3270 0           $text->add('S'); # always stroke the line
3271 0           $text->add('BT'); # back into text mode
3272             # after BT, need to restore position
3273 0           $text->translate($x,$y);
3274             } # handle text-decoration
3275             # before writing a new phrase with possibly increased
3276             # extents, see if new baseline needed
3277             # extents above and below the baseline (so far)?
3278             my ($n_asc, $n_desc, $n_desc_leading) =
3279             _get_fv_extents($pdf, $current_prop->{'font-size'},
3280 0           $properties[-1]->{'line-height'});
3281 0           $line_extents[1] = $x; # current position
3282 0           ($rc, @line_extents) =
3283             _revise_baseline(@line_extents, $n_asc, $n_desc, $n_desc_leading, $w);
3284 0           ($start_x, $x, $y, $width, $endx, $next_y,
3285             $asc, $desc, $desc_leading)
3286             = @line_extents; # only parts which might have changed
3287             # if rc == 0, line successfully moved down page
3288             # if rc == 1, existing line moved down, but need to check if
3289             # still room for $phrase
3290             # if rc == 2, current written line doesn't fit narrower line
3291             # if rc == 3, revised line won't fit in column! (vertically)
3292             # TBD need to check $rc once column width can vary
3293             # if annotation click area, remember x and y
3294 0 0         if (defined $click_ele) {
3295             # UL corner, best guess for y value
3296 0           $x_click = int($x +0.5);
3297 0           $y_click = int($y + 0.8*$fs +0.5);
3298 0           $y_click_bot = int($y_click - $leading*$fs +0.5);
3299             }
3300 0           $text->text($phrase); # have already corrected start point
3301             # if adjusted x and/or y, undo it and zero out
3302 0 0 0       if ($x_adj || $y_adj) {
3303 0           $text->translate($x, $y);
3304 0           $x_adj = $y_adj = 0;
3305             }
3306              
3307 0 0         if ($current_prop->{'_href'} ne '') {
3308             # this text is a link, so need to make an annot. link
3309             # $x,$y is baseline left, $w is width
3310             # $asc, $desc are font ascenders/descenders
3311             # some extra margin to make it easier to select
3312 0           my $fs = 0.2*$current_prop->{'font-size'};
3313 0           my $rect = [ $x-$fs, $y-$desc-3*$fs,
3314             $x+$w+$fs, $y+$asc+$fs ];
3315             # TBD what if link wraps around? make two or more?
3316 0           my $annotation = $page->annotation();
3317 0           my $href = $current_prop->{'_href'};
3318             # TBD: href=pdf:docpath.pdf#p.x.y.z jump to another PDF
3319 0 0         if ($href =~ m/^#/) {
3320             # href starts with # so it's a jump within this doc
3321 0           my ($pageno, $xpos, $ypos, $zoom);
3322 0 0         if ($href =~ m/^#(\d+)$/) {
    0          
    0          
3323             # #p format (whole page)
3324 0           $pageno = $1;
3325 0           $xpos = $ypos = $zoom = undef;
3326             } elsif ($href =~ m/^#(\d+)-(\d+)-(\d+)$/) {
3327             # #p-x-y format (no zoom, at a specific spot)
3328 0           $pageno = $1;
3329 0           $xpos = $2;
3330 0           $ypos = $3;
3331 0           $zoom = undef;
3332             } elsif ($href =~ m/^#(\d+)-(\d+)-(\d+)-(.+)$/) {
3333             # #p-x-y-z format (zoom, at a specific spot)
3334 0           $pageno = $1; # integer > 0
3335 0           $xpos = $2; # number >= 0
3336 0           $ypos = $3; # number >= 0
3337 0           $zoom = $4; # number >= 0
3338 0 0         if ($zoom <= 0) {
3339 0           carp "Invalid zoom value $zoom. Using 1";
3340 0           $zoom = 1;
3341             }
3342             } else {
3343             # bad format
3344 0           carp "Invalid link format '$href'. Using page 1";
3345 0           $pageno = 1;
3346 0           $xpos = $ypos = $zoom = undef;
3347             }
3348 0 0         if ($pageno < 1) {
3349 0           carp "Invalid page number $pageno. Using page 1";
3350 0           $pageno = 1;
3351             }
3352 0 0 0       if (defined $xpos && $xpos < 0) {
3353 0           carp "Invalid page x coordinate $xpos. Using x=100";
3354 0           $xpos = 100;
3355             }
3356 0 0 0       if (defined $ypos && $ypos < 0) {
3357 0           carp "Invalid page y coordinate $ypos. Using y=300";
3358 0           $ypos = 300;
3359             }
3360              
3361 0           my $tgt_page = $pdf->open_page($pageno);
3362 0 0         if (!defined $tgt_page) {
3363 0           carp "Invalid page number $pageno. Using page 1";
3364 0           $pageno = 1;
3365 0           $tgt_page = $pdf->open_page($pageno);
3366             }
3367 0 0         if (!defined $xpos) {
3368             # page only
3369 0           $annotation->link($tgt_page,
3370             'rect'=>$rect, 'border'=>[0,0,0]);
3371             } else {
3372             # page at a location and zoom factor
3373 0           $annotation->link($tgt_page,
3374             'rect'=>$rect, 'border'=>[0,0,0],
3375             'xyz'=>[ $xpos,$ypos, $zoom ]);
3376             }
3377             } else {
3378             # webpage (usually HTML) link
3379 0           $annotation->uri($href,
3380             'rect'=>$rect, 'border'=>[0,0,0]);
3381             }
3382             } # deal with an href
3383             # need to move current x to right end of text just written
3384             # TBD: revise if RTL/bidirectional
3385 0           $x += $w;
3386              
3387             # whether or not the full phrase fit, we need to create the
3388             # annotation click area and the annotation for this line
3389 0 0         if (defined $click_ele) {
3390 0           my $ele = [$ppn, [$x_click,$y_click, $x,$y_click_bot]];
3391             # push this element 'ele' onto the list at click_ele
3392 0           my @click = @$click_ele; # initially empty
3393 0           push @click, $ele;
3394 0           $click_ele = \@click;
3395 0           $state->{'xrefs'}->[$mytext[$el]->{'annot'}]{'click'} = $click_ele;
3396             # TBD when last chunk of phrase has been output, if
3397             # 'other_pg' used, need to update that text element
3398             # (following </_ref>) as well as set flag that this
3399             # has changed (if true)
3400             }
3401              
3402 0           $full_line = 0;
3403 0           $need_line = 0;
3404             # change current property display to inline
3405 0           $current_prop->{'display'} = 'inline';
3406              
3407             # next element in mytext (try to fit on same line)
3408 0           $phrase = $remainder; # may be empty
3409 0           $remainder = '';
3410             # since will start a new line, trim leading w/s
3411 0           $phrase =~ s/^\s+//; # might now be empty
3412 0 0         if ($phrase ne '') {
3413             # phrase used up, but remainder for next line
3414 0           $need_line = 1;
3415 0           $start_y = $next_y;
3416             }
3417 0           next; # done with phrase loop if phrase empty
3418              
3419             # end of handling entire phrase fits
3420             } else {
3421             # existing line plus phrase is too long (overflows line)
3422             # entire phrase does NOT fit (case 2 or 3). start splitting
3423             # up phrase, beginning with stripping space(s) off end
3424              
3425 0 0         if ($phrase =~ s/(\s+)$//) {
3426             # remove whitespace at end (line will end somewhere
3427             # within phrase, anyway)
3428 0           $remainder = $1.$remainder;
3429             } else {
3430             # Is line too short to fit even the first word at the
3431             # beginning of the line? force split in word somewhere
3432             # so that it fits.
3433 0           my $word = $phrase;
3434 0           $word =~ s/^\s+//; # probably not necessary, but doesn't hurt
3435 0           $word =~ s/\s+$//;
3436 0 0 0       if ($full_line && index($word, ' ') == -1) {
3437 0           my ($wordLeft, $wordRight);
3438             # is a single word at the beginning of the line,
3439             # and didn't fit
3440 0           require PDF::Builder::Content::Hyphenate_basic;
3441 0           ($wordLeft,$wordRight) = PDF::Builder::Content::Hyphenate_basic::splitWord($text, $word, $endx-$x);
3442 0 0         if ($wordLeft eq '') {
3443             # failed to split. try desperation move of
3444             # splitting at Non Splitting SPace!
3445 0           ($wordLeft,$wordRight) = PDF::Builder::Content::Hyphenate_basic::splitWord($text, $word, $endx-$x, 'spRB'=>1);
3446 0 0         if ($wordLeft eq '') {
3447             # super-desperation move... split to fit
3448             # space! eventually with proper hyphenation
3449             # this probably will never be needed.
3450 0           ($wordLeft,$wordRight) = PDF::Builder::Content::Hyphenate_basic::splitWord($text, $word, $endx-$x, 'spFS'=>1);
3451             }
3452             }
3453 0           $phrase = $wordLeft;
3454 0           $remainder = "$wordRight $remainder";
3455 0           next; # re-try shortened phrase
3456             }
3457            
3458             # phrase should end with non-whitespace if here.
3459             # try moving last word to remainder
3460 0 0         if ($phrase =~ s/(\S+)$//) {
3461             # remove word at end
3462 0           $remainder = $1.$remainder;
3463             }
3464             }
3465             # at least part of text will end up on another line.
3466             # find current <p> and add cont=>1 to it to mark
3467             # continuation in case we end up at end of column
3468 0           for (my $ptag=$el-1; $ptag>1; $ptag--) {
3469 0 0         if ($mytext[$ptag]->{'text'} ne '') { next; }
  0            
3470 0 0         if ($mytext[$ptag]->{'tag'} ne 'p') { next; }
  0            
3471 0           $mytext[$ptag]->{'cont'} = 1;
3472 0           last;
3473             }
3474            
3475 0 0 0       if ($phrase eq '' && $remainder ne '') {
3476             # entire phrase goes to next line
3477 0           $need_line = 1;
3478 0           $start_y = $next_y;
3479 0           $add_x = $add_y = 0;
3480 0           $phrase = $remainder;
3481 0           $remainder = '';
3482             }
3483 0           next;
3484            
3485             } # phrase did not fit (else)
3486              
3487             # 'next' to here
3488             } # end of while phrase has content loop
3489             # remainder should be '' at this point, phrase may have content
3490             # either ran out of phrase, or ran out of column
3491              
3492 0 0         if ($phrase eq '') {
3493             # ran out of input text phrase, so process more elements
3494             # but first, remove this text from mytext array so won't be
3495             # accidentally repeated
3496 0           splice(@mytext, $el, 1);
3497 0           $el--;
3498 0           next;
3499             }
3500             # could get here if exited loop due to running out of column,
3501             # in which case, phrase has to be stuffed back into mytext
3502 0           $mytext[$el]->{'text'} = $phrase;
3503 0           last;
3504            
3505             } # text to output
3506              
3507             # =================== done with this element? ==========================
3508             # end of processing this element in mytext, UNLESS it was text (phrase)
3509             # and we ran out of column space!
3510              
3511 0 0         if ($phrase ne '') {
3512             # we left early, with incomplete text, because we ran out of
3513             # column space. can't process any more elements -- done with column.
3514             # mytext[el] already updated with remaining text
3515 0           last; # exit mytext loop
3516             } else {
3517             # more elements to go
3518 0           next;
3519             }
3520              
3521             # 'next' to here
3522             } # for $el loop through mytext array over multiple lines
3523              
3524             # if get to here, is it because we ran out of mytext (normal loop exit), or
3525             # because we ran out of space in the column (early exit, in middle of a
3526             # text element)?
3527             #
3528             # for whatever reason we're exiting, remove first array element (default
3529             # CSS entries). it is always re-created on entry to column(). leave next
3530             # element (consolidated <style> tags, if any).
3531 0           shift @mytext;
3532              
3533 0 0         if ($#mytext == 0) {
3534             # [0] = consolidated styles (default styles was just removed)
3535             # we ran out of input. return next start_y and empty list ref
3536            
3537             # first, handle restore = 0, 1, or 2
3538 0 0         if ($restore == 0) {
    0          
3539             # carry out pending font and color changes
3540             # what properties have changed and need PDF calls to update?
3541 0           my $call_get_font = 0;
3542 0 0         if ($properties[-1]->{'font-family'} ne $current_prop->{'font-family'}) {
3543 0           $call_get_font = 1;
3544             # a font label known to FontManager
3545 0           $current_prop->{'font-family'} = $properties[-1]->{'font-family'};
3546             }
3547 0 0         if ($properties[-1]->{'font-style'} ne $current_prop->{'font-style'}) {
3548 0           $call_get_font = 1;
3549             # normal or italic
3550 0           $current_prop->{'font-style'} = $properties[-1]->{'font-style'};
3551             }
3552 0 0         if ($properties[-1]->{'font-weight'} ne $current_prop->{'font-weight'}) {
3553 0           $call_get_font = 1;
3554             # normal or bold
3555 0           $current_prop->{'font-weight'} = $properties[-1]->{'font-weight'};
3556             }
3557             # font size
3558             # don't want to trigger font call unless numeric value changed
3559             # current_prop's s/b in points, newval will be in points. if
3560             # properties (latest request) is a relative size (e.g., %),
3561             # what it is relative to is NOT the last font size used
3562             # (current_prop), but carried-along current font size.
3563             my $newval = _size2pt($properties[-1]->{'font-size'},
3564 0           $properties[-1]->{'_parent-fs'}, 'usage'=>'font-size');
3565             # newval is the latest requested size (in points), while
3566             # current_prop is last one used for output (in points)
3567 0 0         if ($newval != $current_prop->{'font-size'}) {
3568 0           $call_get_font = 1;
3569 0           $current_prop->{'font-size'} = $newval;
3570             }
3571             # any size as a percentage of font-size will use the current fs
3572 0           my $fs = $current_prop->{'font-size'};
3573              
3574 0 0         if ($call_get_font) {
3575             $text->font($pdf->get_font(
3576             'face' => $current_prop->{'font-family'},
3577             'italic' => ($current_prop->{'font-style'} eq 'normal')? 0: 1,
3578 0 0         'bold' => ($current_prop->{'font-weight'} eq 'normal')? 0: 1,
    0          
3579             ), $fs);
3580             }
3581             # font-size should be set in current_prop for use by margins, etc.
3582              
3583             # don't know if color will be used for text or for graphics draw,
3584             # so set both
3585 0 0         if ($properties[-1]->{'color'} ne $current_prop->{'color'}) {
3586 0           $current_prop->{'color'} = $properties[-1]->{'color'};
3587 0           $text->fillcolor($current_prop->{'color'});
3588 0           $text->strokecolor($current_prop->{'color'});
3589 0 0 0       if (defined $grfx && ref($grfx) =~ m/^PDF::Builder::Content/ ) {
3590 0           $grfx->fillcolor($current_prop->{'color'});
3591 0           $grfx->strokecolor($current_prop->{'color'});
3592             }
3593             }
3594             } elsif ($restore == 1) {
3595             # do nothing, leave the font state/colors as-is
3596             } else { # 2
3597             # restore to entry with @entry_state
3598 0           return (2, $next_y - ($vmargin[0]+$vmargin[1]), []);
3599             }
3600              
3601 0           return (0, $next_y - ($vmargin[0]+$vmargin[1]), []);
3602             } else {
3603             # we ran out of vertical space in the column. return -1 and
3604             # remainder of mytext list (next_y would be inapplicable)
3605            
3606             # first, handle restore = 0, 1, or 2
3607 0 0 0       if ($restore == 0 || $restore == 1) {
3608             # do nothing, leave the font state/colors as-is
3609             } else { # 2
3610             # restore to entry with @entry_state
3611 0           return (3, -1, \@mytext);
3612             }
3613              
3614 0           return (1, -1, \@mytext);
3615             }
3616              
3617             } # end of _output_text()
3618              
3619             # initialize current property settings to values that will cause updates (PDF
3620             # calls) when the first real properties are determined, and thereafter whenever
3621             # these properties change
3622             sub _init_current_prop {
3623              
3624 0     0     my $cur_prop = {};
3625            
3626             # NOTE that all lengths must be in points (unitless), ratios are
3627             # pure numbers, named things are strings.
3628 0           $cur_prop->{'font-size'} = -1;
3629 0           $cur_prop->{'line-height'} = 0; # alias is text-height until release 3.030
3630 0           $cur_prop->{'text-indent'} = 0;
3631 0           $cur_prop->{'color'} = 'snork'; # PDF default is black
3632 0           $cur_prop->{'font-family'} = 'yoMama'; # force a change
3633 0           $cur_prop->{'font-weight'} = 'abnormal';
3634 0           $cur_prop->{'font-style'} = 'abnormal';
3635             #$cur_prop->{'font-variant'} = 'abnormal';
3636 0           $cur_prop->{'margin-top'} = '0';
3637 0           $cur_prop->{'margin-right'} = '0';
3638 0           $cur_prop->{'margin-bottom'} = '0';
3639 0           $cur_prop->{'margin-left'} = '0';
3640 0           $cur_prop->{'text-align'} = 'left';
3641             #$cur_prop->{'text-transform'} = 'none';
3642             #$cur_prop->{'border'} = 'none'; # NOT inherited
3643             #$cur_prop->{'border-style'} = 'none'; # NOT inherited
3644             #$cur_prop->{'border-width'} = '1pt'; # NOT inherited
3645             #$cur_prop->{'border-color'} = 'inherit'; # NOT inherited
3646 0           $cur_prop->{'text-decoration'} = 'none';
3647             #$cur_prop->{'text-decoration-skip-ink'}; for underline etc.
3648 0           $cur_prop->{'display'} = 'block'; # inline, TBD inline-block, none
3649 0           $cur_prop->{'height'} = '0'; # currently <hr> only, NOT inherited
3650 0           $cur_prop->{'width'} = '0'; # currently <hr> only, NOT inherited
3651 0           $cur_prop->{'list-style-type'} = '.u';
3652 0           $cur_prop->{'list-style-position'} = 'outside';
3653 0           $cur_prop->{'_marker-before'} = '';
3654 0           $cur_prop->{'_marker-after'} = '.';
3655 0           $cur_prop->{'_marker-color'} = '';
3656 0           $cur_prop->{'_marker-font'} = '';
3657 0           $cur_prop->{'_marker-size'} = '0';
3658 0           $cur_prop->{'_marker-style'} = '';
3659 0           $cur_prop->{'_marker-text'} = '';
3660 0           $cur_prop->{'_marker-weight'} = '';
3661 0           $cur_prop->{'_marker-align'} = 'right';
3662 0           $cur_prop->{'_href'} = '';
3663            
3664 0           return $cur_prop;
3665             } # end of _init_current_prop()
3666              
3667             # update a properties hash for a specific selector (all, if not given)
3668             # in all but a few cases, a higher level selector overrides a lower level by
3669             # simply replacing the old content, but in some, property values are
3670             # combined
3671             sub _update_properties {
3672 0     0     my ($target, $source, $selector) = @_;
3673              
3674 0           my $tag = '';
3675 0 0         if (defined $selector) {
3676 0 0         if ($selector =~ m#^tag:(.+)$#) {
3677 0           $tag = $1;
3678 0           $selector = undef;
3679             }
3680             }
3681              
3682 0 0         if (defined $selector) {
3683 0 0         if (defined $source->{$selector}) {
3684 0           foreach (keys %{$source->{$selector}}) {
  0            
3685             # $selector e.g., 'u' for underline
3686             # $_ is property name, e.g., 'text-decoration'
3687             # special treatment for text-decoration
3688 0 0         if ($_ eq 'text-decoration') {
3689             # 'none' is overwritten, but subsequent values appended
3690 0 0 0       if (defined $target->{$_} && $target->{$_} ne 'none') {
3691 0           $target->{$_} .= " $source->{$selector}->{$_}";
3692             } else {
3693 0           $target->{$_} = $source->{$selector}->{$_};
3694             }
3695             } else {
3696 0           $target->{$_} = $source->{$selector}->{$_};
3697             }
3698             }
3699             }
3700             } else { # selector not defined (use all)
3701 0           foreach my $tag_sel (keys %$source) { # top-level selectors
3702 0 0 0       if ($tag_sel eq 'text' || $tag_sel eq 'tag') { next; }
  0            
3703 0 0         if ($tag_sel eq 'cont') { next; } # paragraph continuation flag
  0            
3704 0 0         if ($tag_sel eq 'body') { next; } # do body selector last
  0            
3705 0 0         if (ref($source->{$tag_sel}) ne 'HASH') {
3706             # e.g., <a href="..."> the href element is a string, not a
3707             # hashref (ref != HASH), so we put it in directly
3708 0           $target->{$tag_sel} = $source->{$tag_sel};
3709             } else {
3710 0           foreach (keys %{$source->{$tag_sel}}) {
  0            
3711 0           $target->{$_} = $source->{$tag_sel}->{$_};
3712             }
3713             }
3714             }
3715             # do body selector last, after others
3716 0 0         if (defined $source->{'body'}) {
3717 0           foreach (keys %{$source->{'body'}}) {
  0            
3718 0           $target->{$_} = $source->{'body'}->{$_};
3719             }
3720             }
3721             }
3722              
3723 0           return;
3724             } # end of _update_properties()
3725              
3726             # according to Text::Layout#10, HarfBuzz::Shaper *may* now have per-glyph
3727             # extents. should check some day when HS is supported (but not mandatory)
3728             sub _get_fv_extents {
3729 0     0     my ($pdf, $font_size, $leading) = @_;
3730              
3731 0 0         $leading = 1.0 if $leading <= 0; # actually, a bad value
3732 0 0         $leading++ if $leading < 1.0; # might have been given as fractional
3733              
3734 0           my $font = $pdf->get_font('face' => 'current'); # font object realized
3735             # now it's loaded, if it wasn't already
3736 0           my $ascender = $font->ascender()/1000*$font_size; # positive
3737 0           my $descender = $font->descender()/1000*$font_size; # negative
3738              
3739             # ascender is positive, descender is negative (above/below baseline)
3740 0           return ($ascender, $descender, $descender-($leading-1.0)*$font_size);
3741             } # end of _get_fv_extents()
3742              
3743             # returns a list (array) of x,y coordinates outlining the column defined
3744             # by various options entries. currently only 'rect' is used, to define a
3745             # rectangular outline.
3746             # $grfx is graphics context, non-dummy if 'outline' option given (draw outline)
3747             #
3748             # TBD: what to do if any line too short to use?
3749              
3750             sub _get_column_outline {
3751 0     0     my ($grfx, $draw_outline, %opts) = @_;
3752              
3753 0           my @outline = ();
3754             # currently only 'rect' supported. TBD: path
3755 0 0         if (!defined $opts{'rect'}) {
3756 0           croak "column: no outline of column area defined";
3757             }
3758              
3759             # treat coordinates as absolute, unless 'relative' option given
3760 0           my $off_x = 0;
3761 0           my $off_y = 0;
3762 0           my $scale_x = 1;
3763 0           my $scale_y = 1;
3764 0 0         if (defined $opts{'relative'}) {
3765 0           my @relative = @{ $opts{'relative'} };
  0            
3766 0 0 0       croak "column: invalid number of elements in 'relative' list"
3767             if (@relative < 2 || @relative > 4);
3768              
3769 0           $off_x = $relative[0];
3770 0           $off_y = $relative[1];
3771             # @relative == 2 use default 1 1 scale factors
3772 0 0         if (@relative == 3) { # same scale for x and y
3773 0           $scale_x = $scale_y = $relative[2];
3774             }
3775 0 0         if (@relative == 4) { # different scales for x and y
3776 0           $scale_x = $relative[2];
3777 0           $scale_y = $relative[3];
3778             }
3779             }
3780              
3781 0           my @rect = @{$opts{'rect'}}; # if using 'rect' option
  0            
3782 0           push @outline, [$rect[0], $rect[1]]; # UL corner = x,y
3783             # TBD: check x,y reasonable, w,h reasonable
3784 0           push @outline, [$rect[0]+$rect[2], $rect[1]]; # UR corner + width
3785 0           push @outline, [$rect[0]+$rect[2], $rect[1]-$rect[3]]; # LR corner - height
3786 0           push @outline, [$rect[0], $rect[1]-$rect[3]]; # LL corner - width
3787 0           push @outline, [$rect[0], $rect[1]]; # back to UL corner
3788              
3789             # TBD: 'path' option
3790              
3791             # treat coordinates as absolute or relative
3792 0           for (my $i = 0; $i < scalar @outline; $i++) {
3793 0           $outline[$i][0] = $outline[$i][0]*$scale_x + $off_x;
3794 0           $outline[$i][1] = $outline[$i][1]*$scale_y + $off_y;
3795             }
3796              
3797             # TBD body level background-color fill in outline INK HERE
3798             # if $has_grfx can proceed
3799             # use _change_properties _fcolor background-color
3800              
3801             # requested to draw outline (color other than 'none')? INK HERE
3802 0 0 0       if ($draw_outline ne 'none' && defined $grfx && ref($grfx) =~ m/^PDF::Builder::Content/) {
      0        
3803 0           $grfx->strokecolor($draw_outline);
3804 0           $grfx->linewidth(0.5);
3805             # only rect currently supported
3806 0           my @flat = ();
3807 0           for (my $i = 0; $i < scalar @outline; $i++) {
3808 0           push @flat, $outline[$i][0];
3809 0           push @flat, $outline[$i][1];
3810             }
3811 0           $grfx->poly(@flat);
3812 0           $grfx->stroke();
3813             }
3814              
3815 0           return @outline;
3816             } # end of _get_column_outline()
3817              
3818             sub _get_col_extents {
3819 0     0     my (@outline) = @_;
3820 0           my ($minx, $miny, $maxx, $maxy);
3821              
3822             # for rect, all pairs are x,y. once introduce splines/arcs, need more
3823 0           for (my $i = 0; $i < scalar @outline; $i++) {
3824 0 0         if ($i == 0) {
3825 0           $minx = $maxx = $outline[$i][0];
3826 0           $miny = $maxy = $outline[$i][1];
3827             } else {
3828 0           $minx = min($minx, $outline[$i][0]);
3829 0           $miny = min($miny, $outline[$i][1]);
3830 0           $maxx = max($maxx, $outline[$i][0]);
3831 0           $maxy = max($maxy, $outline[$i][1]);
3832             }
3833             }
3834              
3835 0           return ($minx, $miny, $maxx, $maxy);
3836             } # end of _get_col_extents()
3837              
3838             # get the next baseline from column outline @outline
3839             # the first argument is the y value of the baseline
3840             # we've already checked that there is room in this column, so y is good
3841             # returns on-page x,y, and width of baseline
3842             # currently expect outline to be UL UR LR LL UL coordinates.
3843             # TBD: arbitrary shape with line at start_y clipped by outline (if multiple
3844             # lines result, pick longest or first one)
3845             sub _get_baseline {
3846 0     0     my ($start_y, @outline) = @_;
3847              
3848 0           my ($x,$y, $width);
3849 0           $x = $outline[0][0];
3850 0           $y = $start_y;
3851 0           $width = $outline[1][0] - $x;
3852              
3853             # note that this is the baseline, so it is possible that some
3854             # descenders may exceed the limit, in a non-rectangular outline!
3855              
3856 0           return ($x,$y, $width);
3857             } # end of _get_baseline()
3858              
3859             # returns array of hashes with prepared text. input could be
3860             # 'pre' markup: must be an array (list) of hashes, returned unchanged.
3861             # 'none' markup: empty lines separate paragraphs, array of texts permitted,
3862             # paragraphs may not span array elements.
3863             # 'md1' markup: empty lines separate paragraphs, array of texts permitted,
3864             # paragraphs may span array elements, content is converted to HTML
3865             # per Text::Markdown, one array element at a time.
3866             # 'md2' markup: similar to md1, but using Text::MultiMarkdown TBD
3867             # 'html' markup: single text string OR array of texts permitted (consolidated
3868             # into one text), containing HTML markup.
3869             #
3870             # each element is a hash containing the text and all attributes (HTML or MD
3871             # has been processed).
3872              
3873             sub _break_text {
3874 0     0     my ($text, $markup, %opts) = @_;
3875 0           my $page_numbers = 0;
3876 0 0         $page_numbers = $opts{'page_numbers'} if defined $opts{'page_numbers'};
3877              
3878 0           my @array = ();
3879              
3880 0 0         if ($markup eq 'pre') {
    0          
    0          
3881             # should already be in final format (such as continuing a column)
3882 0           return @$text;
3883              
3884             } elsif ($markup eq 'none') {
3885             # split up on blank lines into paragraphs and wrap with p and /p tags
3886 0 0         if (ref($text) eq '') {
    0          
3887             # is a single string (scalar)
3888 0           @array = _none_hash($text, %opts);
3889              
3890             } elsif (ref($text) eq 'ARRAY') {
3891             # array ref, elements should be text
3892 0           for (my $i = 0; $i < scalar(@$text); $i++) {
3893 0           @array = (@array, _none_hash($text->[$i], %opts));
3894             }
3895             }
3896              
3897             # dummy style element at array element [0]
3898 0           my $style;
3899 0           $style->{'tag'} = 'style';
3900 0           $style->{'text'} = '';
3901 0           unshift @array, $style;
3902              
3903             } elsif ($markup eq 'md1') {
3904             # process into HTML, then feed to HTML processing to make hash
3905             # note that blank-separated lines already turned into paragraphs
3906 0 0         if (ref($text) eq '') {
    0          
3907             # is a single string (scalar)
3908 0           @array = _md1_hash($text, %opts);
3909              
3910             } elsif (ref($text) eq 'ARRAY') {
3911             # array ref, elements should be text
3912 0           @array = _md1_hash(join("\n", @$text), %opts);
3913             }
3914              
3915             # ### no MultiMarkdown until br, code, pre tags supported
3916             # ### update Column.pl sample, README.md, Column_doc.pm
3917             # ### update TextMultiMarkdown min version in build routines
3918             # } elsif ($markup eq 'md2') {
3919             # # process into HTML, then feed to HTML processing to make hash
3920             # # note that blank-separated lines already turned into paragraphs
3921             # if (ref($text) eq '') {
3922             # # is a single string (scalar)
3923             # @array = _md2_hash($text, %opts);
3924             #
3925             # } elsif (ref($text) eq 'ARRAY') {
3926             # # array ref, elements should be text
3927             # @array = _md2_hash(join("\n", @$text), %opts);
3928             # }
3929              
3930             } else { # should be 'html'
3931 0 0         if (ref($text) eq '') {
    0          
3932             # is a single string (scalar)
3933 0           @array = _html_hash($page_numbers, $text, %opts);
3934            
3935             } elsif (ref($text) eq 'ARRAY') {
3936             # array ref, elements should be text
3937             # consolidate into one string.
3938 0           @array = _html_hash($page_numbers, join("\n", @$text), %opts);
3939             }
3940             }
3941              
3942 0           return @array;
3943             } # end of _break_text()
3944              
3945             # convert unformatted string to array of hashes, splitting up on blank lines.
3946             # return with only markup as paragraphs
3947             # note that you can NOT span a paragraph across array elements
3948             sub _none_hash {
3949 0     0     my ($text, %opts) = @_;
3950              
3951 0           my @array = ();
3952 0           my $in_para = 0;
3953 0           my $line = '';
3954 0           chomp($text); # don't want empty last element due to trailing \n
3955 0           foreach (split /\n/, $text) {
3956             # should be no \n's, but adjacent non-empty lines need to be joined
3957 0 0         if ($_ =~ /^\s*$/) {
3958             # empty/blank line. end paragraph if one in progress
3959 0 0         if ($in_para) {
3960 0           push @array, {'tag' => '', 'text' => $line};
3961 0           push @array, {'text' => "", 'tag' => '/p'};
3962 0           $in_para = 0;
3963 0           $line = '';
3964             }
3965             # not in a paragraph, just ignore this entry
3966              
3967             } else {
3968             # content in this line. start paragraph if necessary
3969 0 0         if ($in_para) {
3970             # accumulate content into line
3971 0           $line .= " $_";
3972             } else {
3973             # start paragraph, content starts with this text
3974 0           push @array, {'text' => "", 'tag' => 'p'};
3975 0           $in_para = 1;
3976 0           $line = $_;
3977             }
3978             }
3979              
3980             } # end of loop through line(s) in paragraph
3981            
3982             # out of input.
3983             # if still within a paragraph, need to properly close it
3984 0 0         if ($in_para) {
3985 0           push @array, {'tag' => '', 'text' => $line};
3986 0           push @array, {'text' => "", 'tag' => '/p'};
3987 0           $in_para = 0;
3988 0           $line = '';
3989             }
3990            
3991 0           return @array;
3992             } # end of _none_hash()
3993              
3994             # convert md1 string to html, returning array of hashes
3995             # TBD `content` wraps in <code> (OK), but fenced ``` wraps in <p><code> ?!
3996             # may need to preprocess ``` to wrap in <pre> or postprocess add <pre>
3997             # <p><code> -> <p><pre><code>
3998             sub _md1_hash {
3999 0     0     my ($text, %opts) = @_;
4000 0           my $page_numbers = 0;
4001 0 0         $page_numbers = $opts{'page_numbers'} if defined $opts{'page_numbers'};
4002              
4003 0           my @array;
4004 0           my ($html, $rc);
4005 0           $rc = eval {
4006 0           require Text::Markdown;
4007 0           1;
4008             };
4009 0 0         if (!defined $rc) { $rc = 0; } # else is 1
  0            
4010 0 0         if ($rc) {
4011             # installed, but not up to date?
4012 0 0         if (version->parse("v$Text::Markdown::VERSION")->numify() <
4013 0           version->parse("v$TextMarkdown")->numify()) { $rc = 0; }
4014             }
4015              
4016 0 0         if ($rc) {
4017             # MD converter appears to be installed, so use it
4018 0           $html = Text::Markdown::markdown($text);
4019             } else {
4020             # leave as MD, will cause a chain of problems
4021 0           warn "Text::Markdown not installed, can't process Markdown";
4022 0           $html = $text;
4023             }
4024              
4025             # need to fix something in Text::Markdown -- custom HTML tags are
4026             # disabled by changing < to &lt;. change them back!
4027 0           $html =~ s/&lt;_ref /<_ref /g;
4028 0           $html =~ s/&lt;_reft /<_reft /g;
4029 0           $html =~ s/&lt;_nameddest /<_nameddest /g;
4030 0           $html =~ s/&lt;_sl /<_sl /g;
4031 0           $html =~ s/&lt;_move /<_move /g;
4032 0           $html =~ s/&lt;_marker /<_marker /g;
4033             # probably could just do it with s/&lt;_/<_/ but the list is short
4034            
4035             # blank lines within a list tend to create paragraphs in list items
4036 0           $html =~ s/<li><p>/<li>/g;
4037 0           $html =~ s#</p></li>#</li>#g;
4038              
4039             # standard Markdown ~~ line-through (strike-out) not recognized
4040 0           my $did_one = 1;
4041 0           while ($did_one) {
4042 0           $did_one = 0;
4043 0 0         if ($html =~ s#~~([^~])#<del>$1#) {
4044             # just one at a time. replace ~~ by <del>
4045 0           $did_one = 1;
4046             }
4047             # should be another, replace ~~ by </del>
4048 0           $html =~ s#~~([^~])#</del>$1#;
4049             }
4050              
4051             # standard Markdown === by itself not recognized as a horizontal rule
4052 0           $html =~ s#<p>===</p>#<hr>#g;
4053              
4054             # dummy (or real) style element will be inserted at array element [0]
4055             # by _html_hash()
4056              
4057             # blank-line separated paragraphs already wrapped in <p> </p>
4058 0           @array = _html_hash($page_numbers, $html, %opts);
4059              
4060 0           return @array;
4061             } # end of _md1_hash()
4062              
4063             # convert md2 string to html, returning array of hashes
4064             #sub _md2_hash {
4065             # my ($text, %opts) = @_;
4066             # my $page_numbers = 0;
4067             # $page_numbers = $opts{'page_numbers'} if defined $opts{'page_numbers'};
4068             #
4069             # my @array;
4070             # my ($html, $rc);
4071             # $rc = eval {
4072             # require Text::MultiMarkdown;
4073             # 1;
4074             # };
4075             # if (!defined $rc) { $rc = 0; } # else is 1
4076             # if ($rc) {
4077             # # installed, but not up to date?
4078             # if (version->parse("v$Text::MultiMarkdown::VERSION")->numify() <
4079             # version->parse("v$TextMultiMarkdown")->numify()) { $rc = 0; }
4080             # }
4081             #
4082             # my $heading_ids = 0; # default no automatic id generation for hX
4083             # if (defined $opts{'heading_ids'}) { $heading_ids = $opts{'heading_ids'}; }
4084             #
4085             # if ($rc) {
4086             # # MD converter appears to be installed, so use it
4087             # $html = Text::MultiMarkdown->new(
4088             # 'heading_ids' => $heading_ids,
4089             # 'img_ids' => 0,
4090             # 'empty_element_suffix' => '>',
4091             # )->markdown($text);
4092             # } else {
4093             # # leave as MD, will cause a chain of problems
4094             # warn "Text::MultiMarkdown not installed, can't process Markdown";
4095             # $html = $text;
4096             # }
4097             #
4098             # # need to fix something in Text::Markdown -- custom HTML tags are
4099             # # disabled by changing < to &lt;. change them back!
4100             # $html =~ s/&lt;_ref /<_ref /g;
4101             # $html =~ s/&lt;_reft /<_reft /g;
4102             # $html =~ s/&lt;_nameddest /<_nameddest /g;
4103             # $html =~ s/&lt;_sl /<_sl /g;
4104             # $html =~ s/&lt;_move /<_move /g;
4105             # $html =~ s/&lt;_marker /<_marker /g;
4106             # # probably could just do it with s/&lt;_/<_/ but the list is short
4107             #
4108             # # blank lines within a list tend to create paragraphs in list items
4109             # $html =~ s/<li><p>/<li>/g;
4110             # $html =~ s#</p></li>#</li>#g;
4111             #
4112             # # standard Markdown ~~ line-through (strike-out) not recognized
4113             # my $did_one = 1;
4114             # while ($did_one) {
4115             # $did_one = 0;
4116             # if ($html =~ s#~~([^~])#<del>$1#) {
4117             # # just one at a time. replace ~~ by <del>
4118             # $did_one = 1;
4119             # }
4120             # # should be another, replace ~~ by </del>
4121             # $html =~ s#~~([^~])#</del>$1#;
4122             # }
4123             #
4124             # # standard Markdown === by itself not recognized as a horizontal rule
4125             # $html =~ s#<p>===</p>#<hr>#g;
4126             #
4127             # # dummy (or real) style element will be inserted at array element [0]
4128             # # by _html_hash()
4129             #
4130             # # blank-line separated paragraphs already wrapped in <p> </p>
4131             # @array = _html_hash($page_numbers, $html, %opts);
4132             #
4133             # return @array;
4134             #} # end of _md2_hash()
4135              
4136             # convert html string to array of hashes. this is for both 'html' markup and
4137             # the final step of 'md1' or 'md2' markup.
4138             # returns array (list) of tags and text, and as a side effect, element [0] is
4139             # consolidated <style> tags (may be empty hash)
4140             sub _html_hash {
4141 0     0     my ($page_numbers, $text, %opts) = @_;
4142              
4143 0           my $style = {}; # <style> hashref to return
4144 0           my @array; # array of body tags and text to return
4145             my ($rc);
4146              
4147             # process 'substitute' stuff here. %opts needs to be passed in!
4148 0 0         if (defined $opts{'substitute'}) {
4149             # array of substitutions to make
4150 0           foreach my $macro_list (@{$opts{'substitute'}}) {
  0            
4151             # 4 element array: macro name (including any delimiters, such as ||)
4152             # HTML code to insert before the macro
4153             # anything to replace the macro name (could be the
4154             # macro name itself if you want it unchanged)
4155             # HTML code to insert after the macro
4156             # $text is updated, perhaps multiple times
4157             # $macro_list is anonymous array [ macro, before, rep, after ]
4158 0           my $macro = $macro_list->[0];
4159 0           my $sub = $macro_list->[1].$macro_list->[2].$macro_list->[3];
4160 0           $text =~ s#$macro#$sub#g;
4161             }
4162             }
4163              
4164 0           $rc = eval {
4165 0           require HTML::TreeBuilder;
4166 0           1;
4167             };
4168 0 0         if (!defined $rc) { $rc = 0; } # else is 1
  0            
4169 0 0         if ($rc) {
4170             # installed, but not up to date?
4171 0 0         if (version->parse("v$HTML::TreeBuilder::VERSION")->numify() <
4172 0           version->parse("v$HTMLTreeBldr")->numify()) { $rc = 0; }
4173             }
4174              
4175 0 0         if ($rc) {
4176             # HTML converter appears to be installed, so use it
4177 0           $HTML::Tagset::isList{'_sl'} = 1; # add new list parent
4178 0           push @HTML::Tagset::p_closure_barriers, '_sl';
4179 0           $HTML::Tagset::emptyElement{'_reft'} = 1; # don't add closing tag
4180 0           $HTML::Tagset::emptyElement{'_nameddest'} = 1; # don't add closing tag
4181 0           $HTML::Tagset::isPhraseMarkup{'_ref'} = 1;
4182 0           $HTML::Tagset::isPhraseMarkup{'_reft'} = 1;
4183 0           $HTML::Tagset::isPhraseMarkup{'_nameddest'} = 1;
4184 0           my $tree = HTML::TreeBuilder->new();
4185 0           $tree->ignore_unknown(0); # don't discard non-HTML recognized tags
4186 0           $tree->no_space_compacting(1); # preserve spaces
4187 0           $tree->warn(1); # warn if syntax error found
4188 0           $tree->p_strict(1); # auto-close paragraph on new block element
4189 0           $tree->implicit_body_p_tag(1); # loose text gets wrapped in <p>
4190 0           $tree->parse_content($text);
4191            
4192             # see if there is a <head>, and if so, if any <style> tags within it
4193 0           my $head = $tree->{'_head'}; # a hash
4194 0 0 0       if (defined $head and defined $head->{'_content'}) {
4195 0           my @headList = @{ $head->{'_content'} }; # array of strings and tags
  0            
4196 0           @array = _walkTree(0, @headList);
4197             # pull out one or more style tags and build $styles hash
4198 0           for (my $el = 0; $el < @array; $el++) {
4199 0           my $style_text = $array[$el]->{'text'};
4200 0 0         if ($style_text ne '') {
4201             # possible style content. style tag immediately before?
4202 0 0 0       if (defined $array[$el-1]->{'tag'} &&
4203             $array[$el-1]->{'tag'} eq 'style') {
4204 0           $style = _process_style_tag($style, $style_text);
4205             }
4206             }
4207             }
4208             } # $style is either empty hash or has style content
4209            
4210             # there should always be a body of some sort
4211 0           my $body = $tree->{'_body'}; # a hash
4212 0           my @bodyList = @{ $body->{'_content'} }; # array of strings and tags
  0            
4213 0           @array = _walkTree(0, @bodyList);
4214             # pull out one or more style tags and add to $styles hash
4215 0           for (my $el = 0; $el < @array; $el++) {
4216 0           my $style_text = $array[$el]->{'text'};
4217 0 0         if ($style_text ne '') {
4218             # possible style content. style tag immediately before?
4219 0 0 0       if (defined $array[$el-1]->{'tag'} &&
4220             $array[$el-1]->{'tag'} eq 'style') {
4221 0           $style = _process_style_tag($style, $style_text);
4222             # remove <style> from body (array list)
4223 0           splice(@array, $el-1, 3);
4224             }
4225             }
4226             } # $style is either empty hash or has style content
4227             } else {
4228             # leave as original HTML, will cause a chain of problems
4229 0           warn "HTML::TreeBuilder not installed, can't process HTML";
4230 0           push @array, {'tag' => '', 'text' => $text};
4231             }
4232              
4233             # does call include a style initialization (opt in column() call)?
4234             # merge into any consolidated <style> tags for user styling in [1]
4235 0 0         if (defined $opts{'style'}) {
4236             # $style could be empty hash ptr at this point
4237 0           $style = _process_style_tag($style, $opts{'style'});
4238             }
4239              
4240             # always first element tag=style containing the hash, even if it's empty
4241             # array[0] is default CSS, array[1] is consolidated <style> tags
4242 0           $style->{'tag'} = 'style';
4243 0           $style->{'text'} = '';
4244 0           unshift @array, $style; # [0] default CSS added later
4245            
4246             # HTML::TreeBuilder does some undesirable things with custom tags
4247             # it doesn't understand. clean them up.
4248 0           @array = _HTB_cleanup($page_numbers, $opts{'debug'}, @array);
4249              
4250 0           return @array;
4251             } # end of _html_hash()
4252              
4253             # clean up some things HTML::TreeBuilder does when it sees unknown tag.
4254             # this is done at creation of the tag/content array, so no need to worry
4255             # about 'pre' input format and the like.
4256             sub _HTB_cleanup {
4257 0     0     my ($page_numbers, $debug, @mytext) = @_;
4258              
4259 0           my @current_list = ('empty');
4260              
4261             # loop through all elements, looking for specific patterns
4262             # start at [1], so defaults and styles skipped
4263 0           for (my $el=1; $el < @mytext; $el++) {
4264 0 0         if (ref($mytext[$el]) ne 'HASH') { next; }
  0            
4265 0 0         if ($mytext[$el]->{'tag'} eq '') { next; }
  0            
4266              
4267 0           my $tag = lc($mytext[$el]->{'tag'});
4268 0           $mytext[$el]->{'tag'} = $tag; # lc the tag
4269 0 0         if (!defined $tag) { next; }
  0            
4270             #if ($tag =~ m#^/#) { next; } # ignore end tags?
4271              
4272 0 0 0       if ($tag eq 'li') {
    0 0        
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
4273             # dealing with <_marker> is a special case, driven by need to
4274             # ensure that all <li> tags have a <_marker>[text]</_marker>
4275             # just before them, and is not a shortcoming of HTML::TreeBuilder
4276             #
4277             # if user did not explicitly give a <_marker> just before <li>,
4278             # insert one to "even up" with any in the source.
4279             # $el element ($tag) s/b at 'li' at this point
4280             # MUST check if HTML::TreeBuilder (or user) added their own
4281             # /_marker tag! and whether explicit text given!
4282             #
4283             # 1. <_marker><li> add text='' and </_marker>
4284             # 2. <_marker></_marker><li> add text='' in between
4285             # 3. <_marker>text</_marker><li> no change (text may be '')
4286             # use this user-provided marker text; do not replace
4287             # 4. <li> add <_marker>text=''</_marker>
4288             #
4289             # Note that HTML::TreeBuilder seems to already track that a list
4290             # (ul) or (ol) is the parent of a li
4291 0 0         if ($mytext[$el-1]->{'tag'} eq '/_marker') {
    0          
4292             # case 2 or 3, assume there is <_marker> tag
4293 0 0         if ($mytext[$el-2]->{'tag'} eq '') {
4294             # case 3, no change to make unless current parent is _sl
4295             # AND text is not ''
4296 0 0         if ($current_list[-1] eq 's') {
4297 0           $mytext[$el-2]->{'text'} = '';
4298             }
4299             } else {
4300             # case 2, add empty text tag between
4301 0           splice(@mytext, $el-1, 0, {'tag'=>'', 'text'=>''});
4302 0           $el++;
4303             }
4304             } elsif ($mytext[$el-1]->{'tag'} eq '_marker') {
4305             # case 1
4306 0           splice(@mytext, $el++, 0, {'tag'=>'', 'text'=>''});
4307 0           splice(@mytext, $el++, 0, {'tag'=>'/_marker', 'text'=>''});
4308             } else {
4309             # case 4
4310             # we haven't added or expanded a <_marker> here yet
4311 0           splice(@mytext, $el++, 0, {'tag'=>'_marker', 'text'=>''});
4312 0           splice(@mytext, $el++, 0, {'tag'=>'', 'text'=>''});
4313 0           splice(@mytext, $el++, 0, {'tag'=>'/_marker', 'text'=>''});
4314             }
4315             # $el should still point to <li> element, which should now have
4316             # three elements in front of it: <_marker>(empty)</_marker>
4317             # for ul, ol if user gives marker with explicit text, don't replace
4318             # for _sl, text should be '', and marker is mostly ignored
4319            
4320             # if user added a non-'' _marker text for _sl, need to remove
4321             } elsif ($tag eq 'ul') {
4322 0           push @current_list, 'u';
4323             } elsif ($tag eq 'ol') {
4324 0           push @current_list, 'o';
4325             } elsif ($tag eq '_sl') {
4326 0           push @current_list, 's';
4327             } elsif ($tag eq '/_sl' || $tag eq '/ol' || $tag eq '/ul') {
4328 0           pop @current_list;
4329              
4330             # already added _sl to list of allowed list parents
4331              
4332             } elsif ($tag eq '_ref') {
4333             # should be followed by empty text and then /_ref tag,
4334             # add if either missing. fill in text content with any title=
4335             # attribute in _ref
4336             # tgtid= is mandatory
4337 0 0         if (!defined $mytext[$el]->{'tgtid'}) {
4338 0           carp "Warning! No 'tgtid' defined for a <_ref> tag, no link.";
4339 0           $mytext[$el]->{'tgtid'} = '';
4340             }
4341             # if tgtid is '#', check if following content is ^\d+-?. if
4342             # not, remove # (is a regular id)
4343 0           my $tgtid = $mytext[$el]->{'tgtid'};
4344 0 0         if ($tgtid =~ m/^#[^#]/) {
4345             # starts with a single '#'
4346 0 0         if ($tgtid =~ m/^#\d+-?/) {
4347             # it's a physical page number link, leave it alone
4348             } else {
4349             # it's #id, so strip off leading #
4350 0           $mytext[$el]->{'tgtid'} = substr($tgtid, 1);
4351             }
4352             }
4353              
4354 0   0       my $text = $mytext[$el]->{'title'} // '[no title given]';
4355 0           $text =~ s/\n/ /sg; # any embedded line ends turn to spaces
4356             # most likely, the /_ref has been put AFTER the following text,
4357             # resulting in el=_ref, el+1=random text, >el+1=/_ref
4358             # >el+1 loose end tag will be deleted
4359 0 0 0       if ($mytext[$el+1]->{'tag'} eq '/_ref') {
    0 0        
    0          
4360             # <_ref></_ref> insert child text with title
4361 0           splice(@mytext, ++$el, 0, {'tag'=>'', 'text'=>$text});
4362 0           $el++;
4363             } elsif ($mytext[$el+1]->{'tag'} eq '' &&
4364             $mytext[$el+1]->{'text'} ne '') {
4365             # # <_ref><other text></_ref> insert text=$text and /_ref
4366             # # giving <_ref><title text></_ref><other text>
4367             # splice(@mytext, ++$el, 0, {'tag'=>'', 'text'=>$text});
4368             # splice(@mytext, ++$el, 0, {'tag'=>'/_ref', 'text'=>''});
4369             # # superfluous /_ref will be deleted
4370 0           $el+=2;
4371             } elsif ($mytext[$el+1]->{'tag'} eq '' &&
4372             $mytext[$el+1]->{'text'} eq '') {
4373             # <_ref><empty text></_ref> update text with title text
4374 0           $mytext[++$el]->{'text'} = $text;
4375             # is following /_ref missing?
4376 0 0         if ($mytext[++$el]->{'tag'} ne '/_ref') {
4377 0           splice(@mytext, $el, 0, {'tag'=>'/_ref', 'text'=>''});
4378             }
4379             } else {
4380             # just <_ref>. add text and end tag
4381 0           splice(@mytext, ++$el, 0, {'tag'=>'', 'text'=>$text});
4382 0           splice(@mytext, ++$el, 0, {'tag'=>'/_ref', 'text'=>''});
4383             }
4384             # $el should be pointing to /_ref tag
4385 0 0 0       if ($page_numbers != 0 &&
4386             $mytext[$el]->{'tgtid'} !~ /#[^#]/) {
4387             # insert a <text> after </_ref> to hold " on page $",
4388             # " on facing page", etc. TBD page&nbsp;$
4389             # do NOT insert for Named Destination (single # in tgtid)
4390 0           splice(@mytext, ++$el, 0, {'tag'=>'', 'text'=>" on page \$"});
4391             }
4392             } elsif ($tag eq '/_ref') {
4393             # TreeBuilder often puts end tag after wrong text
4394             #splice(@mytext, $el--, 1);
4395              
4396             } elsif ($tag eq '_reft') {
4397             # leave title in place for <_reft>, but delete any text and </_reft>
4398 0 0 0       if ($mytext[$el+1]->{'tag'} eq '' &&
    0          
4399             $mytext[$el+2]->{'tag'} eq '/_reft') {
4400 0           splice(@mytext, $el+1, 2);
4401             } elsif ($mytext[$el+1]->{'tag'} eq '/_reft') {
4402 0           splice(@mytext, $el+1, 1);
4403             }
4404             } elsif ($tag eq '/_reft') {
4405             # TreeBuilder often puts end tag after wrong text
4406 0           splice(@mytext, $el--, 1);
4407              
4408             } elsif ($tag eq '_nameddest') {
4409             # delete any text and </_nameddest>
4410 0 0 0       if ($mytext[$el+1]->{'tag'} eq '' &&
    0          
4411             $mytext[$el+2]->{'tag'} eq '/_nameddest') {
4412 0           splice(@mytext, $el+1, 2);
4413             } elsif ($mytext[$el+1]->{'tag'} eq '/_nameddest') {
4414 0           splice(@mytext, $el+1, 1);
4415             }
4416 0 0 0       if (defined $debug && $debug == 1) {
4417             # insert tags to write a blue | bar at beginning of text
4418             # $el should point to _nameddest tag itself
4419 0           splice(@mytext, $el++, 0, {'tag'=>'span', 'text'=>'',
4420             'style'=>'color: #0000FF; font-weight: bold;'});
4421 0           splice(@mytext, $el++, 0, {'tag'=>'', 'text'=>'|'});
4422 0           splice(@mytext, $el++, 0, {'tag'=>'/span', 'text'=>''});
4423             # still pointing at _nameddest tag
4424             }
4425             } elsif ($tag eq '/_nameddest') {
4426             # TreeBuilder often puts end tag after wrong text
4427 0           splice(@mytext, $el--, 1);
4428              
4429             } elsif ($tag eq '/_move') {
4430             # TreeBuilder often puts end tag after wrong text
4431 0           splice(@mytext, $el--, 1);
4432              
4433             } elsif ($tag eq 'a') {
4434             # if a URL, leave as-is. otherwise convert a /a to _ref /_ref
4435 0 0         if ($mytext[$el]->{'href'} =~ m#^[a-z0-9]+://#i) {
4436             # protocol:// likely a URL
4437             } else {
4438             # xref link: convert tag
4439             # 1. a tag convert to _ref
4440 0           $mytext[$el]->{'tag'} = '_ref';
4441             # 1a. need to check if <a href></a> need to insert text?
4442 0 0         if ($mytext[$el+1]->{'tag'} ne '') {
4443             # yep, missing child text
4444 0           splice(@mytext, $el+1, 0, { 'tag'=>'', 'text'=>'' });
4445             }
4446              
4447             # 2. /a tag convert to /_ref (next /a seen, does not nest)
4448 0           for (my $i=$el+1; $i<@mytext; $i++) {
4449 0 0         if ($mytext[$i]->{'tag'} eq '/a') {
4450 0           $mytext[$i]->{'tag'} = '/_ref';
4451 0           last;
4452             }
4453             }
4454              
4455             # 3. href -> tgtid attribute
4456 0           $mytext[$el]->{'tgtid'} = delete $mytext[$el]->{'href'};
4457              
4458             # 4. child text -> title, id, fit attributes
4459             # NOTE: any markup tags get removed, is plain text
4460 0           my $newtitle = _get_special_info(\@mytext, $el, '{^', '}');
4461 0           my $newfit = _get_special_info(\@mytext, $el, '{%', '}');
4462 0           my $newid = _get_special_info(\@mytext, $el, '{#', '}');
4463              
4464 0 0         if ($newtitle eq '') {
4465 0           $newtitle = _get_child_text(\@mytext, $el);
4466             }
4467 0 0         if (!defined $mytext[$el]->{'title'}) {
4468 0           $mytext[$el]->{'title'} = $newtitle;
4469             }
4470             # is child (title) text still empty after all this?
4471 0 0         if ($mytext[$el+1]->{'text'} eq '') {
4472 0           $mytext[$el+1]->{'text'} = $mytext[$el]->{'title'};
4473             }
4474              
4475             # 5. fit info -> fit attribute (if none exists)
4476 0 0         if (defined $mytext[$el]->{'fit'}) {
4477             # already exists, so only remove inline stuff
4478             } else {
4479 0 0         if ($newfit ne '') {
4480 0           $mytext[$el]->{'fit'} = $newfit;
4481             }
4482             }
4483              
4484             # 6. id info -> id attribute (if none exists)
4485 0 0         if (defined $mytext[$el]->{'id'}) {
4486             # already exists, so only remove inline stuff
4487             } else {
4488 0 0         if ($newid ne '') {
4489 0           $mytext[$el]->{'id'} = $newid;
4490             }
4491             }
4492              
4493             # 7. child text is empty? replace by title text
4494 0 0 0       if ($mytext[$el+1]->{'text'} eq '' &&
4495             defined $mytext[$el]->{'title'}) {
4496 0           $mytext[$el+1]->{'text'} = $mytext[$el]->{'title'};
4497             }
4498             }
4499             }
4500              
4501             # any child text (incl. link title) with {#id}? pull out into id=
4502             # this is needed for Markdown (may define, for headings only). not
4503             # necessarily supported by Text::Markdown, or yet by Builder.
4504             # child text in: hX, a, span, p, li, i/em, b/strong, del, sub/sup, mark,
4505             # blockquote, dd/dd, code, pre, img (alt text or title text), th,td
4506 0 0 0       if ($mytext[$el]->{'tag'} eq 'h1' ||
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
4507             $mytext[$el]->{'tag'} eq 'h2' ||
4508             $mytext[$el]->{'tag'} eq 'h3' ||
4509             $mytext[$el]->{'tag'} eq 'h4' ||
4510             $mytext[$el]->{'tag'} eq 'h5' ||
4511             $mytext[$el]->{'tag'} eq 'h6' ||
4512             $mytext[$el]->{'tag'} eq 'a' ||
4513             $mytext[$el]->{'tag'} eq 'span' ||
4514             $mytext[$el]->{'tag'} eq 'p' ||
4515             $mytext[$el]->{'tag'} eq 'li' ||
4516             $mytext[$el]->{'tag'} eq 'i' ||
4517             $mytext[$el]->{'tag'} eq 'em' ||
4518             $mytext[$el]->{'tag'} eq 'b' ||
4519             $mytext[$el]->{'tag'} eq 'strong' ||
4520             $mytext[$el]->{'tag'} eq 'del' ||
4521             $mytext[$el]->{'tag'} eq 'sub' ||
4522             $mytext[$el]->{'tag'} eq 'sup' ||
4523             $mytext[$el]->{'tag'} eq 'mark' ||
4524             $mytext[$el]->{'tag'} eq 'blockquote' ||
4525             $mytext[$el]->{'tag'} eq 'dt' ||
4526             $mytext[$el]->{'tag'} eq 'dd' ||
4527             $mytext[$el]->{'tag'} eq 'code' ||
4528             $mytext[$el]->{'tag'} eq 'pre' ||
4529             $mytext[$el]->{'tag'} eq 'img' ||
4530             $mytext[$el]->{'tag'} eq 'th' ||
4531             $mytext[$el]->{'tag'} eq 'td') {
4532 0           my $newid = _get_special_info(\@mytext, $el, '{#', '}');
4533 0 0 0       if ($newid ne '' && !defined $mytext[$el]->{'id'}) {
4534             # do not replace existing id=
4535 0           $mytext[$el]->{'id'} = $newid;
4536             }
4537             }
4538            
4539             # if _get_special_info() was used to extract an id {#id}, title
4540             # {^title}, or fit {%fit}; it should have NOT left a blank child
4541             # text string, though it may be empty
4542            
4543             # if a tag has id=, assume it's a link target
4544             # insert tags to write a red | bar at beginning of link text
4545             # $el should point to tag itself
4546 0 0 0       if (defined $mytext[$el]->{'id'} && defined $debug && $debug == 1) {
      0        
4547 0           splice(@mytext, ++$el, 0, {'tag'=>'span', 'text'=>'',
4548             'style'=>'color: #FF0000; font-weight: bold;'});
4549 0           splice(@mytext, ++$el, 0, {'tag'=>'', 'text'=>'|'});
4550 0           splice(@mytext, ++$el, 0, {'tag'=>'/span', 'text'=>''});
4551             # still pointing at original tag
4552             }
4553             # 'next' to here
4554             } # for loop through all tags
4555              
4556 0           return @mytext;
4557             } # end of _HTB_cleanup()
4558              
4559             # given the text between <style> and </style>, and an existing $style
4560             # hashref, update $style and return it
4561             sub _process_style_tag {
4562 0     0     my ($style, $text) = @_;
4563              
4564             # expect sets of selector { property: value; ... }
4565             # break up into selector => { property => value, ... }
4566             # replace or add to existing $style
4567             # note that a selector may be a tagName, a .className, or an #idName
4568              
4569 0           $text =~ s/\n/ /sg; # replace end-of-lines with spaces
4570 0           while ($text ne '') {
4571 0           my $selector;
4572              
4573 0 0         if ($text =~ s/^\s+//) { # remove leading whitespace
4574 0 0         if ($text eq '') { last; }
  0            
4575             }
4576 0 0         if ($text =~ s/([^\s]+)//) { # extract selector
4577 0           $selector = $1;
4578             }
4579 0 0         if ($text =~ s/^\s*{//) { # remove whitespace up through {
4580 0 0         if ($text eq '') { last; }
  0            
4581             }
4582             # one or more property-name: value; sets (; might be missing on last)
4583             # go into %prop_val. we don't expect to see any } within a property
4584             # value string.
4585 0 0         if ($text =~ s/([^}]+)//) {
4586 0           $style->{$selector} = _process_style_string({}, $1);
4587             }
4588 0 0         if ($text =~ s/^}\s*//) { # remove closing } and whitespace
4589 0 0         if ($text eq '') { last; }
  0            
4590             }
4591            
4592             # 'next' to here
4593             } # end while loop
4594              
4595 0           return $style;
4596             } # end of _process_style_tag()
4597              
4598             # decompose a style string into property-value pairs. used for both <style>
4599             # tags and style= attributes.
4600             sub _process_style_string {
4601 0     0     my ($style, $text) = @_;
4602              
4603             # split up at ;'s. don't expect to see any ; within value strings
4604 0           my @sets = split /;/, $text;
4605             # split up at :'s. don't expect to see any : within value strings
4606 0           foreach (@sets) {
4607 0           my ($property_name, $value) = split /:/, $_;
4608 0 0 0       if (!defined $property_name || !defined $value) { last; }
  0            
4609             # trim off leading and trailing whitespace from both
4610 0           $property_name =~ s/^\s+//;
4611 0           $property_name =~ s/\s+$//;
4612 0           $value =~ s/^\s+//;
4613 0           $value =~ s/\s+$//;
4614             # trim off any single or double quotes around value string
4615 0 0         if ($value =~ s/^['"]//) {
4616 0           $value =~ s/['"]$//;
4617             }
4618              
4619 0           $style->{$property_name} = $value;
4620             }
4621              
4622 0           return $style;
4623             } # end of _process_style_string()
4624              
4625             # given a list of tags and content and attributes, return a list of hashes.
4626             # new array element at start, at each tag, and each _content.
4627             sub _walkTree {
4628 0     0     my ($depth, @bodyList) = @_;
4629 0           my ($tag, $element, $no_content);
4630              
4631 0           my $bLSize = scalar(@bodyList);
4632             # $depth not really used here, but might come in handy at some point
4633 0           my @array = ();
4634              
4635 0           for (my $elIdx=0; $elIdx<$bLSize; $elIdx++) {
4636 0           $element = $bodyList[$elIdx];
4637             # an element may be a simple text string, but most are hashes that
4638             # contain a _tag and _content array and any tag attributes. _tag and
4639             # any attributes go into an entry with text='', while any _content
4640             # goes into one entry with text='string' and usually no attributes.
4641             # if the _tag takes an end tag , it gets its own dummy entry.
4642            
4643 0 0         if ($element =~ m/^HTML::Element=HASH/) {
4644             # $element should be anonymous hash
4645 0           $tag = $element->{'_tag'};
4646 0           push @array, {'tag' => $tag, 'text' => ''};
4647              
4648             # look for attributes for tag, also see if no child content
4649 0           $no_content = 0; # has content (children) until proven otherwise
4650 0           my @tag_attr = keys %$element;
4651             # VOID elements (br, hr, img, area, base, col, embed, input,
4652             # link, meta, source, track, wbr) should NOT have /> to mark
4653             # as "self-closing", but it's harmless and much HTML code will
4654             # have them marked as "self-closing" even though it really
4655             # isn't! So be prepared to handle such dummy attributes, as
4656             # per RT 143038.
4657 0 0 0       if ($tag eq 'br' || $tag eq 'hr' ||
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
4658             $tag eq 'img' || $tag eq 'area' || $tag eq 'base' ||
4659             $tag eq 'col' || $tag eq 'embed' || $tag eq 'input' ||
4660             $tag eq 'link' || $tag eq 'meta' || $tag eq 'source' ||
4661             $tag eq 'track' || $tag eq 'wbr' ||
4662             $tag eq 'defaults' || $tag eq 'style') {
4663             # self-closing or VOID with unnecessary /, there is no
4664             # child data/elements for this tag. and, we can ignore
4665             # this 'attribute' of /.
4666             # defaults and style are specially treated as a VOID tags
4667 0           $no_content = 1;
4668             }
4669 0           foreach my $key (@tag_attr) {
4670             # has an (unnecessary) self-closing / ?
4671 0 0         if ($element->{$key} eq '/') { next; }
  0            
4672            
4673             # 'key' is more of an attribute within a tag (element)
4674 0 0         if ($key =~ m/^_/) { next; } # built-in attribute
  0            
4675             # this tag has one or more attributes to add to it
4676             # add tag attribute (e.g., src= for <img>) to hash
4677 0           $array[-1]->{$key} = $element->{$key};
4678             }
4679              
4680 0 0 0       if (!$no_content && defined $element->{'_content'}) {
4681 0           my @content = @{ $element->{'_content'} };
  0            
4682             # content array elements are either text segments or
4683             # tag subelements
4684 0           foreach (@content) {
4685 0 0         if ($_ =~ m/^HTML::Element=HASH/) {
4686             # HASH child of this _content
4687             # recursively handle a tag within _content
4688 0           @array = (@array, _walkTree($depth+1, $_));
4689             } else {
4690             # _content text, shouldn't be any attributes
4691 0           push @array, {'tag' => '', 'text' => $_};
4692             }
4693             }
4694             } else {
4695             # no content for this tag
4696             }
4697             # at end of a tag ... if has content, output end tag
4698 0 0         if (!$no_content) {
4699 0           push @array, {'tag' => "/$tag", 'text' => ''};
4700             }
4701              
4702 0           $no_content = 0;
4703              
4704             } else {
4705             # SCALAR (string) element
4706 0           push @array, {'tag' => '', 'text' => $element};
4707             }
4708              
4709             # 'next' to here
4710             } # loop through _content at this level ($elIdx)
4711              
4712 0           return @array;
4713             } # end of _walkTree()
4714              
4715             # convert a size (length) or font size into points
4716             # TBD another parm to indicate how to treat 'no unit' case?
4717             # currently assume points (CSS considers only bare 0 to be valid)
4718             # length = string (or pure number) of length in CSS units
4719             # if number, is returned as points
4720             # font_size = current font size (points) for use with em, en, ex, % units
4721             # option parent_size = parent dimension (points) to use for % instead of font size
4722             # option usage = label for what is being converted to points
4723             sub _size2pt {
4724 0     0     my ($length, $font_size, %opts) = @_;
4725             # length is requested size (or font size), possibly with a unit
4726             # if undefined, use '0'
4727 0 0         $length = '0' if !defined $length;
4728 0           $length = ''.$length; # ensure is a string (may be unitless number of points)
4729             # font_size is current_prop font-size (pts),
4730             # in case relative to font size (such as %). must be number > 0
4731 0           my $parent_size = $font_size;
4732 0 0         if (defined $opts{'parent_size'}) {
4733             # must be a number (points). this way, font size still available
4734             # for em, en, ex, but parent container size used for other things
4735 0           $parent_size = $opts{'parent_size'};
4736             }
4737 0           my $usage = 'unknown';
4738 0 0         $usage = $opts{'usage'} if defined $opts{'usage'};
4739              
4740 0           my $number = 0;
4741 0           my $unit = '';
4742             # split into number and unit
4743 0 0         if ($length =~ m/^(-?\d+\.?\d*)(.*)$/) {
    0          
4744 0           $number = $1; # [-] nnn.nn, nnn., or nnn format
4745 0           $unit = $2; # may be empty
4746             } elsif ($length =~ m/^(-?\.\d+)(.*)$/) {
4747 0           $number = $1; # [-] .nnn format
4748 0           $unit = $2; # may be empty
4749             } else {
4750 0           carp "Unable to find number in '$length', _size2pt returning 0";
4751 0           return 0;
4752             }
4753              
4754             # font_size should be in points (bare number)
4755 0 0 0       if ($unit eq '') {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
4756             # if is already a pure number, just return it
4757             # except for 0, that's not legal CSS, is an extension
4758 0           return $number;
4759             } elsif ($unit eq 'pt') {
4760             # if the unit is 'pt', strip off the unit and return the number
4761 0           return $number;
4762             } elsif ($unit eq '%') {
4763             # if the unit is '%', strip off, /100, multiply by current parent/font size
4764 0           return $number/100 * $parent_size;
4765             } elsif ($unit eq 'em') {
4766             # 1 em = 100% font size
4767 0           return $font_size;
4768             } elsif ($unit eq 'en' || $unit eq 'ex') {
4769             # 1 en = 1 ex = 50% font size
4770             # TBD get true ex size from font information
4771 0           return $font_size/2;
4772             } elsif ($unit eq 'in') {
4773             # 1 inch = 72pt
4774 0           return 72*$number;
4775             } elsif ($unit eq 'cm') {
4776             # 1 cm = 28.35pt
4777 0           return 28.35*$number;
4778             } elsif ($unit eq 'mm') {
4779             # 1 cm = 2.835pt
4780 0           return 2.835*$number;
4781             } elsif ($unit eq 'px') {
4782             # assume 78px to the inch TBD actual value available anywhere?
4783 0           return 72/78*$number;
4784             } else {
4785 0           carp "Unknown unit '$unit' in '$length', _size2pt() assumes 'pt'";
4786 0           return $number;
4787             }
4788              
4789 0           return 0; # should not get to here
4790             } # end of _size2pt()
4791              
4792             # create ordered or unordered list item marker
4793             # for ordered, returns $prefix.formatted_value.$suffix.blank
4794             # for unordered, returns string .disc, .circle, .square, or .box
4795             # (.box is nonstandard marker)
4796             #
4797             # TBD for ol, there are many other formats: cjk-decimal, decimal-leading-zero,
4798             # lower-greek, upper-greek?, lower-latin = lower-alpha, upper-latin =
4799             # upper-alpha, arabic-indic, -moz-arabic-indic, armenian, [-moz-]bengali,
4800             # cambodian (khmer), [-moz-]cjk-earthly-branch, [-moz-]cjk-heavenly-stem,
4801             # cjk-ideographic, [-moz-]devanagari, ethiopi-numeric, georgian,
4802             # [-moz-]gujarati, [-moz-]gurmukhi, hebrew, hiragana, hiragana-iroha,
4803             # japanese-formal, japanese-informal, [-moz-]kannada, katakana,
4804             # katakana-iroha, korean-hangul-formal, korean-hanja-formal,
4805             # korean-hanja-informal, [-moz-]lao, lower-armenian, upper-armenian,
4806             # [-moz-]malayalam, mongolian, [-moz-]myanmar, [-moz-]oriya,
4807             # [-moz-]persian, simp-chinese-formal, simp-chinese-informal, [-moz-]tamil,
4808             # [-moz-]telugu, [-moz-]thai, tibetan, trad-chinese-formal,
4809             # trad-chinese-informal, disclosure-open, disclosure-closed
4810             # TBD for ol, some browser-specific formats: -moz-ethiopic-halehame,
4811             # -moz-ethiopic-halehame-am, [-moz-]ethiopic-halehame-ti-et, [-moz-]hangul,
4812             # [-moz-]hangul-consonant, [-moz-]urdu
4813             # TBD for ul, ability to select images and possibly other characters
4814             sub _marker {
4815 0     0     my ($type, $depth_u, $depth_o, $depth_s, $value, $prefix, $suffix) = @_;
4816             # type = list-style-type,
4817             # depth_u = 1, 2,... ul nesting level,
4818             # depth_o = 1, 2,... ol nesting level,
4819             # depth_s = 1, 2,... _sl nesting level,
4820             # (following for ordered list only):
4821             # value = counter (start)
4822             # prefix = text before formatted value
4823             # default ''
4824             # suffix = text after formatted value
4825             # default '.'
4826 0 0         if (!defined $suffix) { $suffix = '.'; }
  0            
4827 0 0         if (!defined $prefix) { $prefix = ''; }
  0            
4828              
4829 0           my $output = '';
4830             # CAUTION: <ol type= and <li type = will be aAiI1, not CSS property values!
4831 0 0         if ($type eq 'a') {
    0          
    0          
    0          
    0          
4832 0           $type = 'lower-alpha';
4833             } elsif ($type eq 'A') {
4834 0           $type = 'upper-alpha';
4835             } elsif ($type eq 'i') {
4836 0           $type = 'lower-roman';
4837             } elsif ($type eq 'I') {
4838 0           $type = 'upper-roman';
4839             } elsif ($type eq '1') {
4840 0           $type = 'decimal';
4841             }
4842              
4843             # ordered lists
4844 0 0 0       if ($type eq 'decimal' || $type eq 'arabic') {
    0 0        
    0 0        
    0          
    0          
    0          
    0          
    0          
    0          
    0          
4845 0           $output = "$prefix$value$suffix";
4846             } elsif ($type eq 'upper-roman' || $type eq 'lower-roman') {
4847             # TBD support overbar (1000x) for Roman numerals. what is exact format?
4848 0           while ($value >= 1000) { $output .= 'M'; $value -= 1000; }
  0            
  0            
4849 0 0         if ($value >= 900) { $output .= 'CM'; $value -= 900; }
  0            
  0            
4850 0 0         if ($value >= 500) { $output .= 'D'; $value -= 500; }
  0            
  0            
4851 0 0         if ($value >= 400) { $output .= 'CD'; $value -= 500; }
  0            
  0            
4852 0           while ($value >= 100) { $output .= 'C'; $value -= 100; }
  0            
  0            
4853 0 0         if ($value >= 90) { $output .= 'XC'; $value -= 90; }
  0            
  0            
4854 0 0         if ($value >= 50) { $output .= 'L'; $value -= 50; }
  0            
  0            
4855 0 0         if ($value >= 40) { $output .= 'XL'; $value -= 40; }
  0            
  0            
4856 0           while ($value >= 10) { $output .= 'X'; $value -= 10; }
  0            
  0            
4857 0 0         if ($value == 9) { $output .= 'IX'; $value -= 9; }
  0            
  0            
4858 0 0         if ($value >= 5) { $output .= 'V'; $value -= 5; }
  0            
  0            
4859 0 0         if ($value == 4) { $output .= 'IV'; $value -= 4; }
  0            
  0            
4860 0           while ($value >= 1) { $output .= 'I'; $value -= 1; }
  0            
  0            
4861 0 0         if ($type eq 'lower-roman') { $output = lc($output); }
  0            
4862 0           $output = "$prefix$output$suffix";
4863             } elsif ($type eq 'upper-alpha' || $type eq 'lower-alpha') {
4864 0           my $n;
4865 0           while ($value) {
4866 0           $n = ($value - 1)%26; # least significant letter digit 0..25
4867 0           $output = chr(ord('A') + $n) . $output;
4868 0           $value -= ($n+1);
4869 0           $value /= 26;
4870             }
4871 0 0         if ($type eq 'lower-alpha') { $output = lc($output); }
  0            
4872 0           $output = "$prefix$output$suffix";
4873              
4874             # there are many more ordered list formats that could be supported here
4875              
4876             # unordered lists
4877             } elsif ($type eq 'disc') {
4878 0           $output = '.disc';
4879             } elsif ($type eq 'circle') {
4880 0           $output = '.circle';
4881             } elsif ($type eq 'square') {
4882 0           $output = '.square';
4883             } elsif ($type eq 'box') { # non-standard
4884 0           $output = '.box';
4885             } elsif ($type eq '.u') { # default for unordered list at this depth
4886             # unlikely to exceed depth of 4, but be ready for it...
4887             # TBD what is official policy on depth exceeding 3? is it all .square
4888             # or is it supposed to rotate?
4889 0           my $depth = $depth_u+$depth_o+$depth_s;
4890 0 0         if ($depth%4 == 1) {
    0          
    0          
    0          
4891 0           $output = '.disc';
4892             } elsif ($depth%4 == 2) {
4893 0           $output = '.circle';
4894             } elsif ($depth%4 == 3) {
4895 0           $output = '.square';
4896             } elsif ($depth%4 == 0) {
4897 0           $output = '.box';
4898             }
4899             } elsif ($type eq '.o') { # default for ordered list at this depth
4900 0           $output = "$prefix$value$suffix"; # decimal
4901              
4902             # potentially many other unordered list marker systems, but need to find
4903             # out if there's anything official
4904              
4905             } elsif ($type eq 'none') {
4906 0           $output = '.none';
4907             } else {
4908             # unknown. use disc
4909 0           $output = '.disc';
4910             }
4911              
4912 0           return $output;
4913             } # end of _marker()
4914              
4915             # stuff to remember if need to shift line down due to extent increase
4916             # @line_extents array:
4917             # $start_x # fixed start of current baseline
4918             # $x # current baseline offset to write at
4919             # note that $x changes with each write
4920             # $y
4921             # $width
4922             # $endx
4923             # $next_y # where next line will start (may move down)
4924             # $asc # current vertical extents
4925             # $desc
4926             # $desc_leading
4927             # $text # text context (won't change)
4928             # length($text->{' stream'}) # where the current line starts in the stream
4929             # # (won't change)
4930             # $grfx # graphis content, might be undef (won't change)
4931             # length($grfx->{' stream'}) # where the current line starts in the stream
4932             # # (won't change)
4933             # $start_y # very top of this line (won't change)
4934             # $min_y # lowest allowable inked value (won't change)
4935             # $outline # array ref to outline (won't change)
4936             # $left_margin to shorten line (won't change)
4937             # $left_margin_nest to shorten line on nested list (won't change)
4938             # $right_margin to shorten line (won't change)
4939             # we do the asc/desc externally, as how to get them depends on whether it's
4940             # a font change, an image or equation, or some other kind of inline object
4941             # $asc = new ascender (does it exceed the old one?)
4942             # $desc = new descender (does it exceed the old one?)
4943             # $desc_leading = new descender with leading (does it exceed the old one?)
4944             # $text_w = width of text ($phrase) to be written
4945             # returns $rc = 0: all OK, line fits with no change to available space
4946             # 1: OK, but available space reduced, so need to recheck
4947             # 2: problem -- existing line (already written) won't fit in
4948             # shorter line, much less space for new text
4949             # 3: problem -- line now runs off bottom of column
4950             # @line_extents, with some entries revised
4951             sub _revise_baseline {
4952 0     0     my ($o_start_x, $o_x, $o_y, $o_width, $o_endx, $o_next_y, $o_asc, $o_desc,
4953             $o_desc_leading, $text, $line_start_offset,
4954             $grfx, $line_start_offsetg, $start_y, $min_y,
4955             $outline, $margin_left,
4956             $margin_right, $asc, $desc, $desc_leading, $text_w) = @_;
4957              
4958 0           my $rc = 0; # everything OK so far
4959             # items which may change (remembering initial/old values)
4960 0           my $start_x = $o_start_x; # line's original starting x
4961 0           my $x = $o_x; # current x position
4962 0           my $y = $o_y;
4963 0           my $width = $o_width;
4964 0           my $endx = $o_endx;
4965 0           my $next_y = $o_next_y;
4966             # may change, but supplied separately
4967             # $asc = $o_asc;
4968             # $desc = $o_desc;
4969             # $desc_leading = $o_desc_leading;
4970              
4971 0           my $need_revise = 0;
4972             # determine whether we need to revise baseline due to extent increases
4973 0 0         if ($asc > $o_asc) {
4974 0           $need_revise = 1;
4975             } else {
4976 0           $asc = $o_asc;
4977             }
4978 0 0         if ($desc < $o_desc) { # desc and desc_leading are negative values
4979 0           $need_revise = 1;
4980             } else {
4981 0           $desc = $o_desc;
4982             }
4983 0 0         if ($desc_leading < $o_desc_leading) {
4984 0           $need_revise = 1;
4985             } else {
4986 0           $desc_leading = $o_desc_leading;
4987             }
4988              
4989 0 0         if ($need_revise) {
4990             # in middle of line, add_x and add_y are 0
4991             # start_y is unchanged, but asc, desc may have increased
4992 0           $next_y = $start_y - $asc + $desc_leading;
4993             # did we go too low? will return -1 (start_x) and
4994             # remainder of input
4995             # don't include leading when seeing if line dips too low
4996 0 0         if ($start_y - $asc + $desc < $min_y) {
4997 0           $rc = 3; # ran out of column (vertically) = we overflow column
4998             # off bottom if we go ahead and write any of new text
4999             # TBD instead just end line here (early),
5000             # go to next column for taller text we want to print
5001             # however, could then end up with a very short line!
5002             } else {
5003             # start_y and next_y are vertical extent of this line (revised)
5004             # y is the y value of the baseline (so far). lower it a bit.
5005 0           $y -= $asc - $o_asc;
5006             # start_x is baseline start (so far), x is current write position
5007              
5008             # how tall is the line? need to set baseline.
5009 0           ($start_x,$y, $width) = _get_baseline($y, @$outline);
5010             # $x should be unchanged at this point (might be beyond new end)
5011 0           $width -= $margin_left + $margin_right; # available on new line
5012 0           $endx = $start_x + $width;
5013              
5014             # we don't know the nature of the new material attempting to add,
5015             # so can't resolve insufficient space issues here
5016             # $x should already account for margin_left
5017 0 0         if ($x > $endx) {
    0          
5018             # if current (already written) line can't fit (due to much
5019             # shorter line), rc = 2
5020 0           $rc = 2;
5021             } elsif ($x + $text_w > $endx) {
5022             # if adding new text will overflow line, rc = 1
5023 0           $rc = 1;
5024             } else { # should have room to write new text
5025 0           $rc = 0;
5026            
5027             # revise (move in x,y) any existing text in this line (Tm cmd),
5028             # INCLUDING this text chunk's Tm if still in Tpending buffer.
5029 0           $text->_Tpending();
5030 0           my $i = $line_start_offset;
5031 0           my $delta_x = $start_x - $o_start_x;
5032 0           my $delta_y = $y - $o_y;
5033 0           while(1) {
5034 0           $i = index($text->{' stream'}, ' Tm', $i+3);
5035 0 0         if ($i == -1) { last; }
  0            
5036             # $i is the position of a Tm command in the stream. the two
5037             # words before it are x and y position to write at.
5038             # $j is $i back up by two spaces
5039 0           my $j = rindex($text->{' stream'}, ' ', $i-1);
5040 0           $j = rindex($text->{' stream'}, ' ', $j-1) + 1;
5041             # $j points to first char of x, $i to one after last y char
5042 0           my $str1 = substr($text->{' stream'}, 0, $j);
5043 0           my $str2 = substr($text->{' stream'}, $i);
5044 0           my $old_string = substr($text->{' stream'}, $j, $i-$j);
5045 0           $old_string =~ m/^([^ ]+) ([^ ]+)$/;
5046 0           my $old_x = $1;
5047 0           my $old_y = $2;
5048 0           $old_x += $delta_x;
5049 0           $old_y += $delta_y;
5050 0           $text->{' stream'} = $str1."$old_x $old_y".$str2;
5051             # no need to change line_start_offset, but $i has to be
5052             # adjusted to account for possible change in resulting
5053             # position of Tm
5054 0           $i += length("$old_x $old_y") - ($i - $j);
5055             } # end while(1) loop adjusting Tm's on this line
5056              
5057             # AFTER the Tm statement may come one or more strokes for
5058             # underline, strike-through, and/or overline
5059 0           $i = $line_start_offset;
5060             # $delta_x, $delta_y same as before
5061 0           while (1) {
5062 0           $i = index($text->{' stream'}, ' l S', $i+4);
5063 0 0         if ($i == -1) { last; }
  0            
5064             # $i is the position of a lS command in the stream. the five
5065             # words before it are x and y positions to write at.
5066             # (x y m x' y l S is full command to modify)
5067             # $j is $i back up by five spaces
5068 0           my $j = rindex($text->{' stream'}, ' ', $i-1);
5069 0           $j = rindex($text->{' stream'}, ' ', $j-1);
5070 0           $j = rindex($text->{' stream'}, ' ', $j-1);
5071 0           $j = rindex($text->{' stream'}, ' ', $j-1);
5072 0           $j = rindex($text->{' stream'}, ' ', $j-1);
5073             # $j points to first char of x, $i to one after last y char
5074 0           my $str1 = substr($text->{' stream'}, 0, $j);
5075 0           my $str2 = substr($text->{' stream'}, $i);
5076 0           my $old_string = substr($text->{' stream'}, $j, $i-$j);
5077 0           $old_string =~ m/^ ([^ ]+) ([^ ]+) m ([^ ]+) ([^ ]+)$/;
5078 0           my $old_x1 = $1;
5079 0           my $old_y1 = $2;
5080 0           my $old_x2 = $3;
5081 0           my $old_y2 = $4;
5082 0           $old_x1 += $delta_x;
5083 0           $old_y1 += $delta_y;
5084 0           $old_x2 += $delta_x;
5085 0           $old_y2 += $delta_y;
5086 0           $text->{' stream'} = $str1." $old_x1 $old_y1 m $old_x2 $old_y2".$str2;
5087             # no need to change line_start_offset, but $i has to be
5088             # adjusted to account for possible change in resulting
5089             # position of lS
5090 0           $i += length(" $old_x1 $old_y1 m $old_x2 $old_y2") - ($i - $j);
5091             } # end while(1) loop adjusting line stroke positions
5092             }
5093             }
5094             }
5095              
5096 0           return ($rc, $start_x, $x, $y, $width, $endx, $next_y,
5097             $asc, $desc, $desc_leading,
5098             $text, $line_start_offset, $grfx, $line_start_offsetg,
5099             $start_y, $min_y, $outline,
5100             $margin_left, $margin_right);
5101             } # end of _revise_baseline()
5102              
5103             # just something to pause during debugging
5104             sub _pause {
5105 0     0     print STDERR "====> Press Enter key to continue...";
5106 0           my $input = <>;
5107 0           return;
5108             }
5109              
5110             =head4 init_state()
5111              
5112             See L<PDF::Builder> for code and L<PDF::Builder::Content::Column_docs>
5113             for documentation.
5114              
5115             =cut
5116              
5117             =head4 pass_start_state()
5118              
5119             See L<PDF::Builder> for code and L<PDF::Builder::Content::Column_docs>
5120             for documentation.
5121              
5122             =head4 pass_end_state()
5123              
5124             See L<PDF::Builder::Content::Column_docs> for documentation.
5125              
5126             =cut
5127              
5128             sub pass_end_state {
5129 0     0 1   my ($self, $pass_count, $max_passes, $pdf, $state, %opts) = @_;
5130             # $state = ref to %state structure
5131              
5132 0           my $rc = scalar(keys %{$state->{'changed_target'}});
  0            
5133             # length of changed_target key list
5134              
5135             # are we either clear to finish, or at max number of passes? if so,
5136             # output all annotations. each page should have its complete text already,
5137             # as well as a record of the annotations in %state
5138              
5139 0 0 0       if (!$rc || $pass_count == $max_passes) {
5140             # where to put UL corner of target window relative to target text
5141 0           my $delta_x = 20; # 20pt to LEFT
5142 0           my $delta_y = 20;
5143 0 0 0       if (defined $opts{'deltas'} && ref($opts{'deltas'}) eq 'ARRAY') {
5144 0           my @deltas = @{ $opts{'deltas'} };
  0            
5145 0 0         if (@deltas == 2) {
5146 0           $delta_x = $deltas[0];
5147 0           $delta_y = $deltas[1];
5148             }
5149             }
5150 0           my @media_size = $pdf->mediabox(); # [0] min x, [3] max y
5151              
5152             # go through list of annotations to create at '_ref' tag links
5153 0           my $cur_src_page = 0; # minimize openings of source page. min valid 1
5154 0           my $cur_tgt_page = 0; # minimize openings of target page. min valid 1
5155 0           my ($src_page, $tgt_page); # opened page objects
5156 0           my $link_border;
5157 0 0 0       if (defined $opts{'debug'} && $opts{'debug'} == 1) {
5158             # debug: draw border around link text
5159 0           $link_border = [ 0, 0, 1 ];
5160             } else {
5161             # production: no border around link text
5162 0           $link_border = [ 0, 0, 0 ];
5163             }
5164              
5165 0           for (my $source=0; $source<@{$state->{'xrefs'}}; $source++) {
  0            
5166 0           my $sptr = $state->{'xrefs'}->[$source];
5167             # source filename of target link (final name and position!)
5168 0           my $tfn = $sptr->{'tfn'};
5169             # target's physical page number
5170 0           my $tppn = $sptr->{'tppn'};
5171             # source's physical page number
5172 0           my $sppn = $sptr->{'sppn'};
5173             # target's formatted page number is not of interest here (link
5174             # text already output, if includes fpn)
5175             #my $tfpn = $sptr->{'tfpn'};
5176             # target's tag that produced the entry is not of interest here
5177             #my $ttag = $sptr->{'tag'};
5178             # title is not of interest here (link text already output)
5179             #my $title = $sptr->{'title'};
5180             # other_pg is not of interest here (link text already output)
5181             #my $other_pg = $sptr->{'other_pg'};
5182             # target's x and y coordinates (for fit entry)
5183 0           my $tx = $sptr->{'tx'};
5184 0           my $ty = $sptr->{'ty'};
5185             # target id/ND/etc. information and fit
5186 0           my $tid = $sptr->{'id'};
5187 0           my $fit = $sptr->{'fit'};
5188             # if fit includes two % fields, replace by tx and ty
5189             # (for xyz fit: 'xyz,%x,%y,null')
5190 0           my $val = max(int($tx-$delta_x),$media_size[0]);
5191 0           $fit =~ s/%x/$val/;
5192 0           $val = min(int($ty+$delta_y),$media_size[3]);
5193 0           $fit =~ s/%y/$val/;
5194             # replace any 'undef' by 'null' in $fit
5195 0           $fit =~ s/undef/null/g;
5196              
5197             # list of pairs of source physical page number and annot rectangle
5198             # coordinates, to place link at. usually one per link, but
5199             # sometimes 2 or more due to wrapping
5200 0           my @links = @{ $sptr->{'click'} };
  0            
5201 0           for (my $click=0;
5202             $click<@links; # most often, 1
5203             $click++) {
5204             # usually only one click area to place an annotation in, but
5205             # could spread over two or more lines, and even into the
5206             # next column (or page). annotation click area to be placed
5207             # in page object $src_page at coordinates $rect
5208 0           my @next_click_area = @{ $links[$click] };
  0            
5209 0           my $sppn = $next_click_area[0];
5210 0 0         if ($sppn != $cur_src_page) {
5211 0           $src_page = $pdf->openpage($sppn);
5212 0           $cur_src_page = $sppn;
5213             }
5214             # click area corners [ULx,y, LRx,y]
5215 0           my $rect = $next_click_area[1]; # leave as pointer
5216 0           my $annot = $src_page->annotation();
5217              
5218             # three flavors of 'tid':
5219 0 0         if ($tid =~ /^#[^#]/) {
    0          
5220             # physical page number target, may be internal or external
5221             # reuse $tppn since explicitly giving
5222 0           $tppn = substr($tid, 1);
5223             # have target file (if ext) and physical page number
5224 0 0         $fit = 'fit' if $fit eq ''; # default show whole page
5225 0 0         if ($tfn eq '') {
5226             # internal link to page object at $tx,$ty fit
5227 0 0         if ($tppn != $cur_tgt_page) {
5228 0           $tgt_page = $pdf->openpage($tppn);
5229 0           $cur_tgt_page = $tppn;
5230             }
5231 0           $annot->goto($tgt_page,
5232             (split /,/, $fit),
5233             'rect'=>$rect, 'border'=>$link_border);
5234             } else {
5235             # external link to physical page
5236 0           $annot->pdf($tfn, $tppn,
5237             (split /,/, $fit),
5238             'rect'=>$rect, 'border'=>$link_border);
5239             }
5240              
5241             } elsif ($tid =~ /^##/) {
5242             # Named Destination given (ignore 'fit' if given)
5243             # external if filepath not ''
5244 0           my $nd = substr($tid, 1);
5245 0 0         if ($tfn eq '') {
5246             # internal link to named destination
5247 0           $annot->goto($nd,
5248             'rect'=>$rect, 'border'=>$link_border);
5249             } else {
5250             # external link to named destination
5251 0           $annot->pdf($tfn, $nd,
5252             'rect'=>$rect, 'border'=>$link_border);
5253             }
5254              
5255             } else {
5256             # id defined elsewhere, at $tgt_page from target
5257 0 0         if ($fit eq '') {
5258             # default fit is xyz x-$delta_x,y+$delta_y,undef
5259             # x,y from location of target on page
5260 0           $fit = "xyz,".max(int($tx)-$delta_x,$media_size[0]).",".
5261             min(int($ty)+$delta_y,$media_size[3]).",null";
5262             }
5263             # internal link to page object at $tx,$ty fit
5264             # skip if Named Destination instead of a phys page no
5265 0 0 0       if ($tppn =~ m/^\d+$/ && $tppn != $cur_tgt_page) {
5266 0           $tgt_page = $pdf->openpage($tppn);
5267 0           $cur_tgt_page = $tppn;
5268             }
5269 0           $annot->goto($tgt_page,
5270             (split /,/, $fit),
5271             'rect'=>$rect, 'border'=>$link_border);
5272             }
5273             } # have gone through one or more click areas to create for this
5274             # one link
5275             } # done looping through all the requested annotations in xrefs
5276            
5277             # output any named destinations defined
5278 0           my $ptr = $state->{'nameddest'};
5279 0           foreach my $name (keys %$ptr) {
5280 0           my $fit = $ptr->{$name}{'fit'};
5281 0           my $ppn = $ptr->{$name}{'ppn'};
5282 0           my $x = $ptr->{$name}{'x'};
5283 0           my $y = $ptr->{$name}{'y'};
5284              
5285             # if no fit given, set to xyz,x-$delta_x,y+$delta_y,undef
5286 0 0         if ($fit eq '') {
5287 0           $fit = "xyz,".max(int($x)-$delta_x,$media_size[0]).",".
5288             min(int($y)+$delta_y,$media_size[3]).",null";
5289             }
5290             # if $x and $y in fit, replace with integer values
5291 0           my $val = max(int($x)-$delta_x,$media_size[0]);
5292 0           $fit =~ s/\$x/$val/;
5293 0           $val = min(int($y)+$delta_y,$media_size[3]);
5294 0           $fit =~ s/\$y/$val/;
5295 0           my @fits = ();
5296 0           @fits = split /,/, $fit;
5297 0           for (my $i=0; $i<@fits; $i++) {
5298             # if the user specified a fit with 'undef' (string) parms
5299 0 0         if ($fits[$i] eq 'undef') { $fits[$i] = 'null'; }
  0            
5300             }
5301 0           my $dest = PDF::Builder::NamedDestination->new($pdf);
5302 0           my $page = $pdf->openpage($ppn);
5303 0           $dest->goto($page, @fits);
5304 0           $pdf->named_destination('Dests', $name, $dest);
5305             }
5306              
5307             } # end of outputting annotations and named destinations
5308              
5309 0           return $rc;
5310             }
5311              
5312             # list target ids in state holder that are still changing
5313             =head4 unstable_state()
5314              
5315             See L<PDF::Builder::Content::Column_docs> for documentation
5316              
5317             =cut
5318              
5319             sub unstable_state {
5320 0     0 0   my ($self, $state) = @_;
5321             # $state = ref to %state structure
5322              
5323 0           my @list = sort(keys %{$state->{'changed_target'}});
  0            
5324             # would prefer target ids to be returned in order encountered, but
5325             # since no idea what order hash keys will be in, might as well sort
5326             # in alphabetical order
5327 0           return @list; # hopefully empty at some point
5328             }
5329              
5330             # mytext array at element $el, extract full child text of this element
5331             # may be sub tags and their own child text, all to be returned
5332             #
5333             # actually, all tags have already been removed and the overall text will
5334             # now be a series of text and tags and their children (arbitrarily deep)
5335             # e.g. <h2 id=target>This is <i>italic</i> text</h2> would be
5336             # tag=>'h2'
5337             # id=>'target'
5338             # tag=>''
5339             # text=>'This is '
5340             # tag=>'i'
5341             # tag=>''
5342             # text=>'italic'
5343             # tag=>'/i'
5344             # tag=>''
5345             # text=>' text'
5346             # tag=>'/h2'
5347             # desired output: 'This is italic text'
5348             #
5349             # the big problem is to know what element to stop at (the end tag to
5350             # $el element, not necessarily the next /tag, in case there's another 'tag'
5351             # embedded within the child text)
5352             # TBD: consider also copying tags (markup) within child text, to appear
5353             # formatted in title (per _ref, and global, flag to flatten)
5354              
5355             sub _get_child_text {
5356 0     0     my ($mytext, $el) = @_;
5357              
5358 0           my $output = '';
5359 0           my @tags = ($mytext->[$el]->{'tag'});
5360 0           for (my $elx=$el+1; ; $elx++) {
5361             # found end of this tag we seek child text from?
5362 0 0 0       if ($mytext->[$elx]->{'tag'} eq "/$tags[0]" &&
5363 0           scalar(@tags)==1) { last; }
5364             # found some text in it? add to output
5365 0 0         if ($mytext->[$elx]->{'tag'} eq '') {
5366 0           $output .= $mytext->[$elx]->{'text'};
5367 0           next;
5368             }
5369             # an end tag? pop stack (assume properly nested!)
5370 0 0         if ($mytext->[$elx]->{'tag'} =~ /^\//) {
5371 0           pop @tags;
5372 0           next;
5373             }
5374             # must be another tag. push it on tag stack
5375 0           push @tags, $mytext->[$elx]->{'tag'};
5376             }
5377              
5378             # also convert line ends to blanks
5379 0           $output =~ s/\s+/ /sg;
5380 0           return $output;
5381             } # end _get_child_text()
5382              
5383             # similar to _get_child_text(), but goes through looking for special section
5384             # AND trims out removed text from where it was found
5385             #
5386             # open text in a paragraph shouldn't have any special text, but
5387             # we need to look at tag attributes (title= ), heading text, link
5388             # child text, etc.
5389             sub _get_special_info {
5390 0     0     my ($mytext, $el, $pattern, $endchar) = @_;
5391              
5392 0           my $newtext = '';
5393 0           my ($start, $end);
5394              
5395 0           my @tags = ($mytext->[$el]->{'tag'});
5396 0           for (my $elx=$el+1; $elx<@$mytext; $elx++) {
5397             # found end of this tag we seek child text from?
5398 0 0 0       if (@tags == 1 && $mytext->[$elx]->{'tag'} eq "/$tags[0]") { last; }
  0            
5399             # found some desired text in it? extract to output
5400 0 0         if ($mytext->[$elx]->{'tag'} eq '') {
5401             # assume no tags within text
5402 0           my $text = $mytext->[$elx]->{'text'};
5403 0           $start = index($text, $pattern);
5404 0 0         if ($start > -1) {
5405             # starting pattern found within text string
5406 0           $end = index($text, $endchar, $start+length($pattern));
5407 0 0         if ($end > -1) {
5408             # ending pattern found within text string, after starting
5409 0           $newtext = substr($text, $start+length($pattern),
5410             $end-$start-length($pattern));
5411              
5412             # now remove entire thing plus up to one space
5413 0           $end += length($endchar)-1;
5414 0           my $space_before = 0;
5415 0 0 0       if ($start>0 && substr($text, $start-1, 1) eq ' ') {
5416 0           $space_before = 1;
5417             }
5418 0           my $space_after = 0;
5419 0 0 0       if ($end < length($text)-1 &&
5420             substr($text, $end+1, 1) eq ' ') {
5421 0           $space_after = 1;
5422             }
5423              
5424 0 0 0       if ($start == 0) {
    0          
    0          
5425             # at far left
5426 0 0         if ($space_after) { $end++; }
  0            
5427 0           $text = substr($text, $end+1);
5428             } elsif ($end == length($text)-1) {
5429             # at far right
5430 0 0         if ($space_before) { $start--; }
  0            
5431 0           $text = substr($text, 0, $start);
5432             } elsif ($space_before && $space_after) {
5433             # in middle with one space to delete at either end
5434 0           $text = substr($text, 0, --$start) .
5435             substr($text, $end+1);
5436             } else {
5437             # in middle with no space after or no space after,
5438             # so preserve adjoining space
5439 0           $text = substr($text, 0, $start) .
5440             substr($text, $end+1);
5441             }
5442             }
5443             }
5444 0           $mytext->[$elx]->{'text'} = $text; # may be now empty
5445 0           next; # should be only occurence, but still need to clean up
5446             }
5447             # an end tag? pop stack (assume properly nested!)
5448 0 0         if ($mytext->[$elx]->{'tag'} =~ /^\//) {
5449 0           pop @tags;
5450 0           next;
5451             }
5452             # must be another tag. push it on tag stack
5453 0           push @tags, $mytext->[$elx]->{'tag'};
5454             }
5455              
5456             # trim enclosure and leading and trailing whitespace off it
5457 0           $newtext =~ s/^$pattern\s+//;
5458 0           $newtext =~ s/\s+$endchar$//;
5459 0           return $newtext;
5460             } # end _get_special_info()
5461              
5462             # --------------------- end of column() section -----------------------------
5463             1;