File Coverage

blib/lib/Text/KnuthPlass.pm
Criterion Covered Total %
statement 143 204 70.1
branch 22 50 44.0
condition 8 30 26.6
subroutine 29 44 65.9
pod 9 9 100.0
total 211 337 62.6


line stmt bran cond sub pod time code
1             package Text::KnuthPlass;
2             require XSLoader;
3 3     3   124237 use constant DEBUG => 0;
  3         15  
  3         334  
4 3     3   18 use constant purePerl => 0; # 1: do NOT load XS routines
  3         4  
  3         111  
5 3     3   14 use warnings;
  3         5  
  3         71  
6 3     3   14 use strict;
  3         4  
  3         74  
7 3     3   14 use List::Util qw/min/;
  3         5  
  3         351  
8              
9             our $VERSION = '1.07'; # VERSION
10             our $LAST_UPDATE = '1.07'; # manually update whenever file is edited
11              
12 3     3   1742 use Data::Dumper;
  3         23263  
  3         349  
13              
14             # disable XS usage for debug, etc.
15             if (!purePerl) {
16             eval { XSLoader::load("Text::KnuthPlass", $VERSION); } or die $@;
17             # Or else there's a Perl version to fall back on
18             # does camelCase in Perl get automatically changed to camel_case?
19             # _computeCost() in Perl vs _compute_cost() in XS
20             # _computeSum() in Perl vs _compute_sum() in XS
21             # _init_nodelist()
22             # _cleanup()
23             # _active_to_breaks()
24             # _mainloop()
25             }
26              
27             package Text::KnuthPlass::Element;
28 3     3   25 use base 'Class::Accessor';
  3         6  
  3         1704  
29             __PACKAGE__->mk_accessors("width");
30             sub new {
31 319     319   465 my $self = shift;
32 319         1286 return bless { 'width' => 0, @_ }, $self;
33             }
34             sub is_penalty {
35 65     65   1886 return shift->isa("Text::KnuthPlass::Penalty");
36             }
37             sub is_glue {
38 0     0   0 return shift->isa("Text::KnuthPlass::Glue");
39             }
40             sub is_box {
41 0     0   0 return shift->isa("Text::KnuthPlass::Box");
42             }
43              
44             package Text::KnuthPlass::Box;
45 3     3   5552 use base 'Text::KnuthPlass::Element';
  3         8  
  3         1171  
46             __PACKAGE__->mk_accessors("value");
47              
48             sub _txt { # different from other _txt() defs
49 0     0   0 return "[".$_[0]->value()."/".$_[0]->width()."]";
50             }
51              
52             package Text::KnuthPlass::Glue;
53 3     3   23 use base 'Text::KnuthPlass::Element';
  3         6  
  3         922  
54             __PACKAGE__->mk_accessors("stretch", "shrink");
55              
56             sub new {
57 122     122   182 my $self = shift;
58 122         186 return $self->SUPER::new('stretch' => 0, 'shrink' => 0, @_);
59             }
60             sub _txt { # different from other _txt() defs
61 0     0   0 return sprintf "<%.2f+%.2f-%.2f>", $_[0]->width(), $_[0]->stretch(), $_[0]->shrink();
62             }
63              
64             package Text::KnuthPlass::Penalty;
65 3     3   36 use base 'Text::KnuthPlass::Element';
  3         7  
  3         907  
66             __PACKAGE__->mk_accessors("penalty", "flagged", "shrink");
67             sub new {
68 40     40   307 my $self = shift;
69 40         78 return $self->SUPER::new('flagged' => 0, 'shrink' => 0, @_);
70             }
71             sub _txt { # different from other _txt() defs
72 0   0 0   0 return "(".$_[0]->penalty().($_[0]->flagged() &&"!").")";
73             }
74              
75             package Text::KnuthPlass::Breakpoint;
76 3     3   29 use base 'Text::KnuthPlass::Element';
  3         6  
  3         763  
77             __PACKAGE__->mk_accessors(qw/position demerits ratio line fitnessClass totals previous/);
78              
79             package Text::KnuthPlass::DummyHyphenator;
80 3     3   20 use base 'Class::Accessor';
  3         6  
  3         304  
81             sub hyphenate {
82 58     58   436 return $_[1];
83             }
84              
85             package Text::KnuthPlass;
86 3     3   18 use base 'Class::Accessor';
  3         25  
  3         215  
87 3     3   19 use Carp qw/croak/;
  3         5  
  3         9156  
88              
89             # these settings are settable via new(%opts)
90             my %defaults = (
91             'infinity' => 10000,
92             'tolerance' => 30, # maximum allowable ratio (way out of reasonable!)
93             'hyphenpenalty' => 50,
94             'demerits' => { 'line' => 10, 'flagged' => 100, 'fitness' => 3000 },
95             'space' => { 'width' => 3, 'stretch' => 6, 'shrink' => 9 },
96             'linelengths' => [ 78 ], # character count (fixed pitch)
97             'measure' => sub { length $_[0] },
98             'hyphenator' =>
99             # TBD min_suffix 3 for English and many, but not all, languages. %opt
100             eval { require Text::Hyphen }? Text::Hyphen->new('min_suffix' => 3):
101             Text::KnuthPlass::DummyHyphenator->new(),
102             'purePerl' => 0, # 1: use pure Perl code, not XS CURRENTLY UNUSED
103             'const' => 0, # width (char or points) to reduce line length to allow
104             # for word-split hyphen without overflow into margin
105             # CURRENTLY UNUSED
106             'indent' => 0, # global paragraph indentation width
107             );
108             __PACKAGE__->mk_accessors(keys %defaults);
109             sub new {
110 7     7 1 285128 my $self = shift;
111             # hash elements in new() override whatever's in %default
112              
113             # tack on any new() overrides of defaults
114 7         2482 return bless {%defaults, @_}, $self;
115             }
116              
117             =head1 NAME
118              
119             Text::KnuthPlass - Breaks paragraphs into lines using the TeX (Knuth-Plass) algorithm
120              
121             =head1 SYNOPSIS
122              
123             To use with plain text, indentation of 2. NOTE that you should also
124             set the shrinkability of spaces to 0 in the new() call:
125              
126             use Text::KnuthPlass;
127             my $typesetter = Text::KnuthPlass->new(
128             'indent' => 2, # two characters,
129             # set space shrinkability to 0
130             'space' => { 'width' => 3, 'stretch' => 6, 'shrink' -> 0 },
131             # can let 'measure' default to character count
132             # default line lengths to 78 characters
133             );
134             my @lines = $typesetter->typeset($paragraph);
135             ...
136              
137             for my $line (@lines) {
138             for my $node (@{$line->{'nodes'}}) {
139             if ($node->isa("Text::KnuthPlass::Box")) {
140             # a Box is a word or word fragment (no hyphen on fragment)
141             print $node->value();
142             } elsif ($node->isa("Text::KnuthPlass::Glue")) {
143             # a Glue is (at least) a single space, but you can look at
144             # the line's 'ratio' to insert additional spaces to
145             # justify the line. we also are glossing over the skipping
146             # of any final glue at the end of the line
147             print " ";
148             }
149             # ignoring Penalty (word split point) within line
150             }
151             if ($line->{'nodes'}[-1]->is_penalty()) { print "-"; }
152             print "\n";
153             }
154              
155             To use with PDF::Builder: (also PDF::API2)
156              
157             my $text = $page->text();
158             $text->font($font, 12);
159             $text->leading(13.5);
160              
161             my $t = Text::KnuthPlass->new(
162             'indent' => 2*$text->text_width('M'), # 2 ems
163             'measure' => sub { $text->text_width(shift) },
164             'linelengths' => [235] # points
165             );
166             my @lines = $t->typeset($paragraph);
167              
168             my $y = 500; # PDF decreases y down the page
169             for my $line (@lines) {
170             $x = 50; # left margin
171             for my $node (@{$line->{'nodes'}}) {
172             $text->translate($x,$y);
173             if ($node->isa("Text::KnuthPlass::Box")) {
174             # a Box is a word or word fragment (no hyphen on fragment)
175             $text->text($node->value());
176             $x += $node->width();
177             } elsif ($node->isa("Text::KnuthPlass::Glue")) {
178             # a Glue is a variable-width space
179             $x += $node->width() + $line->{'ratio'} *
180             ($line->{'ratio'} < 0 ? $node->shrink(): $node->stretch());
181             # we also are glossing over the skipping
182             # of any final glue at the end of the line
183             }
184             # ignoring Penalty (word split point) within line
185             }
186             # explicitly add a hyphen at a line-ending split word
187             if ($line->{'nodes'}[-1]->is_penalty()) { $text->text("-"); }
188             $y -= $text->leading(); # go to next line down
189             }
190              
191             =head1 METHODS
192              
193             =head2 $t = Text::KnuthPlass->new(%opts)
194              
195             The constructor takes a number of options. The most important ones are:
196              
197             =over
198              
199             =item measure
200              
201             A subroutine reference to determine the width of a piece of text. This
202             defaults to C, which is what you want if you're
203             typesetting plain monospaced text. You will need to change this to plug
204             into your font metrics if you're doing something graphical. For PDF::Builder
205             (also PDF::API2), this would be the C method (alias
206             C), which returns the width of a string (in the present font
207             and size) in points.
208              
209             'measure' => sub { length(shift) }, # default, for character output
210             'measure' => sub { $text->advancewidth(shift) }, # PDF::Builder/API2
211              
212             =item linelengths
213              
214             This is an array of line lengths. For instance, C< [30,40,50] > will
215             typeset a triangle-shaped piece of text with three lines. What if the
216             text spills over to more than three lines? In that case, the final value
217             in the array is used for all further lines. So to typeset an ordinary
218             block-shaped column of text, you only need specify an array with one
219             value: the default is C< [78] >. Note that this default would be the
220             character count, rather than points (as needed by PDF::Builder or PDF::API2).
221              
222             'linelengths' => [$lw, $lw, $lw-6, $lw-6, $lw],
223              
224             This would set the first two lines in the paragraph to C<$lw> length, the next
225             two to 6 less (such as for a float inset), and finally back to full length.
226             At each line, the first element is consumed, but the last element is never
227             removed. Any paragraph indentation set will result in a shorter-appearing
228             first line, which actually has blank space at its beginning. Start output of
229             the first line at the same C value as you do the other lines.
230              
231             Setting C in the C (constructor) call resets the internal
232             line length list to the new elements, overwriting anything that was already
233             there (such as any remaining line lengths left over from a previous C call). Subsequent C calls will continue to consume the existing
234             line length list, until the last element is reached. You can either reset the
235             list for the next paragraph with the C call, or call the
236             C method to get or set the list.
237              
238             =item indent
239              
240             This sets the global (default) paragraph indentation, unless overridden
241             on a per-paragraph basis by
242             an C entry in a C call. The units are the same as for
243             C and C. A "Box" of value C<''> and width of C is
244             inserted before the first node of the paragraph. Your rendering code should
245             know how to handle this by starting at the same C coordinate as other lines,
246             and then moving right (or left) by the indicated amount.
247              
248             'indent' => 2, # 2 character indentation
249             'indent' => 2*$text->text_width('M'), # 2 ems indentation
250             'indent' => -3, # 3 character OUTdent
251              
252             If the value is negative, a negative-width space Box is added. The overall line
253             will be longer than other lines, by that amount. Again, your rendering code
254             should handle this in a similar manner as with a positive indentation, but
255             move I by the indicated amount. Be careful to have your starting C
256             value far enough to the right that text will not end up being written off-page.
257              
258             =item tolerance
259              
260             How much leeway we have in leaving wider spaces than the algorithm
261             would prefer. The C is the maximum C glue expansion value to
262             I in a possible solution, before discarding this solution as so
263             infeasible as to be a waste of time to pursue further. Most of the time, the
264             C is going to have a value in the 1 to 3 range. One approach is to
265             try with C 1>, and if no successful layout is found, try
266             again with 2, and then 3 and perhaps even 4.
267              
268             =item hyphenator
269              
270             An object which hyphenates words. If you have the C product
271             installed (which is highly recommended), then a C object is
272             instantiated by default; if not, an object of the class
273             C is instantiated - this simply finds
274             no hyphenation points at all. So to turn hyphenation off, set
275              
276             'hyphenator' => Text::KnuthPlass::DummyHyphenator->new()
277              
278             To typeset non-English text, pass in a C-like object which
279             responds to the C method, returning a list of hyphen positions for
280             that particular language (native C defaults to American English
281             hyphenation rules). (See C for the interface.)
282              
283             =item space
284              
285             Fine tune space (glue) width, stretchability, and shrinkability.
286              
287             'space' => { 'width' => 3, 'stretch' => 6, 'shrink' => 9 },
288              
289             For typesetting
290             constant width text or output to a text file (characters), we suggest setting
291             the C value to 0. This prevents the glue spaces from being shrunk to
292             less than one character wide, which could result in either no spaces between
293             words, or overflow into the right margin.
294              
295             'space' => { 'width' => 3, 'stretch' => 6, 'shrink' => 0 },
296              
297             =item infinity
298              
299             The default value for I is, as is customary in TeX, 10000. While this
300             is a far cry from the real infinity, so long as it is substantially larger than
301             any other demerit or penalty, it should take precedence in calculations. Both
302             positive and negative C are used in the code for various purposes,
303             including a C<+inf> penalty for something absolutely forbidden, and C<-inf> for
304             something absolutely required (such as a line break at the end of a paragraph).
305              
306             'infinity' => 10000,
307              
308             =item hyphenpenalty
309              
310             Set the penalty for an end-of-line hyphen at 50. You may want to try a somewhat
311             higher value, such as 100+, if you see too much hyphenation on output. Remember
312             that excessively short lines are prone to splitting words and being hyphenated,
313             no matter what the penalty is.
314              
315             'hyphenpenalty' => 50,
316              
317             There does not appear to be anything in the code to find and prevent multiple
318             contiguous (adjacent) hyphenated lines, nor to prevent the penultimate
319             (next-to-last) line from being hyphenated, nor to prevent the hyphenation of
320             a line where you anticipate the paragraph to be split between columns.
321             Something may be done in the future about these three special cases, which
322             are considered to not be good typesetting.
323              
324             =item demerits
325              
326             Various demerits used in calculating penalties, including I, which is
327             used when line tightness (C) changes by more than one class between two
328             lines.
329              
330             'demerits' => { 'line' => 10, 'flagged' => 100, 'fitness' => 3000 },
331              
332             =back
333              
334             There may be other options for fine-tuning the output. If you know your way
335             around TeX, dig into the source to find out what they are. At some point,
336             this package will support additional tuning by allowing the setting of more
337             parameters which are currently hard-coded. Please let us know if you found any
338             more parameters that would be useful to allow additional tuning!
339              
340             =cut
341              
342             # more options, not currently implemented
343             # 'purePerl' => 0, # 1: use pure Perl code, not XS. currently is hard-coded
344             # at top, as new() appears to be too late to call xload()
345             # 'const' => 0, # width (char or points) to reduce line length to allow
346             # hyphenated word's hyphen not to overhang into right
347             # margin (constant width or character output), or result
348             # in slight tightening that may end up too much (ratio too
349             # negative). Still looking at it.
350             # TBD
351             # 'hangingp' => 0, # use hanging punctuation (last character in a line is
352             # punctuation, including split-word hyphen) to write
353             # that punctuation over into the right margin. Some "very
354             # fine" typesetting overhangs a per-character (and font)
355             # percentage on left and right, and even letters too.
356             # 'dropcap' => { 'lines' => 3, 'scale' => 2.5, .... },
357             # indent first 'lines' lines of the paragraph to provide
358             # space for an oversized letter with some movement up and
359             # left. Letter is taken from $paragraph text. If paragraph
360             # doesn't have enough lines, pad with blank lines so that
361             # no need to indent following paragraph! Usually just for
362             # first paragraph in a section (as with SC), so need a way
363             # to cancel for subsequent paragraphs (if on by default).
364             # TBD but this one might better belong in PDF::Builder
365             # 'smallcap' => { 'words' => 1, ... },
366             # small caps on first line text. 'words' is 1 to SC first
367             # word (or remainder after DropCap does its thing), 0 is
368             # no SC, -1 is entire line, >0 is that many words (up to
369             # end of first line). Usually just for first paragraph in
370             # a section (as with DC), so need a way to cancel for
371             # subsequent paragraphs (if on by default).
372             # Note that your rendering code should take care of any additional top margin
373             # (interparagraph space). Settings may be added for other things to fine-tune
374             # the output.
375              
376             =head2 $t->typeset($paragraph_string, %opts)
377              
378             This is the main interface to the algorithm, made up of the constituent
379             parts below. It takes a paragraph of text and returns a list of lines (array
380             of hashes) if suitable breakpoints could be found.
381              
382             The typesetter currently allows several options:
383              
384             =over
385              
386             =item indent
387              
388             Override the global paragraph indentation value B
389             This can be useful for
390             instances such as I indenting the first paragraph in a section.
391              
392             'indent' => 0, # default set in new() is 2ems
393              
394             =item linelengths
395              
396             The array of line lengths may be set here, in C. As with C, it
397             will override whatever existing line lengths array is left over from
398             earlier operations.
399              
400             =back
401              
402             Possibly (in the future) many other global settings set in C may be
403             overridden on a per-paragraph basis in C.
404              
405             The returned list has the following structure:
406              
407             (
408             { 'nodes' => \@nodes, 'ratio' => $ratio },
409             { 'nodes' => \@nodes, 'ratio' => $ratio },
410             ...
411             )
412              
413             The node list in each element will be a list of objects. Each object
414             will be either C, C
415             or C. See below for more on these.
416              
417             The C is the amount of stretch or shrink which should be applied to
418             each glue element in this line. The corrected width of each glue node
419             should be:
420              
421             $node->width() + $line->{'ratio'} *
422             ($line->{'ratio'} < 0 ? $node->shrink() : $node->stretch());
423              
424             Each box, glue or penalty node has a C attribute. Boxes have
425             Cs, which are the text which went into them (including a wide null
426             blank for paragraph indentation, a special case); glue has C
427             and C to determine how much it should vary in width. That should
428             be all you need for basic typesetting; for more, see the source, and see
429             the original Knuth-Plass paper in "Digital Typography".
430              
431             Why I rather than something like I? Per
432             L, this code is ported from the Javascript product
433             B.
434              
435             This method is a thin wrapper around the three methods below.
436              
437             =cut
438              
439             # indent entry in options applies only to this paragraph.
440             # linelengths OK to change global value.
441             sub typeset {
442 3     3 1 34 my ($t, $paragraph, %opts) = @_;
443              
444             # if give linelengths, need to set (replace) global value
445 3 50       17 if (defined $opts{'linelengths'}) {
446 0         0 $t->{'linelengths'} = $opts{'linelengths'};
447             }
448              
449             # break up the text into a collection (list) of box, glue, penalty nodes
450 3         15 my @nodes = $t->break_text_into_nodes($paragraph, %opts);
451              
452             # if indenting first line of paragraph, add a Box for that blank
453 3         10 my $indent = $t->{'indent'}; # global indent
454 3 50       11 $indent = $opts{'indent'} if defined $opts{'indent'}; # local override
455 3 100       12 if ($indent) { # non-zero amount? could be + or -
456 1         5 unshift @nodes, Text::KnuthPlass::Box->new(
457             'width' => $indent,
458             'value' => ''
459             );
460             }
461              
462             # figure best set of breakpoints (lowest cost)
463 3         16 my @breakpoints = $t->break(\@nodes);
464              
465             # quit if nothing found (need to increase tolerance)
466 3 50       19 return unless @breakpoints;
467              
468             # group nodes into lines according to breakpoints
469 3         13 my @lines = $t->breakpoints_to_lines(\@breakpoints, \@nodes);
470              
471             # Remove final penalty and glue from last line in paragraph
472 3 50       10 if (@lines) {
473 3         4 pop @{ $lines[-1]->{'nodes'} } ;
  3         8  
474 3         11 pop @{ $lines[-1]->{'nodes'} } ;
  3         8  
475             }
476              
477             # trim off one linelengths element per line output, but keep last one
478 3         4 my @temp = @{ $t->{'linelengths'} };
  3         11  
479 3         19 splice(@temp, 0, min(scalar(@lines), scalar(@temp)-1));
480 3         10 $t->{'linelengths'} = \@temp;
481              
482 3         92 return @lines;
483             }
484              
485             =head2 $t->line_lengths()
486              
487             =over
488              
489             =item @list = $t->line_lengths() # Get
490              
491             =item $t->line_lengths(@list) # Set
492              
493             Get or set the C list of allowed line lengths. This permits you to
494             do more elaborate operations on this array than simply replacing (resetting) it,
495             as done in the C and C methods. For example, at the bottom of
496             a page, you might cancel any further inset for a float, by deleting all but the
497             last element of the list.
498              
499             my @temp_LL = $t->line_lengths();
500             # cancel remaining line shortening
501             splice(@temp_LL, 0, scalar(@temp_LL)-1);
502             $t->line_lengths(@temp_LL);
503              
504             On a "Set" request, you must have at least one length element in the list. If
505             the list is empty, it is assumed to be a "Get" request.
506              
507             =back
508              
509             =cut
510              
511             sub line_lengths {
512 0     0 1 0 my $self = shift;
513              
514 0 0       0 if (@_) { # Set
515 0         0 $self->{'linelengths'} = \@_;
516 0         0 return;
517              
518             } else { # Get
519 0         0 return @{ $self->{'linelengths'} };
  0         0  
520             }
521             }
522              
523             =head2 $t->break_text_into_nodes($paragraph_string, %opts)
524              
525             This turns a paragraph into a list of box/glue/penalty nodes. It's
526             fairly basic, and designed to be overloaded. It should also support
527             multiple justification styles (centering, ragged right, etc.) but this
528             will come in a future release; right now, it just does full
529             justification.
530              
531             =head3 'style' => "string_name"
532              
533             =over
534              
535             =item "justify"
536              
537             Fully justify the text (flush left I right). This is the B,
538             and currently I
539              
540             =item "left"
541              
542             Not yet implemented. This will be flush left, ragged right (reversed for
543             RTL scripts).
544              
545             =item "right"
546              
547             Not yet implemented. This will be flush right, ragged left (reversed for
548             RTL scripts).
549              
550             =item "center"
551              
552             Implemented, but not yet fully tested.
553             This is centered text within the indicated line width.
554              
555             =back
556              
557             If you are doing clever typography or using non-Western languages you
558             may find that you will want to break text into nodes yourself, and pass
559             the list of nodes to the methods below, instead of using this method.
560              
561             =cut
562              
563             sub _add_word {
564 122     122   175 my ($self, $word, $nodes_r) = @_;
565 122         197 my @elems = $self->hyphenator()->hyphenate($word);
566 122         6645 for (0..$#elems) {
567 156         171 push @{$nodes_r}, Text::KnuthPlass::Box->new(
  156         276  
568             'width' => $self->measure()->($elems[$_]),
569             'value' => $elems[$_]
570             );
571 156 100       300 if ($_ != $#elems) {
572 34         38 push @{$nodes_r}, Text::KnuthPlass::Penalty->new(
  34         67  
573             'flagged' => 1, 'penalty' => $self->hyphenpenalty());
574             }
575             }
576 122         177 return;
577             }
578              
579             sub break_text_into_nodes {
580 6     6 1 559 my ($self, $text, %opts) = @_;
581 6         10 my @nodes;
582 6         123 my @words = split /\s+/, $text;
583              
584 6         12 my $style;
585 6 50       24 $style = $opts{'style'} if defined $opts{'style'};
586 6   50     32 $style ||= "justify"; # default
587              
588 6         26 $self->{'emwidth'} = $self->measure()->("M");
589 6         47 $self->{'spacewidth'} = $self->measure()->(" ");
590 6         38 $self->{'spacestretch'} = $self->{'spacewidth'} * $self->space()->{'width'} / $self->space()->{'stretch'};
591             # shrink of 0 desired in constant width or text output
592 6 100       101 if ($self->space()->{'shrink'} == 0) {
593 1         11 $self->{'spaceshrink'} = 0;
594             } else {
595 5         50 $self->{'spaceshrink'} = $self->{'spacewidth'} * $self->space()->{'width'} / $self->space()->{'shrink'};
596             }
597              
598 6         81 my $spacing_type = "_add_space_$style";
599 6         15 my $start = "_start_$style";
600 6         26 $self->$start(\@nodes);
601              
602 6         19 for (0..$#words) { my $word = $words[$_];
  122         156  
603 122         222 $self->_add_word($word, \@nodes);
604 122         230 $self->$spacing_type(\@nodes,$_ == $#words);
605             }
606 6         44 return @nodes;
607             }
608              
609             # fully justified (flush left and right)
610             sub _start_justify {
611 6     6   10 return;
612             }
613             sub _add_space_justify {
614 122     122   175 my ($self, $nodes_r, $final) = @_;
615 122 100       167 if ($final) {
616             # last line of paragraph, ends with required break (-inf)
617 6         9 push @{$nodes_r},
  6         13  
618             $self->glueclass()->new(
619             'width' => 0,
620             'stretch' => $self->infinity(),
621             'shrink' => 0
622             ),
623             $self->penaltyclass()->new(
624             'width' => 0,
625             'penalty' => -$self->infinity(),
626             'flagged' => 1
627             );
628             } else {
629             # NOT last line of paragraph
630 116         173 push @{$nodes_r}, $self->glueclass()->new(
631             'width' => $self->{'spacewidth'},
632             'stretch' => $self->{'spacestretch'},
633 116         116 'shrink' => $self->{'spaceshrink'}
634             );
635             }
636 122         189 return;
637             }
638              
639             # centered within line (NOT TESTED)
640             sub _start_center {
641 0     0   0 my ($self, $nodes_r) = @_;
642 0         0 push @{$nodes_r},
643             Text::KnuthPlass::Box->new('value' => ""),
644             Text::KnuthPlass::Glue->new(
645             'width' => 0,
646 0         0 'stretch' => 2*$self->{'emwidth'},
647             'shrink' => 0
648             );
649 0         0 return;
650             }
651              
652             sub _add_space_center {
653 0     0   0 my ($self, $nodes_r, $final) = @_;
654 0 0       0 if ($final) {
655             # last line of paragraph, ends with required break (-inf)
656 0         0 push @{$nodes_r}, Text::KnuthPlass::Glue->new(
657             'width' => 0,
658 0         0 'stretch' => 2*$self->{'emwidth'},
659             'shrink' => 0
660             ),
661             Text::KnuthPlass::Penalty->new(
662             'width' => 0,
663             'penalty' => -$self->infinity(),
664             'flagged' => 0
665             );
666             } else {
667             # NOT last line of paragraph
668 0         0 push @{$nodes_r}, Text::KnuthPlass::Glue->new(
669             'width' => 0,
670             'stretch' => 2*$self->{'emwidth'},
671             'shrink' => 0
672             ),
673             Text::KnuthPlass::Penalty->new(
674             'width' => 0,
675             'penalty' => 0,
676             'flagged' => 0
677             ),
678             Text::KnuthPlass::Glue->new(
679             'width' => $self->{'spacewidth'},
680             'stretch' => -4*$self->{'emwidth'},
681             'shrink' => 0
682             ),
683             Text::KnuthPlass::Box->new('value' => ""),
684             Text::KnuthPlass::Penalty->new(
685             'width' => 0,
686             'penalty' => $self->infinity(),
687             'flagged' => 0
688             ),
689             Text::KnuthPlass::Glue->new(
690             'width' => 0,
691 0         0 'stretch' => 2*$self->{'emwidth'},
692             'shrink' => 0
693             ),
694             }
695 0         0 return;
696             }
697              
698             # left justified (ragged right) not yet implemented, just handle as 'justified'
699             sub _start_left {
700             #my ($self, $nodes_r) = @_;
701             #return;
702 0     0   0 return _start_justify(@_);
703             }
704              
705             sub _add_space_left {
706             #my ($self, $nodes_r, $final) = @_;
707             #return;
708 0     0   0 return _add_space_justify(@_);
709             }
710              
711             # right justified (ragged left) not yet implemented, just handle as 'justified'
712             sub _start_right {
713             #my ($self, $nodes_r) = @_;
714             #return;
715 0     0   0 return _start_justify(@_);
716             }
717              
718             sub _add_space_right {
719             #my ($self, $nodes_r, $final) = @_;
720             #return;
721 0     0   0 return _add_space_justify(@_);
722             }
723              
724             =head2 break
725              
726             This implements the main body of the algorithm; it turns a list of nodes
727             (produced from the above method) into a list of breakpoint objects.
728              
729             =cut
730              
731             sub break {
732 4     4 1 1749 my ($self, $nodes) = @_;
733 4         25 $self->{'sum'} = {'width' => 0, 'stretch' => 0, 'shrink' => 0 };
734 4         39 $self->_init_nodelist();
735             # shouldn't ever happen, but just in case...
736 4 50 33     31 if (!$self->{'linelengths'} || ref $self->{'linelengths'} ne "ARRAY") {
737 0         0 croak "No linelengths set";
738             }
739              
740 4         26 for (0..$#$nodes) {
741 291         1964 my $node = $nodes->[$_];
742 291 100 33     671 if ($node->isa("Text::KnuthPlass::Box")) {
    100          
    50          
743 144         241 $self->{'sum'}{'width'} += $node->width();
744             } elsif ($node->isa("Text::KnuthPlass::Glue")) {
745 115 50 33     361 if ($_ > 0 and $nodes->[$_-1]->isa("Text::KnuthPlass::Box")) {
746 115         7603 $self->_mainloop($node, $_, $nodes);
747             }
748 115         288 $self->{'sum'}{'width'} += $node->width();
749 115         970 $self->{'sum'}{'stretch'} += $node->stretch();
750 115         889 $self->{'sum'}{'shrink'} += $node->shrink();
751             } elsif ($node->is_penalty() and $node->penalty() != $self->infinity()) {
752 32         2840 $self->_mainloop($node, $_, $nodes);
753             }
754             }
755              
756 4         36 my @retval = reverse $self->_active_to_breaks();
757 4         21 $self->_cleanup();
758 4         13 return @retval;
759             }
760              
761             sub _computeCost { # _compute_cost() in XS
762 0     0   0 my ($self, $start, $end, $active, $currentLine, $nodes) = @_;
763 0         0 warn "Computing cost from $start to $end\n" if DEBUG;
764 0         0 warn sprintf "Sum width: %f\n", $self->{'sum'}{'width'} if DEBUG;
765 0         0 warn sprintf "Total width: %f\n", $self->{'totals'}{'width'} if DEBUG;
766 0         0 my $width = $self->{'sum'}{'width'} - $active->totals()->{'width'};
767 0         0 my $stretch = 0; my $shrink = 0;
  0         0  
768 0         0 my $linelength = $currentLine <= @{$self->linelengths()}?
769             $self->{'linelengths'}[$currentLine-1]:
770 0 0       0 $self->{'linelengths'}[-1];
771             #$linelength -= $self->{'const'}; # allow space for split word hyphen
772             # allow for in renderer
773              
774 0 0 0     0 warn "Adding penalty width" if($nodes->[$end]->is_penalty()) and DEBUG;
775 0         0 warn sprintf "Width %f, linelength %f\n", $width, $linelength if DEBUG;
776              
777 0 0       0 if ($width < $linelength) {
    0          
778 0         0 $stretch = $self->{'sum'}{'stretch'} - $active->totals()->{'stretch'};
779 0         0 warn sprintf "Stretch %f\n", $stretch if DEBUG;
780 0 0       0 if ($stretch > 0) {
781 0         0 return ($linelength - $width) / $stretch;
782 0         0 } else { return $self->infinity(); }
783             } elsif ($width > $linelength) {
784 0         0 $shrink = $self->{'sum'}{'shrink'} - $active->totals()->{'shrink'};
785 0         0 warn sprintf "Shrink %f\n", $shrink if DEBUG;
786 0 0       0 if ($shrink > 0) {
787 0         0 return ($linelength - $width) / $shrink;
788 0         0 } else { return $self->infinity(); }
789 0         0 } else { return 0; }
790             }
791              
792             sub _computeSum { # _compute_sum() in XS
793 0     0   0 my ($self, $index, $nodes) = @_;
794             my $result = {
795             'width' => $self->{'sum'}{'width'},
796             'stretch' => $self->{'sum'}{'stretch'},
797 0         0 'shrink' => $self->{'sum'}{'shrink'}
798             };
799 0         0 for ($index..$#$nodes) {
800 0 0 0     0 if ($nodes->[$_]->isa("Text::KnuthPlass::Glue")) {
    0 0        
      0        
801 0         0 $result->{'width'} += $nodes->[$_]->width();
802 0         0 $result->{'stretch'} += $nodes->[$_]->stretch();
803 0         0 $result->{'shrink'} += $nodes->[$_]->shrink();
804             } elsif ($nodes->[$_]->isa("Text::KnuthPlass::Box") or
805             ($nodes->[$_]->is_penalty() and $nodes->[$_]->penalty() ==
806             -$self->infinity() and $_ > $index)) {
807 0         0 last;
808             }
809             }
810 0         0 return $result;
811             }
812              
813             sub _init_nodelist { # Overridden by XS, same name in XS
814             my $self = shift;
815             $self->{'activeNodes'} = [
816             Text::KnuthPlass::Breakpoint->new(
817             'position' => 0,
818             'demerits' => 0,
819             'ratio' => 0,
820             'line' => 0,
821             'fitnessClass' => 0,
822             'totals' => { 'width' => 0, 'stretch' => 0, 'shrink' => 0}
823             )
824             ];
825             return;
826             }
827              
828             # same name in XS, but has quite a bit of code
829             sub _cleanup { return; }
830              
831             sub _active_to_breaks { # Overridden by XS, same name in XS
832             my $self = shift;
833             return unless @{$self->{'activeNodes'}};
834             my @breaks;
835             my $best = Text::KnuthPlass::Breakpoint->new('demerits' => ~0);
836             for (@{$self->{'activeNodes'}}) {
837             $best = $_ if $_->demerits() < $best->demerits();
838             }
839             while ($best) {
840             push @breaks, { 'position' => $best->position(),
841             'ratio' => $best->ratio()
842             };
843             $best = $best->previous();
844             }
845             return @breaks;
846             }
847              
848             sub _mainloop { # same name in XS
849             my ($self, $node, $index, $nodes) = @_;
850             my $next; my $ratio = 0; my $demerits = 0; my @candidates;
851             my $badness; my $currentLine = 0; my $tmpSum; my $currentClass = 0;
852             my $active = $self->{'activeNodes'}[0];
853             my $ptr = 0;
854             while ($active) {
855             my @candidates = ( # four fitness classes?
856             # (tight, normal, loose, very loose)
857             {'demerits' => ~0},
858             {'demerits' => ~0},
859             {'demerits' => ~0},
860             {'demerits' => ~0}
861             );
862             warn "Outer\n" if DEBUG;
863             while ($active) {
864             my $next = $self->{'activeNodes'}[++$ptr];
865             warn "Inner loop\n" if DEBUG;
866             $currentLine = $active->line()+1;
867             $ratio = $self->_computeCost($active->position(),
868             $index,
869             $active,
870             $currentLine,
871             $nodes);
872             warn "Got a ratio of $ratio, node is ".$node->_txt()."\n" if DEBUG;
873             if ($ratio < -1 or
874             ($node->is_penalty() and
875             $node->penalty() == -$self->infinity())) {
876             warn "Dropping a node\n" if DEBUG;
877             $self->{'activeNodes'} = [ grep {$_ != $active} @{$self->{'activeNodes'}} ];
878             $ptr--;
879             }
880             if (-1 <= $ratio and $ratio <= $self->tolerance()) {
881             $badness = 100 * $ratio**3;
882             warn "Badness is $badness\n" if DEBUG;
883             if ($node->is_penalty() and $node->penalty() > 0) {
884             $demerits = $self->demerits()->{'line'} + $badness +
885             $node->penalty();
886             } elsif ($node->is_penalty() and $node->penalty() != -$self->infinity()) {
887             $demerits = $self->demerits()->{'line'} + $badness -
888             $node->penalty();
889             } else {
890             $demerits = $self->demerits()->{'line'} + $badness;
891             }
892             $demerits *= $demerits; # demerits**2
893              
894             if ($node->is_penalty() and $nodes->[$active->position()]->is_penalty()) {
895             $demerits += $self->demerits()->{'flagged'} *
896             $node->flagged() *
897             $nodes->[$active->position()]->flagged();
898             }
899              
900             if ($ratio < -0.5) { $currentClass = 0; } # tight
901             elsif ($ratio <= 0.5) { $currentClass = 1; } # normal
902             elsif ($ratio <= 1 ) { $currentClass = 2; } # loose
903             else { $currentClass = 3; } # very loose
904              
905             # bad fitness if changes by more than 1 class
906             $demerits += $self->demerits()->{'fitness'}
907             if abs($currentClass - $active->fitnessClass()) > 1;
908              
909             $demerits += $active->demerits();
910             if ($demerits < $candidates[$currentClass]->{'demerits'}) {
911             warn "Setting c $currentClass\n" if DEBUG;
912             $candidates[$currentClass] = {
913             'active' => $active,
914             'demerits' => $demerits,
915             'ratio' => $ratio
916             };
917             }
918             }
919             $active = $next;
920             #warn "Active is now $active" if DEBUG;
921             last if !$active ||
922             $active->line() >= $currentLine;
923             }
924             warn "Post inner loop\n" if DEBUG;
925              
926             $tmpSum = $self->_computeSum($index, $nodes);
927             for (0..3) {
928             my $c = $candidates[$_];
929             if ($c->{'demerits'} < ~0) {
930             my $newnode = Text::KnuthPlass::Breakpoint->new(
931             'position' => $index,
932             'demerits' => $c->{'demerits'},
933             'ratio' => $c->{'ratio'},
934             'line' => $c->{'active'}->line() + 1,
935             'fitnessClass' => $_,
936             'totals' => $tmpSum,
937             'previous' => $c->{'active'}
938             );
939             if ($active) {
940             warn "Before\n" if DEBUG;
941             my @newlist;
942             for (@{$self->{'activeNodes'}}) {
943             if ($_ == $active) { push @newlist, $newnode; }
944             push @newlist, $_;
945             }
946             $ptr++;
947             $self->{'activeNodes'} = [ @newlist ];
948             # grep {;
949             # ($_ == $active) ? ($newnode, $active) : ($_)
950             #} @{$self->{'activeNodes'}}
951             # ];
952             } else {
953             warn "After\n" if DEBUG;
954             push @{$self->{'activeNodes'}}, $newnode;
955             }
956             #warn @{$self->{'activeNodes'}} if DEBUG;
957             } # demerits check
958             } # fitness class 0..3 loop
959             } # while $active loop
960             return;
961             }
962              
963             =head2 @lines = $t->breakpoints_to_lines(\@breakpoints, \@nodes)
964              
965             And this takes the breakpoints and the nodes, and assembles them into
966             lines.
967              
968             =cut
969              
970             sub breakpoints_to_lines {
971 3     3 1 7 my ($self, $breakpoints, $nodes) = @_;
972 3         6 my @lines;
973 3         5 my $linestart = 0;
974 3         9 for my $x (1 .. $#$breakpoints) { $_ = $breakpoints->[$x];
  18         22  
975 18         21 my $position = $_->{'position'};
976 18         22 my $r = $_->{'ratio'};
977 18         36 for ($linestart..$#$nodes) {
978 33 100 66     143 if ($nodes->[$_]->isa("Text::KnuthPlass::Box") or
      66        
979             ($nodes->[$_]->is_penalty() and
980             $nodes->[$_]->penalty() ==-$self->infinity())) {
981 18         21 $linestart = $_;
982 18         23 last;
983             }
984             }
985             push @lines, {
986             'ratio' => $r,
987             'position' => $_->{'position'},
988 18         36 'nodes' => [ @{$nodes}[$linestart..$position] ]
  18         53  
989             };
990 18         36 $linestart = $_->{'position'};
991             }
992             #if ($linestart < $#$nodes) {
993             # push @lines, { 'ratio' => 1, 'position' => $#$nodes,
994             # 'nodes' => [ @{$nodes}[$linestart+1..$#$nodes] ]};
995             #}
996 3         9 return @lines;
997             }
998              
999             =head2 boxclass()
1000              
1001             =head2 glueclass()
1002              
1003             =head2 penaltyclass()
1004              
1005             For subclassers.
1006              
1007             =cut
1008              
1009             sub boxclass {
1010 0     0 1 0 return "Text::KnuthPlass::Box";
1011             }
1012             sub glueclass {
1013 122     122 1 246 return "Text::KnuthPlass::Glue";
1014             }
1015             sub penaltyclass {
1016 6     6 1 22 return "Text::KnuthPlass::Penalty";
1017             }
1018              
1019             =head1 AUTHOR
1020              
1021             originally written by Simon Cozens, C<< >>
1022              
1023             since 2020, maintained by Phil Perry
1024              
1025             =head1 ACKNOWLEDGEMENTS
1026              
1027             This module is a Perl translation (originally by Simon Cozens) of Bram Stein's
1028             "Typeset" Javascript Knuth-Plass implementation.
1029              
1030             =head1 BUGS
1031              
1032             Please report any bugs or feature requests to the I section of
1033             C.
1034              
1035             Do NOT under ANY circumstances open a PR (Pull Request) to report a bug. It is
1036             a waste of both your and our time and effort. Open a regular ticket (issue),
1037             and attach a Perl (.pl) program illustrating the problem, if possible. If you
1038             believe that you have a program patch, and offer to share it as a PR, we may
1039             give the go-ahead. Unsolicited PRs may be closed without further action.
1040              
1041             =head1 COPYRIGHT & LICENSE
1042              
1043             Copyright (c) 2011 Simon Cozens.
1044              
1045             Copyright (c) 2020-2022 Phil M Perry.
1046              
1047             This program is released under the following license: Perl, GPL
1048              
1049             =cut
1050              
1051             1; # End of Text::KnuthPlass