line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Reflow; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5.005_62; |
4
|
1
|
|
|
1
|
|
23681
|
use strict; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
78
|
|
5
|
1
|
|
|
1
|
|
14
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
88
|
|
6
|
1
|
|
|
1
|
|
1660
|
use integer; |
|
1
|
|
|
|
|
36
|
|
|
1
|
|
|
|
|
11
|
|
7
|
1
|
|
|
1
|
|
83
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1537
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
10
|
|
|
|
|
|
|
require DynaLoader; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our @ISA = qw(Exporter DynaLoader); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Original script written by Michael Larsen, larsen@edu.upenn.math |
15
|
|
|
|
|
|
|
# Modified by Martin Ward, martin@gkc.org.uk |
16
|
|
|
|
|
|
|
# Copyright 1994 Michael Larsen and Martin Ward |
17
|
|
|
|
|
|
|
# Email: martin@gkc.org.uk |
18
|
|
|
|
|
|
|
# |
19
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify |
20
|
|
|
|
|
|
|
# it under the terms of either the Artistic License or |
21
|
|
|
|
|
|
|
# the GNU General Public License as published by |
22
|
|
|
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or |
23
|
|
|
|
|
|
|
# (at your option) any later version. |
24
|
|
|
|
|
|
|
# |
25
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
26
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
27
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
28
|
|
|
|
|
|
|
# GNU General Public License for more details. |
29
|
|
|
|
|
|
|
# |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
32
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
33
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# This allows declaration use Text::Reflow ':all'; |
36
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
37
|
|
|
|
|
|
|
# will save memory. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
40
|
|
|
|
|
|
|
reflow_file |
41
|
|
|
|
|
|
|
reflow_string |
42
|
|
|
|
|
|
|
reflow_array |
43
|
|
|
|
|
|
|
) ] ); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
our @EXPORT = qw( |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
$Text::Reflow::VERSION = "1.16"; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
bootstrap Text::Reflow $Text::Reflow::VERSION; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Preloaded methods go here. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# This is the perl version of the C function reflow_trial |
58
|
|
|
|
|
|
|
# If the C XSUB doesn't work, comment out the line |
59
|
|
|
|
|
|
|
# bootstrap Text::Reflow $VERSION; |
60
|
|
|
|
|
|
|
# above, and take the _ from the front of this perl version: |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _reflow_trial($$$$$$$$$$) { |
63
|
0
|
|
|
0
|
|
0
|
my ($optimum, $maximum, $wordcount, |
64
|
|
|
|
|
|
|
$penaltylimit, $semantic, $shortlast, |
65
|
|
|
|
|
|
|
$word_len, $space_len, $extra, $result) = @_; |
66
|
0
|
|
|
|
|
0
|
my ($lastbreak, @linkbreak); |
67
|
0
|
|
|
|
|
0
|
my ($j, $k, $interval, $penalty, @totalpenalty, $bestsofar); |
68
|
0
|
|
|
|
|
0
|
my (@best_linkbreak, $best_lastbreak, $opt); |
69
|
0
|
|
|
|
|
0
|
my @optimum = unpack("N*", pack("H*", $optimum)); |
70
|
0
|
|
|
|
|
0
|
my @word_len = unpack("N*", pack("H*", $word_len)); |
71
|
0
|
|
|
|
|
0
|
my @space_len = unpack("N*", pack("H*", $space_len)); |
72
|
0
|
|
|
|
|
0
|
my @extra = unpack("N*", pack("H*", $extra)); |
73
|
0
|
|
|
|
|
0
|
my $best = $penaltylimit * 21; |
74
|
0
|
|
|
|
|
0
|
foreach $opt (@optimum) { |
75
|
0
|
|
|
|
|
0
|
@linkbreak = (); |
76
|
0
|
|
|
|
|
0
|
for ($j = 0; $j < $wordcount; $j++) { # Optimize preceding break |
77
|
0
|
|
|
|
|
0
|
$interval = 0; |
78
|
0
|
|
|
|
|
0
|
$totalpenalty[$j] = $penaltylimit * 2; |
79
|
0
|
|
|
|
|
0
|
for ($k = $j; $k >= 0; $k--) { |
80
|
0
|
|
|
|
|
0
|
$interval += $word_len[$k]; |
81
|
0
|
0
|
0
|
|
|
0
|
last if (($k < $j) && (($interval > $opt + 10) |
|
|
|
0
|
|
|
|
|
82
|
|
|
|
|
|
|
|| ($interval >= $maximum))); |
83
|
0
|
|
|
|
|
0
|
$penalty = ($interval - $opt) * ($interval - $opt); |
84
|
0
|
|
|
|
|
0
|
$interval += $space_len[$k]; |
85
|
0
|
0
|
|
|
|
0
|
$penalty += $totalpenalty[$k-1] if ($k > 0); |
86
|
0
|
|
|
|
|
0
|
$penalty -= ($extra[$j] * $semantic)/2; |
87
|
0
|
0
|
|
|
|
0
|
if ($penalty < $totalpenalty[$j]) { |
88
|
0
|
|
|
|
|
0
|
$totalpenalty[$j] = $penalty; |
89
|
0
|
|
|
|
|
0
|
$linkbreak[$j] = $k-1; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
0
|
|
|
|
|
0
|
$interval = 0; |
94
|
0
|
|
|
|
|
0
|
$bestsofar = $penaltylimit * 20; |
95
|
0
|
|
|
|
|
0
|
$lastbreak = $wordcount-2; |
96
|
|
|
|
|
|
|
# Pick a break for the last line which gives |
97
|
|
|
|
|
|
|
# the least penalties for previous lines: |
98
|
0
|
|
|
|
|
0
|
for ($k = $wordcount-2; $k >= -1; $k--) { # Break after k? |
99
|
0
|
|
|
|
|
0
|
$interval += $word_len[$k+1]; |
100
|
0
|
0
|
0
|
|
|
0
|
last if (($interval > $opt + 10) || ($interval > $maximum)); |
101
|
0
|
0
|
|
|
|
0
|
if ($interval > $opt) { # Don't make last line too long |
102
|
0
|
|
|
|
|
0
|
$penalty = ($interval - $opt) * ($interval - $opt); |
103
|
|
|
|
|
|
|
} else { |
104
|
0
|
|
|
|
|
0
|
$penalty = 0; |
105
|
|
|
|
|
|
|
} |
106
|
0
|
|
|
|
|
0
|
$interval += $space_len[$k+1]; |
107
|
0
|
0
|
|
|
|
0
|
$penalty += $totalpenalty[$k] if ($k >= 0); |
108
|
0
|
0
|
|
|
|
0
|
$penalty += $shortlast * $semantic if ($wordcount - $k - 1 <= 2); |
109
|
0
|
0
|
|
|
|
0
|
if ($penalty <= $bestsofar) { |
110
|
0
|
|
|
|
|
0
|
$bestsofar = $penalty; |
111
|
0
|
|
|
|
|
0
|
$lastbreak = $k; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
# Save these breaks if they are an improvement: |
115
|
0
|
0
|
|
|
|
0
|
if ($bestsofar < $best) { |
116
|
0
|
|
|
|
|
0
|
$best_lastbreak = $lastbreak; |
117
|
0
|
|
|
|
|
0
|
@best_linkbreak = @linkbreak; |
118
|
0
|
|
|
|
|
0
|
$best = $bestsofar; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} # Next $opt |
121
|
|
|
|
|
|
|
# Return the best breaks: |
122
|
0
|
|
|
|
|
0
|
$result = unpack("H*", pack("N*", ($best_lastbreak, @best_linkbreak))); |
123
|
0
|
|
|
|
|
0
|
return($result); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
|
127
|
1
|
|
|
|
|
1950
|
use vars qw( |
128
|
|
|
|
|
|
|
$IO_Files $lastbreak $poetryindent %abbrev @output |
129
|
|
|
|
|
|
|
$connpenalty $maximum $quote %connectives @save_opts |
130
|
|
|
|
|
|
|
$dependent $namebreak $semantic %keys @space_len |
131
|
|
|
|
|
|
|
$frenchspacing $noreflow $sentence @extra @tmp |
132
|
|
|
|
|
|
|
$indent $oneparagraph $shortlast @from @to |
133
|
|
|
|
|
|
|
$indent1 $optimum $skipindented @linewords @word_len |
134
|
|
|
|
|
|
|
$indent2 $penaltylimit $skipto @linkbreak @words |
135
|
|
|
|
|
|
|
$independent $pin $wordcount @optimum |
136
|
1
|
|
|
1
|
|
16
|
); |
|
1
|
|
|
|
|
3
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# The following parameters can be twiddled to taste: |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
%keys = (optimum => '.*', maximum => '\d+', |
141
|
|
|
|
|
|
|
indent => '.*', indent1 => '.*', indent2 => '.*', |
142
|
|
|
|
|
|
|
quote => '.*', |
143
|
|
|
|
|
|
|
skipto => '.*', skipindented => '[012]', oneparagraph => '[yYnN]', |
144
|
|
|
|
|
|
|
frenchspacing => '[yYnN]', |
145
|
|
|
|
|
|
|
noreflow => '.*', |
146
|
|
|
|
|
|
|
semantic => '\d+', namebreak => '\d+', |
147
|
|
|
|
|
|
|
sentence => '\d+', independent => '\d+', dependent => '\d+', |
148
|
|
|
|
|
|
|
shortlast => '\d+', connpenalty => '\d+', |
149
|
|
|
|
|
|
|
poetryindent => '\d+'); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
$optimum = [65]; # Best line length 65. Also try [60..70] |
153
|
|
|
|
|
|
|
$maximum = 75; # Maximum possible line length 80 |
154
|
|
|
|
|
|
|
$indent1 = ""; # Indentation for first line |
155
|
|
|
|
|
|
|
$indent2 = ""; # Indentation for each line after the first |
156
|
|
|
|
|
|
|
$quote = ""; # Quote characters to remove from the front of each line |
157
|
|
|
|
|
|
|
$skipto = ""; # Pattern to skip to before starting to reflow |
158
|
|
|
|
|
|
|
$skipindented = 2; # Number of sequential indented lines required |
159
|
|
|
|
|
|
|
# before the group of lines will be skipped |
160
|
|
|
|
|
|
|
$noreflow = ""; # A regexp to indicate lines which should not be reflowed |
161
|
|
|
|
|
|
|
# eg table of contents: '\.\s*\.\s*\.\s*\.\s*\.' |
162
|
|
|
|
|
|
|
$frenchspacing = "n"; # If "y" then don't put two spaces at end of sentence/clause |
163
|
|
|
|
|
|
|
$oneparagraph = "n"; # If "Y" then put all the input into a single paragraph |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
$semantic = 30; # Extent to which semantic factors matter 20 |
166
|
|
|
|
|
|
|
$namebreak = 20; # Penalty for splitting up name 10 |
167
|
|
|
|
|
|
|
$sentence = 20; # Penalty for sentence widows and orphans 6 |
168
|
|
|
|
|
|
|
$independent = 10; # Penalty for independent clause w's & o's |
169
|
|
|
|
|
|
|
$dependent = 6; # Penalty for dependent clause w's & o's |
170
|
|
|
|
|
|
|
$shortlast = 5; # Penalty for a short last line (1 or 2 words) in a paragraph |
171
|
|
|
|
|
|
|
$connpenalty = 1; # Multiplier to avoid penalties at end of line |
172
|
|
|
|
|
|
|
$poetryindent = 1; # Treat $skipindented consecutive lines indented by |
173
|
|
|
|
|
|
|
# at least this much |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
$penaltylimit = 0x2000000; |
176
|
|
|
|
|
|
|
@save_opts = (); # Saved original values of options |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$pin = " " x $poetryindent; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# NB By default there must be two consecutive indented lines for it to count |
181
|
|
|
|
|
|
|
# as poetry, so the program will not mistake a paragraph indentation |
182
|
|
|
|
|
|
|
# for a line of poetry. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# Abbreviations from a half dozen novels, |
186
|
|
|
|
|
|
|
# Titles and other abbreviations which should discourage |
187
|
|
|
|
|
|
|
# a break have the value 1: |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
%abbrev = ( |
190
|
|
|
|
|
|
|
Jan => 1, Feb => 1, Mar => 1, Apr => 1, Jun => 1, Jul => 1, |
191
|
|
|
|
|
|
|
Aug => 1, Sep => 1, Sept => 1, Oct => 1, Nov => 1, Dec => 1, |
192
|
|
|
|
|
|
|
Pvt => 1, Cpl => 1, Sgt => 1, Ens => 1, Lieut => 1, Capt => 1, Cmdr => 1, |
193
|
|
|
|
|
|
|
Maj => 1, Col => 1, Gen => 1, Adm => 1, |
194
|
|
|
|
|
|
|
Dr => 1, Hon => 1, Mlle => 1, Mme => 1, Mr => 1, Mrs => 1, Miss => 1, |
195
|
|
|
|
|
|
|
Prof => 1, Rev => 1, |
196
|
|
|
|
|
|
|
Bart => 2, Esq => 2, etc => 2, |
197
|
|
|
|
|
|
|
No => 1, St => 1, |
198
|
|
|
|
|
|
|
Ave => 2, Rd => 2, Blvd => 2, Ct => 2, Cir => 2, |
199
|
|
|
|
|
|
|
A => 1, B => 1, C => 1, D => 1, E => 1, F => 1, G => 1, H => 1, |
200
|
|
|
|
|
|
|
I => 1, J => 1, K => 1, L => 1, M => 1, N => 1, O => 1, P => 1, Q => 1, |
201
|
|
|
|
|
|
|
R => 1, S => 1, T => 1, U => 1, V => 1, W => 1, X => 1, Y => 1, Z => 1); |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# The value is the rlative effort to avoid breaking |
204
|
|
|
|
|
|
|
# a line after this connective |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
%connectives = ( # Extracted from /usr/dict/connectives |
207
|
|
|
|
|
|
|
the => 4, he => 4, |
208
|
|
|
|
|
|
|
of => 2, and => 2, to => 2, a => 2, |
209
|
|
|
|
|
|
|
in => 2, that => 2, is => 1, was => 1, |
210
|
|
|
|
|
|
|
for => 2, with => 2, as => 2, his => 1, |
211
|
|
|
|
|
|
|
on => 1, be => 1, at => 1, by => 2, |
212
|
|
|
|
|
|
|
had => 1, not => 1, are => 1, but => 2, from => 1, |
213
|
|
|
|
|
|
|
or => 2, have => 1, an => 2, which => 2, |
214
|
|
|
|
|
|
|
one => 1, were => 1, her => 1, all => 1, their => 1, |
215
|
|
|
|
|
|
|
when => 2, who => 2, will => 1, more => 1, no => 1, |
216
|
|
|
|
|
|
|
if => 2, out => 1, so => 2, what => 2, its => 1, |
217
|
|
|
|
|
|
|
about => 1, into => 1, than => 1, |
218
|
|
|
|
|
|
|
only => 1, other => 1, new => 1, some => 1, |
219
|
|
|
|
|
|
|
these => 2, two => 1, may => 1, |
220
|
|
|
|
|
|
|
do => 1, first => 1, any => 1, my => 1, now => 1, |
221
|
|
|
|
|
|
|
such => 1, like => 2, our => 1, over => 1, |
222
|
|
|
|
|
|
|
even => 1, most => 1, after => 1, also => 2, |
223
|
|
|
|
|
|
|
many => 1, before => 1, through => 1, where => 2, your => 1, |
224
|
|
|
|
|
|
|
well => 1, down => 1, should => 1, because => 2, |
225
|
|
|
|
|
|
|
each => 1, just => 1, those => 2, how => 2, too => 1, |
226
|
|
|
|
|
|
|
good => 1, very => 2, here => 1, between => 1, both => 1, |
227
|
|
|
|
|
|
|
under => 1, never => 1, same => 1, another => 1, |
228
|
|
|
|
|
|
|
while => 2, last => 1, might => 1, great => 1, |
229
|
|
|
|
|
|
|
since => 2, against => 1, right => 1, three => 2, next => 2); |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
sub reflow_file($$@) { |
233
|
4
|
|
|
4
|
0
|
3197
|
my ($from, $to, @opts) = @_; |
234
|
4
|
|
|
|
|
25
|
local $IO_Files = 1; # We are reading/writing files |
235
|
4
|
50
|
|
|
|
19
|
$from = \*STDIN if ($from eq ""); |
236
|
4
|
50
|
|
|
|
14
|
$to = \*STDOUT if ($to eq ""); |
237
|
4
|
50
|
0
|
|
|
20
|
my $from_a_handle = (ref($from) |
238
|
|
|
|
|
|
|
? (ref($from) eq 'GLOB' |
239
|
|
|
|
|
|
|
|| UNIVERSAL::isa($from, 'GLOB') |
240
|
|
|
|
|
|
|
|| UNIVERSAL::isa($from, 'IO::Handle')) |
241
|
|
|
|
|
|
|
: (ref(\$from) eq 'GLOB')); |
242
|
4
|
50
|
0
|
|
|
28
|
my $to_a_handle = (ref($to) |
243
|
|
|
|
|
|
|
? (ref($to) eq 'GLOB' |
244
|
|
|
|
|
|
|
|| UNIVERSAL::isa($to, 'GLOB') |
245
|
|
|
|
|
|
|
|| UNIVERSAL::isa($to, 'IO::Handle')) |
246
|
|
|
|
|
|
|
: (ref(\$to) eq 'GLOB')); |
247
|
4
|
|
|
|
|
9
|
my $closefrom = 0; |
248
|
4
|
|
|
|
|
8
|
my $closeto = 0; |
249
|
4
|
|
|
|
|
57
|
local(*FROM, *TO); |
250
|
|
|
|
|
|
|
|
251
|
4
|
50
|
|
|
|
13
|
if ($from_a_handle) { |
252
|
|
|
|
|
|
|
{ |
253
|
1
|
|
|
1
|
|
15
|
no warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
423
|
|
|
0
|
|
|
|
|
0
|
|
254
|
0
|
|
|
|
|
0
|
*FROM = *$from{FILEHANDLE}; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} else { |
257
|
4
|
50
|
|
|
|
25
|
$from = "./$from" if $from =~ /^\s/s; |
258
|
4
|
50
|
|
|
|
158
|
open(FROM, "< $from\0") or croak "Cannot read `$from': $!"; |
259
|
4
|
50
|
|
|
|
22
|
binmode FROM or die "($!,$^E)"; |
260
|
4
|
|
|
|
|
8
|
$closefrom = 1; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
4
|
50
|
|
|
|
10
|
if ($to_a_handle) { |
264
|
|
|
|
|
|
|
{ |
265
|
1
|
|
|
1
|
|
14
|
no warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1300
|
|
|
0
|
|
|
|
|
0
|
|
266
|
0
|
|
|
|
|
0
|
*TO = *$to{FILEHANDLE}; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} else { |
269
|
4
|
50
|
|
|
|
17
|
$to = "./$to" if $to =~ /^\s/s; |
270
|
4
|
50
|
|
|
|
293
|
open(TO,"> $to\0") or croak "Cannot write to `$to': $!"; |
271
|
4
|
50
|
|
|
|
19
|
binmode TO or die "($!,$^E)"; |
272
|
4
|
|
|
|
|
9
|
$closeto = 1; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
4
|
|
|
|
|
16
|
process_opts(@opts); |
276
|
4
|
|
|
|
|
24
|
reflow(); |
277
|
4
|
|
|
|
|
16
|
restore_opts(); |
278
|
|
|
|
|
|
|
|
279
|
4
|
50
|
50
|
|
|
547
|
close(TO) || croak("Cannot close `$to': $!") if ($closeto); |
280
|
4
|
50
|
50
|
|
|
98
|
close(FROM) || croak("Cannot close `$from': $!") if ($closefrom); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub reflow_string($@) { |
285
|
11
|
|
|
11
|
0
|
9019
|
my ($input, @opts) = @_; |
286
|
11
|
|
|
|
|
25
|
local $IO_Files = 0; # We are reading/writing arrays |
287
|
|
|
|
|
|
|
# Create the array from the string, keep trailing empty lines. |
288
|
|
|
|
|
|
|
# We split on newlines and then restore them, being careful |
289
|
|
|
|
|
|
|
# not to add an extra newline at the end: |
290
|
11
|
|
|
|
|
257
|
local @from = split(/\n/, $input, -1); |
291
|
11
|
50
|
|
|
|
103
|
pop(@from) if ($from[$#from] eq ""); |
292
|
11
|
|
|
|
|
47
|
@from = map { "$_\n" } @from; |
|
356
|
|
|
|
|
923
|
|
293
|
11
|
|
|
|
|
61
|
local @to = (); |
294
|
11
|
|
|
|
|
47
|
process_opts(@opts); |
295
|
11
|
|
|
|
|
34
|
reflow(); |
296
|
11
|
|
|
|
|
40
|
restore_opts(); |
297
|
11
|
|
|
|
|
255
|
return(join("", @to)); |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub reflow_array($@) { |
302
|
4
|
|
|
4
|
0
|
3451
|
my ($input, @opts) = @_; |
303
|
4
|
|
|
|
|
11
|
local $IO_Files = 0; # We are reading/writing arrays |
304
|
4
|
|
|
|
|
38
|
local @from = @$input; |
305
|
4
|
|
|
|
|
10
|
local @to = (); |
306
|
4
|
|
|
|
|
17
|
process_opts(@opts); |
307
|
4
|
|
|
|
|
10
|
reflow(); |
308
|
4
|
|
|
|
|
14
|
restore_opts(); |
309
|
4
|
|
|
|
|
26
|
return(\@to); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# Process the keyword options, set module global variables as required, |
315
|
|
|
|
|
|
|
# save the old values on the @save_opts stack: |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub process_opts(@) { |
318
|
19
|
|
|
19
|
0
|
144
|
my @opts = @_; |
319
|
19
|
|
|
|
|
30
|
my ($key, $value); |
320
|
1
|
|
|
1
|
|
15
|
no strict 'refs'; |
|
1
|
|
|
|
|
15
|
|
|
1
|
|
|
|
|
975
|
|
321
|
|
|
|
|
|
|
# Fix an externally-set $optimum value: |
322
|
19
|
50
|
|
|
|
162
|
$optimum = [$optimum] if ($optimum =~ /^\d+$/); |
323
|
19
|
|
|
|
|
69
|
while (@opts) { |
324
|
60
|
|
|
|
|
100
|
$key = shift(@opts); |
325
|
60
|
50
|
|
|
|
191
|
croak "No value for option key `$key'" unless (@opts); |
326
|
60
|
|
|
|
|
83
|
$value = shift(@opts); |
327
|
60
|
50
|
|
|
|
218
|
croak "`$key' is not a valid option" unless ($keys{$key}); |
328
|
60
|
50
|
|
|
|
1391
|
croak "`$value' is not a suitable value for `$key'" |
329
|
|
|
|
|
|
|
unless ($value =~ /^$keys{$key}$/); |
330
|
|
|
|
|
|
|
# keyword "indent" is short for setting both indent1 and indent2: |
331
|
60
|
100
|
|
|
|
275
|
if ($key eq "indent") { |
|
|
100
|
|
|
|
|
|
332
|
1
|
|
|
|
|
4
|
$key = "indent1"; |
333
|
1
|
|
|
|
|
5
|
unshift(@opts, "indent2", $value); |
334
|
|
|
|
|
|
|
} elsif ($key eq "optimum") { |
335
|
14
|
50
|
|
|
|
113
|
if ($value =~ /^\d+$/) { |
|
|
50
|
|
|
|
|
|
336
|
0
|
|
|
|
|
0
|
$value = [$value]; |
337
|
|
|
|
|
|
|
} elsif (ref($value) ne 'ARRAY') { |
338
|
0
|
|
|
|
|
0
|
croak "`$value' is not a suitable value for `$key'"; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
# Save old value. Save a copy of the array if the value is a reference: |
342
|
60
|
100
|
|
|
|
105
|
if (ref(${$key}) eq "ARRAY") { |
|
60
|
|
|
|
|
232
|
|
343
|
14
|
|
|
|
|
25
|
push(@save_opts, $key, [@${$key}]); |
|
14
|
|
|
|
|
54
|
|
344
|
|
|
|
|
|
|
} else { |
345
|
46
|
|
|
|
|
107
|
push(@save_opts, $key, ${$key}); |
|
46
|
|
|
|
|
113
|
|
346
|
|
|
|
|
|
|
} |
347
|
60
|
|
|
|
|
87
|
${$key} = $value; |
|
60
|
|
|
|
|
286
|
|
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
# Adjust $optimum and $maximum by $indent2 length: |
350
|
19
|
100
|
|
|
|
103
|
if ($indent2 ne "") { |
351
|
3
|
|
|
|
|
14
|
push(@save_opts, "optimum", $optimum, "maximum", $maximum); |
352
|
3
|
|
|
|
|
7
|
$maximum -= length($indent2); |
353
|
3
|
|
|
|
|
10
|
$optimum = [map { $_ - length($indent2) } @$optimum]; |
|
7
|
|
|
|
|
20
|
|
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub restore_opts() { |
359
|
19
|
|
|
19
|
0
|
44
|
my ($key, $value); |
360
|
1
|
|
|
1
|
|
15
|
no strict 'refs'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
6165
|
|
361
|
19
|
|
|
|
|
83
|
while (@save_opts) { |
362
|
66
|
|
|
|
|
100
|
$value = pop(@save_opts); |
363
|
66
|
|
|
|
|
103
|
$key = pop(@save_opts); |
364
|
66
|
|
|
|
|
109
|
${$key} = $value; |
|
66
|
|
|
|
|
275
|
|
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
sub get_line() { |
370
|
655
|
|
|
655
|
0
|
790
|
my $line; |
371
|
655
|
100
|
|
|
|
1190
|
if ($IO_Files) { |
372
|
144
|
|
|
|
|
561
|
$line = ; |
373
|
|
|
|
|
|
|
} else { |
374
|
511
|
|
|
|
|
806
|
$line = shift(@from); |
375
|
|
|
|
|
|
|
} |
376
|
655
|
100
|
|
|
|
1782
|
return($line) unless defined($line); |
377
|
636
|
|
|
|
|
1125
|
$line =~ tr/\015\032//d; |
378
|
636
|
|
|
|
|
3062
|
$line =~ s/^$quote//; |
379
|
|
|
|
|
|
|
# Check for eg $quote = "> " and $line = ">": |
380
|
636
|
|
|
|
|
1061
|
my $quote_ns = $quote; |
381
|
636
|
100
|
|
|
|
1620
|
if ($quote_ns =~ s/\s+$//) { |
382
|
39
|
100
|
|
|
|
194
|
$line = "" if ($line =~ /^$quote_ns$/); |
383
|
|
|
|
|
|
|
} |
384
|
636
|
|
|
|
|
2074
|
return($line); |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# Trim EOL spaces and print the lines: |
389
|
|
|
|
|
|
|
sub print_lines(@) { |
390
|
289
|
|
|
289
|
0
|
610
|
my @lines = @_; |
391
|
289
|
|
|
|
|
440
|
map { s/[ \t]+\n/\n/gs } @lines; |
|
656
|
|
|
|
|
2850
|
|
392
|
289
|
100
|
|
|
|
574
|
if ($IO_Files) { |
393
|
69
|
|
|
|
|
324
|
print TO @lines; |
394
|
|
|
|
|
|
|
} else { |
395
|
220
|
|
|
|
|
912
|
push(@to, @lines) |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub reflow() { |
401
|
19
|
|
|
19
|
0
|
42
|
my ($line, $last); |
402
|
19
|
100
|
|
|
|
60
|
if ($skipto ne "") { |
403
|
4
|
|
|
|
|
11
|
while (defined($line = get_line())) { |
404
|
58
|
|
|
|
|
91
|
print_lines($line); |
405
|
58
|
100
|
|
|
|
279
|
last if ($line =~ /^$skipto/); |
406
|
|
|
|
|
|
|
} |
407
|
4
|
50
|
|
|
|
10
|
croak "Skipto pattern `$skipto' not found!" unless (defined($line)); |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
19
|
100
|
|
|
|
87
|
if ($oneparagraph =~ /[Yy]/) { |
|
|
100
|
|
|
|
|
|
411
|
|
|
|
|
|
|
# put all the lines into one paragraph |
412
|
1
|
|
|
|
|
6
|
while (defined($line = get_line())) { |
413
|
35
|
|
|
|
|
61
|
process($line); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
} elsif ($skipindented < 2) { |
417
|
3
|
|
|
|
|
8
|
while (defined($line = get_line())) { |
418
|
105
|
100
|
66
|
|
|
1131
|
if (($skipindented && ($line =~ /^($pin|\t).*\S/)) |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
419
|
|
|
|
|
|
|
|| (($noreflow ne "") && ($line =~ /$noreflow/))) { |
420
|
|
|
|
|
|
|
# current line is indented, or a paragraph break: |
421
|
21
|
|
|
|
|
42
|
reflow_para(); |
422
|
21
|
|
|
|
|
54
|
print_lines($indent1 . $line); |
423
|
|
|
|
|
|
|
} else { |
424
|
|
|
|
|
|
|
# Add line to current paragraph in @words: |
425
|
84
|
|
|
|
|
179
|
process($line); |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
} else { |
430
|
|
|
|
|
|
|
|
431
|
15
|
|
|
|
|
34
|
while (defined($line = get_line())) { |
432
|
369
|
50
|
33
|
|
|
2583
|
if (($noreflow ne "") && ($line =~ /$noreflow/)) { |
|
|
100
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# current line is a paragraph break: |
434
|
0
|
|
|
|
|
0
|
reflow_para(); |
435
|
0
|
|
|
|
|
0
|
print_lines($indent1 . $line); |
436
|
0
|
|
|
|
|
0
|
next; |
437
|
|
|
|
|
|
|
} elsif ($line =~ /^($pin|\t).*\S/) { |
438
|
|
|
|
|
|
|
# current line may be poetry, check next line: |
439
|
39
|
|
|
|
|
49
|
$last = $line; |
440
|
39
|
|
|
|
|
73
|
$line = get_line(); |
441
|
39
|
50
|
|
|
|
90
|
if (!defined($line)) { |
442
|
0
|
|
|
|
|
0
|
process($last); |
443
|
0
|
|
|
|
|
0
|
last; |
444
|
|
|
|
|
|
|
} |
445
|
39
|
100
|
|
|
|
194
|
if ($line =~ /^($pin|\t).*\S/) { |
446
|
|
|
|
|
|
|
# found some poetry, skip indented lines until end of input |
447
|
|
|
|
|
|
|
# or a non-indented line found: |
448
|
10
|
|
|
|
|
18
|
reflow_para(); |
449
|
10
|
|
|
|
|
29
|
print_lines($indent1 . $last); |
450
|
10
|
|
|
|
|
45
|
print_lines($indent1 . $line); |
451
|
10
|
|
|
|
|
31
|
while (defined($line = get_line())) { |
452
|
30
|
50
|
33
|
|
|
365
|
last unless (($line =~ /^($pin|\t).*\S/) |
|
|
|
66
|
|
|
|
|
453
|
|
|
|
|
|
|
|| ($noreflow ne "" && $line =~ /$noreflow/)); |
454
|
20
|
|
|
|
|
57
|
print_lines($indent1 . $line); |
455
|
|
|
|
|
|
|
} |
456
|
10
|
50
|
|
|
|
29
|
last unless (defined($line)); # poetry at end of document |
457
|
|
|
|
|
|
|
# $line is a non-poetic line |
458
|
|
|
|
|
|
|
} else { |
459
|
|
|
|
|
|
|
# $last had a poetry indent, but current line doesn't. |
460
|
|
|
|
|
|
|
# Process last line: |
461
|
29
|
|
|
|
|
76
|
process($last); |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
} # end of first poetry test |
464
|
|
|
|
|
|
|
# current line is non-poetic, so process it: |
465
|
369
|
|
|
|
|
715
|
process($line); |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
# reflow any remaining @words: |
469
|
19
|
|
|
|
|
54
|
reflow_para(); |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# Process a non-poetry line by pushing the words onto @words |
474
|
|
|
|
|
|
|
# If the line is blank, then reflow the paragraph of @words: |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub process($) { |
477
|
517
|
|
|
517
|
0
|
915
|
my ($line) = @_; |
478
|
|
|
|
|
|
|
# current line is non-poetry |
479
|
|
|
|
|
|
|
# remove spaces around dashes: |
480
|
517
|
|
|
|
|
1605
|
$line =~ s/([^-])[ \t]*--[ \t]*([^-])/$1--$2/g; |
481
|
|
|
|
|
|
|
# protect ". . ." ellipses: |
482
|
517
|
|
|
|
|
743
|
$line =~ s/ \. \. \./\x9F\.\x9F\.\x9F\./g; |
483
|
517
|
|
|
|
|
683
|
$line =~ s/\. \. \./\.\x9F\.\x9F\./g; |
484
|
517
|
|
|
|
|
4756
|
@linewords = split(/\s+/, $line); |
485
|
517
|
100
|
100
|
|
|
2811
|
shift(@linewords) if (@linewords && ($linewords[0] eq "")); |
486
|
|
|
|
|
|
|
# If last word of previous line ends in a single hyphen, |
487
|
|
|
|
|
|
|
# then append first word of this line: |
488
|
517
|
50
|
100
|
|
|
3387
|
if (@linewords && @words && ($words[$#words] =~ /[a-zA-Z0-9]-$/)) { |
|
|
|
66
|
|
|
|
|
489
|
0
|
|
|
|
|
0
|
$words[$#words] .= shift(@linewords); |
490
|
|
|
|
|
|
|
} |
491
|
517
|
100
|
|
|
|
1203
|
if ($#linewords == -1) { |
492
|
|
|
|
|
|
|
# No words on this line |
493
|
109
|
100
|
|
|
|
317
|
if ($oneparagraph !~ /[Yy]/) { |
494
|
|
|
|
|
|
|
# end of paragraph |
495
|
102
|
|
|
|
|
231
|
reflow_para(); |
496
|
102
|
|
|
|
|
312
|
print_lines("$indent1\n"); |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
} else { |
499
|
|
|
|
|
|
|
# add @linewords to @words, |
500
|
|
|
|
|
|
|
# split on em dashes, ie word--word |
501
|
|
|
|
|
|
|
# Move "--" from beginning of current word to end of last word: |
502
|
408
|
50
|
66
|
|
|
1917
|
if (($#words >= 0) && ($linewords[0] =~ s/^--[^a-zA-Z0-9]*//)) { |
503
|
0
|
|
|
|
|
0
|
$words[$#words] .= $&; |
504
|
0
|
0
|
|
|
|
0
|
shift(@linewords) if ($linewords[0] eq ""); |
505
|
|
|
|
|
|
|
} |
506
|
408
|
|
|
|
|
512
|
my $word; |
507
|
408
|
|
|
|
|
826
|
foreach $word (@linewords) { |
508
|
4545
|
100
|
|
|
|
8042
|
if ($word =~ /[^-]--[a-zA-Z0-9]/) { |
509
|
65
|
|
|
|
|
210
|
@tmp = split(/--/, $word); |
510
|
|
|
|
|
|
|
# restore the hyphens: |
511
|
65
|
|
|
|
|
382
|
grep(s/$/--/, @tmp); |
512
|
|
|
|
|
|
|
# remove an extra one at the end: |
513
|
65
|
|
|
|
|
248
|
$tmp[$#tmp] =~ s/--$//; |
514
|
|
|
|
|
|
|
# append @tmp to @words: |
515
|
65
|
|
|
|
|
247
|
push (@words, @tmp); |
516
|
|
|
|
|
|
|
} else { |
517
|
|
|
|
|
|
|
# append $word to @words: |
518
|
4480
|
|
|
|
|
8371
|
push (@words, $word); |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
sub reflow_para { |
526
|
152
|
100
|
|
152
|
0
|
460
|
return() unless (@words); |
527
|
68
|
|
|
|
|
156
|
reflow_penalties(); |
528
|
68
|
|
|
|
|
90
|
$lastbreak = 0; |
529
|
68
|
|
|
|
|
135
|
$linkbreak[$wordcount] = 0; |
530
|
|
|
|
|
|
|
# Create space for the result: |
531
|
68
|
|
|
|
|
342
|
my $result = " " x (($wordcount + 2) * 8); |
532
|
68
|
|
|
|
|
11402
|
$result = reflow_trial(unpack("H*", pack("N*", @$optimum)), |
533
|
|
|
|
|
|
|
$maximum, $wordcount, |
534
|
|
|
|
|
|
|
$penaltylimit, $semantic, $shortlast, |
535
|
|
|
|
|
|
|
unpack("H*", pack("N*", @word_len)), |
536
|
|
|
|
|
|
|
unpack("H*", pack("N*", @space_len)), |
537
|
|
|
|
|
|
|
unpack("H*", pack("N*", @extra)), |
538
|
|
|
|
|
|
|
$result); |
539
|
68
|
|
|
|
|
1267
|
@linkbreak = unpack("N*", pack("H*", $result)); |
540
|
|
|
|
|
|
|
# Convert @linkbreak from unsigned to signed: |
541
|
68
|
100
|
|
|
|
267
|
@linkbreak = map { $_ > 0xF0000000 ? -((0xFFFFFFFF - $_) + 1) : $_ + 0 } @linkbreak; |
|
4678
|
|
|
|
|
9272
|
|
542
|
68
|
|
|
|
|
360
|
$lastbreak = shift(@linkbreak); |
543
|
68
|
|
|
|
|
166
|
compute_output(); |
544
|
68
|
|
|
|
|
373
|
grep (s/\x9F/ /g, @output); |
545
|
68
|
|
|
|
|
244
|
print_lines(@output); |
546
|
68
|
|
|
|
|
605
|
@words = (); |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
# Add spaces to ends of sentences and calculate @extra array of penalties |
551
|
|
|
|
|
|
|
sub reflow_penalties { |
552
|
68
|
|
|
68
|
0
|
78
|
my $j; |
553
|
68
|
|
|
|
|
162
|
$wordcount = $#words + 1; |
554
|
|
|
|
|
|
|
# Add paragraph indentation to first word: |
555
|
68
|
50
|
|
|
|
233
|
$words[0] = $indent1 . $words[0] if ($wordcount); |
556
|
68
|
|
|
|
|
274
|
for ($j = 0; $j < $wordcount+1; $j++) { |
557
|
4678
|
|
|
|
|
10433
|
$extra[$j] = 0; |
558
|
|
|
|
|
|
|
} |
559
|
68
|
|
|
|
|
159
|
for ($j = 0; $j < $wordcount; $j++) { |
560
|
4610
|
100
|
|
|
|
17728
|
if ($words[$j] =~ /^([A-Za-z0-9-]+)["')]*([\.\:])["')]*$/) { # Period or colon |
561
|
187
|
50
|
33
|
|
|
946
|
if (!defined($abbrev{$1}) || ($2 eq ":")) { # End of sentence |
562
|
187
|
|
|
|
|
279
|
$extra[$j] += $sentence / 2; |
563
|
187
|
50
|
|
|
|
491
|
$extra[$j-1] -= $sentence if ($j > 0); |
564
|
187
|
|
|
|
|
276
|
$extra[$j+1] -= $sentence; |
565
|
187
|
100
|
|
|
|
899
|
$words[$j] = $words[$j] . " " unless ($frenchspacing =~ /[Yy]/); |
566
|
|
|
|
|
|
|
} else{ |
567
|
|
|
|
|
|
|
# Don't break "Mr. X" |
568
|
0
|
0
|
|
|
|
0
|
$extra[$j] -= $namebreak if ($abbrev{$1} == 1); |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
} |
571
|
4610
|
0
|
0
|
|
|
11297
|
if (($words[$j] =~ /[\?\!]["')]*$/) # !? after word |
|
|
|
33
|
|
|
|
|
572
|
|
|
|
|
|
|
&& (($j >= $#words) || ($words[$j+1] =~ /^[^a-zA-Z]*[A-Z]/))) { |
573
|
0
|
|
|
|
|
0
|
$extra[$j] += $sentence / 2; |
574
|
0
|
0
|
|
|
|
0
|
$extra[$j-1] -= $sentence if ($j > 0); |
575
|
0
|
|
|
|
|
0
|
$extra[$j+1] -= $sentence; |
576
|
0
|
0
|
|
|
|
0
|
$words[$j] = $words[$j] . " " unless ($frenchspacing =~ /[Yy]/); |
577
|
|
|
|
|
|
|
} |
578
|
4610
|
100
|
|
|
|
10018
|
if ($words[$j] =~ /\,$/) { # Comma after word |
579
|
246
|
|
|
|
|
416
|
$extra[$j] += $dependent / 2; |
580
|
246
|
50
|
|
|
|
664
|
$extra[$j-1] -= $dependent if ($j > 0); |
581
|
246
|
|
|
|
|
335
|
$extra[$j+1] -= $dependent; |
582
|
|
|
|
|
|
|
} |
583
|
4610
|
100
|
|
|
|
15990
|
if ($words[$j] =~ /[\;\"\'\)]$|--$/) { # Punctuation after word |
584
|
178
|
|
|
|
|
278
|
$extra[$j] += $independent / 2; |
585
|
178
|
50
|
|
|
|
542
|
$extra[$j-1] -= $independent if ($j > 0); |
586
|
178
|
|
|
|
|
275
|
$extra[$j+1] -= $independent; |
587
|
|
|
|
|
|
|
} |
588
|
4610
|
100
|
100
|
|
|
23535
|
if (($j < $#words) |
589
|
|
|
|
|
|
|
&& ($words[$j+1] =~ /^\(/)) { # Next word has opening parenthesis |
590
|
1
|
|
|
|
|
3
|
$extra[$j] += $independent / 2; |
591
|
1
|
50
|
|
|
|
7
|
$extra[$j-1] -= $independent if ($j > 0); |
592
|
1
|
|
|
|
|
3
|
$extra[$j+1] -= $independent; |
593
|
|
|
|
|
|
|
} |
594
|
4610
|
100
|
100
|
|
|
23791
|
if (($j < $#words) |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
595
|
|
|
|
|
|
|
&& ($words[$j] =~ /[A-Z]/ && $words[$j] !~ /\./ |
596
|
|
|
|
|
|
|
&& $words[$j+1] =~ /[A-Z]/)) { |
597
|
78
|
|
|
|
|
118
|
$extra[$j] -= $namebreak; # Don't break "United States" |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
$extra[$j] -= $connectives{$words[$j]} * $connpenalty |
600
|
4610
|
100
|
|
|
|
18563
|
if (defined($connectives{$words[$j]})); |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
68
|
|
|
|
|
318
|
@word_len = (); # Length of each word (excluding spaces) |
604
|
68
|
|
|
|
|
260
|
@space_len = (); # Length the space after this word |
605
|
68
|
|
|
|
|
238
|
for ($j = 0; $j < $wordcount; $j++) { |
606
|
4610
|
100
|
|
|
|
12150
|
if ($words[$j] =~ /--$/) { |
|
|
100
|
|
|
|
|
|
607
|
66
|
|
|
|
|
105
|
$word_len[$j] = length($words[$j]); |
608
|
66
|
|
|
|
|
177
|
$space_len[$j] = 0; |
609
|
|
|
|
|
|
|
} elsif ($words[$j] =~ / $/) { |
610
|
176
|
|
|
|
|
262
|
$word_len[$j] = length($words[$j]) - 1; |
611
|
176
|
|
|
|
|
475
|
$space_len[$j] = 2; |
612
|
|
|
|
|
|
|
} else { |
613
|
4368
|
|
|
|
|
5736
|
$word_len[$j] = length($words[$j]); |
614
|
4368
|
|
|
|
|
10652
|
$space_len[$j] = 1; |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
# First word already has $indent1 added and will not be indented further: |
618
|
68
|
50
|
|
|
|
249
|
$word_len[0] -= length($indent2) if ($wordcount); |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# compute @output from $wordcount, @words, $lastbreak and @linkbreak |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
sub compute_output { |
625
|
68
|
|
|
68
|
0
|
83
|
my ($j, $terminus); |
626
|
68
|
|
|
|
|
233
|
@output = (); |
627
|
68
|
|
|
|
|
89
|
$terminus = $wordcount-1; |
628
|
68
|
|
|
|
|
231
|
for ($j = 0; $terminus >= 0; $j++) { |
629
|
435
|
|
|
|
|
1978
|
$output[$j] = join(' ', @words[$lastbreak+1..$terminus])."\n"; |
630
|
|
|
|
|
|
|
#print "j = $j, lastbreak = $lastbreak:\noutput = $output[$j]\n"; |
631
|
435
|
|
|
|
|
725
|
$terminus = $lastbreak; |
632
|
435
|
|
|
|
|
1152
|
$lastbreak = $linkbreak[$lastbreak]; |
633
|
|
|
|
|
|
|
} |
634
|
68
|
|
|
|
|
120
|
@output = reverse(@output); |
635
|
|
|
|
|
|
|
# trim spaces after hyphens: |
636
|
68
|
|
|
|
|
140
|
map { s/([^-])[ \t]*--[ \t*]([^-])/$1--$2/g } @output; |
|
435
|
|
|
|
|
1628
|
|
637
|
|
|
|
|
|
|
# Add the indent to all but the first line: |
638
|
68
|
|
|
|
|
205
|
map { $_ = $indent2 . $_ } @output[1..$#output]; |
|
367
|
|
|
|
|
779
|
|
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
1; |
646
|
|
|
|
|
|
|
__END__ |