...
|
122
|
|
|
|
|
|
|
# we grab all it's | inner text (and/or parsed html), rearrange it into a |
|
123
|
|
|
|
|
|
|
# single string of formatted text, and put a token into it's first | |
|
124
|
|
|
|
|
|
|
# once we have processed the html with _parse(), we replace the tokens with the |
|
125
|
|
|
|
|
|
|
# corresponding formatted text |
|
126
|
|
|
|
|
|
|
|
|
127
|
6
|
|
|
|
|
74
|
my @tables = $tree->look_down(_tag=>'table'); |
|
128
|
6
|
|
|
|
|
640
|
my $table_count = 0; |
|
129
|
6
|
|
|
|
|
12
|
for my $table (@tables) { |
|
130
|
6
|
|
|
|
|
14
|
$formatted_tables->[$table_count] = []; |
|
131
|
6
|
|
|
|
|
21
|
my @trs = $table->look_down(_tag=>'tr'); |
|
132
|
6
|
|
|
|
|
358
|
my @max_col_width; # max column widths by index |
|
133
|
|
|
|
|
|
|
my @max_col_heights; # max column heights (for multi-line text) by index |
|
134
|
0
|
|
|
|
|
0
|
my @col_lines; # a stack for our redesigned rows of column ( | ) text |
|
135
|
|
|
|
|
|
|
FIRST_PASS: { |
|
136
|
6
|
|
|
|
|
10
|
my $row_count = 0; # obviously a counter... |
|
|
6
|
|
|
|
|
13
|
|
|
137
|
6
|
|
|
|
|
13
|
for my $tr (@trs) { # *** 1st pass over rows |
|
138
|
9
|
|
|
|
|
25
|
$max_col_heights[$row_count] = 0; |
|
139
|
9
|
|
|
|
|
12
|
$col_lines[$row_count] = []; |
|
140
|
9
|
|
|
|
|
51
|
my @cols = $tr->look_down(_tag=>qr/^(td|th)$/); # no support for | . sorry.
|
|
141
|
9
|
|
|
|
|
499
|
for (my $i = 0; $i < scalar @cols; $i++) { |
|
142
|
12
|
|
|
|
|
44
|
my $td = $cols[$i]->clone; |
|
143
|
12
|
|
|
|
|
453
|
my $new_tree = HTML::TreeBuilder->new; |
|
144
|
12
|
|
|
|
|
1578
|
$new_tree->{_content} = [ $td ]; |
|
145
|
|
|
|
|
|
|
# parse the contents of the td into text |
|
146
|
|
|
|
|
|
|
# this doesn't work well with nested tables... |
|
147
|
12
|
|
|
|
|
50
|
my $text = __PACKAGE__->new->_parse($new_tree); |
|
148
|
|
|
|
|
|
|
# we don't want leading or tailing whitespace |
|
149
|
12
|
|
|
|
|
46398
|
$text =~ s/\xA0+/ /s; #   -> space |
|
150
|
12
|
|
|
|
|
47
|
$text =~ s/^\s+//s; |
|
151
|
12
|
|
|
|
|
35
|
$text =~ s/\s+\z//s; |
|
152
|
|
|
|
|
|
|
# now we figure out the maximum widths and heights needed for each column |
|
153
|
12
|
|
|
|
|
84
|
my $max_line_width = 0; |
|
154
|
12
|
|
|
|
|
38
|
my @lines = split "\n", $text; # take the parsed text and break it into virtual rows |
|
155
|
12
|
100
|
|
|
|
38
|
$max_col_heights[$row_count] = scalar @lines if scalar @lines > $max_col_heights[$row_count]; |
|
156
|
12
|
|
|
|
|
20
|
for my $line (@lines) { |
|
157
|
25
|
|
|
|
|
20
|
my $line_width = length $line; |
|
158
|
25
|
100
|
|
|
|
38
|
$max_line_width = $line_width if $line_width > $max_line_width; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
12
|
|
|
|
|
29
|
$cols[$i]->{_content} = [ $text ]; |
|
161
|
12
|
|
100
|
|
|
58
|
$max_col_width[$i] ||= 0; |
|
162
|
12
|
100
|
|
|
|
27
|
$max_col_width[$i] = $max_line_width if $max_line_width > $max_col_width[$i]; |
|
163
|
|
|
|
|
|
|
# now put the accumulated lines onto our stack |
|
164
|
12
|
|
|
|
|
167
|
$col_lines[$row_count]->[$i] = \@lines; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
9
|
|
|
|
|
14
|
$tr->{_content} = \@cols; |
|
167
|
9
|
|
|
|
|
19
|
$row_count++; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
SECOND_PASS: { |
|
172
|
6
|
|
|
|
|
8
|
my $row_count = 0; # obviously, another counter... |
|
|
6
|
|
|
|
|
9
|
|
|
173
|
6
|
|
|
|
|
11
|
for my $tr (@trs) { # *** 2nd pass over rows |
|
174
|
9
|
|
|
|
|
49
|
my @cols = $tr->look_down(_tag=>qr/^(td|th)$/); # no support for | . sorry.
|
|
175
|
|
|
|
|
|
|
|
|
176
|
9
|
|
|
|
|
423
|
my $row_text; # the final string representing each row of reformatted text |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
my @col_rows; # a stack for each virtual $new_line spliced together from a group of | 's |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# iterate over each column of the maximum rows of parsed multiline text per | |
|
181
|
|
|
|
|
|
|
# for each virtual row of each virtual column, concat the text with alignment spacings |
|
182
|
|
|
|
|
|
|
# the final concatinated string value will be placed in column 0 |
|
183
|
9
|
|
|
|
|
35
|
for (my $j = 0; $j < $max_col_heights[$row_count]; $j++) { |
|
184
|
14
|
|
|
|
|
12
|
my $new_line; |
|
185
|
14
|
|
|
|
|
26
|
for (my $i = 0; $i < scalar @cols; $i++) { # here are the actual | elements we're iterating over... |
|
186
|
26
|
|
|
|
|
26
|
my $width = $max_col_width[$i] + $cellpadding; # how wide is this column of text |
|
187
|
26
|
|
|
|
|
23
|
my $line = $col_lines[$row_count]->[$i]->[$j]; # get the text to fit into it |
|
188
|
26
|
100
|
|
|
|
33
|
$line = defined $line ? $line : ''; |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# strip the whitespace from beginning and end of each line |
|
191
|
26
|
|
|
|
|
44
|
$line =~ s/^\s+//gs; |
|
192
|
26
|
|
|
|
|
31
|
$line =~ s/\s+\z//gs; |
|
193
|
26
|
|
|
|
|
22
|
my $n_space = $width - length $line; # the difference between the column and text widths |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# we are creating virtual rows of text within a single | |
|
196
|
|
|
|
|
|
|
# so we need to add an indent to all but the first row to |
|
197
|
|
|
|
|
|
|
# match the indent added by _parse() for presenting table contents |
|
198
|
26
|
100
|
100
|
|
|
68
|
$line = ((' ')x$parser_indent). $line if $j != 0 and $i == 0; |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# here we adjust the text alignment by wrapping the text in occulted whitespace |
|
201
|
26
|
100
|
100
|
|
|
49
|
my $justify = $cols[$i]->tag eq 'td' ? ( $cols[$i]->attr('align') || 'left' ) : 'center'; |
|
202
|
26
|
100
|
|
|
|
284
|
if ($justify eq 'center') { |
|
|
|
100
|
|
|
|
|
|
|
203
|
1
|
|
|
|
|
4
|
my $pre = int( ($n_space + $cellpadding) / 2 ); # divide remaining space in half |
|
204
|
1
|
|
|
|
|
1
|
my $post = $n_space - $pre; # assign any uneven remainder to the end |
|
205
|
1
|
|
|
|
|
6
|
$new_line .= ((' ')x$pre). $line .((' ')x$post); # wrap the text in spaces |
|
206
|
|
|
|
|
|
|
} elsif ($justify eq 'left') { |
|
207
|
15
|
|
|
|
|
49
|
$new_line .= ((' ')x$cellpadding). $line .((' ')x$n_space); |
|
208
|
|
|
|
|
|
|
} else { |
|
209
|
10
|
|
|
|
|
24
|
$new_line .= ((' ')x$n_space). $line .((' ')x$cellpadding); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
} |
|
212
|
14
|
100
|
|
|
|
32
|
$new_line .= "\n" if $j != $max_col_heights[$row_count] - 1; # add a newline to all but the last text row |
|
213
|
14
|
|
|
|
|
26
|
$col_rows[$j] = $new_line; # put the line into the stack for this row |
|
214
|
|
|
|
|
|
|
} |
|
215
|
9
|
|
|
|
|
26
|
$row_text .= $_ for @col_rows; |
|
216
|
9
|
|
|
|
|
33
|
for (my $i = 1; $i < scalar @cols; $i++) { |
|
217
|
4
|
|
|
|
|
7
|
$cols[$i]->delete; # get rid of unneeded | 's |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
# put the fully formatted text into our accumulator |
|
220
|
9
|
|
|
|
|
98
|
$formatted_tables->[$table_count]->[$row_count] = $row_text; |
|
221
|
9
|
100
|
|
|
|
20
|
if (scalar @cols) { |
|
222
|
8
|
|
|
|
|
45
|
$cols[0]->content->[0] = "__TOKEN__${table_count}__${row_count}__"; # place a token into the row at col 0 |
|
223
|
|
|
|
|
|
|
} |
|
224
|
9
|
|
|
|
|
38
|
$row_count++; |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
} |
|
227
|
6
|
|
|
|
|
19
|
$table_count++; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# now replace our tokens |
|
231
|
6
|
|
|
|
|
24
|
my $text = $self->_parse( $tree ); |
|
232
|
6
|
|
|
|
|
15071
|
for (my $i = 0; $i < scalar @$formatted_tables; $i++) { |
|
233
|
6
|
|
|
|
|
13
|
for (my $j = 0; $j < scalar @{ $$formatted_tables[$i] }; $j++) { |
|
|
15
|
|
|
|
|
51
|
|
|
234
|
9
|
|
|
|
|
24
|
my $token = "__TOKEN__${i}__${j}__"; |
|
235
|
9
|
50
|
|
|
|
21
|
$token .= "\n?" if $no_rowspacing; |
|
236
|
9
|
|
|
|
|
14
|
my $new_text = $$formatted_tables[$i][$j]; |
|
237
|
9
|
100
|
|
|
|
20
|
if (defined $new_text) { |
|
238
|
6
|
|
|
|
|
80
|
$text =~ s/$token/$new_text/; |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
else { |
|
241
|
3
|
|
|
|
|
37
|
$text =~ s/$token//; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
|
|
246
|
6
|
|
|
|
|
85
|
return $text; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
1; |
|
250
|
|
|
|
|
|
|
__END__ |