...
122
|
|
|
|
|
|
|
# we grab all it's | inner text (and/or parsed html), rearrange it into a |
123
|
|
|
|
|
|
|
# single string of formatted text, and put a token into it's first | |
124
|
|
|
|
|
|
|
# once we have processed the html with _parse(), we replace the tokens with the |
125
|
|
|
|
|
|
|
# corresponding formatted text |
126
|
|
|
|
|
|
|
|
127
|
6
|
|
|
|
|
74
|
my @tables = $tree->look_down(_tag=>'table'); |
128
|
6
|
|
|
|
|
640
|
my $table_count = 0; |
129
|
6
|
|
|
|
|
12
|
for my $table (@tables) { |
130
|
6
|
|
|
|
|
14
|
$formatted_tables->[$table_count] = []; |
131
|
6
|
|
|
|
|
21
|
my @trs = $table->look_down(_tag=>'tr'); |
132
|
6
|
|
|
|
|
358
|
my @max_col_width; # max column widths by index |
133
|
|
|
|
|
|
|
my @max_col_heights; # max column heights (for multi-line text) by index |
134
|
0
|
|
|
|
|
0
|
my @col_lines; # a stack for our redesigned rows of column ( | ) text |
135
|
|
|
|
|
|
|
FIRST_PASS: { |
136
|
6
|
|
|
|
|
10
|
my $row_count = 0; # obviously a counter... |
|
6
|
|
|
|
|
13
|
|
137
|
6
|
|
|
|
|
13
|
for my $tr (@trs) { # *** 1st pass over rows |
138
|
9
|
|
|
|
|
25
|
$max_col_heights[$row_count] = 0; |
139
|
9
|
|
|
|
|
12
|
$col_lines[$row_count] = []; |
140
|
9
|
|
|
|
|
51
|
my @cols = $tr->look_down(_tag=>qr/^(td|th)$/); # no support for | . sorry.
|
141
|
9
|
|
|
|
|
499
|
for (my $i = 0; $i < scalar @cols; $i++) { |
142
|
12
|
|
|
|
|
44
|
my $td = $cols[$i]->clone; |
143
|
12
|
|
|
|
|
453
|
my $new_tree = HTML::TreeBuilder->new; |
144
|
12
|
|
|
|
|
1578
|
$new_tree->{_content} = [ $td ]; |
145
|
|
|
|
|
|
|
# parse the contents of the td into text |
146
|
|
|
|
|
|
|
# this doesn't work well with nested tables... |
147
|
12
|
|
|
|
|
50
|
my $text = __PACKAGE__->new->_parse($new_tree); |
148
|
|
|
|
|
|
|
# we don't want leading or tailing whitespace |
149
|
12
|
|
|
|
|
46398
|
$text =~ s/\xA0+/ /s; #   -> space |
150
|
12
|
|
|
|
|
47
|
$text =~ s/^\s+//s; |
151
|
12
|
|
|
|
|
35
|
$text =~ s/\s+\z//s; |
152
|
|
|
|
|
|
|
# now we figure out the maximum widths and heights needed for each column |
153
|
12
|
|
|
|
|
84
|
my $max_line_width = 0; |
154
|
12
|
|
|
|
|
38
|
my @lines = split "\n", $text; # take the parsed text and break it into virtual rows |
155
|
12
|
100
|
|
|
|
38
|
$max_col_heights[$row_count] = scalar @lines if scalar @lines > $max_col_heights[$row_count]; |
156
|
12
|
|
|
|
|
20
|
for my $line (@lines) { |
157
|
25
|
|
|
|
|
20
|
my $line_width = length $line; |
158
|
25
|
100
|
|
|
|
38
|
$max_line_width = $line_width if $line_width > $max_line_width; |
159
|
|
|
|
|
|
|
} |
160
|
12
|
|
|
|
|
29
|
$cols[$i]->{_content} = [ $text ]; |
161
|
12
|
|
100
|
|
|
58
|
$max_col_width[$i] ||= 0; |
162
|
12
|
100
|
|
|
|
27
|
$max_col_width[$i] = $max_line_width if $max_line_width > $max_col_width[$i]; |
163
|
|
|
|
|
|
|
# now put the accumulated lines onto our stack |
164
|
12
|
|
|
|
|
167
|
$col_lines[$row_count]->[$i] = \@lines; |
165
|
|
|
|
|
|
|
} |
166
|
9
|
|
|
|
|
14
|
$tr->{_content} = \@cols; |
167
|
9
|
|
|
|
|
19
|
$row_count++; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
SECOND_PASS: { |
172
|
6
|
|
|
|
|
8
|
my $row_count = 0; # obviously, another counter... |
|
6
|
|
|
|
|
9
|
|
173
|
6
|
|
|
|
|
11
|
for my $tr (@trs) { # *** 2nd pass over rows |
174
|
9
|
|
|
|
|
49
|
my @cols = $tr->look_down(_tag=>qr/^(td|th)$/); # no support for | . sorry.
|
175
|
|
|
|
|
|
|
|
176
|
9
|
|
|
|
|
423
|
my $row_text; # the final string representing each row of reformatted text |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
my @col_rows; # a stack for each virtual $new_line spliced together from a group of | 's |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# iterate over each column of the maximum rows of parsed multiline text per | |
181
|
|
|
|
|
|
|
# for each virtual row of each virtual column, concat the text with alignment spacings |
182
|
|
|
|
|
|
|
# the final concatinated string value will be placed in column 0 |
183
|
9
|
|
|
|
|
35
|
for (my $j = 0; $j < $max_col_heights[$row_count]; $j++) { |
184
|
14
|
|
|
|
|
12
|
my $new_line; |
185
|
14
|
|
|
|
|
26
|
for (my $i = 0; $i < scalar @cols; $i++) { # here are the actual | elements we're iterating over... |
186
|
26
|
|
|
|
|
26
|
my $width = $max_col_width[$i] + $cellpadding; # how wide is this column of text |
187
|
26
|
|
|
|
|
23
|
my $line = $col_lines[$row_count]->[$i]->[$j]; # get the text to fit into it |
188
|
26
|
100
|
|
|
|
33
|
$line = defined $line ? $line : ''; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# strip the whitespace from beginning and end of each line |
191
|
26
|
|
|
|
|
44
|
$line =~ s/^\s+//gs; |
192
|
26
|
|
|
|
|
31
|
$line =~ s/\s+\z//gs; |
193
|
26
|
|
|
|
|
22
|
my $n_space = $width - length $line; # the difference between the column and text widths |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# we are creating virtual rows of text within a single | |
196
|
|
|
|
|
|
|
# so we need to add an indent to all but the first row to |
197
|
|
|
|
|
|
|
# match the indent added by _parse() for presenting table contents |
198
|
26
|
100
|
100
|
|
|
68
|
$line = ((' ')x$parser_indent). $line if $j != 0 and $i == 0; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# here we adjust the text alignment by wrapping the text in occulted whitespace |
201
|
26
|
100
|
100
|
|
|
49
|
my $justify = $cols[$i]->tag eq 'td' ? ( $cols[$i]->attr('align') || 'left' ) : 'center'; |
202
|
26
|
100
|
|
|
|
284
|
if ($justify eq 'center') { |
|
|
100
|
|
|
|
|
|
203
|
1
|
|
|
|
|
4
|
my $pre = int( ($n_space + $cellpadding) / 2 ); # divide remaining space in half |
204
|
1
|
|
|
|
|
1
|
my $post = $n_space - $pre; # assign any uneven remainder to the end |
205
|
1
|
|
|
|
|
6
|
$new_line .= ((' ')x$pre). $line .((' ')x$post); # wrap the text in spaces |
206
|
|
|
|
|
|
|
} elsif ($justify eq 'left') { |
207
|
15
|
|
|
|
|
49
|
$new_line .= ((' ')x$cellpadding). $line .((' ')x$n_space); |
208
|
|
|
|
|
|
|
} else { |
209
|
10
|
|
|
|
|
24
|
$new_line .= ((' ')x$n_space). $line .((' ')x$cellpadding); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
} |
212
|
14
|
100
|
|
|
|
32
|
$new_line .= "\n" if $j != $max_col_heights[$row_count] - 1; # add a newline to all but the last text row |
213
|
14
|
|
|
|
|
26
|
$col_rows[$j] = $new_line; # put the line into the stack for this row |
214
|
|
|
|
|
|
|
} |
215
|
9
|
|
|
|
|
26
|
$row_text .= $_ for @col_rows; |
216
|
9
|
|
|
|
|
33
|
for (my $i = 1; $i < scalar @cols; $i++) { |
217
|
4
|
|
|
|
|
7
|
$cols[$i]->delete; # get rid of unneeded | 's |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
# put the fully formatted text into our accumulator |
220
|
9
|
|
|
|
|
98
|
$formatted_tables->[$table_count]->[$row_count] = $row_text; |
221
|
9
|
100
|
|
|
|
20
|
if (scalar @cols) { |
222
|
8
|
|
|
|
|
45
|
$cols[0]->content->[0] = "__TOKEN__${table_count}__${row_count}__"; # place a token into the row at col 0 |
223
|
|
|
|
|
|
|
} |
224
|
9
|
|
|
|
|
38
|
$row_count++; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
} |
227
|
6
|
|
|
|
|
19
|
$table_count++; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# now replace our tokens |
231
|
6
|
|
|
|
|
24
|
my $text = $self->_parse( $tree ); |
232
|
6
|
|
|
|
|
15071
|
for (my $i = 0; $i < scalar @$formatted_tables; $i++) { |
233
|
6
|
|
|
|
|
13
|
for (my $j = 0; $j < scalar @{ $$formatted_tables[$i] }; $j++) { |
|
15
|
|
|
|
|
51
|
|
234
|
9
|
|
|
|
|
24
|
my $token = "__TOKEN__${i}__${j}__"; |
235
|
9
|
50
|
|
|
|
21
|
$token .= "\n?" if $no_rowspacing; |
236
|
9
|
|
|
|
|
14
|
my $new_text = $$formatted_tables[$i][$j]; |
237
|
9
|
100
|
|
|
|
20
|
if (defined $new_text) { |
238
|
6
|
|
|
|
|
80
|
$text =~ s/$token/$new_text/; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
else { |
241
|
3
|
|
|
|
|
37
|
$text =~ s/$token//; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
6
|
|
|
|
|
85
|
return $text; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
1; |
250
|
|
|
|
|
|
|
__END__ |