File Coverage

blib/lib/Combine/FromHTML.pm
Criterion Covered Total %
statement 24 149 16.1
branch 0 76 0.0
condition 0 25 0.0
subroutine 8 9 88.8
pod 0 1 0.0
total 32 260 12.3


line stmt bran cond sub pod time code
1             # Copyright (c) 1996-1998 LUB NetLab
2             #
3             # This program is free software; you can redistribute it and/or modify
4             # it under the terms of the GNU General Public License as published by
5             # the Free Software Foundation; either version 1, or (at your option)
6             # any later version.
7             #
8             # This program is distributed in the hope that it will be useful,
9             # but WITHOUT ANY WARRANTY; without even the implied warranty of
10             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11             # GNU General Public License for more details.
12             #
13             # You should have received a copy of the GNU General Public License
14             # along with this program; if not, write to the Free Software
15             # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16             #
17             #
18             # NO WARRANTY
19             #
20             # BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
21             # FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
22             # OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
23             # PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
24             # OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25             # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
26             # TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
27             # PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
28             # REPAIR OR CORRECTION.
29             #
30             # IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
31             # WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
32             # REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
33             # INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
34             # OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
35             # TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
36             # YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
37             # PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
38             # POSSIBILITY OF SUCH DAMAGES.
39             #
40             # Copyright (c) 1996-1998 LUB NetLab
41              
42             # $Id: FromHTML.pm 292 2008-11-08 08:54:11Z it-aar $
43              
44             package Combine::FromHTML;
45              
46 1     1   824 use strict;
  1         2  
  1         48  
47 1     1   6 use Combine::Config;
  1         2  
  1         22  
48 1     1   5 use HTTP::Date;
  1         2  
  1         49  
49 1     1   6 use URI;
  1         2  
  1         38  
50 1     1   6 use URI::Escape;
  1         2  
  1         61  
51 1     1   7 use HTML::Entities;
  1         1  
  1         71  
52 1     1   1083 use Encode;
  1         13290  
  1         1576  
53              
54             # Character entities to char mapping. We do NOT convert those
55             # entities with a structural meaning, because most likely
56             # the output of this module will go through postprocessing.
57             #
58             my %Ent2CharMap=(
59              
60             # amp => '&',
61             # gt => '>',
62             # lt => '<',
63             # quot => '"',
64             # apos => "'",
65              
66             AElig => 'Æ',
67             Aacute => 'Á',
68             Acirc => 'Â',
69             Agrave => 'À',
70             Aring => 'Å',
71             Atilde => 'Ã',
72             Auml => 'Ä',
73             Ccedil => 'Ç',
74             ETH => 'Ð',
75             Eacute => 'É',
76             Ecirc => 'Ê',
77             Egrave => 'È',
78             Euml => 'Ë',
79             Iacute => 'Í',
80             Icirc => 'Î',
81             Igrave => 'Ì',
82             Iuml => 'Ï',
83             Ntilde => 'Ñ',
84             Oacute => 'Ó',
85             Ocirc => 'Ô',
86             Ograve => 'Ò',
87             Oslash => 'Ø',
88             Otilde => 'Õ',
89             Ouml => 'Ö',
90             THORN => 'Þ',
91             Uacute => 'Ú',
92             Ucirc => 'Û',
93             Ugrave => 'Ù',
94             Uuml => 'Ü',
95             Yacute => 'Ý',
96             aacute => 'á',
97             acirc => 'â',
98             aelig => 'æ',
99             agrave => 'à',
100             aring => 'å',
101             atilde => 'ã',
102             auml => 'ä',
103             ccedil => 'ç',
104             eacute => 'é',
105             ecirc => 'ê',
106             egrave => 'è',
107             eth => 'ð',
108             euml => 'ë',
109             iacute => 'í',
110             icirc => 'î',
111             igrave => 'ì',
112             iuml => 'ï',
113             ntilde => 'ñ',
114             oacute => 'ó',
115             ocirc => 'ô',
116             ograve => 'ò',
117             oslash => 'ø',
118             otilde => 'õ',
119             ouml => 'ö',
120             szlig => 'ß',
121             thorn => 'þ',
122             uacute => 'ú',
123             ucirc => 'û',
124             ugrave => 'ù',
125             uuml => 'ü',
126             yacute => 'ý',
127             yuml => 'ÿ',
128              
129             copy => '©',
130             reg => '®',
131             # nbsp => "\240",
132             nbsp => ' ',
133              
134             iexcl => '¡',
135             cent => '¢',
136             pound => '£',
137             curren => '¤',
138             yen => '¥',
139             brvbar => '¦',
140             sect => '§',
141             uml => '¨',
142             ordf => 'ª',
143             laquo => '«',
144             not => '¬',
145             shy => '­',
146             macr => '¯',
147             deg => '°',
148             plusmn => '±',
149             sup1 => '¹',
150             sup2 => '²',
151             sup3 => '³',
152             acute => '´',
153             micro => 'µ',
154             para => '¶',
155             middot => '·',
156             cedil => '¸',
157             ordm => 'º',
158             raquo => '»',
159             frac14 => '¼',
160             frac12 => '½',
161             frac34 => '¾',
162             iquest => '¿',
163             times => '×',
164             divide => '÷',
165              
166             );
167              
168             my $log;
169              
170             sub trans {
171 0     0 0   my ($html, $xwi, $opt) = @_;
172 0 0         return undef unless ref $xwi;
173             #$opt can be 'HTML', 'TEXT', 'GuessHTML', 'GuessText'
174 0           $xwi->url_rewind; # (BR)
175 0   0       my $url = $xwi->url_get || return undef; # $xwi object must have url field
176 0 0         if ( !defined($log) ) {
177 0           $log = Combine::Config::Get('LogHandle');
178             }
179 0 0         if ($$html eq '') {
180 0           $html = $xwi->content;
181             }
182 0 0         if ( length($$html) < 10 ) {
183 0           $log->say('FromHTML: short or empty file');
184 0           return $xwi;
185             }
186 0 0         if ( length($$html) > 1024 ) { # should we check shorter files as well ?
187 0           my $teststring = substr($$html,0,1024);
188 0           my $start_len = 1024;
189 0           $teststring =~ s/[^\s\x20-\xfe]+//g;
190 0           my $len = length($teststring);
191 0 0         if ( $len > ( 0.9 * $start_len ) ) { # this is some kind of text
192 0           my @rows = split(/\n/,$teststring);
193 0           shift(@rows);
194 0           my ($i,$uu,$b64,$r);
195 0           $uu=0; $b64=0;
  0            
196 0 0         my $n = $#rows>10 ? 10 : $#rows;
197 0           for ($i=0;$i<$n;$i++) {
198 0           $r = shift(@rows);
199 0 0 0       $uu++ if (length($r)==61) and (substr($r,0,1) eq "M");
200 0 0 0       $b64++ if (length($r)==72) and ($r!~/\s/);
201 0 0 0       if ( ( $uu == 10 ) or ( $b64 == 10 ) ) {
202             # this is probably uuencoded or base64 encoded
203 0           $log->say('FromHTML: probably uuencoded or base64 encoded');
204 0           return $xwi;
205             }
206             }
207             } else {
208             # this is most likely a binary file => don't parse it
209             # DISABLED since it creates problems with certain charactersets
210             # $log->say('FromHTML: most likely a binary file');
211             # return $xwi;
212             }
213             }
214              
215 0           $html = $$html;
216 0 0         if ($xwi->truncated()) {
217 0           my $last_blank = rindex($html, ' ');
218 0 0         if ($last_blank > 0) {
219 0           $html = substr($html, 0, $last_blank);
220             } else {
221             # What ! No blanks ! This is some weird text => don't parse it
222 0           $log->say('FromHTML: No blanks - Not processing');
223 0           return $xwi;
224             }
225             }
226              
227 0 0         if ( $opt =~ /^Guess/ ) {
228 0 0 0       if ( ($url =~ /\..?html?$|\/$/i) ||
229             ($html =~ /<\s*html\s*|<\s*head\s*|<\s*body\s*/i) ) {
230 0           $opt = 'HTML';
231             } else {
232 0           $opt = 'Text';
233             }
234             }
235              
236 0 0         if ($opt =~ /Text/i) {
237 0           $html =~ s/[\s\240\n]+/ /sg; # compress whitespace??
238 0           $xwi->text(\$html);
239 0           return $xwi;
240             }
241              
242             #clean character entities #1..#255 to utf-8/latin1
243 0           my $html_utf8;
244 0           if (1) {
245 0           my $c;
246 0           $html_utf8=HTML::Entities::decode_entities($html);
247             }
248              
249 0           my $rtext;
250             ##Plugin for extracting only relevant text and discarding base templates
251 0           my $relTextPlugin = Combine::Config::Get('relTextPlugin');
252 0 0 0       if (defined($relTextPlugin) && $relTextPlugin ne '') {
253 0           eval "require $relTextPlugin";
254 0           $rtext = $relTextPlugin->extrText($html_utf8);
255 0 0         if (defined($rtext)) {
256 0           $xwi->text(\$rtext);
257             }
258             }
259             ##
260              
261             #Only do for HTML files
262             # General modifications to the HTML code before extracting our information
263              
264 0 0         if ( Combine::Config::Get('useTidy') ) {
265              
266              
267             # print "Doing Tidy\n";
268 0           require HTML::Tidy;
269 0           my $tidy = new HTML::Tidy ( {config_file => Combine::Config::Get('baseConfigDir') . '/tidy.cfg'} );
270             # $tidy->ignore( type => TIDY_WARNING );
271             # if (!eval{$html = $tidy->clean( $html . "\n" )}) { print "TIDY ERR in eval\n"; }
272 0           my $thtml;
273 0 0         if (!eval{$thtml = $tidy->clean( $html_utf8 . "\n" )}) {
  0            
274 0           print "TIDY ERR in eval\n";
275             }
276             # for my $message ( $tidy->messages ) {
277             # print $message->as_string; #LOG!
278             # }
279 0           $html = Encode::decode('UTF-8', $thtml); # convert to Perl internal representation
280             } else {
281 0           $html_utf8 =~ s/<\!\-\-.*?\-\->/ /sgo; # replace all comments (including multiline) with whitespace
282 0           $html = $html_utf8;
283             }
284 0 0         if ( ! Encode::is_utf8($html) ) {
285 0           $log->say('WARN HTML content not in UTF-8');
286             } ##
287              
288 0           $html =~ s// /sigo; # remove all the scripts (including multiline)
289 0           $html =~ s// /sigo; # remove all the scripts (including multiline)
290 0           $html =~ s// /sigo; # remove all the style scripts (including multiline)
291             ## $html =~ s/[\s\240]+/ /g; # compress whitespace
292              
293 0           my $xwicontent=$html;
294 0           $xwi->content(\$xwicontent);
295              
296             # #Split into HEAD and BODY
297             # my $head='';
298             ## if ($html =~ s|^(.*?)<\s*body\s*|
299             ## where the frameset is outside the see http://poseidon.csd.auth.gr/EN/
300             # if ( $html =~ s|^(.*?<\s*\/head[^>]*>)||i ) { ???
301             # $head=$1;
302             # }
303              
304             #Parsing and extraction of data
305 0 0         if ($html =~ /([^<]+)<\/title>/i) { # extract title </td> </tr> <tr> <td class="h" > <a name="306">306</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $tmp = $1; </td> </tr> <tr> <td class="h" > <a name="307">307</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $tmp =~ s/\s+/ /g; #needed AA0? </td> </tr> <tr> <td class="h" > <a name="308">308</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $tmp = HTML::Entities::decode_entities($tmp); </td> </tr> <tr> <td class="h" > <a name="309">309</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->title($tmp); </td> </tr> <tr> <td class="h" > <a name="310">310</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="311">311</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="312">312</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #Extract META tags </td> </tr> <tr> <td class="h" > <a name="313">313</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> while ( $html =~ m/<meta\s*(.*?)>/sgi ) { </td> </tr> <tr> <td class="h" > <a name="314">314</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $tag = $1; </td> </tr> <tr> <td class="h" > <a name="315">315</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $key=''; </td> </tr> <tr> <td class="h" > <a name="316">316</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $val=''; </td> </tr> <tr> <td class="h" > <a name="317">317</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $tag =~ s/[\n\r]/ /g; </td> </tr> <tr> <td class="h" > <a name="318">318</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> foreach my $attr ('name','content') { </td> </tr> <tr> <td class="h" > <a name="319">319</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $str=''; </td> </tr> <tr> <td class="h" > <a name="320">320</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="321">321</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#321-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ($tag =~ /$attr\s*=\s*[\"]/i) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#-2"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="322">322</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#322-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ($tag =~ s/$attr\s*=\s*\"([^\"]+?)\"//i) { </td> </tr> <tr> <td class="h" > <a name="323">323</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $str = $1; </td> </tr> <tr> <td class="h" > <a name="324">324</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="325">325</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } elsif ($tag =~ /$attr\s*=\s*[\']/i) { </td> </tr> <tr> <td class="h" > <a name="326">326</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#326-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ($tag =~ s/$attr\s*=\s*\'([^\']+?)\'//i) { </td> </tr> <tr> <td class="h" > <a name="327">327</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $str = $1; </td> </tr> <tr> <td class="h" > <a name="328">328</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="329">329</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } else { </td> </tr> <tr> <td class="h" > <a name="330">330</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#330-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ($tag =~ s/$attr\s*=\s*([^\s]+?)\s//i) { </td> </tr> <tr> <td class="h" > <a name="331">331</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $str = $1; </td> </tr> <tr> <td class="h" > <a name="332">332</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="333">333</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="334">334</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#334-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> next if($str =~ /^$/); </td> </tr> <tr> <td class="h" > <a name="335">335</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#335-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ($attr =~ /name/i) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#-2"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="336">336</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $key=lc($str); </td> </tr> <tr> <td class="h" > <a name="337">337</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } elsif ($attr =~ /content/i) { </td> </tr> <tr> <td class="h" > <a name="338">338</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $val=$str; </td> </tr> <tr> <td class="h" > <a name="339">339</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="340">340</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="341">341</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#341-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--condition.html#341-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> next if(($key =~ /^$/) || ($val =~ /^$/)); </td> </tr> <tr> <td class="h" > <a name="342">342</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $xwi->meta_add($key,HTML::Entities::decode_entities($val)); </td> </tr> <tr> <td class="h" > <a name="343">343</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->meta_add($key,$val); </td> </tr> <tr> <td class="h" > <a name="344">344</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="345">345</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #END extract META tags </td> </tr> <tr> <td class="h" > <a name="346">346</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="347">347</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> =begin comment </td> </tr> <tr> <td class="h" > <a name="348">348</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> This feature is temporarily disabled </td> </tr> <tr> <td class="h" > <a name="349">349</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="350">350</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $summary = ""; </td> </tr> <tr> <td class="h" > <a name="351">351</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->meta_rewind; </td> </tr> <tr> <td class="h" > <a name="352">352</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my ($name,$content); </td> </tr> <tr> <td class="h" > <a name="353">353</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> while(1) { </td> </tr> <tr> <td class="h" > <a name="354">354</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ($name,$content) = $xwi->meta_get; </td> </tr> <tr> <td class="h" > <a name="355">355</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if (!defined($name)) { last; } </td> </tr> <tr> <td class="h" > <a name="356">356</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="357">357</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #If abstract, description or DC.Description is not a list of keywords: add it to summary </td> </tr> <tr> <td class="h" > <a name="358">358</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( $name eq 'description' || $name eq 'dc.description' || $name eq 'abstract' ) { </td> </tr> <tr> <td class="h" > <a name="359">359</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my @kom = split(', ',$content); </td> </tr> <tr> <td class="h" > <a name="360">360</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my @dot = split(' ',$content); </td> </tr> <tr> <td class="h" > <a name="361">361</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( $#kom < $#dot ) { #If several meta-fields check if they overlap or are the same## </td> </tr> <tr> <td class="h" > <a name="362">362</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $summary .= $content . ' '; </td> </tr> <tr> <td class="h" > <a name="363">363</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="364">364</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="365">365</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="366">366</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="367">367</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #Generate Summary </td> </tr> <tr> <td class="h" > <a name="368">368</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $sumlength = Combine::Config::Get('SummaryLength'); </td> </tr> <tr> <td class="h" > <a name="369">369</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # print "SUM1: $summary\nHTML: $html\n"; </td> </tr> <tr> <td class="h" > <a name="370">370</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( $sumlength > 0 ) { </td> </tr> <tr> <td class="h" > <a name="371">371</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( ($sumlength - length($summary)) > 0 ) { </td> </tr> <tr> <td class="h" > <a name="372">372</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> require HTML::Summary; </td> </tr> <tr> <td class="h" > <a name="373">373</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> require HTML::TreeBuilder; </td> </tr> <tr> <td class="h" > <a name="374">374</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $html_summarizer = new HTML::Summary( LENGTH => $sumlength - length($summary), USE_META => 0 ); </td> </tr> <tr> <td class="h" > <a name="375">375</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $tree = new HTML::TreeBuilder; </td> </tr> <tr> <td class="h" > <a name="376">376</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $tree->parse( Encode::encode('latin1',$html) ); </td> </tr> <tr> <td class="h" > <a name="377">377</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $tree->parse( $html ); </td> </tr> <tr> <td class="h" > <a name="378">378</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $tree->eof(); </td> </tr> <tr> <td class="h" > <a name="379">379</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ## $summary .= $html_summarizer->generate ( $tree ); </td> </tr> <tr> <td class="h" > <a name="380">380</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $t .= Encode::decode('latin1',$html_summarizer->generate ( $tree )); </td> </tr> <tr> <td class="h" > <a name="381">381</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $tree = $tree->delete; </td> </tr> <tr> <td class="h" > <a name="382">382</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $summary .= $t; </td> </tr> <tr> <td class="h" > <a name="383">383</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="384">384</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if (length($summary)>2) { </td> </tr> <tr> <td class="h" > <a name="385">385</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $summary =~ s/[^\w\s,\.\!\?:;\'\"]//gs; </td> </tr> <tr> <td class="h" > <a name="386">386</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $summary =~ s/[^\p{IsAlnum}\s,\.\!\?:;\'\"]//gs; </td> </tr> <tr> <td class="h" > <a name="387">387</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $summary =~ s/[\s\240]+/ /g; </td> </tr> <tr> <td class="h" > <a name="388">388</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->meta_add("Rsummary",$summary); </td> </tr> <tr> <td class="h" > <a name="389">389</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="390">390</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="391">391</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="392">392</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> =end comment </td> </tr> <tr> <td class="h" > <a name="393">393</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="394">394</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> =cut </td> </tr> <tr> <td class="h" > <a name="395">395</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="396">396</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # extract links </td> </tr> <tr> <td class="h" > <a name="397">397</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Combine-FromHTML-pm--subroutine.html#397-1"> 1 </a> </td> <td >   </td> <td > 734 </td> <td class="s"> use Combine::HTMLExtractor; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 4 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 608 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="398">398</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my ($alt, $linktext, $linkurl, $base); </td> </tr> <tr> <td class="h" > <a name="399">399</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $base = $xwi->base; #Set by UA.pm </td> </tr> <tr> <td class="h" > <a name="400">400</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $lx = new Combine::HTMLExtractor(undef,undef,1); </td> </tr> <tr> <td class="h" > <a name="401">401</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # print "INPUT: $html\n"; </td> </tr> <tr> <td class="h" > <a name="402">402</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $html = HTML::Entities::decode_entities( Encode::encode('latin1',$html) ); </td> </tr> <tr> <td class="h" > <a name="403">403</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $html = HTML::Entities::decode_entities( $html ); </td> </tr> <tr> <td class="h" > <a name="404">404</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $lx->parse(\$html); </td> </tr> <tr> <td class="h" > <a name="405">405</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="406">406</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my %Tags = ( a => 1, area => 1, frame => 1, img => 1, headings => 1, text => 1 ); </td> </tr> <tr> <td class="h" > <a name="407">407</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> for my $link ( @{$lx->links} ) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="408">408</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # print "GOTLINK: $$link{tag} = $$link{_TEXT}\n"; </td> </tr> <tr> <td class="h" > <a name="409">409</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#409-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> next unless exists($Tags{$$link{tag}}); </td> </tr> <tr> <td class="h" > <a name="410">410</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#410-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $linktext = $$link{_TEXT} ? $$link{_TEXT} : ''; </td> </tr> <tr> <td class="h" > <a name="411">411</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#411-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--condition.html#411-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( ($$link{tag} eq 'headings') ) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#-2"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#-3"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="412">412</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#412-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( $linktext !~ /^\s*$/ ) { </td> </tr> <tr> <td class="h" > <a name="413">413</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linktext =~ s/^[\s;]+//; </td> </tr> <tr> <td class="h" > <a name="414">414</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linktext =~ s/[\s;]+$//; </td> </tr> <tr> <td class="h" > <a name="415">415</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $xwi->heading_add(Encode::decode('latin1',$linktext)); </td> </tr> <tr> <td class="h" > <a name="416">416</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->heading_add($linktext); </td> </tr> <tr> <td class="h" > <a name="417">417</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="418">418</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> next; </td> </tr> <tr> <td class="h" > <a name="419">419</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } elsif ( ($$link{tag} eq 'text') ) { </td> </tr> <tr> <td class="h" > <a name="420">420</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#420-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if (!defined($rtext)) { </td> </tr> <tr> <td class="h" > <a name="421">421</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $linktext = Encode::decode('latin1',$linktext); </td> </tr> <tr> <td class="h" > <a name="422">422</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linktext =~ s/[\s\240]+/ /g; # compress whitespace?? </td> </tr> <tr> <td class="h" > <a name="423">423</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->text(\$linktext); </td> </tr> <tr> <td class="h" > <a name="424">424</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #print "HT=$linktext\n"; </td> </tr> <tr> <td class="h" > <a name="425">425</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="426">426</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> next; </td> </tr> <tr> <td class="h" > <a name="427">427</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } elsif ( ($$link{tag} eq 'frame') || ($$link{tag} eq 'img') ) { </td> </tr> <tr> <td class="h" > <a name="428">428</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linkurl = $$link{src}; </td> </tr> <tr> <td class="h" > <a name="429">429</a> </td> <td class="c0" > 0 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--condition.html#429-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linktext .= $$link{alt} || ''; </td> </tr> <tr> <td class="h" > <a name="430">430</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } else { </td> </tr> <tr> <td class="h" > <a name="431">431</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linkurl = $$link{href}; </td> </tr> <tr> <td class="h" > <a name="432">432</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="433">433</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linktext =~ s/\[IMG\]//g; </td> </tr> <tr> <td class="h" > <a name="434">434</a> </td> <td class="c0" > 0 </td> <td class="c0" > <a href="blib-lib-Combine-FromHTML-pm--branch.html#434-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> if ( $linkurl !~ /^#/ ) { # Throw away links within a document </td> </tr> <tr> <td class="h" > <a name="435">435</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $linkurl =~ s/\?\s+/?/; #to be handled in normalize?? </td> </tr> <tr> <td class="h" > <a name="436">436</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $urlstr = URI->new_abs($linkurl, $base)->canonical->as_string; </td> </tr> <tr> <td class="h" > <a name="437">437</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # $xwi->link_add($urlstr, 0, 0, Encode::decode('latin1',$linktext), $$link{tag}); </td> </tr> <tr> <td class="h" > <a name="438">438</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $xwi->link_add($urlstr, 0, 0, $linktext, $$link{tag}); </td> </tr> <tr> <td class="h" > <a name="439">439</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> # print "ADD: $$link{tag}; $urlstr; |$linktext|\n"; </td> </tr> <tr> <td class="h" > <a name="440">440</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="441">441</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="442">442</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="443">443</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> return $xwi; </td> </tr> <tr> <td class="h" > <a name="444">444</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="445">445</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="446">446</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> 1; </td> </tr> <tr> <td class="h" > <a name="447">447</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="448">448</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> __END__ </td> </tr> </table> </body> </html>