line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# MySql replacement for hdb |
2
|
|
|
|
|
|
|
# AA0 2002-09-30 |
3
|
|
|
|
|
|
|
# Modified Open to return DBI connection and HDB table name |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package Combine::MySQLhdb; |
6
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
722
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
64
|
|
8
|
2
|
|
|
2
|
|
10
|
use Combine::XWI; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
35
|
|
9
|
2
|
|
|
2
|
|
1804
|
use HTTP::Date; |
|
2
|
|
|
|
|
8929
|
|
|
2
|
|
|
|
|
120
|
|
10
|
2
|
|
|
2
|
|
2116
|
use Encode; |
|
2
|
|
|
|
|
28962
|
|
|
2
|
|
|
|
|
272
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $sv; # holds the mysql connection |
13
|
|
|
|
|
|
|
my $table = ''; # holds the hdb table name |
14
|
|
|
|
|
|
|
my $savehtml; |
15
|
|
|
|
|
|
|
my $doOAI; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub Open { #needed?? |
18
|
2
|
|
|
2
|
|
587
|
use Combine::Config; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
1943
|
|
19
|
1
|
|
|
1
|
0
|
10
|
$sv = Combine::Config::Get('MySQLhandle'); |
20
|
0
|
|
|
|
|
0
|
$savehtml = Combine::Config::Get('saveHTML'); |
21
|
0
|
|
|
|
|
0
|
$doOAI = Combine::Config::Get('doOAI'); |
22
|
0
|
|
|
|
|
0
|
my $hdbd = 'hdb'; |
23
|
0
|
|
|
|
|
0
|
return ($sv,$hdbd); |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub Close { |
27
|
|
|
|
|
|
|
# print "MySQLhdb::Close\n"; |
28
|
0
|
|
|
0
|
0
|
0
|
$sv->disconnect ; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub DESTROY { |
32
|
0
|
|
|
0
|
|
0
|
print STDERR "MySQLhdb::DESTROY\n"; |
33
|
0
|
|
|
|
|
0
|
$sv->disconnect ; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub Write { |
37
|
0
|
|
|
0
|
0
|
0
|
my ($xwi) = @_; |
38
|
0
|
0
|
|
|
|
0
|
return undef unless $xwi; |
39
|
0
|
0
|
|
|
|
0
|
if (!defined($sv)) { Open(); } #Init $sv CHANGE? |
|
0
|
|
|
|
|
0
|
|
40
|
0
|
|
|
|
|
0
|
my $md5 = $xwi->md5; |
41
|
0
|
|
|
|
|
0
|
my $recordid = $xwi->recordid; #Set by DataBase.pm |
42
|
|
|
|
|
|
|
#OAI |
43
|
0
|
0
|
|
|
|
0
|
if ($doOAI) { |
44
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO oai SET status='created', recordid=?, md5=?")->execute($recordid, $md5); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
#OAI |
47
|
|
|
|
|
|
|
# $xwi->url_rewind; MORE THAN one URL?? |
48
|
|
|
|
|
|
|
# my $url = $xwi->url_get; |
49
|
0
|
|
|
|
|
0
|
my $urlid = $xwi->urlid; |
50
|
0
|
|
|
|
|
0
|
my $my_netlocid = $xwi->netlocid; |
51
|
0
|
|
|
|
|
0
|
my $type = $xwi->type; |
52
|
0
|
|
|
|
|
0
|
my $title = $xwi->title; |
53
|
|
|
|
|
|
|
#checkedDate is inserted/updated in DataBase.pm and harvpars.pl |
54
|
0
|
|
|
|
|
0
|
my $modifiedDate = $xwi->modifiedDate; |
55
|
0
|
0
|
|
|
|
0
|
if ( ! $modifiedDate) { $modifiedDate = $xwi->checkedDate; } |
|
0
|
|
|
|
|
0
|
|
56
|
0
|
|
|
|
|
0
|
my $expiryDate = $xwi->expiryDate; |
57
|
|
|
|
|
|
|
# if ($expiryDate) { $expiryDate = str2time($expiryDate) ; } |
58
|
|
|
|
|
|
|
# else { $expiryDate = 'NULL'; } |
59
|
0
|
|
|
|
|
0
|
my $length = $xwi->length; |
60
|
0
|
|
|
|
|
0
|
my $server = $xwi->server; |
61
|
0
|
|
|
|
|
0
|
my $etag = $xwi->etag; |
62
|
0
|
|
|
|
|
0
|
my $nheadings = $xwi->heading_count; |
63
|
0
|
|
|
|
|
0
|
my $headings=''; |
64
|
|
|
|
|
|
|
# headings |
65
|
0
|
|
|
|
|
0
|
$xwi->heading_rewind; |
66
|
0
|
|
|
|
|
0
|
while (1) { |
67
|
0
|
0
|
|
|
|
0
|
my $this = $xwi->heading_get or last; |
68
|
0
|
|
|
|
|
0
|
$headings .= $this . '; '; |
69
|
|
|
|
|
|
|
} |
70
|
0
|
|
|
|
|
0
|
my $nlinks = $xwi->link_count; |
71
|
0
|
|
|
|
|
0
|
my $this = $xwi->text; |
72
|
0
|
|
|
|
|
0
|
my $ip; |
73
|
0
|
0
|
|
|
|
0
|
if ($this) { |
74
|
0
|
|
|
|
|
0
|
$this = $$this; |
75
|
0
|
0
|
|
|
|
0
|
if ($xwi->truncated()) { |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# IMPORTANT! This document was truncated. Therefore: |
78
|
|
|
|
|
|
|
# |
79
|
|
|
|
|
|
|
# 1) Discard it if no space characters in it, because then it |
80
|
|
|
|
|
|
|
# could be binary. |
81
|
|
|
|
|
|
|
# |
82
|
|
|
|
|
|
|
# 2) If a space is found, then truncate after the last space, |
83
|
|
|
|
|
|
|
# so as to avoid erroneous indexing (since the truncation |
84
|
|
|
|
|
|
|
# most likely cut a word). |
85
|
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
0
|
my $last_blank = rindex($this,' '); |
87
|
0
|
0
|
|
|
|
0
|
if ($last_blank > 0) { |
88
|
0
|
|
|
|
|
0
|
$ip = substr($this, 0, $last_blank) ; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
else { |
92
|
0
|
|
|
|
|
0
|
$ip = $this ; |
93
|
|
|
|
|
|
|
} |
94
|
0
|
|
|
|
|
0
|
} else { my $t=''; $xwi->text(\$t); } #make sure xwi->text is defined |
|
0
|
|
|
|
|
0
|
|
95
|
|
|
|
|
|
|
#?? if (length($ip)>250000) {$ip = substr($ip, 0, 250000);} |
96
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO hdb VALUES (?, ?, ?, FROM_UNIXTIME( ? ), FROM_UNIXTIME( ? ), ?, ?, ?, ?, ?, ?, COMPRESS(?))")->execute( |
98
|
|
|
|
|
|
|
$recordid, $type, Encode::encode('utf8',$title), $modifiedDate, $expiryDate, $length, $server, $etag, $nheadings, $nlinks, Encode::encode('utf8',$headings), Encode::encode('utf8',$ip)); |
99
|
|
|
|
|
|
|
|
100
|
0
|
0
|
|
|
|
0
|
if ( $savehtml == 1 ) { |
101
|
0
|
|
|
|
|
0
|
my $html = $xwi->content; |
102
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO html SET html=COMPRESS(?), recordid=?")->execute(Encode::encode('utf8',$$html),$recordid); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my $res; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#save links |
108
|
0
|
|
|
|
|
0
|
my ( $urlstr, $anchor, $ltype); |
109
|
0
|
|
|
|
|
0
|
$xwi->link_rewind; |
110
|
0
|
|
|
|
|
0
|
my $link_count = 1; |
111
|
0
|
|
|
|
|
0
|
my $netlocid; |
112
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM links WHERE recordid='$recordid';}); #needed? |
113
|
0
|
|
|
|
|
0
|
while(1) { #links |
114
|
0
|
|
|
|
|
0
|
($urlstr, $netlocid, $urlid, $anchor, $ltype) = $xwi->link_get; |
115
|
0
|
0
|
|
|
|
0
|
if (defined($urlstr)) { |
116
|
|
|
|
|
|
|
#Convert urlstr to urlid,netlocid if needed |
117
|
0
|
0
|
0
|
|
|
0
|
if ( ($netlocid <= 0) || ($urlid <= 0) ) { |
118
|
0
|
0
|
|
|
|
0
|
if ( $urlstr eq '') { print STDERR "ERR MySQLhdb, save links, no info\n"; } ## sanity check -> log error |
|
0
|
|
|
|
|
0
|
|
119
|
2
|
|
|
2
|
|
1566
|
use Combine::selurl; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
3968
|
|
120
|
0
|
|
|
|
|
0
|
my $u; |
121
|
0
|
0
|
|
|
|
0
|
if ( $u = new Combine::selurl($urlstr) ) { |
122
|
0
|
|
|
|
|
0
|
$urlstr = $u->normalise(); |
123
|
0
|
|
|
|
|
0
|
my $netlocstr = $u->authority; |
124
|
0
|
|
|
|
|
0
|
my $path_query = $u->path_query; |
125
|
0
|
|
|
|
|
0
|
my $lsth = $sv->prepare(qq{SELECT netlocid,urlid FROM urls WHERE urlstr=?;}); |
126
|
0
|
|
|
|
|
0
|
$lsth->execute($urlstr); |
127
|
0
|
|
|
|
|
0
|
($netlocid,$urlid) = $lsth->fetchrow_array; |
128
|
0
|
0
|
|
|
|
0
|
if ( !defined($urlid) ) { |
129
|
0
|
|
|
|
|
0
|
$sv->prepare(qq{INSERT IGNORE INTO netlocs SET netlocstr=?;})->execute($netlocstr); |
130
|
|
|
|
|
|
|
# ($netlocid) = $sv->selectrow_array(qq{SELECT netlocid FROM netlocs WHERE netlocstr='$netlocstr';}); |
131
|
0
|
|
|
|
|
0
|
my $nlsth = $sv->prepare(qq{SELECT netlocid FROM netlocs WHERE netlocstr=?;}); |
132
|
0
|
|
|
|
|
0
|
$nlsth->execute($netlocstr); |
133
|
0
|
|
|
|
|
0
|
($netlocid) = $nlsth->fetchrow_array(); |
134
|
0
|
|
|
|
|
0
|
$sv->prepare(qq{INSERT IGNORE INTO urls SET urlstr=?, netlocid=?, path=?;})->execute($urlstr,$netlocid,$path_query); |
135
|
0
|
|
|
|
|
0
|
$lsth->execute($urlstr); |
136
|
0
|
|
|
|
|
0
|
($netlocid,$urlid) = $lsth->fetchrow_array; |
137
|
|
|
|
|
|
|
} |
138
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO links (recordid,mynetlocid,urlid,netlocid,anchor,linktype) VALUES (?, ?, ?, ?, ?, ?)")->execute($recordid,$my_netlocid,$urlid,$netlocid,Encode::encode('utf8',$anchor),$ltype); |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} else { |
141
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO links (recordid,mynetlocid,urlid,netlocid,anchor,linktype) VALUES (?, ?, ?, ?, ?, ?)")->execute($recordid,$my_netlocid,$urlid,$netlocid,Encode::encode('utf8',$anchor),$ltype); |
142
|
|
|
|
|
|
|
} |
143
|
0
|
|
|
|
|
0
|
} else { last; } |
144
|
0
|
0
|
|
|
|
0
|
last if ($link_count++ >= 500); # limit on number of links |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
#save metadata |
148
|
0
|
|
|
|
|
0
|
$xwi->meta_rewind; |
149
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM meta WHERE recordid='$recordid';}); #needed? |
150
|
0
|
|
|
|
|
0
|
my ($name,$content); |
151
|
0
|
|
|
|
|
0
|
while (1) { |
152
|
0
|
|
|
|
|
0
|
($name,$content) = $xwi->meta_get; |
153
|
0
|
0
|
|
|
|
0
|
last unless $name; |
154
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO meta VALUES (?, ?, ?)")->execute($recordid, Encode::encode('utf8',$name), Encode::encode('utf8',$content)); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
#OLD |
158
|
|
|
|
|
|
|
#save URLs |
159
|
|
|
|
|
|
|
# $xwi->url_rewind; |
160
|
|
|
|
|
|
|
# $res = $sv->do(qq{DELETE FROM urls WHERE recordid='$recordid';}); |
161
|
|
|
|
|
|
|
# while (1) { |
162
|
|
|
|
|
|
|
# $this = $xwi->url_get or last; |
163
|
|
|
|
|
|
|
## $res = $sv->do(qq{INSERT INTO urls VALUES ('$recordid','$this');}); |
164
|
|
|
|
|
|
|
# my $machine = $this; |
165
|
|
|
|
|
|
|
# $machine =~ s|http://([^:/]+)[:/]?.*|$1|; |
166
|
|
|
|
|
|
|
# $sv->prepare("INSERT INTO urls VALUES (?, ?, ?)")->execute($recordid, $this, $machine); |
167
|
|
|
|
|
|
|
# } |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#save robot data in analys table (uses that URL is stored) |
170
|
0
|
|
|
|
|
0
|
$xwi->robot_rewind; |
171
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM analys WHERE recordid='$recordid';}); #needed? |
172
|
0
|
|
|
|
|
0
|
while (1) { |
173
|
0
|
|
|
|
|
0
|
($name,$content) = $xwi->robot_get; |
174
|
0
|
0
|
|
|
|
0
|
last unless $name; |
175
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, $name, Encode::encode('utf8',$content)); |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
## my $alinks = calclinks($recordid,$machine); #? |
178
|
|
|
|
|
|
|
#What if link-stats are inserted double after a Get and following write? |
179
|
0
|
|
|
|
|
0
|
my $sth = $sv->prepare(qq{SELECT COUNT(DISTINCT(links.recordid)), COUNT(DISTINCT(mynetlocid)) FROM links,recordurl WHERE recordurl.recordid= ? AND |
180
|
|
|
|
|
|
|
links.urlid = recordurl.urlid AND mynetlocid<>links.netlocid;}); |
181
|
0
|
|
|
|
|
0
|
$sth->execute($recordid); |
182
|
0
|
|
|
|
|
0
|
my ($inlinks,$hostinlinks)=$sth->fetchrow_array; |
183
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, 'inlinks', $inlinks); |
184
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, 'hostinlinks', $hostinlinks); |
185
|
0
|
|
|
|
|
0
|
$sth = $sv->prepare(qq{SELECT count(distinct(netlocid)) FROM links WHERE recordid=?;}); |
186
|
0
|
|
|
|
|
0
|
$sth->execute($recordid); |
187
|
0
|
|
|
|
|
0
|
my ($outlinks)=$sth->fetchrow_array; |
188
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, 'outlinks', $outlinks); |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
#save topic, ie result of autoclassification |
191
|
0
|
|
|
|
|
0
|
$xwi->topic_rewind; |
192
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM topic WHERE recordid='$recordid';}); #needed? |
193
|
0
|
|
|
|
|
0
|
my ($cls,$absscore, $relscore, $terms, $alg); |
194
|
0
|
|
|
|
|
0
|
while (1) { |
195
|
0
|
|
|
|
|
0
|
($cls,$absscore, $relscore,$terms, $alg) = $xwi->topic_get; |
196
|
0
|
0
|
|
|
|
0
|
last unless $cls; |
197
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO topic VALUES (?, ?, ?, ?, ?, ?)")->execute($recordid, Encode::encode('utf8',$cls), $absscore, $relscore, Encode::encode('utf8',$terms), $alg); |
198
|
|
|
|
|
|
|
} |
199
|
0
|
0
|
|
|
|
0
|
if (my $zh = Combine::Config::Get('ZebraHost')) { |
200
|
0
|
|
|
|
|
0
|
require Combine::Zebra; |
201
|
0
|
|
|
|
|
0
|
Combine::Zebra::update($zh,$xwi); |
202
|
|
|
|
|
|
|
} |
203
|
0
|
0
|
|
|
|
0
|
if (Combine::Config::Get('MySQLfulltext')) { |
204
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO search VALUES (?, ?)")->execute($recordid, Encode::encode('utf8',$title .' '. $ip)); |
205
|
|
|
|
|
|
|
} |
206
|
0
|
0
|
|
|
|
0
|
if (my $sh = Combine::Config::Get('SolrHost')) { |
207
|
0
|
|
|
|
|
0
|
require Combine::Solr; |
208
|
0
|
|
|
|
|
0
|
Combine::Solr::update($sh,$xwi); |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub Delete { #Used?? |
213
|
0
|
|
|
0
|
0
|
0
|
my ($xwi) = @_; |
214
|
0
|
0
|
|
|
|
0
|
return undef unless $xwi; |
215
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
0
|
my $recordid = $xwi->recordid; |
217
|
|
|
|
|
|
|
#print "MySQLhdb::DeleteMD5 $recordid\n"; |
218
|
0
|
|
|
|
|
0
|
DeleteKey($recordid, $xwi->md5); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub DeleteKey { |
222
|
1
|
|
|
1
|
0
|
18
|
my ($key, $md5) = @_; |
223
|
1
|
50
|
|
|
|
19
|
if (!defined($sv)) { Open(); } #Init $sv CHANGE? |
|
1
|
|
|
|
|
15
|
|
224
|
|
|
|
|
|
|
#OAI |
225
|
0
|
0
|
|
|
|
|
if ($doOAI) { |
226
|
|
|
|
|
|
|
# $sv->prepare("REPLACE INTO oai SET status='deleted', recordid=?, md5=?")->execute($key,$md5); |
227
|
|
|
|
|
|
|
##FEL recurdurl updaterad i Database.pm FIX! |
228
|
0
|
|
|
|
|
|
$sv->prepare("REPLACE INTO oai SELECT recordid,md5,NOW(),'deleted' FROM recordurl WHERE recordid=?")->execute($key); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
#OAI |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
#Zebra |
233
|
0
|
0
|
|
|
|
|
if (my $zh = Combine::Config::Get('ZebraHost')) { |
234
|
0
|
|
|
|
|
|
require Combine::Zebra; |
235
|
|
|
|
|
|
|
#Not needed: if ($md5 eq '') { ($md5)=$sv->selectrow_array('SELECT md5 FROM recordurl WHERE recordid=$key'); } |
236
|
0
|
|
|
|
|
|
Combine::Zebra::delete($zh, $md5, $key); |
237
|
|
|
|
|
|
|
} |
238
|
0
|
0
|
|
|
|
|
if (my $sh = Combine::Config::Get('SolrHost')) { |
239
|
0
|
|
|
|
|
|
require Combine::Solr; |
240
|
0
|
|
|
|
|
|
Combine::Solr::delete($sh, $md5, $key); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#print "MySQLhdb::DeleteKey $key\n"; |
244
|
0
|
|
|
|
|
|
my $res = $sv->do(qq{DELETE FROM hdb WHERE recordid=$key;}); |
245
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM html WHERE recordid=$key;}); |
246
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM search WHERE recordid=$key;}); |
247
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM meta WHERE recordid=$key;}); |
248
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM analys WHERE recordid=$key}); |
249
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM links WHERE recordid=$key;}); |
250
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM topic WHERE recordid=$key;}); |
251
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM recordurl WHERE recordid=$key;}); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub Get { |
255
|
0
|
|
|
0
|
0
|
|
my ($key) = @_; |
256
|
|
|
|
|
|
|
#should return an initalized xwi-object |
257
|
0
|
0
|
|
|
|
|
if (!defined($sv)) { Open(); } #Init $sv CHANGE? |
|
0
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
259
|
0
|
|
|
|
|
|
my ($type, $title, $modifiedDate, $expiryDate, $length, $server, $etag, $nheadings, $nlinks, $headings, $ip) = |
260
|
|
|
|
|
|
|
$sv->selectrow_array(qq{SELECT type,title, |
261
|
|
|
|
|
|
|
UNIX_TIMESTAMP(mdate),IF(expiredate,UNIX_TIMESTAMP(expiredate),0), |
262
|
|
|
|
|
|
|
length,server,etag,nheadings,nlinks,headings,UNCOMPRESS(ip) |
263
|
|
|
|
|
|
|
FROM hdb WHERE recordid='$key';}); |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
my $xwi = new Combine::XWI ; |
266
|
0
|
|
|
|
|
|
$xwi->recordid($key); |
267
|
|
|
|
|
|
|
#url Relies on that all urls are in table urls |
268
|
0
|
|
|
|
|
|
$xwi->type($type); |
269
|
0
|
|
|
|
|
|
$xwi->title(Encode::decode('utf8',$title)); |
270
|
0
|
|
|
|
|
|
$xwi->modifiedDate($modifiedDate); |
271
|
0
|
0
|
|
|
|
|
if ($expiryDate>0) {$xwi->expiryDate($expiryDate)}; |
|
0
|
|
|
|
|
|
|
272
|
0
|
|
|
|
|
|
$xwi->length($length); |
273
|
0
|
|
|
|
|
|
$xwi->server($server); |
274
|
0
|
|
|
|
|
|
$xwi->etag($etag); |
275
|
0
|
|
|
|
|
|
$xwi->nheadings($nheadings); |
276
|
0
|
|
|
|
|
|
$xwi->nlinks($nlinks); |
277
|
0
|
|
|
|
|
|
$headings =~ s/; $//; |
278
|
0
|
|
|
|
|
|
$xwi->heading_add(Encode::decode('utf8',$headings)) ; |
279
|
0
|
|
|
|
|
|
my $ip1=Encode::decode('utf8',$ip); |
280
|
0
|
|
|
|
|
|
$xwi->text(\$ip1); |
281
|
0
|
|
|
|
|
|
my ($html1) = $sv->selectrow_array(qq{SELECT UNCOMPRESS(html) FROM html WHERE recordid='$key';}); |
282
|
0
|
|
|
|
|
|
my $html = Encode::decode('utf8',$html1); |
283
|
0
|
|
|
|
|
|
$xwi->content(\$html); |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
my ($urlpath) = $sv->selectrow_array(qq{SELECT path FROM urls,recordurl WHERE recordid='$key' AND recordurl.urlid=urls.urlid;}); |
286
|
0
|
|
|
|
|
|
$xwi->urlpath($urlpath); |
287
|
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
|
my ($url,$anchor,$lty,$name,$value,$heading); |
289
|
|
|
|
|
|
|
#links |
290
|
0
|
|
|
|
|
|
my $sth = $sv->prepare(qq{SELECT urlid,netlocid,anchor,linktype from links WHERE recordid='$key';}); |
291
|
0
|
|
|
|
|
|
$sth->execute; |
292
|
0
|
|
|
|
|
|
my ($urlid,$netlocid,$checkedDate,$md5,$fingerprint,$cls,$absscore,$relscore,$terms,$alg); |
293
|
0
|
|
|
|
|
|
while (($urlid,$netlocid,$anchor,$lty)=$sth->fetchrow_array) { |
294
|
0
|
|
|
|
|
|
$xwi->link_add('', $netlocid, $urlid, Encode::decode('utf8',$anchor), $lty) ; #no URLstr add? |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
#meta |
298
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT name,value from meta WHERE recordid='$key';}); |
299
|
0
|
|
|
|
|
|
$sth->execute; |
300
|
0
|
|
|
|
|
|
while (($name,$value)=$sth->fetchrow_array) { |
301
|
0
|
|
|
|
|
|
$xwi->meta_add(Encode::decode('utf8',$name),Encode::decode('utf8',$value)) ; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# analys -> robot |
305
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT name,value FROM analys WHERE recordid='$key';}); |
306
|
0
|
|
|
|
|
|
$sth->execute; |
307
|
0
|
|
|
|
|
|
while (($name,$value)=$sth->fetchrow_array) { |
308
|
0
|
|
|
|
|
|
$xwi->robot_add($name,Encode::decode('utf8',$value)) ; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
# topic |
312
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT notation,abscore,relscore,terms,algorithm FROM topic WHERE recordid='$key';}); |
313
|
0
|
|
|
|
|
|
$sth->execute; |
314
|
0
|
|
|
|
|
|
while (($cls,$absscore,$relscore,$terms,$alg)=$sth->fetchrow_array) { |
315
|
0
|
|
|
|
|
|
$xwi->topic_add(Encode::decode('utf8',$cls),$absscore,$relscore,Encode::decode('utf8',$terms),$alg) ; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
#recordurl |
319
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT urlid,UNIX_TIMESTAMP(lastchecked),md5,fingerprint FROM recordurl WHERE recordid='$key';}); |
320
|
0
|
|
|
|
|
|
$sth->execute; |
321
|
0
|
|
|
|
|
|
while (($urlid,$checkedDate,$md5,$fingerprint)=$sth->fetchrow_array) { |
322
|
0
|
|
|
|
|
|
$xwi->urlid($urlid); |
323
|
0
|
|
|
|
|
|
$xwi->checkedDate($checkedDate); |
324
|
0
|
|
|
|
|
|
$xwi->md5($md5); |
325
|
0
|
|
|
|
|
|
$xwi->fingerprint($fingerprint); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
0
|
|
|
|
|
|
return $xwi; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
1; |