line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package SeeAlso::Source::BeaconAggregator::Maintenance; |
2
|
10
|
|
|
10
|
|
431688
|
use strict; |
|
10
|
|
|
|
|
12
|
|
|
10
|
|
|
|
|
224
|
|
3
|
10
|
|
|
10
|
|
30
|
use warnings; |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
212
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
BEGIN { |
6
|
10
|
|
|
10
|
|
27
|
use Exporter (); |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
245
|
|
7
|
10
|
|
|
10
|
|
28
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
10
|
|
|
|
|
9
|
|
|
10
|
|
|
|
|
826
|
|
8
|
10
|
|
|
10
|
|
18
|
$VERSION = '0.2_92'; |
9
|
10
|
|
|
|
|
108
|
@ISA = qw(Exporter); |
10
|
|
|
|
|
|
|
#Give a hoot don't pollute, do not export more than needed by default |
11
|
10
|
|
|
|
|
23
|
@EXPORT = qw(); |
12
|
10
|
|
|
|
|
16
|
@EXPORT_OK = qw(); |
13
|
10
|
|
|
|
|
209
|
%EXPORT_TAGS = (); |
14
|
|
|
|
|
|
|
} |
15
|
|
|
|
|
|
|
|
16
|
10
|
|
|
10
|
|
33
|
use base ("SeeAlso::Source::BeaconAggregator"); |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
3802
|
|
17
|
10
|
|
|
10
|
|
48
|
use Carp; |
|
10
|
|
|
|
|
11
|
|
|
10
|
|
|
|
|
424
|
|
18
|
10
|
|
|
10
|
|
36
|
use HTTP::Date; # not perfect, but the module is commonly installed... |
|
10
|
|
|
|
|
12
|
|
|
10
|
|
|
|
|
372
|
|
19
|
10
|
|
|
10
|
|
3263
|
use HTTP::Request; |
|
10
|
|
|
|
|
103705
|
|
|
10
|
|
|
|
|
226
|
|
20
|
10
|
|
|
10
|
|
4865
|
use LWP::UserAgent; |
|
10
|
|
|
|
|
100967
|
|
|
10
|
|
|
|
|
251
|
|
21
|
10
|
|
|
10
|
|
5446
|
use File::Temp; |
|
10
|
|
|
|
|
119697
|
|
|
10
|
|
|
|
|
78709
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 NAME |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sasbactrl.pl - command line interface to SeeAlso::Source::BeaconAggregator and |
26
|
|
|
|
|
|
|
auxiliary classes |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SYNOPSIS |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This Module allows a collection of BEACON files (cf. http://de.wikipedia.org/wiki/Wikipedia:BEACON) |
34
|
|
|
|
|
|
|
to be used as SeeAlso::Source (probably in the context of an SeeAlso::Server application). |
35
|
|
|
|
|
|
|
Therefore it implements the four methods documented in SeeAlso::Source |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
The BEACON files (lists of non-local identifiers of a certain type documenting the coverage of a given |
38
|
|
|
|
|
|
|
online database plus means for access) are imported by the methods provided by |
39
|
|
|
|
|
|
|
SeeAlso::Source::BeaconAggregator::Maintenance.pm, usually by employing the script sasbactrl.pl |
40
|
|
|
|
|
|
|
as command line client. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Serving other formats than SeeAlso or providing a BEACON file with respect to this |
43
|
|
|
|
|
|
|
SeeAlso service is achieved by using SeeAlso::Source::BeaconAggregator::Publisher. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 USAGE |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Use the C method inherited from C to |
49
|
|
|
|
|
|
|
access an existing database or create a new one. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 Database Methods |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head3 init( [ %options] ) |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Sets up and initializes the database structure for the object. |
57
|
|
|
|
|
|
|
This has to be done once after creating a new database and after |
58
|
|
|
|
|
|
|
upgrading this module. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
Valid options include: |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=over 8 |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=item verbose |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item prepareRedirs |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=item identifierClass |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=back |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
The I table contains as columns all valid beacon fields plus |
74
|
|
|
|
|
|
|
the following administrative fields which have to be prefixed with |
75
|
|
|
|
|
|
|
"_" in the interface: |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=over 8 |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item seqno |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Sequence number: Is incremented on any successfull load |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item alias |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Unique key: On update older seqences with the same alias are |
86
|
|
|
|
|
|
|
automatically discarded. Most methods take an alias as |
87
|
|
|
|
|
|
|
argument thus obliterating the need to determine the sequence |
88
|
|
|
|
|
|
|
number. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item sort |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
optional sort key |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item uri |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Overrides the #FEED header for updates |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item ruri |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Real uri from which the last instance was loaded |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item ftime |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Fetch time: Timestamp as to when this instance was loaded |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Clear this or mtime to force automatic reload. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item fstat |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Short statistics line of last successful reload on update. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item mtime |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Modification time: Timestamp of the file / HTTP object from which this instance was loaded. |
118
|
|
|
|
|
|
|
Identical to ftime if no timestamp is provided |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Clear this or ftime to force automatic reload on update. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item utime |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Timestamp of last update attempt |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=item ustat |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Short status line of last update attempt. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item counti |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Identifier count |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item countu |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Unique identifier count |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item admin |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Just to store some remarks. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=back |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
The I table stores the individual beacon entries from the input files. |
148
|
|
|
|
|
|
|
Its columns are: |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=over 8 |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item hash |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Identifier. If a (subclass of) C instance is provided, |
155
|
|
|
|
|
|
|
this will be transformed by the C method. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item seqno |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Sequence number of the beacon file in the database |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item altid |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
optional identifier from an alternative identifier system for use |
164
|
|
|
|
|
|
|
with ALTTARGET templates. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item hits |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
optional number of hits for this identifier in the given resource |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=item info |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
optional information text |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item link |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
optional explicit URL |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=back |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The I table contains C, C pairs for various metadata |
182
|
|
|
|
|
|
|
concerning the collection as such, notably the values needed for |
183
|
|
|
|
|
|
|
the Open Search Description and the Header fields needed in case |
184
|
|
|
|
|
|
|
of publishing a beacon file for this collection. |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
The I table stores (unique) C, C pairs for |
187
|
|
|
|
|
|
|
general persistent data. Currently the following keys are defined: |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=over 8 |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=item DATA_VERSION |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Integer version number to migrate database layout. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item IDENTIFIER_CLASS |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Name of the Identifier class to be used. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=item REDIRECTION_INDEX |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Control creation of an additional index for the I column |
202
|
|
|
|
|
|
|
(facialiates reverse lookups as needed for clustering). |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=back |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub init { |
210
|
9
|
|
|
9
|
1
|
5076
|
my ($self, %options) = @_; |
211
|
9
|
50
|
|
|
|
40
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
212
|
|
|
|
|
|
|
|
213
|
9
|
|
|
|
|
53
|
my @fieldlist = SeeAlso::Source::BeaconAggregator->beaconfields(); |
214
|
9
|
|
|
|
|
25
|
my @bf = map{ join(" ", @{[SeeAlso::Source::BeaconAggregator->beaconfields($_)]}[0..1]) } @fieldlist; |
|
189
|
|
|
|
|
145
|
|
|
189
|
|
|
|
|
273
|
|
215
|
9
|
50
|
|
|
|
33
|
my $hdl = $self->{dbh} or croak("no database handle?"); |
216
|
|
|
|
|
|
|
|
217
|
9
|
|
|
|
|
18
|
local($") = ",\n"; |
218
|
9
|
50
|
|
|
|
102
|
$hdl->do(<<"XxX" |
219
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS repos ( |
220
|
|
|
|
|
|
|
seqno INTEGER PRIMARY KEY AUTOINCREMENT, |
221
|
|
|
|
|
|
|
alias TEXT, |
222
|
|
|
|
|
|
|
sort TEXT, |
223
|
|
|
|
|
|
|
uri VARCHAR(512), |
224
|
|
|
|
|
|
|
ruri VARCHAR(512), |
225
|
|
|
|
|
|
|
mtime INTEGER, |
226
|
|
|
|
|
|
|
utime INTEGER, |
227
|
|
|
|
|
|
|
ftime INTEGER, |
228
|
|
|
|
|
|
|
counti INTEGER DEFAULT 0, |
229
|
|
|
|
|
|
|
countu INTEGER DEFAULT 0, |
230
|
|
|
|
|
|
|
fstat TEXT, |
231
|
|
|
|
|
|
|
ustat TEXT, |
232
|
|
|
|
|
|
|
admin VARCHAR(512), |
233
|
|
|
|
|
|
|
@bf |
234
|
|
|
|
|
|
|
); |
235
|
|
|
|
|
|
|
XxX |
236
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
237
|
|
|
|
|
|
|
|
238
|
9
|
50
|
|
|
|
52167
|
$hdl->do("CREATE UNIQUE INDEX IF NOT EXISTS seqnos ON repos(seqno);") or croak("Setup error: ".$hdl->errstr); |
239
|
9
|
50
|
|
|
|
29641
|
$hdl->do("CREATE INDEX IF NOT EXISTS aliases ON repos(alias);") or croak("Setup error: ".$hdl->errstr); |
240
|
|
|
|
|
|
|
|
241
|
9
|
50
|
|
|
|
28997
|
$hdl->do(<<"XxX" |
242
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS beacons ( |
243
|
|
|
|
|
|
|
hash CHARACTER(64) NOT NULL, |
244
|
|
|
|
|
|
|
seqno INTEGER REFERENCES repos(seqno) ON DELETE CASCADE, |
245
|
|
|
|
|
|
|
altid TEXT, |
246
|
|
|
|
|
|
|
hits INTEGER, |
247
|
|
|
|
|
|
|
info VARCHAR(255), |
248
|
|
|
|
|
|
|
link VARCHAR(1024) |
249
|
|
|
|
|
|
|
); |
250
|
|
|
|
|
|
|
XxX |
251
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# Faciliate lookups |
255
|
9
|
50
|
|
|
|
18354
|
$hdl->do("CREATE INDEX IF NOT EXISTS lookup ON beacons(hash);") or croak("Setup error: ".$hdl->errstr); |
256
|
|
|
|
|
|
|
# maintenance and enforce constraints |
257
|
|
|
|
|
|
|
# (Problem: Dupes w/o altid but differing in link *and* info fields should be legitimate, too) |
258
|
9
|
50
|
|
|
|
16722
|
$hdl->do("CREATE UNIQUE INDEX IF NOT EXISTS mntnce ON beacons(seqno, hash, altid);") or croak("Setup error: ".$hdl->errstr); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# foreign key on cascade does not work? |
261
|
|
|
|
|
|
|
|
262
|
9
|
50
|
|
|
|
22416
|
$hdl->do(<<"XxX" |
263
|
|
|
|
|
|
|
CREATE TRIGGER IF NOT EXISTS on_delete_seqno BEFORE DELETE ON repos FOR EACH ROW |
264
|
|
|
|
|
|
|
BEGIN |
265
|
|
|
|
|
|
|
DELETE FROM beacons WHERE seqno=OLD.seqno; |
266
|
|
|
|
|
|
|
END; |
267
|
|
|
|
|
|
|
XxX |
268
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# OpenSearchDescription |
271
|
9
|
50
|
|
|
|
19410
|
$hdl->do(<<"XxX" |
272
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS osd ( |
273
|
|
|
|
|
|
|
key CHAR(20) NOT NULL, |
274
|
|
|
|
|
|
|
val VARCHAR(1024) |
275
|
|
|
|
|
|
|
); |
276
|
|
|
|
|
|
|
XxX |
277
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
278
|
9
|
50
|
|
|
|
9072
|
$hdl->do("CREATE INDEX IF NOT EXISTS OSDKeys ON osd(key);") or croak("Setup error: ".$hdl->errstr); |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# Admin Stuff |
281
|
9
|
50
|
|
|
|
8124
|
$hdl->do(<<"XxX" |
282
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS admin ( |
283
|
|
|
|
|
|
|
key CHAR(20) PRIMARY KEY NOT NULL, |
284
|
|
|
|
|
|
|
val VARCHAR(1024) |
285
|
|
|
|
|
|
|
); |
286
|
|
|
|
|
|
|
XxX |
287
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
288
|
|
|
|
|
|
|
|
289
|
9
|
50
|
|
|
|
13068
|
$hdl->do("CREATE UNIQUE INDEX IF NOT EXISTS ADMKeys ON admin(key);") or croak("Setup error: ".$hdl->errstr); |
290
|
|
|
|
|
|
|
|
291
|
9
|
|
|
|
|
23470
|
my $admref = $self->admhash(); |
292
|
|
|
|
|
|
|
|
293
|
9
|
|
|
|
|
13
|
my $verkey = "DATA_VERSION"; |
294
|
9
|
|
|
|
|
23
|
my $goalver = $SeeAlso::Source::BeaconAggregator::DATA_VERSION; |
295
|
9
|
|
100
|
|
|
26
|
my $dbver = $admref->{$verkey} || 0; |
296
|
9
|
100
|
|
|
|
46
|
if ( $dbver != $goalver ) { |
|
|
50
|
|
|
|
|
|
297
|
1
|
|
|
|
|
36
|
print "NOTICE: Database version $dbver: Upgrading to $goalver\n"; |
298
|
|
|
|
|
|
|
# alter tables here |
299
|
1
|
50
|
|
|
|
5
|
if ( $dbver < 2 ) { |
300
|
|
|
|
|
|
|
# my ($at, $type) = SeeAlso::Source::BeaconAggregator->beaconfields("COUNT"); |
301
|
|
|
|
|
|
|
# $hdl->do("ALTER TABLE repos ADD COLUMN $at $type;"); |
302
|
|
|
|
|
|
|
# ($at, $type) = SeeAlso::Source::BeaconAggregator->beaconfields("REMARK"); |
303
|
|
|
|
|
|
|
# $hdl->do("ALTER TABLE repos ADD COLUMN $at $type;"); |
304
|
|
|
|
|
|
|
}; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
elsif ( $options{'verbose'} ) { |
307
|
0
|
|
|
|
|
0
|
print "INFO: Database version $dbver is current\n"}; |
308
|
|
|
|
|
|
|
|
309
|
9
|
100
|
|
|
|
28
|
unless ( $dbver == $goalver) { |
310
|
1
|
|
|
|
|
6
|
my $verh = $self->stmtHdl("INSERT OR REPLACE INTO admin VALUES (?, ?);", "update version statement"); |
311
|
1
|
50
|
|
|
|
5069
|
$verh->execute($verkey, $goalver) |
312
|
|
|
|
|
|
|
or croak("Could not execute update version statement: ".$verh->errstr); |
313
|
|
|
|
|
|
|
}; |
314
|
|
|
|
|
|
|
|
315
|
9
|
100
|
|
|
|
28
|
unless ( exists $options{'identifierClass'} ) { |
316
|
3
|
100
|
|
|
|
10
|
$options{'identifierClass'} = $self->{'identifierClass'} if exists $self->{'identifierClass'}; |
317
|
|
|
|
|
|
|
}; |
318
|
|
|
|
|
|
|
|
319
|
9
|
|
|
|
|
18
|
my $ickey = "IDENTIFIER_CLASS"; |
320
|
9
|
100
|
100
|
|
|
114
|
if ( (exists $options{identifierClass}) and (my $wanttype = ref($options{'identifierClass'})) ) { |
|
|
100
|
66
|
|
|
|
|
321
|
5
|
100
|
100
|
|
|
62
|
if ( (exists $self->{identifierClass}) && (ref($self->{identifierClass}) ne $wanttype) ) { |
322
|
1
|
|
|
|
|
219
|
croak("Cannot override identifierClass set on new()")}; |
323
|
4
|
100
|
|
|
|
14
|
if ( my $oldtype = $admref->{$ickey} ) { |
324
|
2
|
50
|
|
|
|
8
|
croak ("Identifier mismatch: Cannot set to $wanttype since database already branded to $oldtype") |
325
|
|
|
|
|
|
|
unless($oldtype eq $wanttype); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
else { |
328
|
2
|
50
|
|
|
|
6
|
print "fixing identifierClass as $wanttype\n" if $options{'verbose'}; |
329
|
2
|
|
|
|
|
7
|
my $ichdl = $self->stmtHdl("INSERT INTO admin VALUES (?, ?);", "fix identifier class statement"); |
330
|
2
|
50
|
|
|
|
24558
|
$ichdl->execute($ickey, $wanttype) |
331
|
|
|
|
|
|
|
or croak("Could not execute fix identifier class statement: ".$ichdl->errstr); |
332
|
2
|
|
|
|
|
19
|
$self->{identifierClass} = $options{identifierClass}; |
333
|
|
|
|
|
|
|
}; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
elsif ( (exists $options{identifierClass}) and (not $options{identifierClass}) ) { |
336
|
2
|
50
|
|
|
|
7
|
print "removing fixed identifierClass from admin table\n" if $options{'verbose'}; |
337
|
2
|
|
|
|
|
7
|
my $ichdl = $self->stmtHdl("DELETE FROM admin WHERE key=?;", "identifier class statement"); |
338
|
2
|
50
|
|
|
|
68931
|
$ichdl->execute($ickey) |
339
|
|
|
|
|
|
|
or croak("Could not execute remove identifier class statement: ".$ichdl->errstr); |
340
|
2
|
|
|
|
|
34
|
delete $self->{identifierClass}; |
341
|
|
|
|
|
|
|
}; |
342
|
|
|
|
|
|
|
|
343
|
8
|
|
|
|
|
22
|
my $rikey = "REDIRECTION_INDEX"; |
344
|
8
|
50
|
33
|
|
|
61
|
if ( exists $options{prepareRedirs} or exists $admref->{$rikey} ) { |
345
|
0
|
|
|
|
|
0
|
my $rihdl = $self->stmtHdl("INSERT OR REPLACE INTO admin VALUES (?, ?);", "fix redirection index statement"); |
346
|
0
|
0
|
0
|
|
|
0
|
if ( $options{prepareRedirs} or ( $admref->{$rikey} and not exists $options{prepareRedirs} ) ) { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
347
|
0
|
0
|
0
|
|
|
0
|
print "creating redirection index\n" if $options{prepareRedirs} and $options{'verbose'}; |
348
|
0
|
0
|
|
|
|
0
|
$hdl->do("CREATE INDEX IF NOT EXISTS redir ON beacons(altid,seqno);") or croak("Setup error: ".$hdl->errstr); |
349
|
0
|
0
|
|
|
|
0
|
$rihdl->execute($rikey, 1) |
350
|
|
|
|
|
|
|
or croak("Could not execute fix redirection index: ".$rihdl->errstr); |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
elsif ( not( $admref->{$rikey} and ($options{prepareRedirs} or (not exists $options{prepareRedirs})) ) ) { |
353
|
0
|
0
|
|
|
|
0
|
print "dropping redirection index\n" if $options{'verbose'}; |
354
|
0
|
0
|
|
|
|
0
|
$hdl->do("DROP INDEX IF EXISTS redir;") or croak("Setup error: ".$hdl->errstr); |
355
|
0
|
0
|
|
|
|
0
|
$rihdl->execute($rikey, 0) |
356
|
|
|
|
|
|
|
or croak("Could not execute fix redirection index: ".$rihdl->errstr); |
357
|
|
|
|
|
|
|
}; |
358
|
|
|
|
|
|
|
# $admref = $self->admhash(); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
8
|
50
|
|
|
|
27
|
print "[ANALYZE ..." if $options{'verbose'}; |
362
|
8
|
|
|
|
|
96
|
$hdl->do("ANALYZE;"); |
363
|
8
|
50
|
|
|
|
243467
|
print "]\n" if $options{'verbose'}; |
364
|
8
|
|
|
|
|
182
|
return 1; # o.k. |
365
|
|
|
|
|
|
|
}; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head3 deflate() |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Maintenance action: performs VACCUUM, REINDEX and ANALYZE on the database |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub deflate { |
375
|
1
|
|
|
1
|
1
|
1548
|
my ($self) = @_; |
376
|
1
|
50
|
|
|
|
4
|
my $hdl = $self->{dbh} or croak("no handle?"); |
377
|
1
|
|
|
|
|
13
|
local $hdl->{AutoCommit} = 1; |
378
|
1
|
|
|
|
|
29
|
print "VACUUM\n"; |
379
|
1
|
50
|
|
|
|
5
|
$hdl->do("VACUUM") or carp("could not VACUUM: Skipping..."); |
380
|
1
|
|
|
|
|
36787
|
print "REINDEX\n"; |
381
|
1
|
50
|
|
|
|
14
|
$hdl->do("REINDEX") or croak("could not REINDEX: Abort"); |
382
|
1
|
|
|
|
|
11040
|
print "ANALYZE\n"; |
383
|
1
|
50
|
|
|
|
12
|
$hdl->do("ANALYZE;") or croak("could not ANALYZE: Abort"); |
384
|
1
|
|
|
|
|
13792
|
return 1; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 Handling of beacon files |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head3 loadFile ( $file, $fields, %options ) |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Reads a physical beacon file and stores it with a new Sequence number in the |
393
|
|
|
|
|
|
|
database. |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Returns a triple: |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
my ($seqno, $rec_ok, $message) = loadFile ( $file, $fields, %options ) |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
$seqno is undef on error |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
$seqno and $rec_ok are zero with $message containing an explanation in case |
402
|
|
|
|
|
|
|
of no action taken. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
$seqno is an positive integer if something was loaded: The L |
405
|
|
|
|
|
|
|
(internal unique identifier) for the representation of the beacon file in |
406
|
|
|
|
|
|
|
the database. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=over 8 |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item $file |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
File to read: Must be a beacon file |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=item $fields |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Hashref with additional meta and admin fields to store |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=item Supported options: |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
verbose => (0|1) |
421
|
|
|
|
|
|
|
force => (0|1) process unconditionally without timestamp comparison |
422
|
|
|
|
|
|
|
nostat => (0|1) don't refresh global identifier counters |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=back |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
If the file does not contain a minimal correct header (eg. is an empty file |
427
|
|
|
|
|
|
|
or an HTML error page accidentaly caught) no action is performed. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
Otherwise, a fresh SeqNo (sequence number) is generated and meta and |
430
|
|
|
|
|
|
|
BEACON-Lines are stored in the appropriate tables in the database. |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
If the _alias field is provided, existing database entries for this |
433
|
|
|
|
|
|
|
Alias are updated, identifiers not accounted for any more are |
434
|
|
|
|
|
|
|
eventually discarded. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub loadFile { |
439
|
9
|
|
|
9
|
1
|
1910
|
my ($self, $file, $fields, %options) = @_; |
440
|
9
|
100
|
|
|
|
31
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
441
|
9
|
|
100
|
|
|
52
|
$options{'verbose'} ||= 0; |
442
|
|
|
|
|
|
|
|
443
|
9
|
50
|
|
|
|
163
|
if ( ! $file ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
444
|
0
|
|
|
|
|
0
|
croak("Missing file argument")} |
445
|
|
|
|
|
|
|
elsif ( ! -e $file ) { |
446
|
0
|
|
0
|
|
|
0
|
print "ERROR: no such file $file\n" && return undef} |
447
|
|
|
|
|
|
|
elsif ( ! -r _ ) { |
448
|
0
|
|
0
|
|
|
0
|
print "ERROR: no read permissions for $file\n" && return undef} |
449
|
|
|
|
|
|
|
elsif ( -z _ ) { |
450
|
0
|
|
|
|
|
0
|
print "WARNING: empty file $file\n"; |
451
|
0
|
|
|
|
|
0
|
return (0,0, "empty file: Will not process"); |
452
|
|
|
|
|
|
|
} |
453
|
9
|
|
|
|
|
28
|
my $mtime = (stat(_))[9]; |
454
|
9
|
50
|
|
|
|
241
|
open(BKN, "<:utf8", $file) or (print "ERROR: cannot read $file\n", return undef); |
455
|
9
|
|
|
|
|
50
|
local($.) = 0; |
456
|
|
|
|
|
|
|
|
457
|
9
|
50
|
|
|
|
31
|
unless ( defined $self->{identifierClass} ) { |
458
|
9
|
|
|
|
|
60
|
my $package = $self->autoIdentifier(); |
459
|
9
|
50
|
66
|
|
|
47
|
$options{'verbose'} && ref($package) && print "Assuming identifiers of type ".ref($package)."\n"; |
460
|
|
|
|
|
|
|
}; |
461
|
|
|
|
|
|
|
|
462
|
9
|
50
|
|
|
|
17
|
$fields = {} unless $fields; |
463
|
9
|
|
33
|
|
|
54
|
$fields->{'_ftime'} ||= time(); |
464
|
9
|
|
66
|
|
|
25
|
$fields->{'_mtime'} ||= $mtime; |
465
|
9
|
100
|
|
|
|
26
|
delete $fields->{_uri} unless $fields->{_uri}; |
466
|
9
|
50
|
|
|
|
45
|
delete $fields->{_alias} unless $fields->{_alias}; |
467
|
9
|
|
50
|
|
|
23
|
my $autopurge = $fields->{_alias} || ""; |
468
|
9
|
|
0
|
|
|
25
|
my $showme = $fields->{_alias} || $fields->{_uri} || $file; |
469
|
|
|
|
|
|
|
|
470
|
9
|
100
|
|
|
|
19
|
if ( $options{'verbose'} ) { |
471
|
4
|
|
50
|
|
|
16
|
printf("* Loading %s from URI %s\n", $fields->{_alias} || "", $fields->{_uri} || ""); |
|
|
|
50
|
|
|
|
|
472
|
4
|
|
|
|
|
123
|
printf("* local input %s (%s)\n", $file, SeeAlso::Source::BeaconAggregator::tToISO($mtime)); |
473
|
|
|
|
|
|
|
}; |
474
|
|
|
|
|
|
|
|
475
|
9
|
|
|
|
|
13
|
my ($collno, $inserthandle, $replacehandle, $err, $format); |
476
|
9
|
|
|
|
|
14
|
my ($linecount, $headerseen, $oseq) = (0, 0, 0); |
477
|
9
|
|
|
|
|
21
|
my ($reccount, $recill, $recign, $recnil, $recupd, $recnew, $recdupl, $recdel) = (0, 0, 0, 0, 0, 0, 0, 0); |
478
|
9
|
|
|
|
|
10
|
local($_); |
479
|
|
|
|
|
|
|
lines: |
480
|
9
|
|
|
|
|
115
|
while ( ) { |
481
|
120
|
|
|
|
|
595
|
s/[ \x0d\x0a]+$//; |
482
|
120
|
100
|
|
|
|
243
|
unless ( $linecount++ ) { |
483
|
9
|
50
|
|
|
|
37
|
if ( s/^\x{FEFF}// ) { # BOM-Character |
|
|
50
|
|
|
|
|
|
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
elsif ( s/^\xef\xbb\xbf// ) { # BOM-Bytes |
486
|
0
|
|
|
|
|
0
|
print "ERROR: cannot cope with doubly UTF-8 encoded $file\n"; |
487
|
0
|
|
|
|
|
0
|
return (undef, undef, "encoding trouble")}; |
488
|
9
|
50
|
|
|
|
37
|
if ( /^\s*$/ ) { |
489
|
0
|
|
|
|
|
0
|
print "WARNING: Discarding blank line before beacon header [$showme l.$.]\n"; |
490
|
0
|
|
|
|
|
0
|
next; |
491
|
|
|
|
|
|
|
}; |
492
|
|
|
|
|
|
|
}; |
493
|
120
|
100
|
|
|
|
166
|
if ( not defined $collno ) { # $collno used as flag: "still in header" |
494
|
65
|
100
|
|
|
|
296
|
if ( /^#\s*([A-Z][\w-]*):\s*(.*)$/ ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
495
|
55
|
|
|
|
|
38
|
$headerseen++; |
496
|
55
|
|
|
|
|
94
|
my ($field, $data) = ($1, $2); |
497
|
55
|
50
|
|
|
|
100
|
$field =~ s/^DATE$/TIMESTAMP/ && print "WARNING: corrected DATE to TIMESTAMP in Beacon-Header [$showme l.$.]\n"; |
498
|
55
|
|
|
|
|
90
|
$data =~ s/\s+$//; |
499
|
55
|
50
|
|
|
|
105
|
next if $data =~ /^\s*$/; |
500
|
55
|
100
|
|
|
|
178
|
if ( SeeAlso::Source::BeaconAggregator->beaconfields($field) ) { |
501
|
52
|
50
|
|
|
|
69
|
if ( $fields->{$field} ) { |
502
|
0
|
|
|
|
|
0
|
print "WARNING: Skipping already set $field [$showme l.$.]\n"} |
503
|
|
|
|
|
|
|
else { |
504
|
52
|
|
|
|
|
297
|
$fields->{$field} = $data} |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
else { |
507
|
3
|
|
|
|
|
43
|
print "WARNING: Ignoring unknown $field [$data] [$showme l.$.]\n"; |
508
|
|
|
|
|
|
|
}; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
elsif ( /^(#[^:\s]+)/ ) { |
511
|
0
|
|
|
|
|
0
|
print "WARNING: Discarding unparseable line >$1...< in beacon header context [$showme l.$.]\n"} |
512
|
|
|
|
|
|
|
elsif ( /^\s*$/ ) { |
513
|
1
|
50
|
|
|
|
20
|
print "NOTICE: Discarding blank line in beacon header context [$showme l.$.]\n" if $options{'verbose'}} |
514
|
|
|
|
|
|
|
elsif ( ! $headerseen ) { |
515
|
0
|
|
|
|
|
0
|
print "ERROR: no header fields [$showme l.$.]\n"; |
516
|
0
|
|
|
|
|
0
|
return (0, 0, "no header fields: Will not proceed"); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
else { |
519
|
9
|
|
|
|
|
40
|
($collno, $err, $format, $inserthandle, $replacehandle, $oseq) = $self->processbeaconheader($fields, %options); |
520
|
9
|
50
|
|
|
|
32
|
unless ( $collno ) { |
521
|
0
|
|
|
|
|
0
|
print "ERROR: metadata error [$showme l.$.]\n"; |
522
|
0
|
|
|
|
|
0
|
return (0, 0, "metadata error: $err"); |
523
|
|
|
|
|
|
|
}; |
524
|
9
|
|
|
|
|
99
|
$self->{dbh}->{AutoCommit} = 0; |
525
|
9
|
|
|
|
|
19
|
$linecount --; |
526
|
9
|
|
|
|
|
24
|
redo lines; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
else { |
530
|
55
|
|
|
|
|
116
|
s/^\s+//; s/\s+$//; |
|
55
|
|
|
|
|
122
|
|
531
|
55
|
|
|
|
|
42
|
my ($id, $altid, @rest); |
532
|
55
|
|
|
|
|
369
|
($id, @rest) = split(/\s*\|\s*/, $_, 4); |
533
|
55
|
50
|
|
|
|
190
|
($id, $altid) = split(/\s*=\s*/, $id, 2) if $id; |
534
|
55
|
50
|
|
|
|
89
|
$id || ($recnil++, next); |
535
|
|
|
|
|
|
|
|
536
|
55
|
100
|
|
|
|
106
|
if ( $options{'filter'} ) { |
537
|
14
|
|
|
|
|
17
|
($id, $altid) = &{$options{'filter'}}($id, $altid, @rest); |
|
14
|
|
|
|
|
37
|
|
538
|
14
|
100
|
|
|
|
112
|
unless ( $id ) { |
539
|
7
|
|
|
|
|
6
|
$recign ++; |
540
|
7
|
50
|
|
|
|
17
|
unless ( ++$reccount % 10000 ) { |
541
|
0
|
|
|
|
|
0
|
$self->{dbh}->{AutoCommit} = 1; |
542
|
0
|
0
|
|
|
|
0
|
print "$reccount\n" if $options{'verbose'}; |
543
|
0
|
|
|
|
|
0
|
$self->{dbh}->{AutoCommit} = 0; |
544
|
|
|
|
|
|
|
}; |
545
|
7
|
|
|
|
|
26
|
next lines; |
546
|
|
|
|
|
|
|
}; |
547
|
|
|
|
|
|
|
}; |
548
|
48
|
|
100
|
|
|
129
|
$altid ||= ""; |
549
|
|
|
|
|
|
|
|
550
|
48
|
|
|
|
|
36
|
my($hits, $info, $link); |
551
|
48
|
100
|
100
|
|
|
273
|
if ( @rest && ($rest[$#rest] =~ m!^\S+://\S+$!) ) { |
|
|
50
|
|
|
|
|
|
552
|
18
|
|
|
|
|
26
|
$link = pop @rest} |
553
|
|
|
|
|
|
|
elsif ( defined $rest[2] ) { |
554
|
0
|
|
|
|
|
0
|
print "WARNING: unparseable link content >$rest[2]< [$showme l.$.]"}; |
555
|
|
|
|
|
|
|
|
556
|
48
|
100
|
100
|
|
|
347
|
if ( @rest && ($rest[0] =~ /^\d*$/) ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
557
|
9
|
|
|
|
|
14
|
$hits = shift @rest; |
558
|
|
|
|
|
|
|
# really disregard hits with explicit 0? |
559
|
9
|
100
|
66
|
|
|
58
|
if ( (!$altid) and (!$link) and ($format =~ /\baltTARGET\b/) ) { |
|
|
|
100
|
|
|
|
|
560
|
1
|
|
50
|
|
|
6
|
$altid = shift @rest || ""; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
else { |
563
|
8
|
|
100
|
|
|
20
|
$info = shift @rest || ""; |
564
|
|
|
|
|
|
|
}; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
elsif ( defined $rest[1] ) { |
567
|
1
|
|
|
|
|
2
|
$hits = ""; |
568
|
1
|
50
|
33
|
|
|
13
|
if ( (!$altid) and (!$link) and ($format =~ /\baltTARGET\b/) ) { |
|
|
|
33
|
|
|
|
|
569
|
1
|
|
|
|
|
3
|
$info = shift @rest; |
570
|
1
|
|
|
|
|
2
|
$altid = shift @rest; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
else { |
573
|
0
|
|
|
|
|
0
|
shift @rest; |
574
|
0
|
|
|
|
|
0
|
$info = shift @rest; |
575
|
|
|
|
|
|
|
}; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
elsif ( defined $rest[0] ) { |
578
|
30
|
|
|
|
|
30
|
$hits = ""; |
579
|
30
|
|
|
|
|
36
|
$info = shift @rest; |
580
|
|
|
|
|
|
|
}; |
581
|
48
|
50
|
|
|
|
80
|
if ( @rest ) { |
582
|
0
|
|
|
|
|
0
|
print "WARNING: unparseable content >$_< [$showme l.$.]"}; |
583
|
|
|
|
|
|
|
|
584
|
48
|
100
|
|
|
|
65
|
unless ( $link ) { |
585
|
30
|
100
|
|
|
|
118
|
if ( ($format =~ /\bhasTARGET\b/) ) { # ok |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
elsif ( $format =~ /\baltTARGET\b/ ) { |
588
|
2
|
50
|
|
|
|
4
|
if ( $altid ) { # also ok |
|
|
0
|
|
|
|
|
|
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
elsif ( $info ) { # too strange |
591
|
0
|
0
|
|
|
|
0
|
print "WARNING: discarding >$id<".(defined $hits ? " ($hits)" : "")." for $info without link [$showme l.$.] (assertion failed)\n"; |
592
|
0
|
|
|
|
|
0
|
$recill++; |
593
|
0
|
|
|
|
|
0
|
next lines; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
else { # feed id as altid |
596
|
0
|
|
|
|
|
0
|
$altid = $id} |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
elsif ( $format =~ /\bnoTARGET\b/ ) { |
599
|
0
|
0
|
|
|
|
0
|
print "NOTICE: discarding >$id<".(defined $hits ? " ($hits)" : "")." without link [$showme l.$.]\n" if $options{'verbose'} > 1; |
|
|
0
|
|
|
|
|
|
600
|
0
|
|
|
|
|
0
|
$recill++; |
601
|
0
|
|
|
|
|
0
|
next lines; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
else { |
604
|
0
|
0
|
|
|
|
0
|
print "WARNING: discarding >$id<".(defined $hits ? " ($hits)" : "")." without link [$showme l.$.] (assertion failed)\n"; |
605
|
0
|
|
|
|
|
0
|
$recill++; |
606
|
0
|
|
|
|
|
0
|
next lines; |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
}; |
609
|
|
|
|
|
|
|
|
610
|
48
|
100
|
|
|
|
136
|
if ( $format !~ /\baltTARGET\b/ ) { # Allow certain duplicates (force disambiguisation) |
611
|
4
|
|
33
|
|
|
20
|
$altid ||= $info || $link} |
|
|
|
33
|
|
|
|
|
612
|
|
|
|
|
|
|
|
613
|
48
|
100
|
|
|
|
81
|
$hits = "" unless defined $hits; |
614
|
48
|
50
|
|
|
|
73
|
($hits =~ /^0+/) && ($recnil++, next); # Explizit "0" => raus |
615
|
48
|
100
|
|
|
|
88
|
$hits = 0 if $hits eq ""; |
616
|
48
|
|
100
|
|
|
104
|
$altid ||= ""; |
617
|
48
|
|
|
|
|
38
|
my $hash; |
618
|
48
|
50
|
|
|
|
79
|
if ( defined $self->{identifierClass} ) { |
619
|
0
|
|
|
|
|
0
|
$self->{identifierClass}->value($id); |
620
|
0
|
0
|
|
|
|
0
|
unless ( $self->{identifierClass}->valid ) { |
621
|
0
|
0
|
|
|
|
0
|
print "NOTICE: invalid identifier >$id< ($hits) [$showme l.$.]\n" if $options{'verbose'}; |
622
|
0
|
|
|
|
|
0
|
$recill++; |
623
|
0
|
|
|
|
|
0
|
next lines; |
624
|
|
|
|
|
|
|
}; |
625
|
0
|
|
|
|
|
0
|
$hash = $self->{identifierClass}->hash(); |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
else { |
628
|
48
|
|
|
|
|
45
|
$hash = $id}; |
629
|
48
|
|
|
|
|
35
|
my $did; |
630
|
48
|
100
|
66
|
|
|
2309
|
if ( $replacehandle && ($did = $replacehandle->execute($hits, $info, $link, $hash, $altid)) ) { # UPDATE OR FAIL old record |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
631
|
24
|
50
|
|
|
|
98
|
if ( $replacehandle->err ) { |
|
|
100
|
|
|
|
|
|
632
|
0
|
|
|
|
|
0
|
carp("update in trouble: $replacehandle->errstring [$showme l.$.]"); |
633
|
0
|
|
|
|
|
0
|
$recdupl++; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
elsif ( $did eq "0E0" ) { # not found, try insert |
636
|
19
|
|
|
|
|
586
|
$did = $inserthandle->execute($hash, $altid, $hits, $info, $link); |
637
|
19
|
100
|
|
|
|
40
|
if ( $did eq "0E0" ) { |
638
|
6
|
|
|
|
|
7
|
$recdupl++; |
639
|
6
|
50
|
|
|
|
9
|
if ( $altid ) { |
640
|
0
|
0
|
|
|
|
0
|
print "INFO: did not insert duplicate Id >$id< = >$altid< ($hits) [$showme l.$.]\n" if $options{'verbose'}} |
641
|
|
|
|
|
|
|
else { |
642
|
6
|
50
|
|
|
|
14
|
print "INFO: did not insert duplicate Id >$id< ($hits) [$showme l.$.]\n" if $options{'verbose'} > 1}; |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
else { |
645
|
13
|
|
|
|
|
13
|
$recnew++}; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
else { |
648
|
5
|
|
|
|
|
6
|
$recupd++}; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
elsif ( $did = $inserthandle->execute($hash, $altid, $hits, $info, $link) ) { # INSERT OR IGNORE new record |
651
|
24
|
100
|
|
|
|
48
|
if ( $did eq "0E0" ) { |
652
|
7
|
|
|
|
|
6
|
$recdupl++; |
653
|
7
|
50
|
|
|
|
17
|
print "INFO: did not insert duplicate Id $id ($hits) [$showme l.$.]\n" if $options{'verbose'} > 1; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
else { |
656
|
17
|
|
|
|
|
21
|
$recnew++}; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
elsif ( $inserthandle->errstr =~ /constraint/ ) { |
659
|
0
|
|
|
|
|
0
|
$recdupl++; |
660
|
0
|
0
|
|
|
|
0
|
print "INFO: duplicate Id $id ($hits): not inserting [$showme l.$.]\n" if $options{'verbose'} > 1; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
else { |
663
|
0
|
|
|
|
|
0
|
croak("Could not insert: ($id, $hits, $info, $link): ".$inserthandle->errstr)}; |
664
|
|
|
|
|
|
|
|
665
|
48
|
50
|
|
|
|
324
|
unless ( ++$reccount % 10000 ) { |
666
|
0
|
|
|
|
|
0
|
$self->{dbh}->{AutoCommit} = 1; |
667
|
0
|
0
|
|
|
|
0
|
print "$reccount\n" if $options{'verbose'}; |
668
|
0
|
|
|
|
|
0
|
$self->{dbh}->{AutoCommit} = 0; |
669
|
|
|
|
|
|
|
}; |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
}; |
672
|
9
|
50
|
|
|
|
62
|
if ( not defined $collno ) { |
673
|
0
|
0
|
|
|
|
0
|
if ( $headerseen ) { |
|
|
0
|
|
|
|
|
|
674
|
0
|
|
|
|
|
0
|
($collno, $err, $format, $inserthandle, $replacehandle, $oseq) = $self->processbeaconheader($fields, %options); |
675
|
0
|
0
|
|
|
|
0
|
if ( $collno ) { |
676
|
0
|
|
|
|
|
0
|
print "WARNING: no idn content in file [$showme l.$.]\n"} |
677
|
|
|
|
|
|
|
else { |
678
|
0
|
|
|
|
|
0
|
print "ERROR: metadata error [$showme l.$.]\n"; |
679
|
0
|
|
|
|
|
0
|
return (0,0, "metadata error: $err"); |
680
|
|
|
|
|
|
|
}; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
elsif ( $. ) { |
683
|
0
|
|
|
|
|
0
|
print "ERROR: no header fields [$showme l.$.]\n"; |
684
|
0
|
|
|
|
|
0
|
return (0, 0, "no header fields: Will not proceed"); |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
else { |
687
|
0
|
|
|
|
|
0
|
print "WARNING: empty file [$showme]\n"; |
688
|
0
|
|
|
|
|
0
|
return (0,0, "empty file"); |
689
|
|
|
|
|
|
|
}; |
690
|
|
|
|
|
|
|
} |
691
|
9
|
|
|
|
|
85672
|
$self->{dbh}->{AutoCommit} = 1; |
692
|
|
|
|
|
|
|
|
693
|
9
|
50
|
|
|
|
80
|
if ( $autopurge ) { |
694
|
9
|
|
|
|
|
166
|
$self->{dbh}->{AutoCommit} = 0; |
695
|
9
|
100
|
|
|
|
34
|
if ( $oseq ) { |
696
|
4
|
|
|
|
|
30
|
my ($bcdelh, $bcdelexpl) = $self->stmtHdl("DELETE FROM beacons WHERE seqno==?"); |
697
|
4
|
50
|
|
|
|
15
|
$self->stmtExplain($bcdelexpl, $oseq) if $ENV{'DBI_PROFILE'}; |
698
|
4
|
50
|
|
|
|
441
|
my $rows = $bcdelh->execute($oseq) or croak("Could not execute >".$bcdelh->{Statement}."<: ".$bcdelh->errstr); |
699
|
4
|
|
|
|
|
4414
|
$self->{dbh}->{AutoCommit} = 1; |
700
|
4
|
50
|
|
|
|
20
|
printf("INFO: Purged %s surplus identifiers from old sequence %u\n", $rows, $oseq) if $options{'verbose'}; |
701
|
4
|
100
|
|
|
|
16
|
$rows = "0" if $rows eq "0E0"; |
702
|
4
|
|
|
|
|
14
|
$recdel += $rows; |
703
|
|
|
|
|
|
|
}; |
704
|
|
|
|
|
|
|
|
705
|
9
|
|
|
|
|
41
|
$self->{dbh}->{AutoCommit} = 0; |
706
|
9
|
|
|
|
|
62
|
my ($rpdelh, $rpdelexpl) = $self->stmtHdl("DELETE FROM repos WHERE (alias=?) AND (seqno);"); |
707
|
9
|
50
|
|
|
|
34
|
$self->stmtExplain($rpdelexpl, $autopurge, $collno) if $ENV{'DBI_PROFILE'}; |
708
|
9
|
50
|
|
|
|
1308
|
my $rows = $rpdelh->execute($autopurge, $collno) or croak("Could not execute >".$rpdelh->{Statement}."<: ".$rpdelh->errstr); |
709
|
9
|
|
|
|
|
26032
|
$self->{dbh}->{AutoCommit} = 1; |
710
|
9
|
100
|
|
|
|
60
|
$rows = "0" if $rows eq "0E0"; |
711
|
9
|
100
|
|
|
|
175
|
printf("INFO: %u old sequences discarded\n", $rows) if $options{'verbose'}; |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
|
714
|
9
|
|
50
|
|
|
490
|
printf "NOTICE: New sequence %u for %s: processed %u Records from %u lines\n", |
715
|
|
|
|
|
|
|
$collno, $autopurge || "???", $reccount, $linecount; |
716
|
9
|
|
|
|
|
60
|
my $statline = sprintf "%u replaced, %u new, %u deleted, %u duplicate, %u nil, %u invalid, %u ignored", |
717
|
|
|
|
|
|
|
$recupd, $recnew, $recdel, $recdupl, $recnil, $recill, $recign; |
718
|
9
|
|
|
|
|
157
|
print " ($statline)\n"; |
719
|
|
|
|
|
|
|
|
720
|
9
|
|
|
|
|
21
|
my $recok = $recupd + $recnew; |
721
|
9
|
100
|
66
|
|
|
55
|
my $numchg = ($recnew or $recdel) ? 1 : 0; |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
# my $ct1hdl = $self->stmtHdl("SELECT COUNT(*) FROM beacons WHERE seqno==? LIMIT 1;"); |
724
|
|
|
|
|
|
|
# $ct1hdl->execute($collno) or croak("could not execute live count: ".$ct1hdl->errstr); |
725
|
|
|
|
|
|
|
# my $ct1ref = $ct1hdl->fetchrow_arrayref(); |
726
|
|
|
|
|
|
|
# my $counti = $ct1ref->[0] || 0; |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
# my $ct2hdl = $self->stmtHdl("SELECT COUNT(DISTINCT hash) FROM beacons WHERE seqno==?"); |
729
|
|
|
|
|
|
|
# using subquery to trick SQLite into using indices |
730
|
|
|
|
|
|
|
# my $ct2hdl = $self->stmtHdl("SELECT COUNT(*) FROM (SELECT DISTINCT hash FROM beacons WHERE seqno==?) LIMIT 1;"); |
731
|
|
|
|
|
|
|
# $ct2hdl->execute($collno) or croak("could not execute live count: ".$ct2hdl->errstr); |
732
|
|
|
|
|
|
|
# my $ct2ref = $ct2hdl->fetchrow_arrayref(); |
733
|
|
|
|
|
|
|
# my $countu = $ct2ref->[0] || 0; |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
# combined query turned out as not as efficient |
736
|
|
|
|
|
|
|
# my $ct0hdl = $self->stmtHdl("SELECT COUNT(*), COUNT(DISTINCT hash) FROM beacons WHERE seqno==? LIMIT 1;"); |
737
|
|
|
|
|
|
|
# $ct0hdl->execute($collno) or croak("could not execute live count: ".$ct0hdl->errstr); |
738
|
|
|
|
|
|
|
# my $ct0ref = $ct0hdl->fetchrow_arrayref(); |
739
|
|
|
|
|
|
|
# my ($counti, $countu) = ($ct0ref->[0] || 0, $ct0ref->[1] || 0); |
740
|
|
|
|
|
|
|
|
741
|
9
|
|
|
|
|
48
|
my ($updh, $updexpl) = $self->stmtHdl(<<"XxX"); |
742
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET counti=?,countu=?,fstat=?,utime=?,ustat=?,sort=? WHERE seqno==?; |
743
|
|
|
|
|
|
|
XxX |
744
|
|
|
|
|
|
|
|
745
|
9
|
|
50
|
|
|
57
|
my $counti = $self->idStat($collno, 'distinct' => 0) || 0; |
746
|
9
|
50
|
|
|
|
30
|
printf("WARNING: expected %u valid records, counted %u\n", $recok, $counti) if $recok != $counti; |
747
|
9
|
100
|
|
|
|
26
|
unless ( $numchg ) { |
748
|
1
|
|
50
|
|
|
7
|
$fields->{'_counti'} ||= 0; |
749
|
1
|
50
|
|
|
|
5
|
printf("WARNING: expected unchanged number %u valid records, counted %u\n", $fields->{'_counti'}, $counti) if $fields->{'_counti'} != $counti; |
750
|
|
|
|
|
|
|
}; |
751
|
|
|
|
|
|
|
|
752
|
9
|
|
50
|
|
|
49
|
my $sort = $fields->{'_sort'} || ""; |
753
|
|
|
|
|
|
|
my $countu = $numchg ? ( $self->idStat($collno, 'distinct' => 1) || 0 ) |
754
|
9
|
100
|
50
|
|
|
39
|
: ( $fields->{'_countu'} || $self->idStat($collno, 'distinct' => 1) || 0 ); |
|
|
|
50
|
|
|
|
|
755
|
9
|
50
|
|
|
|
36
|
$self->stmtExplain($updexpl, $counti, $countu, $statline, time(), "successfully loaded", $sort, $collno) if $ENV{'DBI_PROFILE'}; |
756
|
|
|
|
|
|
|
$updh->execute($counti, $countu, $statline, time(), "successfully loaded", $sort, $collno) |
757
|
9
|
50
|
|
|
|
84305
|
or croak("Could not execute >".$updh->{Statement}."<: ".$updh->errstr); |
758
|
9
|
|
|
|
|
124
|
close(BKN); |
759
|
|
|
|
|
|
|
|
760
|
9
|
50
|
66
|
|
|
46
|
if ( $numchg or $options{'force'} ) { |
761
|
|
|
|
|
|
|
# if ( $options{'force'} ) { |
762
|
|
|
|
|
|
|
# print "[ANALYZE ..." if $options{'verbose'}; |
763
|
|
|
|
|
|
|
# $self->{dbh}->do("ANALYZE;"); |
764
|
|
|
|
|
|
|
# print "]\n" if $options{'verbose'}; |
765
|
|
|
|
|
|
|
# }; |
766
|
|
|
|
|
|
|
|
767
|
9
|
50
|
|
|
|
27
|
if ( $options{'nostat'} ) { # invalidate since they might have changed |
768
|
0
|
|
|
|
|
0
|
$self->admin('gcounti', undef); |
769
|
0
|
|
|
|
|
0
|
$self->admin('gcountu', undef); |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
else { |
772
|
9
|
|
50
|
|
|
57
|
$self->admin('gcounti', $self->idStat(undef, 'distinct' => 0) || 0); |
773
|
9
|
|
50
|
|
|
91
|
$self->admin('gcountu', $self->idStat(undef, 'distinct' => 1) || 0); |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
}; |
776
|
|
|
|
|
|
|
|
777
|
9
|
|
|
|
|
158
|
return ($collno, $recok, undef); |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=head4 processbeaconheader($self, $fieldref, [ %options] ) |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
Internal subroutine used by C. |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=over 8 |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=item $fieldref |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
Hash with raw fields. |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=item Supported options: |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
verbose => (0|1) |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
Show seqnos of old instances which are met by the alias |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=back |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=cut |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
sub processbeaconheader { |
803
|
9
|
|
|
9
|
1
|
25
|
my ($self, $fieldref, %options) = @_; |
804
|
9
|
|
|
|
|
10
|
my $osq = 0; |
805
|
9
|
|
|
|
|
9
|
my @carp; |
806
|
|
|
|
|
|
|
|
807
|
9
|
50
|
|
|
|
27
|
if ( my $alias = $fieldref->{_alias} ) { |
808
|
9
|
|
|
|
|
25
|
my $stampfield = SeeAlso::Source::BeaconAggregator->beaconfields("TIMESTAMP"); |
809
|
9
|
|
|
|
|
33
|
my ($listh, $listexpl) = $self->stmtHdl("SELECT seqno, $stampfield, mtime, counti, countu FROM repos WHERE alias=?;"); |
810
|
9
|
50
|
|
|
|
22
|
$self->stmtExplain($listexpl, $alias) if $ENV{'DBI_PROFILE'}; |
811
|
9
|
50
|
|
|
|
465
|
$listh->execute($alias) or croak("Could not execute >".$listh->{Statement}."<: ".$listh->errstr); |
812
|
9
|
|
|
|
|
18
|
my ($rowcnt, $ocounti, $ocountu); |
813
|
9
|
|
|
|
|
82
|
while ( my($row) = $listh->fetchrow_arrayref ) { |
814
|
13
|
100
|
|
|
|
31
|
last unless defined $row; |
815
|
4
|
|
|
|
|
6
|
$rowcnt ++; |
816
|
4
|
|
|
|
|
14
|
($ocounti, $ocountu) = ($row->[3], $row->[4]); |
817
|
4
|
50
|
|
|
|
11
|
if ( $options{'verbose'} ) { |
818
|
0
|
0
|
|
|
|
0
|
print "* Old Instances for $alias:\n" unless $osq; |
819
|
0
|
|
|
|
|
0
|
$osq = $row->[0]; |
820
|
0
|
|
0
|
|
|
0
|
print "+\t#$osq ", SeeAlso::Source::BeaconAggregator::tToISO($row->[1] || $row->[2]), " (", $row->[3] || "???", ")\n"; |
|
|
|
0
|
|
|
|
|
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
else { |
823
|
4
|
|
|
|
|
18
|
$osq = $row->[0]}; |
824
|
|
|
|
|
|
|
} |
825
|
9
|
100
|
66
|
|
|
40
|
if ( $rowcnt && ($rowcnt == 1) ) { |
826
|
4
|
100
|
33
|
|
|
17
|
$fieldref->{_counti} ||= $ocounti if $ocounti; |
827
|
4
|
100
|
33
|
|
|
16
|
$fieldref->{_countu} ||= $ocountu if $ocountu; |
828
|
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
}; |
830
|
|
|
|
|
|
|
|
831
|
9
|
|
|
|
|
18
|
my $format = ""; |
832
|
9
|
50
|
66
|
|
|
69
|
if ( $fieldref->{'FORMAT'} && $self->{accept}->{'FORMAT'} ) { |
|
|
100
|
33
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
833
|
0
|
0
|
|
|
|
0
|
if ( $fieldref->{'FORMAT'} =~ $self->{accept}->{'FORMAT'} ) { |
834
|
0
|
|
|
|
|
0
|
$format = $fieldref->{'FORMAT'}} |
835
|
|
|
|
|
|
|
else { |
836
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: only FORMAT '".$self->{accept}->{'FORMAT'}."' are supported, this is ".$fieldref->{'FORMAT'})} |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
elsif ( $fieldref->{'FORMAT'} ) { |
839
|
8
|
|
|
|
|
17
|
$format = $fieldref->{'FORMAT'}} |
840
|
|
|
|
|
|
|
elsif ( $fieldref->{'VERSION'} or $fieldref->{'TARGET'} or $fieldref->{'PREFIX'} or $fieldref->{'MESSAGE'} ) { |
841
|
1
|
|
|
|
|
3
|
push(@carp, "WARNING: header line #FORMAT: BEACON should be supplied")} |
842
|
|
|
|
|
|
|
elsif ( $self->{accept}->{'FORMAT'} ) { |
843
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: header line #FORMAT is missing")} |
844
|
|
|
|
|
|
|
else { |
845
|
0
|
|
|
|
|
0
|
push(@carp, "WARNING: header line #FORMAT: BEACON should be supplied")}; |
846
|
|
|
|
|
|
|
|
847
|
9
|
50
|
66
|
|
|
66
|
if ( $fieldref->{'FORMAT'} && ($fieldref->{'FORMAT'} =~ /v(?:ersion)?\s*(\d+(?:\.\d*)?)/i) ) { |
848
|
0
|
|
0
|
|
|
0
|
$fieldref->{'VERSION'} ||= $1}; |
849
|
9
|
100
|
|
|
|
26
|
unless ( $fieldref->{'VERSION'} ) { |
850
|
2
|
100
|
|
|
|
5
|
$fieldref->{'VERSION'} = $fieldref->{'FORMAT'} ? "0.1" : "1.0"; |
851
|
2
|
|
|
|
|
7
|
push(@carp, "NOTICE: added header field #VERSION as '".$fieldref->{'VERSION'}."'"); |
852
|
|
|
|
|
|
|
}; |
853
|
9
|
50
|
|
|
|
24
|
if ( $self->{accept}->{'VERSION'} ) { |
854
|
|
|
|
|
|
|
($fieldref->{'VERSION'} =~ $self->{accept}->{'VERSION'}) |
855
|
0
|
0
|
|
|
|
0
|
|| push(@carp, "ERROR: only VERSION '".$self->{accept}->{'VERSION'}."' is supported, this is ".$fieldref->{'VERSION'}); |
856
|
|
|
|
|
|
|
}; |
857
|
|
|
|
|
|
|
|
858
|
9
|
100
|
|
|
|
21
|
if ( $fieldref->{'ALTTARGET'} ) { |
859
|
7
|
50
|
|
|
|
22
|
$fieldref->{'ALTTARGET'} = "" unless defined $fieldref->{'ALTTARGET'}; |
860
|
7
|
|
|
|
|
28
|
my $parsed = hDecode($fieldref, 'ALTTARGET'); |
861
|
7
|
50
|
33
|
|
|
108
|
if ( $parsed && ($parsed =~ /(^|[^%])(%.)*%\d\$s/) ) { |
|
|
0
|
|
|
|
|
|
862
|
7
|
|
|
|
|
19
|
$fieldref->{'ALTTARGET'} = $parsed; |
863
|
7
|
|
|
|
|
9
|
$format =~ s/\s*-altTARGET//; |
864
|
7
|
|
|
|
|
18
|
$format .= " -altTARGET"; |
865
|
7
|
50
|
|
|
|
83
|
($parsed =~ /(^|[^%])(%.)*%2\$s/) or |
866
|
|
|
|
|
|
|
push(@carp, "WARNING: header field #ALTTARGET should contain placeholder {ALTID}"); |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
elsif ( $parsed ) { |
869
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: header field #ALTTARGET must contain placeholder {ALTID} (or {ID})"); |
870
|
0
|
|
|
|
|
0
|
delete $fieldref->{'ALTTARGET'}; |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
else { |
873
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: could not parse header field #ALTTARGET: '".$fieldref->{'ALTTARGET'}."'"); |
874
|
0
|
|
|
|
|
0
|
delete $fieldref->{'ALTTARGET'}; |
875
|
|
|
|
|
|
|
} |
876
|
|
|
|
|
|
|
}; |
877
|
|
|
|
|
|
|
|
878
|
9
|
50
|
|
|
|
27
|
if ( $fieldref->{'IMGTARGET'} ) { |
879
|
0
|
0
|
|
|
|
0
|
$fieldref->{'IMGTARGET'} = "" unless defined $fieldref->{'IMGTARGET'}; |
880
|
0
|
|
|
|
|
0
|
my $parsed = hDecode($fieldref, 'IMGTARGET'); |
881
|
0
|
0
|
0
|
|
|
0
|
if ( $parsed && ($parsed =~ /(^|[^%])(%.)*%\d\$s/) ) { |
|
|
0
|
|
|
|
|
|
882
|
0
|
|
|
|
|
0
|
$fieldref->{'IMGTARGET'} = $parsed; |
883
|
0
|
|
|
|
|
0
|
$format =~ s/\s*-imgTARGET//; |
884
|
0
|
|
|
|
|
0
|
$format .= " -imgTARGET"; |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
elsif ( $parsed ) { |
887
|
0
|
|
|
|
|
0
|
push(@carp, "WARNING: header field #IMGTARGET should contain placeholders {ID} or {ALTID}")} |
888
|
|
|
|
|
|
|
else { |
889
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: could not parse header field #IMGTARGET: '".$fieldref->{'IMGTARGET'}."'"); |
890
|
0
|
|
|
|
|
0
|
delete $fieldref->{'IMGTARGET'}; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
}; |
893
|
|
|
|
|
|
|
|
894
|
9
|
50
|
|
|
|
21
|
if ( exists $fieldref->{'TARGET'} ) { |
|
|
0
|
|
|
|
|
|
895
|
9
|
50
|
|
|
|
20
|
$fieldref->{'TARGET'} = "" unless defined $fieldref->{'TARGET'}; |
896
|
9
|
|
|
|
|
22
|
my $parsed = hDecode($fieldref, 'TARGET'); |
897
|
9
|
100
|
66
|
|
|
180
|
if ( $parsed && ($parsed =~ /(^|[^%])(%.)*%1\$s/) && ($parsed !~ /(^|[^%])(%.)*%[2-9]\$s/) ) { |
|
|
50
|
66
|
|
|
|
|
898
|
8
|
|
|
|
|
19
|
$fieldref->{'TARGET'} = $parsed; |
899
|
8
|
|
|
|
|
16
|
$format .= " -hasTARGET"; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
elsif ( $parsed ) { |
902
|
1
|
50
|
|
|
|
4
|
if ( exists $fieldref->{'ALTTARGET'} ) { |
903
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: header field #TARGET must contain placeholder {ID} only"); |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
else { |
906
|
1
|
|
|
|
|
3
|
push(@carp, "WARNING: Adding implicit {ID} to #TARGET as #ALTTARGET"); |
907
|
1
|
|
|
|
|
5
|
$fieldref->{'ALTTARGET'} = $parsed."%2\$s"; |
908
|
1
|
|
|
|
|
2
|
$format .= " -altTARGET"; |
909
|
|
|
|
|
|
|
} |
910
|
1
|
|
|
|
|
4
|
delete $fieldref->{'TARGET'}; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
else { |
913
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: could not parse header field #TARGET: '".$fieldref->{'TARGET'}."'"); |
914
|
0
|
|
|
|
|
0
|
delete $fieldref->{'TARGET'}; |
915
|
|
|
|
|
|
|
} |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
elsif ( $format =~ /^BEACON/ ) { |
918
|
0
|
|
|
|
|
0
|
push(@carp, "WARNING: header field #TARGET not set: ALL beacon lines will have to provide their link by other means!"); |
919
|
0
|
|
|
|
|
0
|
$format =~ s/\s*-noTARGET//; |
920
|
0
|
|
|
|
|
0
|
$format .= " -noTARGET"; |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
else { |
923
|
0
|
|
|
|
|
0
|
push(@carp, "ERROR: header field #TARGET is mandatory")}; |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
|
926
|
9
|
50
|
|
|
|
24
|
$fieldref->{'MESSAGE'} = hDecode($fieldref, 'MESSAGE') if $fieldref->{'MESSAGE'}; |
927
|
|
|
|
|
|
|
|
928
|
9
|
100
|
|
|
|
18
|
if ( $fieldref->{'TIMESTAMP'} ) { |
929
|
1
|
50
|
|
|
|
4
|
if ( my $parsed = hDecode($fieldref, 'TIMESTAMP') ) { |
930
|
1
|
50
|
|
|
|
6
|
printf("* %-30s %s\n", "Beacon Timestamp:", hEncode($parsed, 'TIMESTAMP')) if $options{'verbose'}; |
931
|
1
|
|
|
|
|
5
|
$fieldref->{'TIMESTAMP'} = $parsed; |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
else { # unparseable => use current |
934
|
0
|
|
|
|
|
0
|
push(@carp, "WARNING: cannot parse TIMESTAMP '".$fieldref->{'TIMESTAMP'}."', using current time"); |
935
|
0
|
|
|
|
|
0
|
$fieldref->{'TIMESTAMP'} = $^T; |
936
|
|
|
|
|
|
|
}; |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
else { |
939
|
|
|
|
|
|
|
# $fieldref->{'TIMESTAMP'} = $fieldref->{'_mtime'} || $^T; |
940
|
8
|
|
|
|
|
14
|
push(@carp, "NOTICE: no header field #TIMESTAMP detected"); |
941
|
|
|
|
|
|
|
}; |
942
|
|
|
|
|
|
|
|
943
|
9
|
50
|
|
|
|
22
|
if ( $fieldref->{'REVISIT'} ) { |
944
|
0
|
0
|
|
|
|
0
|
if ( my $parsed = hDecode($fieldref, 'REVISIT') ) { |
945
|
0
|
0
|
|
|
|
0
|
if ( $parsed < $^T ) { |
946
|
0
|
|
|
|
|
0
|
printf("* %-30s %s [%s]\n", "STALE Revisit hint parsed as", hEncode($parsed, 'REVISIT'), $fieldref->{'REVISIT'})} # if $options{'verbose'} |
947
|
|
|
|
|
|
|
else { |
948
|
0
|
0
|
|
|
|
0
|
printf("* %-30s %s\n", "Revisit hint parsed as", hEncode($parsed, 'REVISIT')) if $options{'verbose'}}; |
949
|
0
|
|
|
|
|
0
|
$fieldref->{'REVISIT'} = $parsed; |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
else { # unparseable => discard |
952
|
0
|
|
|
|
|
0
|
push(@carp, "WARNING: cannot parse #REVISIT '".$fieldref->{'REVISIT'}."', discarding"); |
953
|
0
|
|
|
|
|
0
|
delete $fieldref->{'REVISIT'}; |
954
|
|
|
|
|
|
|
}; |
955
|
|
|
|
|
|
|
} |
956
|
|
|
|
|
|
|
else { |
957
|
9
|
|
|
|
|
15
|
push(@carp, "INFO: no header field #REVISIT detected"); |
958
|
|
|
|
|
|
|
}; |
959
|
|
|
|
|
|
|
|
960
|
9
|
|
|
|
|
13
|
my $cancontinue = 1; |
961
|
9
|
|
|
|
|
9
|
my $err = ""; |
962
|
9
|
|
|
|
|
24
|
foreach ( @carp ) { |
963
|
21
|
|
|
|
|
567
|
print "$_\n"; |
964
|
21
|
50
|
|
|
|
70
|
if ( s/^ERROR: // ) { |
965
|
0
|
|
|
|
|
0
|
$cancontinue = 0; |
966
|
0
|
0
|
|
|
|
0
|
$err .= " | " if $err; |
967
|
0
|
|
|
|
|
0
|
$err .= $_; |
968
|
|
|
|
|
|
|
}; |
969
|
|
|
|
|
|
|
} |
970
|
9
|
0
|
33
|
|
|
25
|
unless ( $cancontinue or $options{'ignore-header-errors'} ) { |
971
|
0
|
|
|
|
|
0
|
print "CRITICAL: Aborting because of Header Errors\n"; |
972
|
0
|
|
|
|
|
0
|
return (undef, $err, $format); |
973
|
|
|
|
|
|
|
}; |
974
|
|
|
|
|
|
|
|
975
|
9
|
|
66
|
|
|
29
|
$fieldref->{'_uri'} ||= $fieldref->{'FEED'}; |
976
|
9
|
100
|
|
|
|
38
|
delete $fieldref->{'_uri'} unless $fieldref->{'_uri'}; |
977
|
|
|
|
|
|
|
|
978
|
9
|
|
0
|
|
|
41
|
$fieldref->{'_alias'} ||= $fieldref->{'FEED'} || $fieldref->{'TARGET'}; |
|
|
|
33
|
|
|
|
|
979
|
|
|
|
|
|
|
|
980
|
9
|
|
|
|
|
13
|
my (@fn, @fd); |
981
|
9
|
|
|
|
|
49
|
while ( my ($key, $val) = each %$fieldref ) { |
982
|
109
|
100
|
|
|
|
148
|
next unless defined $val; |
983
|
105
|
|
|
|
|
68
|
my $dbkey = ""; |
984
|
105
|
100
|
|
|
|
182
|
if ( $dbkey = SeeAlso::Source::BeaconAggregator->beaconfields($key) ) { |
|
|
50
|
|
|
|
|
|
985
|
54
|
|
|
|
|
55
|
push(@fn, $dbkey)} |
986
|
|
|
|
|
|
|
elsif ( $key =~ /_(\w+)$/ ) { |
987
|
51
|
|
|
|
|
91
|
push(@fn, $1)} |
988
|
|
|
|
|
|
|
else { |
989
|
0
|
|
|
|
|
0
|
next}; |
990
|
105
|
|
|
|
|
96
|
my $myval = $val; |
991
|
105
|
100
|
|
|
|
279
|
unless ( $myval =~ /^\d+$/ ) { |
992
|
82
|
|
|
|
|
163
|
$myval =~ s/'/''/g; |
993
|
82
|
|
|
|
|
150
|
$myval = "'".$myval."'"; |
994
|
|
|
|
|
|
|
}; |
995
|
105
|
|
|
|
|
300
|
push(@fd, $myval); |
996
|
|
|
|
|
|
|
}; |
997
|
9
|
|
|
|
|
16
|
local($") = ",\n"; |
998
|
9
|
|
|
|
|
86
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
999
|
|
|
|
|
|
|
INSERT INTO repos ( seqno, @fn ) VALUES ( NULL, @fd ); |
1000
|
|
|
|
|
|
|
XxX |
1001
|
9
|
50
|
|
|
|
28
|
$self->stmtExplain($sthexpl) if $ENV{'DBI_PROFILE'}; |
1002
|
9
|
50
|
|
|
|
122228
|
$sth->execute() or croak("Could not execute >".$sth->{Statement}."<:".$sth->errstr); |
1003
|
9
|
|
|
|
|
154
|
my $collno = $self->{dbh}->last_insert_id("", "", "", ""); |
1004
|
|
|
|
|
|
|
|
1005
|
9
|
|
|
|
|
13
|
my $rhandle; |
1006
|
9
|
100
|
|
|
|
37
|
if ( $osq ) { |
1007
|
4
|
|
|
|
|
67
|
$rhandle = $self->stmtHdl(<<"XxX"); |
1008
|
|
|
|
|
|
|
UPDATE OR FAIL beacons SET seqno=$collno, hits=?, info=?, link=? WHERE hash=? AND seqno==$osq AND altid=?; |
1009
|
|
|
|
|
|
|
XxX |
1010
|
|
|
|
|
|
|
}; |
1011
|
9
|
|
|
|
|
95
|
my $ihandle = $self->stmtHdl(<<"XxX"); |
1012
|
|
|
|
|
|
|
INSERT OR IGNORE INTO beacons ( hash, seqno, altid, hits, info, link ) VALUES (?, $collno, ?, ?, ?, ?); |
1013
|
|
|
|
|
|
|
XxX |
1014
|
9
|
|
|
|
|
123
|
return ($collno, "", $format, $ihandle, $rhandle, $osq); |
1015
|
|
|
|
|
|
|
} |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
my ($lwpcarp817, $lwpcarp827); |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
=head3 update ($sq_or_alias, $params, %options) |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
Loads a beacon file into the database, possibly replacing a previous instance. |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
Some magic is employed to autoconvert ISO-8859-1 or doubly UTF-8 encoded files |
1027
|
|
|
|
|
|
|
back to UTF-8. |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
Returns undef, if something goes wrong, or the file was not modified since, |
1030
|
|
|
|
|
|
|
otherwise returns a pair (new seqence number, number of lines imported). |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
=over 8 |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
=item $sq_or_alias |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
Sequence number or alias: Used to determine an existing instance. |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
=item $params |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
Hashref, containing |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
agent => LWP::UserAgent to use |
1046
|
|
|
|
|
|
|
_uri => Feed URL to load from |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
=item %options |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
Hash, propagated to C |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
verbose => (0|1) |
1053
|
|
|
|
|
|
|
force => (0|1) process unconditionally without timestamp comparison |
1054
|
|
|
|
|
|
|
nostat => (0|1) don't refresh global identifier counters |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
=back |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
Incorporates a new beacon source from a URI in the database or updates an existing one. |
1059
|
|
|
|
|
|
|
For HTTP URIs care is taken not to reload an unmodified BEACON feed (unless the 'force' |
1060
|
|
|
|
|
|
|
option is provided). |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
If the feed appears to be newer than the previously loaded version it is fetched, |
1063
|
|
|
|
|
|
|
some UTF-8 adjustments are performed if necessary, then it is stored to a temporary file |
1064
|
|
|
|
|
|
|
and from there finally processed by the C method above. |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
The URI to load is determined by the following order of precedence: |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
=over 8 |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
=item 1 |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
_uri Option |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
=item 2 |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
admin field uri stored in the database |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
=item 3 |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
meta field #FEED taken from the database |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
=back |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
Typical use is with an alias, not with a sequence number: |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
$db->update('whatever'); |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
Can be used to initially load beacon files from URIs: |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
$db->update("new_alias", {_uri => $file_uri} ); |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
=cut |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
sub update { |
1095
|
9
|
|
|
9
|
1
|
61348
|
my ($self, $sq_or_alias, $params, %options) = @_; |
1096
|
9
|
100
|
|
|
|
37
|
$params = {} unless $params; |
1097
|
9
|
100
|
|
|
|
37
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1098
|
|
|
|
|
|
|
|
1099
|
9
|
|
|
|
|
13
|
my $ua = $params->{'agent'}; |
1100
|
9
|
50
|
|
|
|
25
|
unless ( $ua ) { |
1101
|
9
|
|
|
|
|
70
|
require LWP::UserAgent; |
1102
|
9
|
50
|
|
|
|
85
|
$ua = LWP::UserAgent->new(agent => "SA-S-BeaconAggregator ", # end with space to get default agent appended |
1103
|
|
|
|
|
|
|
env_proxy => 1, |
1104
|
|
|
|
|
|
|
timeout => 300, |
1105
|
|
|
|
|
|
|
) or croak("cannot create UserAgent"); |
1106
|
|
|
|
|
|
|
}; |
1107
|
|
|
|
|
|
|
|
1108
|
9
|
|
|
|
|
14585
|
my ($cond, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($sq_or_alias); |
1109
|
9
|
100
|
|
|
|
40
|
my $alias = ($sq_or_alias =~ /^\d+$/) ? "" : $sq_or_alias; |
1110
|
9
|
|
|
|
|
61
|
my $feedname = SeeAlso::Source::BeaconAggregator->beaconfields("FEED"); |
1111
|
9
|
|
|
|
|
64
|
my ($ssth, $ssthexpl) = $self->stmtHdl(<<"XxX"); |
1112
|
|
|
|
|
|
|
SELECT seqno, uri, alias, $feedname, ftime, mtime, sort FROM repos $cond; |
1113
|
|
|
|
|
|
|
XxX |
1114
|
9
|
50
|
|
|
|
33
|
$self->stmtExplain($ssthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1115
|
9
|
50
|
|
|
|
807
|
$ssth->execute(@cval) or croak("Could not execute >".$ssth->{Statement}."<: ".$ssth->errstr); |
1116
|
9
|
50
|
|
|
|
55
|
croak("Select old instance error: ".$ssth->errstr) if $ssth->err; |
1117
|
9
|
|
|
|
|
96
|
my $aryref = $ssth->fetchrow_arrayref; |
1118
|
9
|
100
|
|
|
|
35
|
my ($osq, $ouri, $oalias, $feed, $fetchtime, $modtime, $osort) = $aryref ? @$aryref : (); |
1119
|
|
|
|
|
|
|
|
1120
|
9
|
|
33
|
|
|
58
|
my $uri = $params->{'_uri'} || $ouri || $feed; |
1121
|
9
|
50
|
|
|
|
49
|
croak("Cannot update $sq_or_alias: URI not given nor determinable from previous content") unless $uri; |
1122
|
9
|
|
|
|
|
57
|
$uri =~ s/\s$//; |
1123
|
9
|
|
50
|
|
|
42
|
$alias ||= $oalias || ""; |
|
|
|
66
|
|
|
|
|
1124
|
|
|
|
|
|
|
|
1125
|
9
|
100
|
|
|
|
29
|
print "Requesting $uri\n" if $options{'verbose'}; |
1126
|
9
|
50
|
|
|
|
186
|
my $rq = HTTP::Request->new('GET', $uri, ['Accept' => 'text/*']) or croak("could not construct request from $uri"); |
1127
|
9
|
100
|
100
|
|
|
5284
|
if ( $fetchtime && $modtime && !$options{'force'} ) { # allow force-reload by deleting _ftime or _mtime |
|
|
|
100
|
|
|
|
|
1128
|
1
|
50
|
|
|
|
5
|
printf(" %-30s %s\n", "Old instance stamped", scalar localtime($modtime)) if $options{'verbose'}; |
1129
|
1
|
|
|
|
|
6
|
$rq->header('If-Modified-Since', HTTP::Date::time2str($modtime)); |
1130
|
|
|
|
|
|
|
}; |
1131
|
9
|
50
|
|
|
|
105
|
if ( $rq->can("accept_decodable") ) { # LWP 5.817 and newer |
1132
|
9
|
|
|
|
|
26
|
$rq->accept_decodable} |
1133
|
|
|
|
|
|
|
else { |
1134
|
0
|
0
|
0
|
|
|
0
|
carp("please upgrade to LWP >= 5.817 for compression negotiation") if $options{'verbose'} && (!$lwpcarp817++)}; |
1135
|
|
|
|
|
|
|
|
1136
|
9
|
|
|
|
|
66435
|
my $response = $ua->request($rq); # Well, we hoggishly slurp everything into memory, |
1137
|
|
|
|
|
|
|
# however explicit decompression of an already dumped result would be PITA |
1138
|
9
|
|
|
|
|
21374
|
my $nuri = ($response->request)->uri; |
1139
|
9
|
50
|
|
|
|
108
|
print "NOTICE: Differing result URI: $nuri\n" if $uri ne $nuri; |
1140
|
9
|
100
|
|
|
|
76
|
if ( $response->is_success ) { |
|
|
50
|
|
|
|
|
|
1141
|
8
|
100
|
|
|
|
390
|
print $osq ? "INFO: refreshing $alias sq $osq from $uri\n" |
1142
|
|
|
|
|
|
|
: "INFO: importing previously unseen $alias from $uri\n"; |
1143
|
8
|
|
|
|
|
102
|
my $charset; |
1144
|
8
|
50
|
|
|
|
42
|
if ( $response->can("content_charset") ) { # LWP 5.827 and above |
1145
|
8
|
|
|
|
|
27
|
$charset = $response->content_charset; |
1146
|
8
|
100
|
33
|
|
|
9263
|
print "DEBUG: Content charset is $charset\n" if $charset && $options{'verbose'}; |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
else { |
1149
|
0
|
0
|
0
|
|
|
0
|
carp("please upgrade to LWP >= 5.827 for better detection of content_charset") if $options{'verbose'} && (!$lwpcarp827++)}; |
1150
|
8
|
|
50
|
|
|
24
|
$charset ||= "UTF-8"; |
1151
|
|
|
|
|
|
|
|
1152
|
8
|
|
|
|
|
35
|
my $lm = $response->last_modified; |
1153
|
8
|
100
|
33
|
|
|
944
|
printf(" %-30s %s\n", "Last modified", scalar localtime($lm)) if $lm && $options{'verbose'}; |
1154
|
8
|
|
33
|
|
|
24
|
$lm ||= $^T; |
1155
|
|
|
|
|
|
|
|
1156
|
8
|
|
|
|
|
43
|
my $vt = $response->fresh_until(h_min => 1800, h_max => 30 * 86400); |
1157
|
8
|
100
|
33
|
|
|
2820
|
printf(" %-30s %s\n", "Should be valid until", scalar localtime($vt)) if $vt && $options{'verbose'}; |
1158
|
8
|
|
50
|
|
|
35
|
$vt ||= 0; |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
# temporary file for dumped contents |
1161
|
8
|
50
|
|
|
|
45
|
my ($tmpfh, $tmpfile) = File::Temp::tempfile("BeaconAggregator-XXXXXXXX", SUFFIX => ".txt", TMPDIR => 1) or croak("Could not acquire temporary file for storage"); |
1162
|
8
|
|
|
|
|
3004
|
my $contref; # reference to content buffer |
1163
|
8
|
50
|
|
|
|
35
|
if ( ! $response->content_is_text ) { |
1164
|
0
|
|
|
|
|
0
|
my $ct = $response->content_type; |
1165
|
0
|
|
|
|
|
0
|
print "WARNING: Response content is $ct, not text/*\n"; |
1166
|
0
|
0
|
|
|
|
0
|
if ( my $ce = $response->content_encoding ) { |
1167
|
0
|
|
|
|
|
0
|
print "NOTICE: Response is also Content-encoded: $ce\n"} |
1168
|
0
|
|
|
|
|
0
|
my $ctt = join("|", $response->decodable()); |
1169
|
0
|
0
|
|
|
|
0
|
if ( $ct =~ s!^(.+\/)?($ctt)$!$2! ) { |
1170
|
|
|
|
|
|
|
# yes: decode anyway since it could be a gzip-encoded .txt.gz file! |
1171
|
0
|
|
|
|
|
0
|
my $cr = $response->decoded_content( raise_error => 1, ref => 1); # method exists since LWP 5.802 (2004-11-30) |
1172
|
0
|
|
|
|
|
0
|
$response->remove_content_headers; |
1173
|
0
|
|
|
|
|
0
|
my $newresp = HTTP::Response->new($response->code, $response->message, $response->headers); |
1174
|
0
|
|
|
|
|
0
|
$newresp->content_type("text/plain; charset: $charset"); |
1175
|
0
|
|
|
|
|
0
|
$newresp->content_encoding($ct); |
1176
|
0
|
|
|
|
|
0
|
$newresp->content_ref($cr); |
1177
|
0
|
|
|
|
|
0
|
$response = $newresp; |
1178
|
|
|
|
|
|
|
} |
1179
|
|
|
|
|
|
|
}; |
1180
|
8
|
|
|
|
|
219
|
$contref = $response->decoded_content( raise_error => 1, ref => 1); # method exists since LWP 5.802 (2004-11-30) |
1181
|
|
|
|
|
|
|
|
1182
|
8
|
50
|
|
|
|
9138
|
if ( $$contref =~ /^\x{FFEF}/ ) { # properly encoded BOM => put Characters to file |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1183
|
0
|
|
|
|
|
0
|
binmode($tmpfh, ":utf8"); |
1184
|
0
|
|
|
|
|
0
|
print "INFO: properly encoded BOM detected: Groked UTF8\n"; # if $options{'verbose'}; |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
elsif ( $$contref =~ s/^\xef\xbb\xbf// ) { # BOM Bytes => put Bytes to file, re-read as UTF-8 |
1187
|
0
|
|
|
|
|
0
|
print "INFO: Byte coded BOM detected: trying to restitute character semantics\n"; # if $options{'verbose'}; |
1188
|
0
|
0
|
|
|
|
0
|
print "INFO: Length is ", length($$contref), " ", (utf8::is_utf8($$contref) ? "characters" : "bytes"), "\n"; |
1189
|
0
|
|
|
|
|
0
|
binmode($tmpfh, ":bytes"); |
1190
|
0
|
0
|
|
|
|
0
|
utf_deduplicate($contref) && binmode($tmpfh, ":utf8"); |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
elsif ( utf8::is_utf8($$contref) ) { # already Upgraded strings should be written as utf-8 |
1193
|
7
|
100
|
|
|
|
88
|
print "INFO: UTF8-ness already established\n" if $options{'verbose'}; |
1194
|
7
|
|
|
|
|
44
|
binmode($tmpfh, ":utf8"); |
1195
|
7
|
|
|
|
|
41
|
utf_deduplicate($contref); # but don't trust it (older LWP with file URLs, ...) |
1196
|
|
|
|
|
|
|
} |
1197
|
|
|
|
|
|
|
elsif ( utf8::decode($$contref) ) { # everything in character semantics now |
1198
|
1
|
50
|
|
|
|
26
|
print "INFO: Could decode bytes to UTF8-characters\n" if $options{'verbose'}; |
1199
|
1
|
|
|
|
|
6
|
binmode($tmpfh, ":utf8"); |
1200
|
|
|
|
|
|
|
} |
1201
|
|
|
|
|
|
|
else { # leave it alone |
1202
|
0
|
|
|
|
|
0
|
print "WARNING: No clue about character encoding: Assume ISO 8859-1\n"; # if $options{'verbose'}; |
1203
|
0
|
|
|
|
|
0
|
binmode($tmpfh, ":utf8"); |
1204
|
|
|
|
|
|
|
}; |
1205
|
8
|
|
|
|
|
74
|
print $tmpfh $$contref; |
1206
|
8
|
|
|
|
|
320
|
close($tmpfh); |
1207
|
|
|
|
|
|
|
# early cleanup since everything might be huge.... |
1208
|
8
|
|
|
|
|
15
|
$contref = $response = undef; |
1209
|
|
|
|
|
|
|
|
1210
|
8
|
|
|
|
|
142
|
my ($collno, $count, $statref) = $self->loadFile($tmpfile, {_alias => $alias, _uri => $uri, _ruri => $nuri, _mtime => $lm, _sort => $osort}, %options); |
1211
|
8
|
50
|
33
|
|
|
106
|
if ( ! $collno && $osq ) { |
1212
|
0
|
|
|
|
|
0
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1213
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET utime=?,ustat=? WHERE seqno==?; |
1214
|
|
|
|
|
|
|
XxX |
1215
|
0
|
0
|
|
|
|
0
|
$self->stmtExplain($usthexpl, time(), ($statref ? "load error: $statref" : "internal error"), $osq) if $ENV{'DBI_PROFILE'}; |
|
|
0
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
$usth->execute(time(), ($statref ? "load error: $statref" : "internal error"), $osq) |
1217
|
0
|
0
|
|
|
|
0
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
|
|
0
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
}; |
1219
|
|
|
|
|
|
|
|
1220
|
8
|
50
|
|
|
|
606
|
unlink($tmpfile) if -f $tmpfile; |
1221
|
8
|
50
|
|
|
|
416
|
return $collno ? ($collno, $count) : undef; |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
elsif ( $response->code == 304 ) { |
1224
|
1
|
50
|
|
|
|
16
|
print "INFO: $alias not modified since ".HTTP::Date::time2str($modtime)."\n" if $options{'verbose'}; |
1225
|
1
|
|
|
|
|
5
|
my $vt = $response->fresh_until(h_min => 1800, h_max => 6 * 86400); |
1226
|
1
|
50
|
|
|
|
276
|
printf(" %-30s %s\n", "Will not try again before", scalar localtime($vt)) if $options{'verbose'}; |
1227
|
1
|
|
|
|
|
4
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1228
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET utime=?,ustat=?,ruri=? WHERE seqno==?; |
1229
|
|
|
|
|
|
|
XxX |
1230
|
1
|
50
|
|
|
|
5
|
$self->stmtExplain($usthexpl, time(), $response->status_line, $nuri, $osq) if $ENV{'DBI_PROFILE'}; |
1231
|
|
|
|
|
|
|
$usth->execute(time(), $response->status_line, $nuri, $osq) |
1232
|
1
|
50
|
|
|
|
7
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1233
|
1
|
|
|
|
|
6594
|
return undef; |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
else { |
1236
|
0
|
|
|
|
|
0
|
print "WARNING: No access to $uri for $alias [".$response->status_line."]\n"; |
1237
|
0
|
|
|
|
|
0
|
print $response->headers_as_string, "\n"; |
1238
|
0
|
0
|
|
|
|
0
|
return undef unless $osq; |
1239
|
0
|
|
|
|
|
0
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1240
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET utime=?,ustat=?,ruri=? WHERE seqno==?; |
1241
|
|
|
|
|
|
|
XxX |
1242
|
0
|
0
|
|
|
|
0
|
$self->stmtExplain($usthexpl, time(), $response->status_line, $nuri, $osq) if $ENV{'DBI_PROFILE'}; |
1243
|
|
|
|
|
|
|
$usth->execute(time(), $response->status_line, $nuri, $osq) |
1244
|
0
|
0
|
|
|
|
0
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1245
|
0
|
|
|
|
|
0
|
return undef; |
1246
|
|
|
|
|
|
|
}; |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
sub utf_deduplicate { |
1252
|
7
|
|
|
7
|
0
|
16
|
my ($success, $stringref) = (0, @_); |
1253
|
7
|
50
|
|
|
|
33
|
if ( utf8::downgrade($$stringref, 1) ) { # 1 = FAIL_OK |
1254
|
7
|
|
|
|
|
14
|
my $prevlength = length($$stringref); |
1255
|
7
|
50
|
|
|
|
316
|
print "INFO: Downgrade was possible, length now $prevlength ", (utf8::is_utf8($$stringref) ? "characters" : "bytes"), "\n"; |
1256
|
7
|
|
|
|
|
42
|
while ( utf8::decode($$stringref) ) { |
1257
|
0
|
|
|
|
|
0
|
$success ++; |
1258
|
0
|
|
|
|
|
0
|
my $newlength = length($$stringref); |
1259
|
0
|
0
|
|
|
|
0
|
print "DEBUG: Reassembling as UTF-8 succeeded, length now $newlength ", (utf8::is_utf8($$stringref) ? "characters" : "bytes"), "\n"; |
1260
|
0
|
0
|
|
|
|
0
|
last if $newlength == $prevlength; |
1261
|
0
|
|
|
|
|
0
|
$prevlength = $newlength; |
1262
|
|
|
|
|
|
|
# last unless utf8::downgrade($$stringref, 1); |
1263
|
|
|
|
|
|
|
} |
1264
|
|
|
|
|
|
|
} |
1265
|
|
|
|
|
|
|
else { |
1266
|
0
|
|
|
|
|
0
|
print "WARNING: no downgrade possible, proceed with byte semantics"; |
1267
|
|
|
|
|
|
|
}; |
1268
|
7
|
|
|
|
|
11
|
return $success; |
1269
|
|
|
|
|
|
|
} |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
=head3 unload ( [ $seqno_or_alias, %options ] ) |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
Deletes the sequence(s). |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
=over 8 |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
=item $seqno_or_alias |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
numeric sequence number, Alias or SQL pattern. |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=item Supported options: |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
force => (0|1) |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
Needed to purge the complete database ($seqno_or_alias empty) or to purge |
1286
|
|
|
|
|
|
|
more than one sequence ($seqno_or_alias yields more than one seqno). |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
=back |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=cut |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
sub unload { |
1294
|
3
|
|
|
3
|
1
|
942
|
my ($self, $seqno_or_alias, %options) = @_; |
1295
|
3
|
50
|
|
|
|
20
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1296
|
|
|
|
|
|
|
|
1297
|
3
|
|
|
|
|
7
|
my @seqnos = (); |
1298
|
3
|
50
|
33
|
|
|
43
|
if ( $seqno_or_alias && ($seqno_or_alias =~ /^\d+$/) ) { |
|
|
50
|
33
|
|
|
|
|
1299
|
0
|
|
|
|
|
0
|
@seqnos = ($seqno_or_alias)} |
1300
|
|
|
|
|
|
|
elsif ( $seqno_or_alias || $options{'force'} ) { |
1301
|
3
|
50
|
|
|
|
22
|
@seqnos = $self->Seqnos('_alias', $seqno_or_alias ? ($seqno_or_alias) : ()); |
1302
|
3
|
50
|
|
|
|
12
|
unless ( @seqnos ) { |
1303
|
0
|
|
|
|
|
0
|
carp("no Seqnos selected by $seqno_or_alias"); |
1304
|
0
|
|
|
|
|
0
|
return 0; |
1305
|
|
|
|
|
|
|
}; |
1306
|
3
|
50
|
33
|
|
|
25
|
unless ( $options{'force'} or (@seqnos == 1) ) { |
1307
|
0
|
|
|
|
|
0
|
carp("Use --force to purge more than one sequence (@seqnos)"); |
1308
|
0
|
|
|
|
|
0
|
return 0; |
1309
|
|
|
|
|
|
|
}; |
1310
|
|
|
|
|
|
|
} |
1311
|
|
|
|
|
|
|
else { |
1312
|
0
|
|
|
|
|
0
|
carp("Use --force to purge the complete database"); |
1313
|
0
|
|
|
|
|
0
|
return 0; |
1314
|
|
|
|
|
|
|
}; |
1315
|
|
|
|
|
|
|
|
1316
|
3
|
50
|
|
|
|
11
|
if ( $options{'force'} ) { |
1317
|
0
|
|
|
|
|
0
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1318
|
|
|
|
|
|
|
DELETE FROM beacons WHERE seqno==?; |
1319
|
|
|
|
|
|
|
XxX |
1320
|
0
|
|
|
|
|
0
|
foreach my $seqno ( @seqnos ) { |
1321
|
0
|
0
|
|
|
|
0
|
$self->stmtExplain($sthexpl, $seqno_or_alias) if $ENV{'DBI_PROFILE'}; |
1322
|
0
|
0
|
|
|
|
0
|
my $rows = $sth->execute($seqno_or_alias) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1323
|
0
|
0
|
|
|
|
0
|
print "INFO: $rows forced for $seqno\n" if $options{'verbose'}; |
1324
|
|
|
|
|
|
|
}; |
1325
|
|
|
|
|
|
|
}; |
1326
|
|
|
|
|
|
|
|
1327
|
3
|
|
|
|
|
17
|
my ($cond, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($seqno_or_alias); |
1328
|
3
|
|
|
|
|
16
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1329
|
|
|
|
|
|
|
DELETE FROM repos $cond; |
1330
|
|
|
|
|
|
|
XxX |
1331
|
3
|
50
|
|
|
|
12
|
$self->stmtExplain($sthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1332
|
3
|
50
|
|
|
|
82491
|
my $rows = $sth->execute(@cval) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1333
|
3
|
50
|
|
|
|
30
|
$rows = 0 if $rows eq "0E0"; |
1334
|
|
|
|
|
|
|
|
1335
|
3
|
50
|
33
|
|
|
18
|
if ( $rows or $options{'force'} ) { |
1336
|
|
|
|
|
|
|
# if ( $options{'force'} ) { |
1337
|
|
|
|
|
|
|
# print "[ANALYZE ..." if $options{'verbose'}; |
1338
|
|
|
|
|
|
|
# $self->{dbh}->do("ANALYZE;"); |
1339
|
|
|
|
|
|
|
# print "]\n" if $options{'verbose'}; |
1340
|
|
|
|
|
|
|
# }; |
1341
|
|
|
|
|
|
|
|
1342
|
3
|
50
|
|
|
|
20
|
if ( $options{'nostat'} ) { # invalidate since they might have changed |
1343
|
0
|
|
|
|
|
0
|
$self->admin('gcounti', undef); |
1344
|
0
|
|
|
|
|
0
|
$self->admin('gcountu', undef); |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
else { |
1347
|
3
|
|
50
|
|
|
39
|
$self->admin('gcounti', $self->idStat(undef, 'distinct' => 0) || 0); |
1348
|
3
|
|
50
|
|
|
28
|
$self->admin('gcountu', $self->idStat(undef, 'distinct' => 1) || 0); |
1349
|
|
|
|
|
|
|
} |
1350
|
|
|
|
|
|
|
}; |
1351
|
|
|
|
|
|
|
|
1352
|
3
|
|
|
|
|
49
|
return $rows; |
1353
|
|
|
|
|
|
|
} |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=head3 purge ( $seqno_or_alias[, %options ] ) |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
Deletes all identifiers from the database to the given pattern, |
1359
|
|
|
|
|
|
|
but leaves the stored header information intact, such that it |
1360
|
|
|
|
|
|
|
can be updated automatically. |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
=over 8 |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
=item $seqno_or_alias |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
Pattern |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=item Supported options: |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
force => (0|1) |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
Allow purging of more than one sequence. |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=back |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
=cut |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
sub purge { |
1380
|
2
|
|
|
2
|
1
|
4405
|
my ($self, $seqno_or_alias, %options) = @_; |
1381
|
2
|
50
|
|
|
|
11
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1382
|
2
|
|
|
|
|
101
|
my @seqnos; |
1383
|
2
|
50
|
33
|
|
|
33
|
if ( $seqno_or_alias && ($seqno_or_alias =~ /^\d+$/) ) { |
|
|
50
|
33
|
|
|
|
|
1384
|
0
|
|
|
|
|
0
|
@seqnos = ($seqno_or_alias)} |
1385
|
|
|
|
|
|
|
elsif ( $seqno_or_alias || $options{'force'} ) { |
1386
|
2
|
50
|
|
|
|
32
|
@seqnos = $self->Seqnos('_alias', $seqno_or_alias ? ($seqno_or_alias) : ()); |
1387
|
2
|
50
|
|
|
|
7
|
unless ( @seqnos ) { |
1388
|
0
|
|
|
|
|
0
|
carp("no Seqnos selected by $seqno_or_alias"); |
1389
|
0
|
|
|
|
|
0
|
return 0; |
1390
|
|
|
|
|
|
|
}; |
1391
|
2
|
50
|
33
|
|
|
16
|
unless ( $options{'force'} or (@seqnos == 1) ) { |
1392
|
0
|
|
|
|
|
0
|
carp("Use --force to purge more than one sequence (@seqnos)"); |
1393
|
0
|
|
|
|
|
0
|
return 0; |
1394
|
|
|
|
|
|
|
}; |
1395
|
|
|
|
|
|
|
} |
1396
|
|
|
|
|
|
|
else { |
1397
|
0
|
|
|
|
|
0
|
carp("Use --force to purge the complete database"); |
1398
|
0
|
|
|
|
|
0
|
return 0; |
1399
|
|
|
|
|
|
|
}; |
1400
|
2
|
|
|
|
|
10
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1401
|
|
|
|
|
|
|
DELETE FROM beacons WHERE seqno==?; |
1402
|
|
|
|
|
|
|
XxX |
1403
|
2
|
|
|
|
|
8
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1404
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET counti=?,countu=?,utime=?,ustat=? WHERE seqno==?; |
1405
|
|
|
|
|
|
|
XxX |
1406
|
2
|
|
|
|
|
5
|
my $trows = 0; |
1407
|
2
|
|
|
|
|
4
|
foreach my $seqno ( @seqnos ) { |
1408
|
2
|
50
|
|
|
|
6
|
$self->stmtExplain($sthexpl, $seqno) if $ENV{'DBI_PROFILE'}; |
1409
|
2
|
50
|
|
|
|
42422
|
my $rows = $sth->execute($seqno) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1410
|
2
|
50
|
|
|
|
17
|
$rows = "0" if $rows eq "0E0"; |
1411
|
2
|
50
|
|
|
|
12
|
print "INFO: $rows purged for $seqno\n" if $options{'verbose'}; |
1412
|
2
|
|
|
|
|
6
|
$trows += $rows; |
1413
|
2
|
50
|
|
|
|
12
|
$self->stmtExplain($usthexpl, 0, 0, time, "purged", $seqno) if $ENV{'DBI_PROFILE'}; |
1414
|
|
|
|
|
|
|
$usth->execute(0, 0, time, "purged", $seqno) |
1415
|
2
|
50
|
|
|
|
20066
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1416
|
|
|
|
|
|
|
}; |
1417
|
|
|
|
|
|
|
|
1418
|
2
|
50
|
33
|
|
|
32
|
if ( $trows or $options{'force'} ) { |
1419
|
|
|
|
|
|
|
# if ( $options{'force'} ) { |
1420
|
|
|
|
|
|
|
# print "[ANALYZE ..." if $options{'verbose'}; |
1421
|
|
|
|
|
|
|
# $self->{dbh}->do("ANALYZE;"); |
1422
|
|
|
|
|
|
|
# print "]\n" if $options{'verbose'}; |
1423
|
|
|
|
|
|
|
# }; |
1424
|
|
|
|
|
|
|
|
1425
|
2
|
50
|
|
|
|
14
|
if ( $options{'nostat'} ) { # invalidate since they might have changed |
1426
|
0
|
|
|
|
|
0
|
$self->admin('gcounti', undef); |
1427
|
0
|
|
|
|
|
0
|
$self->admin('gcountu', undef); |
1428
|
|
|
|
|
|
|
} |
1429
|
|
|
|
|
|
|
else { |
1430
|
2
|
|
50
|
|
|
21
|
$self->admin('gcounti', $self->idStat(undef, 'distinct' => 0) || 0); |
1431
|
2
|
|
50
|
|
|
16
|
$self->admin('gcountu', $self->idStat(undef, 'distinct' => 1) || 0); |
1432
|
|
|
|
|
|
|
} |
1433
|
|
|
|
|
|
|
}; |
1434
|
|
|
|
|
|
|
|
1435
|
2
|
|
|
|
|
38
|
return $trows; |
1436
|
|
|
|
|
|
|
} |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
=head2 Methods for headers |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
=head3 ($rows, @oldvalues) = headerfield ( $sq_or_alias, $key [, $value] ) |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
Gets or sets an meta or admin Entry for the constituent file indicated by $sq_or_alias |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
=cut |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
sub headerfield { |
1448
|
9
|
|
|
9
|
1
|
9295
|
my ($self, $sq_or_alias, $key, $value) = @_; |
1449
|
|
|
|
|
|
|
|
1450
|
9
|
|
|
|
|
13
|
my $dbkey = ""; |
1451
|
9
|
100
|
|
|
|
40
|
if ( $dbkey = SeeAlso::Source::BeaconAggregator->beaconfields($key) ) { |
|
|
50
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
} |
1453
|
|
|
|
|
|
|
elsif ( $key =~ /_(\w+)$/ ) { |
1454
|
5
|
|
|
|
|
14
|
$dbkey = $1} |
1455
|
|
|
|
|
|
|
else { |
1456
|
0
|
|
|
|
|
0
|
carp "Field $key not known"; |
1457
|
0
|
|
|
|
|
0
|
return undef; |
1458
|
|
|
|
|
|
|
}; |
1459
|
|
|
|
|
|
|
|
1460
|
9
|
|
|
|
|
33
|
my ($cond, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($sq_or_alias); |
1461
|
|
|
|
|
|
|
|
1462
|
9
|
|
|
|
|
53
|
my ($osth, $osthexpl) = $self->stmtHdl(<<"XxX"); |
1463
|
|
|
|
|
|
|
SELECT $dbkey FROM repos $cond; |
1464
|
|
|
|
|
|
|
XxX |
1465
|
9
|
50
|
|
|
|
24
|
$self->stmtExplain($osthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1466
|
9
|
50
|
|
|
|
606
|
$osth->execute(@cval) or croak("Could not execute >".$osth->{Statement}."<:".$osth->errstr); |
1467
|
9
|
|
|
|
|
132
|
my $tmpval = $osth->fetchall_arrayref(); |
1468
|
9
|
100
|
|
|
|
22
|
my @oval = map { hEncode($_, $key) } map { (defined $_->[0]) ? ($_->[0]) : () } @$tmpval; |
|
7
|
|
|
|
|
20
|
|
|
9
|
|
|
|
|
34
|
|
1469
|
9
|
|
|
|
|
12
|
my $rows = scalar @oval; |
1470
|
|
|
|
|
|
|
|
1471
|
9
|
100
|
66
|
|
|
48
|
if ( (defined $value) and ($value ne "") ) { # set |
|
|
50
|
|
|
|
|
|
1472
|
3
|
|
|
|
|
18
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1473
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET $dbkey=? $cond; |
1474
|
|
|
|
|
|
|
XxX |
1475
|
3
|
|
100
|
|
|
12
|
$value = hDecode($value, $key) || ""; |
1476
|
3
|
50
|
|
|
|
8
|
$self->stmtExplain($usthexpl, $value, @cval) if $ENV{'DBI_PROFILE'}; |
1477
|
3
|
50
|
|
|
|
32698
|
$rows = $usth->execute($value, @cval) or croak("Could not execute >".$usth->{Statement}."<:".$usth->errstr); |
1478
|
|
|
|
|
|
|
} |
1479
|
|
|
|
|
|
|
elsif ( defined $value ) { # clear |
1480
|
0
|
|
|
|
|
0
|
my ($dsth, $dsthexpl) = $self->stmtHdl(<<"XxX"); |
1481
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET $dbkey=? $cond; |
1482
|
|
|
|
|
|
|
XxX |
1483
|
0
|
0
|
|
|
|
0
|
$self->stmtExplain($dsthexpl, undef, @cval) if $ENV{'DBI_PROFILE'}; |
1484
|
0
|
0
|
|
|
|
0
|
$rows = $dsth->execute(undef, @cval) or croak("Could not execute >".$dsth->{Statement}."<:".$dsth->errstr); |
1485
|
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
else { # read |
1487
|
|
|
|
|
|
|
} |
1488
|
|
|
|
|
|
|
|
1489
|
9
|
|
|
|
|
75
|
return ($rows, @oval); |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
=head3 ($resultref, $metaref) = headers ( [ $seqno_or_alias ] ) |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
Iterates over all |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
For each iteration returns two hash references: |
1497
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
=over 8 |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
=item 1 |
1501
|
|
|
|
|
|
|
all official beacon fields |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
=item 2 |
1504
|
|
|
|
|
|
|
all administrative fields (_alias, ...) |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
=back |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
=cut |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
sub headers { |
1511
|
5
|
|
|
5
|
1
|
7167
|
my ($self, $seqno_or_alias) = @_; |
1512
|
|
|
|
|
|
|
|
1513
|
5
|
100
|
|
|
|
12
|
unless ( $self->{_iterator_info} ) { |
1514
|
1
|
|
|
|
|
4
|
my ($constraint, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($seqno_or_alias); |
1515
|
1
|
|
|
|
|
5
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1516
|
|
|
|
|
|
|
SELECT * FROM repos $constraint; |
1517
|
|
|
|
|
|
|
XxX |
1518
|
1
|
50
|
|
|
|
5
|
$self->stmtExplain($sthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1519
|
1
|
50
|
|
|
|
47
|
$sth->execute(@cval) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1520
|
1
|
|
|
|
|
6
|
$self->{_iterator_info} = $sth; |
1521
|
|
|
|
|
|
|
}; |
1522
|
|
|
|
|
|
|
|
1523
|
5
|
|
|
|
|
209
|
my $info = $self->{_iterator_info}->fetchrow_hashref; |
1524
|
5
|
100
|
|
|
|
21
|
unless ( defined $info ) { |
1525
|
1
|
50
|
|
|
|
6
|
croak("Error listing Collections: $self->{_iterator_info}->errstr") if $self->{_iterator_info}->err; |
1526
|
1
|
|
|
|
|
1
|
delete $self->{_iterator_info}; |
1527
|
1
|
|
|
|
|
3
|
return undef; |
1528
|
|
|
|
|
|
|
} |
1529
|
|
|
|
|
|
|
|
1530
|
4
|
|
33
|
|
|
9
|
my $collno = $info->{seqno} || $seqno_or_alias; |
1531
|
4
|
|
|
|
|
9
|
my %meta = (_seqno => $collno); |
1532
|
4
|
|
|
|
|
5
|
my %result = (); |
1533
|
4
|
|
|
|
|
13
|
while ( my($key, $val) = each %$info ) { |
1534
|
136
|
100
|
|
|
|
218
|
next unless defined $val; |
1535
|
71
|
|
|
|
|
68
|
my $pval = hEncode($val, $key); |
1536
|
|
|
|
|
|
|
|
1537
|
71
|
100
|
|
|
|
109
|
if ( $key =~ /^bc(\w+)$/ ) { |
1538
|
25
|
|
|
|
|
85
|
$result{$1} = $pval} |
1539
|
|
|
|
|
|
|
else { |
1540
|
46
|
|
|
|
|
135
|
$meta{"_$key"} = $pval}; |
1541
|
|
|
|
|
|
|
} |
1542
|
4
|
|
|
|
|
24
|
return \%result, \%meta; |
1543
|
|
|
|
|
|
|
} |
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
=head3 listCollections ( [ $seqno_or_alias ] ) |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
Iterates over all Sequences and returns on each call an array of |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
Seqno, Alias, Uri, Modification time, Identifier Count and Unique identifier count |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
Returns undef if done. |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
=cut |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
sub listCollections { |
1556
|
28
|
|
|
28
|
1
|
14751
|
my ($self, $seqno_or_alias) = @_; |
1557
|
|
|
|
|
|
|
|
1558
|
28
|
100
|
|
|
|
73
|
unless ( $self->{_iterator_listCollections} ) { |
1559
|
7
|
|
|
|
|
26
|
my ($constraint, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($seqno_or_alias); |
1560
|
7
|
|
|
|
|
33
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1561
|
|
|
|
|
|
|
SELECT seqno, alias, uri, mtime, counti, countu FROM repos $constraint; |
1562
|
|
|
|
|
|
|
XxX |
1563
|
7
|
50
|
|
|
|
32
|
$self->stmtExplain($sthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1564
|
7
|
50
|
|
|
|
433
|
$sth->execute(@cval) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1565
|
7
|
|
|
|
|
24
|
$self->{_iterator_listCollections} = $sth; |
1566
|
|
|
|
|
|
|
}; |
1567
|
28
|
|
|
|
|
268
|
my $onerow = $self->{_iterator_listCollections}->fetchrow_arrayref; |
1568
|
28
|
100
|
|
|
|
63
|
unless ( $onerow ) { |
1569
|
7
|
50
|
|
|
|
57
|
croak("Error listing Collections: $self->{_iterator_listCollections}->errstr") if $self->{_iterator_listCollections}->err; |
1570
|
7
|
|
|
|
|
11
|
delete $self->{_iterator_listCollections}; |
1571
|
7
|
|
|
|
|
19
|
return (); |
1572
|
|
|
|
|
|
|
}; |
1573
|
21
|
|
|
|
|
81
|
return @$onerow; |
1574
|
|
|
|
|
|
|
} |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
=head2 Statistics |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
=head3 idStat ( [ $seqno_or_alias, %options ] ) |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
Count identifiers for the given pattern. |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=over 8 |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
=item Supported options: |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
distinct => (0|1) |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
Count multiple occurences only once |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
verbose => (0|1) |
1591
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
=back |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
=cut |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
sub idStat { |
1598
|
61
|
|
|
61
|
1
|
9076
|
my ($self, $seqno_or_alias, %options) = @_; |
1599
|
61
|
50
|
|
|
|
248
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1600
|
61
|
|
|
|
|
96
|
my $cond = ""; |
1601
|
61
|
100
|
100
|
|
|
369
|
if ( $seqno_or_alias && ($seqno_or_alias =~ /^\d+$/) ) { |
|
|
100
|
|
|
|
|
|
1602
|
17
|
|
|
|
|
32
|
$cond = "WHERE seqno==$seqno_or_alias"} |
1603
|
|
|
|
|
|
|
elsif ( $seqno_or_alias ) { |
1604
|
4
|
|
|
|
|
30
|
my @seqnos = $self->Seqnos('_alias', $seqno_or_alias); |
1605
|
4
|
50
|
|
|
|
10
|
if ( @seqnos ) { |
1606
|
4
|
|
|
|
|
19
|
$cond = "WHERE seqno IN (".join(",", @seqnos).")"} |
1607
|
|
|
|
|
|
|
else { |
1608
|
0
|
|
|
|
|
0
|
carp("no Seqnos selected by $seqno_or_alias"); |
1609
|
0
|
|
|
|
|
0
|
return 0; |
1610
|
|
|
|
|
|
|
}; |
1611
|
|
|
|
|
|
|
}; |
1612
|
|
|
|
|
|
|
# my $count_what = $options{'distinct'} ? "DISTINCT hash" : "*"; |
1613
|
|
|
|
|
|
|
# will not be optimized by SQLite or mySQL: SELECT COUNT($count_what) FROM beacons $cond; |
1614
|
|
|
|
|
|
|
# my $sth= $self->stmtHdl("SELECT COUNT($count_what) FROM beacons $cond LIMIT 1;"); |
1615
|
61
|
100
|
|
|
|
213
|
my $from = $options{'distinct'} ? "(SELECT DISTINCT hash FROM beacons $cond)" |
1616
|
|
|
|
|
|
|
: "beacons $cond"; |
1617
|
61
|
|
|
|
|
389
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1618
|
|
|
|
|
|
|
SELECT COUNT(*) FROM $from LIMIT 1; |
1619
|
|
|
|
|
|
|
XxX |
1620
|
61
|
50
|
|
|
|
168
|
$self->stmtExplain($sthexpl) if $ENV{'DBI_PROFILE'}; |
1621
|
61
|
50
|
|
|
|
3456
|
$sth->execute() or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1622
|
61
|
|
|
|
|
652
|
my $hits = $sth->fetchrow_arrayref; |
1623
|
|
|
|
|
|
|
|
1624
|
61
|
|
50
|
|
|
418
|
return $hits->[0] || 0; |
1625
|
|
|
|
|
|
|
}; |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
=head3 idCounts ( [ $pattern, %options ] ) |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
Iterates through the entries according to the optional id filter expression. |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
For each iteration the call returns a triple consisting of (identifier, |
1633
|
|
|
|
|
|
|
number of rows, and sum of all individual counts). |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
=over 8 |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
=item Supported options: |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
distinct => (0|1) |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
Count multiple occurences in one beacon file only once. |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
=back |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
=cut |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
sub idCounts { |
1648
|
64
|
|
|
64
|
1
|
48054
|
my ($self, $pattern, %options) = @_; |
1649
|
64
|
50
|
|
|
|
112
|
my $cond = $pattern ? qq!WHERE hash LIKE "$pattern"! : ""; |
1650
|
64
|
100
|
|
|
|
96
|
my $count_what = $options{'distinct'} ? "DISTINCT seqno" : "seqno"; |
1651
|
64
|
100
|
|
|
|
118
|
unless ( $self->{_iterator_idCounts} ) { |
1652
|
8
|
|
|
|
|
37
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1653
|
|
|
|
|
|
|
SELECT hash, COUNT($count_what), SUM(hits) FROM beacons $cond GROUP BY hash ORDER BY hash; |
1654
|
|
|
|
|
|
|
XxX |
1655
|
8
|
50
|
|
|
|
23
|
$self->stmtExplain($sthexpl) if $ENV{'DBI_PROFILE'}; |
1656
|
8
|
50
|
|
|
|
743
|
$sth->execute() or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1657
|
8
|
|
|
|
|
23
|
$self->{_iterator_idCounts} = $sth; |
1658
|
8
|
100
|
|
|
|
25
|
unless ( defined $self->{identifierClass} ) { |
1659
|
2
|
|
|
|
|
10
|
my $package = $self->autoIdentifier(); |
1660
|
2
|
50
|
33
|
|
|
7
|
$options{'verbose'} && ref($package) && carp "Assuming identifiers of type ".ref($package)."\n"; |
1661
|
|
|
|
|
|
|
} |
1662
|
|
|
|
|
|
|
}; |
1663
|
64
|
|
|
|
|
1012
|
my $onerow = $self->{_iterator_idCounts}->fetchrow_arrayref; |
1664
|
64
|
100
|
|
|
|
114
|
unless ( $onerow ) { |
1665
|
8
|
50
|
|
|
|
51
|
croak("Error listing Collections: $self->{_iterator_idCounts}->errstr") if $self->{_iterator_idCounts}->err; |
1666
|
8
|
|
|
|
|
12
|
delete $self->{_iterator_idCounts}; |
1667
|
8
|
|
|
|
|
24
|
return (); |
1668
|
|
|
|
|
|
|
}; |
1669
|
56
|
100
|
|
|
|
105
|
if ( defined $self->{identifierClass} ) { |
1670
|
42
|
|
|
|
|
39
|
my $c = $self->{identifierClass}; |
1671
|
|
|
|
|
|
|
# compat: hash might not take an argument, must resort to value, has to be cleared before... |
1672
|
42
|
|
|
|
|
103
|
$c->value(""); |
1673
|
42
|
|
100
|
|
|
1230
|
my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]); |
1674
|
42
|
100
|
|
|
|
2011
|
$onerow->[0] = $c->can("pretty") ? $c->pretty() : $c->value(); |
1675
|
|
|
|
|
|
|
}; |
1676
|
56
|
|
|
|
|
465
|
return @$onerow; |
1677
|
|
|
|
|
|
|
}; |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
=head3 idList ( [ $pattern ] ) |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
Iterates through the entries according to the optional selection. |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
For each iteration the call returns a tuple consisting of identifier and an |
1685
|
|
|
|
|
|
|
list of array references (Seqno, Hits, Info, explicit Link, AltId) or the emtpy list |
1686
|
|
|
|
|
|
|
if finished. |
1687
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
Hits, Info, Link and AltId are normalized to the empty string if undefined (or < 2 for hits). |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
It is important to finish all iterations before calling this method for "new" arguments: |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
1 while $db->idList(); # flush pending results |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
=cut |
1695
|
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
sub idList { |
1697
|
44
|
|
|
44
|
1
|
87111
|
my ($self, $pattern) = @_; |
1698
|
44
|
50
|
|
|
|
110
|
my $cond = $pattern ? ($pattern =~ /%/ ? "WHERE hash LIKE ?" : qq"WHERE hash=?") |
|
|
100
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
: ""; |
1700
|
44
|
100
|
|
|
|
95
|
unless ( $self->{_iterator_idList_handle} ) { |
1701
|
8
|
|
|
|
|
41
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1702
|
|
|
|
|
|
|
SELECT hash, seqno, hits, info, link, altid FROM beacons $cond ORDER BY hash, seqno, altid; |
1703
|
|
|
|
|
|
|
XxX |
1704
|
8
|
0
|
|
|
|
24
|
$self->stmtExplain($sthexpl, ($pattern ? ($pattern) : () )) if $ENV{'DBI_PROFILE'}; |
|
|
50
|
|
|
|
|
|
1705
|
8
|
100
|
|
|
|
691
|
$sth->execute(($pattern ? ($pattern) : () )) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
|
|
50
|
|
|
|
|
|
1706
|
8
|
|
|
|
|
22
|
$self->{_iterator_idList_handle} = $sth; |
1707
|
8
|
|
|
|
|
49
|
$self->{_iterator_idList_crosscheck} = $self->RepoCols("ALTTARGET"); |
1708
|
8
|
|
|
|
|
12
|
$self->{_iterator_idList_prefetch} = undef; |
1709
|
8
|
100
|
|
|
|
28
|
$self->autoIdentifier() unless defined $self->{identifierClass}; |
1710
|
|
|
|
|
|
|
}; |
1711
|
44
|
100
|
|
|
|
79
|
unless ( exists $self->{_iterator_idList_prefetch} ) { # deferred exit |
1712
|
8
|
|
|
|
|
13
|
delete $self->{_iterator_idList_handle}; |
1713
|
8
|
|
|
|
|
18
|
delete $self->{_iterator_idList_crosscheck}; |
1714
|
8
|
|
|
|
|
17
|
return (); |
1715
|
|
|
|
|
|
|
}; |
1716
|
36
|
|
|
|
|
36
|
my $pf = $self->{_iterator_idList_prefetch}; |
1717
|
36
|
|
|
|
|
379
|
while ( my $onerow = $self->{_iterator_idList_handle}->fetchrow_arrayref ) { |
1718
|
|
|
|
|
|
|
# $onerow->[2] = "" unless $self->{_iterator_idList_crosscheck}->{$onerow->[1]}; # kill artefacts |
1719
|
48
|
100
|
|
|
|
96
|
$onerow->[2] = "" unless $onerow->[2]; # kill artefacts |
1720
|
48
|
100
|
|
|
|
66
|
$onerow->[3] = "" unless defined $onerow->[3]; # kill artefacts |
1721
|
48
|
100
|
|
|
|
74
|
$onerow->[4] = "" unless defined $onerow->[4]; # kill artefacts |
1722
|
48
|
50
|
|
|
|
62
|
$onerow->[5] = "" unless defined $onerow->[5]; # kill artefacts |
1723
|
48
|
100
|
|
|
|
113
|
if ( defined $self->{identifierClass} ) { |
1724
|
36
|
|
|
|
|
32
|
my $c = $self->{identifierClass}; |
1725
|
|
|
|
|
|
|
# compat: hash might not take an argument, must resort to value, has to be cleared before... |
1726
|
36
|
|
|
|
|
72
|
$c->value(""); |
1727
|
36
|
|
100
|
|
|
921
|
my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]); |
1728
|
36
|
100
|
|
|
|
1382
|
$onerow->[0] = $c->can("pretty") ? $c->pretty() : $c->value(); |
1729
|
|
|
|
|
|
|
}; |
1730
|
48
|
100
|
|
|
|
318
|
if ( $pf ) { |
1731
|
40
|
100
|
|
|
|
95
|
if ( $pf->[0] eq $onerow->[0] ) { |
1732
|
12
|
|
|
|
|
47
|
push(@$pf, [@$onerow[1..@$onerow-1]]); |
1733
|
12
|
|
|
|
|
190
|
next; |
1734
|
|
|
|
|
|
|
} |
1735
|
|
|
|
|
|
|
else { |
1736
|
28
|
|
|
|
|
89
|
$self->{_iterator_idList_prefetch} = [$onerow->[0], [@$onerow[1..@$onerow-1]]]; |
1737
|
28
|
|
|
|
|
106
|
return @$pf; |
1738
|
|
|
|
|
|
|
} |
1739
|
|
|
|
|
|
|
} |
1740
|
|
|
|
|
|
|
else { |
1741
|
8
|
|
|
|
|
148
|
$pf = [$onerow->[0], [@$onerow[1..@$onerow-1]]]}; |
1742
|
|
|
|
|
|
|
}; |
1743
|
|
|
|
|
|
|
|
1744
|
8
|
50
|
|
|
|
45
|
if ( $self->{_iterator_idList_handle}->err ) { |
1745
|
0
|
|
|
|
|
0
|
croak("Error listing Collections: $self->{_iterator_idList_handle}->errstr"); |
1746
|
|
|
|
|
|
|
}; |
1747
|
8
|
|
|
|
|
12
|
delete $self->{_iterator_idList_prefetch}; |
1748
|
8
|
50
|
|
|
|
30
|
return $pf ? @$pf : (); |
1749
|
|
|
|
|
|
|
}; |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
=head2 Manipulation of global metadata: Open Search Description |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
=head3 setOSD ( $field, @values } |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
Sets the field $field of the OpenSearchDescription to @value(s). |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=cut |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
sub setOSD { |
1761
|
5
|
|
|
5
|
1
|
1816
|
my ($self) = shift; |
1762
|
5
|
100
|
|
|
|
9
|
$self->clearOSD($_[0]) or return undef; |
1763
|
3
|
100
|
|
|
|
12
|
return (defined $_[1]) ? $self->addOSD(@_) : 0; # value(s) to set |
1764
|
|
|
|
|
|
|
}; |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
=head3 clearOSD ( $field } |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
Clears the field $field of the OpenSearchDescription. |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
=cut |
1771
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
sub clearOSD { |
1773
|
8
|
|
|
8
|
1
|
2604
|
my ($self, $field) = @_; |
1774
|
8
|
100
|
|
|
|
201
|
$field || (carp("no OSD field name provided"), return undef); |
1775
|
6
|
100
|
|
|
|
24
|
defined $self->osdKeys($field) || (carp("no valid OSD field '$field'"), return undef); |
1776
|
4
|
|
|
|
|
11
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1777
|
|
|
|
|
|
|
DELETE FROM osd WHERE key=?; |
1778
|
|
|
|
|
|
|
XxX |
1779
|
4
|
50
|
|
|
|
9
|
$self->stmtExplain($sthexpl, $field) if $ENV{'DBI_PROFILE'}; |
1780
|
4
|
50
|
|
|
|
28343
|
$sth->execute($field) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1781
|
4
|
|
|
|
|
18
|
return 1; |
1782
|
|
|
|
|
|
|
} |
1783
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
=head3 addOSD ( $field, @values } |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
Adds more @value(s) as (repeatable) field $field of the OpenSearchDescription. |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=cut |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
sub addOSD { |
1791
|
10
|
|
|
10
|
1
|
2540
|
my ($self, $field, @values) = @_; |
1792
|
10
|
100
|
|
|
|
107
|
$field || (carp("no OSD field name provided"), return undef); |
1793
|
9
|
100
|
|
|
|
19
|
return 0 unless @values; |
1794
|
8
|
100
|
|
|
|
24
|
defined $self->osdKeys($field) || (carp("no valid OSD field '$field'"), return undef); |
1795
|
7
|
|
|
|
|
16
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1796
|
|
|
|
|
|
|
INSERT INTO osd ( key, val ) VALUES ( ?, ? ); |
1797
|
|
|
|
|
|
|
XxX |
1798
|
7
|
50
|
|
|
|
13
|
$self->stmtExplain($sthexpl, $field, $values[0]) if $ENV{'DBI_PROFILE'}; |
1799
|
7
|
|
|
|
|
8
|
my $tstatus = []; |
1800
|
7
|
50
|
|
|
|
31
|
my $tuples = $sth->execute_array({ArrayTupleStatus => $tstatus}, $field, \@values) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1801
|
7
|
|
|
|
|
269458
|
return $tuples; |
1802
|
|
|
|
|
|
|
} |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
=head2 Manipulation of global metadata: Beacon Metadata |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
These headers are used when you will be publishing a beacon file for the collection. |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
=head3 setBeaconMeta ( $field, $value ) |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
Sets the field $field of the Beacon meta table (used to generate a BEACON file for this |
1811
|
|
|
|
|
|
|
service) to $value. |
1812
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
=cut |
1814
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
sub setBeaconMeta { |
1816
|
4
|
|
|
4
|
1
|
531
|
my ($self) = shift; |
1817
|
4
|
100
|
|
|
|
10
|
$self->clearBeaconMeta(@_) or return undef; |
1818
|
2
|
100
|
|
|
|
11
|
return (defined $_[1]) ? $self->addBeaconMeta(@_) : 0; # value to set |
1819
|
|
|
|
|
|
|
}; |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
=head3 clearBeaconMeta ( $field } |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Deletes the field $field of the Beacon meta table. |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
=cut |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
sub clearBeaconMeta { |
1828
|
7
|
|
|
7
|
1
|
669
|
my ($self, $rfield) = @_; |
1829
|
7
|
100
|
|
|
|
227
|
$rfield || (carp("no Beacon field name provided"), return undef); |
1830
|
5
|
100
|
|
|
|
26
|
my $field = $self->beaconfields($rfield) or (carp("no valid Beacon field '$rfield'"), return undef); |
1831
|
3
|
|
|
|
|
17
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1832
|
|
|
|
|
|
|
DELETE FROM osd WHERE key=?; |
1833
|
|
|
|
|
|
|
XxX |
1834
|
3
|
50
|
|
|
|
8
|
$self->stmtExplain($sthexpl, $field) if $ENV{'DBI_PROFILE'}; |
1835
|
3
|
50
|
|
|
|
35953
|
$sth->execute($field) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1836
|
3
|
|
|
|
|
55
|
return 1; |
1837
|
|
|
|
|
|
|
} |
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
=head3 addBeaconMeta ( $field, $value ) |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
Appends $value to the field $field of the BEACON meta table |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
=cut |
1844
|
|
|
|
|
|
|
sub addBeaconMeta { |
1845
|
7
|
|
|
7
|
1
|
359
|
my ($self, $rfield, $value) = @_; |
1846
|
7
|
100
|
|
|
|
107
|
$rfield || (carp("no Beacon field name provided"), return undef); |
1847
|
6
|
100
|
|
|
|
32
|
my $field = $self->beaconfields($rfield) or (carp("no valid Beacon field '$rfield'"), return undef); |
1848
|
5
|
|
|
|
|
20
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1849
|
|
|
|
|
|
|
INSERT INTO osd ( key, val ) VALUES ( ?, ? ); |
1850
|
|
|
|
|
|
|
XxX |
1851
|
5
|
50
|
|
|
|
24
|
$self->stmtExplain($sthexpl, $field, $value) if $ENV{'DBI_PROFILE'}; |
1852
|
5
|
50
|
|
|
|
66570
|
$sth->execute($field, $value) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1853
|
5
|
|
|
|
|
127
|
return 1; |
1854
|
|
|
|
|
|
|
} |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
=head3 admin ( [$field, [$value]] ) |
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
Manipulates the admin table. |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
Yields a hashref to the admin table if called without arguments. |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
If called with $field, returns the current value, and sets the |
1863
|
|
|
|
|
|
|
table entry to $value if defined. |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
=cut |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
sub admin { |
1869
|
33
|
|
|
33
|
1
|
2001
|
my ($self, $field, $value) = @_; |
1870
|
33
|
|
|
|
|
141
|
my $admref = $self->admhash(); |
1871
|
33
|
100
|
|
|
|
67
|
return $admref unless $field; |
1872
|
31
|
|
|
|
|
60
|
my $retval = $admref->{$field}; |
1873
|
31
|
100
|
|
|
|
108
|
return $retval unless defined $value; |
1874
|
|
|
|
|
|
|
|
1875
|
30
|
|
|
|
|
87
|
my ($admh, $admexpl) = $self->stmtHdl("INSERT OR REPLACE INTO admin VALUES (?, ?);"); |
1876
|
30
|
50
|
|
|
|
66
|
$self->stmtExplain($admexpl, $field, $value) if $ENV{'DBI_PROFILE'}; |
1877
|
30
|
50
|
|
|
|
407940
|
$admh->execute($field, $value) |
1878
|
|
|
|
|
|
|
or croak("Could not execute update to admin table: ".$admh->errstr); |
1879
|
30
|
100
|
|
|
|
398
|
return defined($retval) ? $retval : ""; |
1880
|
|
|
|
|
|
|
} |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
# on-the-fly conversions |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
sub hDecode { # external time to numeric timestamp, printf placeholders |
1886
|
20
|
|
|
20
|
0
|
28
|
my ($val, $fnam) = @_; |
1887
|
20
|
50
|
|
|
|
36
|
return $val unless $fnam; |
1888
|
20
|
100
|
|
|
|
58
|
local($_) = (ref $val) ? $val->{$fnam} : $val; |
1889
|
20
|
50
|
|
|
|
37
|
return undef unless defined $_; |
1890
|
|
|
|
|
|
|
|
1891
|
20
|
100
|
|
|
|
99
|
if ( $fnam =~ /target$/i ) { s/%/%%/g; s/(\{id\}|\$PND)/%1\$s/gi; s/(\{altid\}|\$PND)/%2\$s/gi; } |
|
16
|
50
|
|
|
|
46
|
|
|
16
|
100
|
|
|
|
89
|
|
|
16
|
|
|
|
|
92
|
|
1892
|
0
|
|
|
|
|
0
|
elsif ( $fnam =~ /message$/i ) { s/%/%%/g; s/\{hits?\}/%s/gi; } |
|
0
|
|
|
|
|
0
|
|
1893
|
|
|
|
|
|
|
elsif ( $fnam =~ /time|revisit/i ) { |
1894
|
2
|
100
|
|
|
|
15
|
if ( /^\d+$/ ) { # legacy UNIX timestamp |
|
|
50
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
} |
1896
|
|
|
|
|
|
|
elsif ( my $p = HTTP::Date::str2time($_, "GMT") ) { # all unqualified times are GMT |
1897
|
1
|
|
|
|
|
135
|
$_ = $p} |
1898
|
|
|
|
|
|
|
else { |
1899
|
0
|
|
|
|
|
0
|
carp("could not parse value '$_' as time in field $fnam"); |
1900
|
0
|
|
|
|
|
0
|
return undef; |
1901
|
|
|
|
|
|
|
}; |
1902
|
|
|
|
|
|
|
} |
1903
|
20
|
|
|
|
|
48
|
return $_; |
1904
|
|
|
|
|
|
|
} |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
sub hEncode { # timestamp to beacon format |
1907
|
79
|
|
|
79
|
0
|
69
|
my ($val, $fnam) = @_; |
1908
|
79
|
50
|
|
|
|
109
|
local($_) = (ref $val) ? $val->{$fnam} : $val; |
1909
|
79
|
50
|
|
|
|
96
|
return undef unless defined $_; |
1910
|
79
|
100
|
|
|
|
360
|
if ( $fnam =~ /time|revisit/i ) { $_ = SeeAlso::Source::BeaconAggregator::tToISO($_) } |
|
15
|
100
|
|
|
|
56
|
|
|
|
100
|
|
|
|
|
|
1911
|
2
|
|
|
|
|
6
|
elsif ( $fnam =~ /message/i ) { s/%s/{hits}/; s/%%/%/g; } |
|
2
|
|
|
|
|
4
|
|
1912
|
6
|
|
|
|
|
12
|
elsif ( $fnam =~ /target/i ) { s/%s/{ID}/; s/%1\$s/{ID}/; s/%2\$s/{ALTID}/; s/%%/%/g; }; |
|
6
|
|
|
|
|
15
|
|
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
10
|
|
1913
|
79
|
|
|
|
|
118
|
return $_; |
1914
|
|
|
|
|
|
|
} |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
=head1 AUTHOR |
1917
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
Thomas Berger |
1919
|
|
|
|
|
|
|
CPAN ID: THB |
1920
|
|
|
|
|
|
|
gymel.com |
1921
|
|
|
|
|
|
|
THB@cpan.org |
1922
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
=head1 COPYRIGHT |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
This program is free software; you can redistribute |
1926
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
The full text of the license can be found in the |
1929
|
|
|
|
|
|
|
LICENSE file included with this module. |
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=cut |
1932
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
1; |
1934
|
|
|
|
|
|
|
|