line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package SeeAlso::Source::BeaconAggregator::Maintenance; |
2
|
10
|
|
|
10
|
|
165616
|
use strict; |
|
10
|
|
|
|
|
16
|
|
|
10
|
|
|
|
|
274
|
|
3
|
10
|
|
|
10
|
|
30
|
use warnings; |
|
10
|
|
|
|
|
9
|
|
|
10
|
|
|
|
|
212
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
BEGIN { |
6
|
10
|
|
|
10
|
|
27
|
use Exporter (); |
|
10
|
|
|
|
|
9
|
|
|
10
|
|
|
|
|
174
|
|
7
|
10
|
|
|
10
|
|
32
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
10
|
|
|
|
|
11
|
|
|
10
|
|
|
|
|
862
|
|
8
|
10
|
|
|
10
|
|
16
|
$VERSION = '0.2_90'; |
9
|
10
|
|
|
|
|
62
|
@ISA = qw(Exporter); |
10
|
|
|
|
|
|
|
#Give a hoot don't pollute, do not export more than needed by default |
11
|
10
|
|
|
|
|
13
|
@EXPORT = qw(); |
12
|
10
|
|
|
|
|
7
|
@EXPORT_OK = qw(); |
13
|
10
|
|
|
|
|
133
|
%EXPORT_TAGS = (); |
14
|
|
|
|
|
|
|
} |
15
|
|
|
|
|
|
|
|
16
|
10
|
|
|
10
|
|
38
|
use base ("SeeAlso::Source::BeaconAggregator"); |
|
10
|
|
|
|
|
17
|
|
|
10
|
|
|
|
|
3570
|
|
17
|
|
|
|
|
|
|
use Carp; |
18
|
|
|
|
|
|
|
use HTTP::Date; # not perfect, but the module is commonly installed... |
19
|
|
|
|
|
|
|
use HTTP::Request; |
20
|
|
|
|
|
|
|
use LWP::UserAgent; |
21
|
|
|
|
|
|
|
use File::Temp; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 NAME |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sasbactrl.pl - command line interface to SeeAlso::Source::BeaconAggregator and |
26
|
|
|
|
|
|
|
auxiliary classes |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SYNOPSIS |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This Module allows a collection of BEACON files (cf. http://de.wikipedia.org/wiki/Wikipedia:BEACON) |
34
|
|
|
|
|
|
|
to be used as SeeAlso::Source (probably in the context of an SeeAlso::Server application). |
35
|
|
|
|
|
|
|
Therefore it implements the four methods documented in SeeAlso::Source |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
The BEACON files (lists of non-local identifiers of a certain type documenting the coverage of a given |
38
|
|
|
|
|
|
|
online database plus means for access) are imported by the methods provided by |
39
|
|
|
|
|
|
|
SeeAlso::Source::BeaconAggregator::Maintenance.pm, usually by employing the script sasbactrl.pl |
40
|
|
|
|
|
|
|
as command line client. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Serving other formats than SeeAlso or providing a BEACON file with respect to this |
43
|
|
|
|
|
|
|
SeeAlso service is achieved by using SeeAlso::Source::BeaconAggregator::Publisher. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 USAGE |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Use the C method inherited from C to |
49
|
|
|
|
|
|
|
access an existing database or create a new one. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 Database Methods |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head3 init( [ %options] ) |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Sets up and initializes the database structure for the object. |
57
|
|
|
|
|
|
|
This has to be done once after creating a new database and after |
58
|
|
|
|
|
|
|
upgrading this module. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
Valid options include: |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=over 8 |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=item verbose |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item prepareRedirs |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=item identifierClass |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=back |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
The I table contains as columns all valid beacon fields plus |
74
|
|
|
|
|
|
|
the following administrative fields which have to be prefixed with |
75
|
|
|
|
|
|
|
"_" in the interface: |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=over 8 |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item seqno |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Sequence number: Is incremented on any successfull load |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item alias |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Unique key: On update older seqences with the same alias are |
86
|
|
|
|
|
|
|
automatically discarded. Most methods take an alias as |
87
|
|
|
|
|
|
|
argument thus obliterating the need to determine the sequence |
88
|
|
|
|
|
|
|
number. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item sort |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
optional sort key |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item uri |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Overrides the #FEED header for updates |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item ruri |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Real uri from which the last instance was loaded |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item ftime |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Fetch time: Timestamp as to when this instance was loaded |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Clear this or mtime to force automatic reload. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item fstat |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Short statistics line of last successful reload on update. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item mtime |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Modification time: Timestamp of the file / HTTP object from which this instance was loaded. |
118
|
|
|
|
|
|
|
Identical to ftime if no timestamp is provided |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Clear this or ftime to force automatic reload on update. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item utime |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Timestamp of last update attempt |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=item ustat |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Short status line of last update attempt. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item counti |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Identifier count |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item countu |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Unique identifier count |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item admin |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Just to store some remarks. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=back |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
The I table stores the individual beacon entries from the input files. |
148
|
|
|
|
|
|
|
Its columns are: |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=over 8 |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item hash |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Identifier. If a (subclass of) C instance is provided, |
155
|
|
|
|
|
|
|
this will be transformed by the C method. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item seqno |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Sequence number of the beacon file in the database |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item altid |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
optional identifier from an alternative identifier system for use |
164
|
|
|
|
|
|
|
with ALTTARGET templates. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item hits |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
optional number of hits for this identifier in the given resource |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=item info |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
optional information text |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item link |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
optional explicit URL |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=back |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The I table contains C, C pairs for various metadata |
182
|
|
|
|
|
|
|
concerning the collection as such, notably the values needed for |
183
|
|
|
|
|
|
|
the Open Search Description and the Header fields needed in case |
184
|
|
|
|
|
|
|
of publishing a beacon file for this collection. |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
The I table stores (unique) C, C pairs for |
187
|
|
|
|
|
|
|
general persistent data. Currently the following keys are defined: |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=over 8 |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=item DATA_VERSION |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Integer version number to migrate database layout. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item IDENTIFIER_CLASS |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Name of the Identifier class to be used. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=item REDIRECTION_INDEX |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Control creation of an additional index for the I column |
202
|
|
|
|
|
|
|
(facialiates reverse lookups as needed for clustering). |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=back |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub init { |
210
|
|
|
|
|
|
|
my ($self, %options) = @_; |
211
|
|
|
|
|
|
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my @fieldlist = SeeAlso::Source::BeaconAggregator->beaconfields(); |
214
|
|
|
|
|
|
|
my @bf = map{ join(" ", @{[SeeAlso::Source::BeaconAggregator->beaconfields($_)]}[0..1]) } @fieldlist; |
215
|
|
|
|
|
|
|
my $hdl = $self->{dbh} or croak("no database handle?"); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
local($") = ",\n"; |
218
|
|
|
|
|
|
|
$hdl->do(<<"XxX" |
219
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS repos ( |
220
|
|
|
|
|
|
|
seqno INTEGER PRIMARY KEY AUTOINCREMENT, |
221
|
|
|
|
|
|
|
alias TEXT, |
222
|
|
|
|
|
|
|
sort TEXT, |
223
|
|
|
|
|
|
|
uri VARCHAR(512), |
224
|
|
|
|
|
|
|
ruri VARCHAR(512), |
225
|
|
|
|
|
|
|
mtime INTEGER, |
226
|
|
|
|
|
|
|
utime INTEGER, |
227
|
|
|
|
|
|
|
ftime INTEGER, |
228
|
|
|
|
|
|
|
counti INTEGER DEFAULT 0, |
229
|
|
|
|
|
|
|
countu INTEGER DEFAULT 0, |
230
|
|
|
|
|
|
|
fstat TEXT, |
231
|
|
|
|
|
|
|
ustat TEXT, |
232
|
|
|
|
|
|
|
admin VARCHAR(512), |
233
|
|
|
|
|
|
|
@bf |
234
|
|
|
|
|
|
|
); |
235
|
|
|
|
|
|
|
XxX |
236
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
$hdl->do("CREATE UNIQUE INDEX IF NOT EXISTS seqnos ON repos(seqno);") or croak("Setup error: ".$hdl->errstr); |
239
|
|
|
|
|
|
|
$hdl->do("CREATE INDEX IF NOT EXISTS aliases ON repos(alias);") or croak("Setup error: ".$hdl->errstr); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
$hdl->do(<<"XxX" |
242
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS beacons ( |
243
|
|
|
|
|
|
|
hash CHARACTER(64) NOT NULL, |
244
|
|
|
|
|
|
|
seqno INTEGER REFERENCES repos(seqno) ON DELETE CASCADE, |
245
|
|
|
|
|
|
|
altid TEXT, |
246
|
|
|
|
|
|
|
hits INTEGER, |
247
|
|
|
|
|
|
|
info VARCHAR(255), |
248
|
|
|
|
|
|
|
link VARCHAR(1024) |
249
|
|
|
|
|
|
|
); |
250
|
|
|
|
|
|
|
XxX |
251
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# Faciliate lookups |
255
|
|
|
|
|
|
|
$hdl->do("CREATE INDEX IF NOT EXISTS lookup ON beacons(hash);") or croak("Setup error: ".$hdl->errstr); |
256
|
|
|
|
|
|
|
# maintenance and enforce constraints |
257
|
|
|
|
|
|
|
# (Problem: Dupes w/o altid but differing in link *and* info fields should be legitimate, too) |
258
|
|
|
|
|
|
|
$hdl->do("CREATE UNIQUE INDEX IF NOT EXISTS mntnce ON beacons(seqno, hash, altid);") or croak("Setup error: ".$hdl->errstr); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# foreign key on cascade does not work? |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
$hdl->do(<<"XxX" |
263
|
|
|
|
|
|
|
CREATE TRIGGER IF NOT EXISTS on_delete_seqno BEFORE DELETE ON repos FOR EACH ROW |
264
|
|
|
|
|
|
|
BEGIN |
265
|
|
|
|
|
|
|
DELETE FROM beacons WHERE seqno=OLD.seqno; |
266
|
|
|
|
|
|
|
END; |
267
|
|
|
|
|
|
|
XxX |
268
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# OpenSearchDescription |
271
|
|
|
|
|
|
|
$hdl->do(<<"XxX" |
272
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS osd ( |
273
|
|
|
|
|
|
|
key CHAR(20) NOT NULL, |
274
|
|
|
|
|
|
|
val VARCHAR(1024) |
275
|
|
|
|
|
|
|
); |
276
|
|
|
|
|
|
|
XxX |
277
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
278
|
|
|
|
|
|
|
$hdl->do("CREATE INDEX IF NOT EXISTS OSDKeys ON osd(key);") or croak("Setup error: ".$hdl->errstr); |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# Admin Stuff |
281
|
|
|
|
|
|
|
$hdl->do(<<"XxX" |
282
|
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS admin ( |
283
|
|
|
|
|
|
|
key CHAR(20) PRIMARY KEY NOT NULL, |
284
|
|
|
|
|
|
|
val VARCHAR(1024) |
285
|
|
|
|
|
|
|
); |
286
|
|
|
|
|
|
|
XxX |
287
|
|
|
|
|
|
|
) or croak("Setup error: ".$hdl->errstr); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
$hdl->do("CREATE UNIQUE INDEX IF NOT EXISTS ADMKeys ON admin(key);") or croak("Setup error: ".$hdl->errstr); |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
my $admref = $self->admhash(); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
my $verkey = "DATA_VERSION"; |
294
|
|
|
|
|
|
|
my $goalver = $SeeAlso::Source::BeaconAggregator::DATA_VERSION; |
295
|
|
|
|
|
|
|
my $dbver = $admref->{$verkey} || 0; |
296
|
|
|
|
|
|
|
if ( $dbver != $goalver ) { |
297
|
|
|
|
|
|
|
print "NOTICE: Database version $dbver: Upgrading to $goalver\n"; |
298
|
|
|
|
|
|
|
# alter tables here |
299
|
|
|
|
|
|
|
if ( $dbver < 2 ) { |
300
|
|
|
|
|
|
|
# my ($at, $type) = SeeAlso::Source::BeaconAggregator->beaconfields("COUNT"); |
301
|
|
|
|
|
|
|
# $hdl->do("ALTER TABLE repos ADD COLUMN $at $type;"); |
302
|
|
|
|
|
|
|
# ($at, $type) = SeeAlso::Source::BeaconAggregator->beaconfields("REMARK"); |
303
|
|
|
|
|
|
|
# $hdl->do("ALTER TABLE repos ADD COLUMN $at $type;"); |
304
|
|
|
|
|
|
|
}; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
elsif ( $options{'verbose'} ) { |
307
|
|
|
|
|
|
|
print "INFO: Database version $dbver is current\n"}; |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
unless ( $dbver == $goalver) { |
310
|
|
|
|
|
|
|
my $verh = $self->stmtHdl("INSERT OR REPLACE INTO admin VALUES (?, ?);", "update version statement"); |
311
|
|
|
|
|
|
|
$verh->execute($verkey, $goalver) |
312
|
|
|
|
|
|
|
or croak("Could not execute update version statement: ".$verh->errstr); |
313
|
|
|
|
|
|
|
}; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
unless ( exists $options{'identifierClass'} ) { |
316
|
|
|
|
|
|
|
$options{'identifierClass'} = $self->{'identifierClass'} if exists $self->{'identifierClass'}; |
317
|
|
|
|
|
|
|
}; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
my $ickey = "IDENTIFIER_CLASS"; |
320
|
|
|
|
|
|
|
if ( (exists $options{identifierClass}) and (my $wanttype = ref($options{'identifierClass'})) ) { |
321
|
|
|
|
|
|
|
if ( (exists $self->{identifierClass}) && (ref($self->{identifierClass}) ne $wanttype) ) { |
322
|
|
|
|
|
|
|
croak("Cannot override identifierClass set on new()")}; |
323
|
|
|
|
|
|
|
if ( my $oldtype = $admref->{$ickey} ) { |
324
|
|
|
|
|
|
|
croak ("Identifier mismatch: Cannot set to $wanttype since database already branded to $oldtype") |
325
|
|
|
|
|
|
|
unless($oldtype eq $wanttype); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
else { |
328
|
|
|
|
|
|
|
print "fixing identifierClass as $wanttype\n" if $options{'verbose'}; |
329
|
|
|
|
|
|
|
my $ichdl = $self->stmtHdl("INSERT INTO admin VALUES (?, ?);", "fix identifier class statement"); |
330
|
|
|
|
|
|
|
$ichdl->execute($ickey, $wanttype) |
331
|
|
|
|
|
|
|
or croak("Could not execute fix identifier class statement: ".$ichdl->errstr); |
332
|
|
|
|
|
|
|
$self->{identifierClass} = $options{identifierClass}; |
333
|
|
|
|
|
|
|
}; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
elsif ( (exists $options{identifierClass}) and (not $options{identifierClass}) ) { |
336
|
|
|
|
|
|
|
print "removing fixed identifierClass from admin table\n" if $options{'verbose'}; |
337
|
|
|
|
|
|
|
my $ichdl = $self->stmtHdl("DELETE FROM admin WHERE key=?;", "identifier class statement"); |
338
|
|
|
|
|
|
|
$ichdl->execute($ickey) |
339
|
|
|
|
|
|
|
or croak("Could not execute remove identifier class statement: ".$ichdl->errstr); |
340
|
|
|
|
|
|
|
delete $self->{identifierClass}; |
341
|
|
|
|
|
|
|
}; |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
my $rikey = "REDIRECTION_INDEX"; |
344
|
|
|
|
|
|
|
if ( exists $options{prepareRedirs} or exists $admref->{$rikey} ) { |
345
|
|
|
|
|
|
|
my $rihdl = $self->stmtHdl("INSERT OR REPLACE INTO admin VALUES (?, ?);", "fix redirection index statement"); |
346
|
|
|
|
|
|
|
if ( $options{prepareRedirs} or ( $admref->{$rikey} and not exists $options{prepareRedirs} ) ) { |
347
|
|
|
|
|
|
|
print "creating redirection index\n" if $options{prepareRedirs} and $options{'verbose'}; |
348
|
|
|
|
|
|
|
$hdl->do("CREATE INDEX IF NOT EXISTS redir ON beacons(altid,seqno);") or croak("Setup error: ".$hdl->errstr); |
349
|
|
|
|
|
|
|
$rihdl->execute($rikey, 1) |
350
|
|
|
|
|
|
|
or croak("Could not execute fix redirection index: ".$rihdl->errstr); |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
elsif ( not( $admref->{$rikey} and ($options{prepareRedirs} or (not exists $options{prepareRedirs})) ) ) { |
353
|
|
|
|
|
|
|
print "dropping redirection index\n" if $options{'verbose'}; |
354
|
|
|
|
|
|
|
$hdl->do("DROP INDEX IF EXISTS redir;") or croak("Setup error: ".$hdl->errstr); |
355
|
|
|
|
|
|
|
$rihdl->execute($rikey, 0) |
356
|
|
|
|
|
|
|
or croak("Could not execute fix redirection index: ".$rihdl->errstr); |
357
|
|
|
|
|
|
|
}; |
358
|
|
|
|
|
|
|
# $admref = $self->admhash(); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
print "[ANALYZE ..." if $options{'verbose'}; |
362
|
|
|
|
|
|
|
$hdl->do("ANALYZE;"); |
363
|
|
|
|
|
|
|
print "]\n" if $options{'verbose'}; |
364
|
|
|
|
|
|
|
return 1; # o.k. |
365
|
|
|
|
|
|
|
}; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head3 deflate() |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Maintenance action: performs VACCUUM, REINDEX and ANALYZE on the database |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub deflate { |
375
|
|
|
|
|
|
|
my ($self) = @_; |
376
|
|
|
|
|
|
|
my $hdl = $self->{dbh} or croak("no handle?"); |
377
|
|
|
|
|
|
|
local $hdl->{AutoCommit} = 1; |
378
|
|
|
|
|
|
|
print "VACUUM\n"; |
379
|
|
|
|
|
|
|
$hdl->do("VACUUM") or carp("could not VACUUM: Skipping..."); |
380
|
|
|
|
|
|
|
print "REINDEX\n"; |
381
|
|
|
|
|
|
|
$hdl->do("REINDEX") or croak("could not REINDEX: Abort"); |
382
|
|
|
|
|
|
|
print "ANALYZE\n"; |
383
|
|
|
|
|
|
|
$hdl->do("ANALYZE;") or croak("could not ANALYZE: Abort"); |
384
|
|
|
|
|
|
|
return 1; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 Handling of beacon files |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head3 loadFile ( $file, $fields, %options ) |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Reads a physical beacon file and stores it with a new Sequence number in the |
393
|
|
|
|
|
|
|
database. |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Returns a triple: |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
my ($seqno, $rec_ok, $message) = loadFile ( $file, $fields, %options ) |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
$seqno is undef on error |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
$seqno and $rec_ok are zero with $message containing an explanation in case |
402
|
|
|
|
|
|
|
of no action taken. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
$seqno is an positive integer if something was loaded: The L |
405
|
|
|
|
|
|
|
(internal unique identifier) for the representation of the beacon file in |
406
|
|
|
|
|
|
|
the database. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=over 8 |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item $file |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
File to read: Must be a beacon file |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=item $fields |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Hashref with additional meta and admin fields to store |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=item Supported options: |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
verbose => (0|1) |
421
|
|
|
|
|
|
|
force => (0|1) process unconditionally without timestamp comparison |
422
|
|
|
|
|
|
|
nostat => (0|1) don't refresh global identifier counters |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=back |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
If the file does not contain a minimal correct header (eg. is an empty file |
427
|
|
|
|
|
|
|
or an HTML error page accidentaly caught) no action is performed. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
Otherwise, a fresh SeqNo (sequence number) is generated and meta and |
430
|
|
|
|
|
|
|
BEACON-Lines are stored in the appropriate tables in the database. |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
If the _alias field is provided, existing database entries for this |
433
|
|
|
|
|
|
|
Alias are updated, identifiers not accounted for any more are |
434
|
|
|
|
|
|
|
eventually discarded. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub loadFile { |
439
|
|
|
|
|
|
|
my ($self, $file, $fields, %options) = @_; |
440
|
|
|
|
|
|
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
441
|
|
|
|
|
|
|
$options{'verbose'} ||= 0; |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
if ( ! $file ) { |
444
|
|
|
|
|
|
|
croak("Missing file argument")} |
445
|
|
|
|
|
|
|
elsif ( ! -e $file ) { |
446
|
|
|
|
|
|
|
print "ERROR: no such file $file\n" && return undef} |
447
|
|
|
|
|
|
|
elsif ( ! -r _ ) { |
448
|
|
|
|
|
|
|
print "ERROR: no read permissions for $file\n" && return undef} |
449
|
|
|
|
|
|
|
elsif ( -z _ ) { |
450
|
|
|
|
|
|
|
print "WARNING: empty file $file\n"; |
451
|
|
|
|
|
|
|
return (0,0, "empty file: Will not process"); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
my $mtime = (stat(_))[9]; |
454
|
|
|
|
|
|
|
open(BKN, "<:utf8", $file) or (print "ERROR: cannot read $file\n", return undef); |
455
|
|
|
|
|
|
|
local($.) = 0; |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
unless ( defined $self->{identifierClass} ) { |
458
|
|
|
|
|
|
|
my $package = $self->autoIdentifier(); |
459
|
|
|
|
|
|
|
$options{'verbose'} && ref($package) && print "Assuming identifiers of type ".ref($package)."\n"; |
460
|
|
|
|
|
|
|
}; |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
$fields = {} unless $fields; |
463
|
|
|
|
|
|
|
$fields->{'_ftime'} ||= time(); |
464
|
|
|
|
|
|
|
$fields->{'_mtime'} ||= $mtime; |
465
|
|
|
|
|
|
|
delete $fields->{_uri} unless $fields->{_uri}; |
466
|
|
|
|
|
|
|
delete $fields->{_alias} unless $fields->{_alias}; |
467
|
|
|
|
|
|
|
my $autopurge = $fields->{_alias} || ""; |
468
|
|
|
|
|
|
|
my $showme = $fields->{_alias} || $fields->{_uri} || $file; |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
if ( $options{'verbose'} ) { |
471
|
|
|
|
|
|
|
printf("* Loading %s from URI %s\n", $fields->{_alias} || "", $fields->{_uri} || ""); |
472
|
|
|
|
|
|
|
printf("* local input %s (%s)\n", $file, SeeAlso::Source::BeaconAggregator::tToISO($mtime)); |
473
|
|
|
|
|
|
|
}; |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
my ($collno, $inserthandle, $replacehandle, $err, $format); |
476
|
|
|
|
|
|
|
my ($linecount, $headerseen, $oseq) = (0, 0, 0); |
477
|
|
|
|
|
|
|
my ($reccount, $recill, $recign, $recnil, $recupd, $recnew, $recdupl, $recdel) = (0, 0, 0, 0, 0, 0, 0, 0); |
478
|
|
|
|
|
|
|
local($_); |
479
|
|
|
|
|
|
|
lines: |
480
|
|
|
|
|
|
|
while ( ) { |
481
|
|
|
|
|
|
|
s/[ \x0d\x0a]+$//; |
482
|
|
|
|
|
|
|
unless ( $linecount++ ) { |
483
|
|
|
|
|
|
|
if ( s/^\x{FEFF}// ) { # BOM-Character |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
elsif ( s/^\xef\xbb\xbf// ) { # BOM-Bytes |
486
|
|
|
|
|
|
|
print "ERROR: cannot cope with doubly UTF-8 encoded $file\n"; |
487
|
|
|
|
|
|
|
return (undef, undef, "encoding trouble")}; |
488
|
|
|
|
|
|
|
if ( /^\s*$/ ) { |
489
|
|
|
|
|
|
|
print "WARNING: Discarding blank line before beacon header [$showme l.$.]\n"; |
490
|
|
|
|
|
|
|
next; |
491
|
|
|
|
|
|
|
}; |
492
|
|
|
|
|
|
|
}; |
493
|
|
|
|
|
|
|
if ( not defined $collno ) { # $collno used as flag: "still in header" |
494
|
|
|
|
|
|
|
if ( /^#\s*([A-Z][\w-]*):\s*(.*)$/ ) { |
495
|
|
|
|
|
|
|
$headerseen++; |
496
|
|
|
|
|
|
|
my ($field, $data) = ($1, $2); |
497
|
|
|
|
|
|
|
$field =~ s/^DATE$/TIMESTAMP/ && print "WARNING: corrected DATE to TIMESTAMP in Beacon-Header [$showme l.$.]\n"; |
498
|
|
|
|
|
|
|
$data =~ s/\s+$//; |
499
|
|
|
|
|
|
|
next if $data =~ /^\s*$/; |
500
|
|
|
|
|
|
|
if ( SeeAlso::Source::BeaconAggregator->beaconfields($field) ) { |
501
|
|
|
|
|
|
|
if ( $fields->{$field} ) { |
502
|
|
|
|
|
|
|
print "WARNING: Skipping already set $field [$showme l.$.]\n"} |
503
|
|
|
|
|
|
|
else { |
504
|
|
|
|
|
|
|
$fields->{$field} = $data} |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
else { |
507
|
|
|
|
|
|
|
print "WARNING: Ignoring unknown $field [$data] [$showme l.$.]\n"; |
508
|
|
|
|
|
|
|
}; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
elsif ( /^(#[^:\s]+)/ ) { |
511
|
|
|
|
|
|
|
print "WARNING: Discarding unparseable line >$1...< in beacon header context [$showme l.$.]\n"} |
512
|
|
|
|
|
|
|
elsif ( /^\s*$/ ) { |
513
|
|
|
|
|
|
|
print "NOTICE: Discarding blank line in beacon header context [$showme l.$.]\n" if $options{'verbose'}} |
514
|
|
|
|
|
|
|
elsif ( ! $headerseen ) { |
515
|
|
|
|
|
|
|
print "ERROR: no header fields [$showme l.$.]\n"; |
516
|
|
|
|
|
|
|
return (0, 0, "no header fields: Will not proceed"); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
else { |
519
|
|
|
|
|
|
|
($collno, $err, $format, $inserthandle, $replacehandle, $oseq) = $self->processbeaconheader($fields, %options); |
520
|
|
|
|
|
|
|
unless ( $collno ) { |
521
|
|
|
|
|
|
|
print "ERROR: metadata error [$showme l.$.]\n"; |
522
|
|
|
|
|
|
|
return (0, 0, "metadata error: $err"); |
523
|
|
|
|
|
|
|
}; |
524
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 0; |
525
|
|
|
|
|
|
|
$linecount --; |
526
|
|
|
|
|
|
|
redo lines; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
else { |
530
|
|
|
|
|
|
|
s/^\s+//; s/\s+$//; |
531
|
|
|
|
|
|
|
my ($id, $altid, @rest); |
532
|
|
|
|
|
|
|
($id, @rest) = split(/\s*\|\s*/, $_, 4); |
533
|
|
|
|
|
|
|
($id, $altid) = split(/\s*=\s*/, $id, 2) if $id; |
534
|
|
|
|
|
|
|
$id || ($recnil++, next); |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
if ( $options{'filter'} ) { |
537
|
|
|
|
|
|
|
($id, $altid) = &{$options{'filter'}}($id, $altid, @rest); |
538
|
|
|
|
|
|
|
unless ( $id ) { |
539
|
|
|
|
|
|
|
$recign ++; |
540
|
|
|
|
|
|
|
unless ( ++$reccount % 10000 ) { |
541
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 1; |
542
|
|
|
|
|
|
|
print "$reccount\n" if $options{'verbose'}; |
543
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 0; |
544
|
|
|
|
|
|
|
}; |
545
|
|
|
|
|
|
|
next lines; |
546
|
|
|
|
|
|
|
}; |
547
|
|
|
|
|
|
|
}; |
548
|
|
|
|
|
|
|
$altid ||= ""; |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
my($hits, $info, $link); |
551
|
|
|
|
|
|
|
if ( @rest && ($rest[$#rest] =~ m!^\S+://\S+$!) ) { |
552
|
|
|
|
|
|
|
$link = pop @rest} |
553
|
|
|
|
|
|
|
elsif ( defined $rest[2] ) { |
554
|
|
|
|
|
|
|
print "WARNING: unparseable link content >$rest[2]< [$showme l.$.]"}; |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
if ( @rest && ($rest[0] =~ /^\d*$/) ) { |
557
|
|
|
|
|
|
|
$hits = shift @rest; |
558
|
|
|
|
|
|
|
# really disregard hits with explicit 0? |
559
|
|
|
|
|
|
|
if ( (!$altid) and (!$link) and ($format =~ /\baltTARGET\b/) ) { |
560
|
|
|
|
|
|
|
$altid = shift @rest || ""; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
else { |
563
|
|
|
|
|
|
|
$info = shift @rest || ""; |
564
|
|
|
|
|
|
|
}; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
elsif ( defined $rest[1] ) { |
567
|
|
|
|
|
|
|
$hits = ""; |
568
|
|
|
|
|
|
|
if ( (!$altid) and (!$link) and ($format =~ /\baltTARGET\b/) ) { |
569
|
|
|
|
|
|
|
$info = shift @rest; |
570
|
|
|
|
|
|
|
$altid = shift @rest; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
else { |
573
|
|
|
|
|
|
|
shift @rest; |
574
|
|
|
|
|
|
|
$info = shift @rest; |
575
|
|
|
|
|
|
|
}; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
elsif ( defined $rest[0] ) { |
578
|
|
|
|
|
|
|
$hits = ""; |
579
|
|
|
|
|
|
|
$info = shift @rest; |
580
|
|
|
|
|
|
|
}; |
581
|
|
|
|
|
|
|
if ( @rest ) { |
582
|
|
|
|
|
|
|
print "WARNING: unparseable content >$_< [$showme l.$.]"}; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
unless ( $link ) { |
585
|
|
|
|
|
|
|
if ( ($format =~ /\bhasTARGET\b/) ) { # ok |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
elsif ( $altid && ($format =~ /\baltTARGET\b/) ) { # also ok |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
elsif ( $format =~ /\bnoTARGET\b/ ) { |
590
|
|
|
|
|
|
|
print "NOTICE: discarding >$id<".(defined $hits ? " ($hits)" : "")." without link [$showme l.$.]\n" if $options{'verbose'} > 1; |
591
|
|
|
|
|
|
|
$recill++; |
592
|
|
|
|
|
|
|
next lines; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
else { |
595
|
|
|
|
|
|
|
print "WARNING: discarding >$id<".(defined $hits ? " ($hits)" : "")." without link [$showme l.$.] (assertion failed)\n"; |
596
|
|
|
|
|
|
|
$recill++; |
597
|
|
|
|
|
|
|
next lines; |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
}; |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
if ( $format !~ /\baltTARGET\b/ ) { # Allow certain duplicates (force disambiguisation) |
602
|
|
|
|
|
|
|
$altid ||= $info || $link} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
$hits = "" unless defined $hits; |
605
|
|
|
|
|
|
|
($hits =~ /^0+/) && ($recnil++, next); # Explizit "0" => raus |
606
|
|
|
|
|
|
|
$hits = 0 if $hits eq ""; |
607
|
|
|
|
|
|
|
$altid ||= ""; |
608
|
|
|
|
|
|
|
my $hash; |
609
|
|
|
|
|
|
|
if ( defined $self->{identifierClass} ) { |
610
|
|
|
|
|
|
|
$self->{identifierClass}->value($id); |
611
|
|
|
|
|
|
|
unless ( $self->{identifierClass}->valid ) { |
612
|
|
|
|
|
|
|
print "NOTICE: invalid identifier >$id< ($hits) [$showme l.$.]\n" if $options{'verbose'}; |
613
|
|
|
|
|
|
|
$recill++; |
614
|
|
|
|
|
|
|
next lines; |
615
|
|
|
|
|
|
|
}; |
616
|
|
|
|
|
|
|
$hash = $self->{identifierClass}->hash(); |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
else { |
619
|
|
|
|
|
|
|
$hash = $id}; |
620
|
|
|
|
|
|
|
my $did; |
621
|
|
|
|
|
|
|
if ( $replacehandle && ($did = $replacehandle->execute($hits, $info, $link, $hash, $altid)) ) { # UPDATE OR FAIL old record |
622
|
|
|
|
|
|
|
if ( $replacehandle->err ) { |
623
|
|
|
|
|
|
|
carp("update in trouble: $replacehandle->errstring [$showme l.$.]"); |
624
|
|
|
|
|
|
|
$recdupl++; |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
elsif ( $did eq "0E0" ) { # not found, try insert |
627
|
|
|
|
|
|
|
$did = $inserthandle->execute($hash, $altid, $hits, $info, $link); |
628
|
|
|
|
|
|
|
if ( $did eq "0E0" ) { |
629
|
|
|
|
|
|
|
$recdupl++; |
630
|
|
|
|
|
|
|
if ( $altid ) { |
631
|
|
|
|
|
|
|
print "INFO: did not insert duplicate Id >$id< = >$altid< ($hits) [$showme l.$.]\n" if $options{'verbose'}} |
632
|
|
|
|
|
|
|
else { |
633
|
|
|
|
|
|
|
print "INFO: did not insert duplicate Id >$id< ($hits) [$showme l.$.]\n" if $options{'verbose'} > 1}; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
else { |
636
|
|
|
|
|
|
|
$recnew++}; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
else { |
639
|
|
|
|
|
|
|
$recupd++}; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
elsif ( $did = $inserthandle->execute($hash, $altid, $hits, $info, $link) ) { # INSERT OR IGNORE new record |
642
|
|
|
|
|
|
|
if ( $did eq "0E0" ) { |
643
|
|
|
|
|
|
|
$recdupl++; |
644
|
|
|
|
|
|
|
print "INFO: did not insert duplicate Id $id ($hits) [$showme l.$.]\n" if $options{'verbose'} > 1; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
else { |
647
|
|
|
|
|
|
|
$recnew++}; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
elsif ( $inserthandle->errstr =~ /constraint/ ) { |
650
|
|
|
|
|
|
|
$recdupl++; |
651
|
|
|
|
|
|
|
print "INFO: duplicate Id $id ($hits): not inserting [$showme l.$.]\n" if $options{'verbose'} > 1; |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
else { |
654
|
|
|
|
|
|
|
croak("Could not insert: ($id, $hits, $info, $link): ".$inserthandle->errstr)}; |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
unless ( ++$reccount % 10000 ) { |
657
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 1; |
658
|
|
|
|
|
|
|
print "$reccount\n" if $options{'verbose'}; |
659
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 0; |
660
|
|
|
|
|
|
|
}; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
}; |
663
|
|
|
|
|
|
|
if ( not defined $collno ) { |
664
|
|
|
|
|
|
|
if ( $headerseen ) { |
665
|
|
|
|
|
|
|
($collno, $err, $format, $inserthandle, $replacehandle, $oseq) = $self->processbeaconheader($fields, %options); |
666
|
|
|
|
|
|
|
if ( $collno ) { |
667
|
|
|
|
|
|
|
print "WARNING: no idn content in file [$showme l.$.]\n"} |
668
|
|
|
|
|
|
|
else { |
669
|
|
|
|
|
|
|
print "ERROR: metadata error [$showme l.$.]\n"; |
670
|
|
|
|
|
|
|
return (0,0, "metadata error: $err"); |
671
|
|
|
|
|
|
|
}; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
elsif ( $. ) { |
674
|
|
|
|
|
|
|
print "ERROR: no header fields [$showme l.$.]\n"; |
675
|
|
|
|
|
|
|
return (0, 0, "no header fields: Will not proceed"); |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
else { |
678
|
|
|
|
|
|
|
print "WARNING: empty file [$showme]\n"; |
679
|
|
|
|
|
|
|
return (0,0, "empty file"); |
680
|
|
|
|
|
|
|
}; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 1; |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
if ( $autopurge ) { |
685
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 0; |
686
|
|
|
|
|
|
|
if ( $oseq ) { |
687
|
|
|
|
|
|
|
my ($bcdelh, $bcdelexpl) = $self->stmtHdl("DELETE FROM beacons WHERE seqno==?"); |
688
|
|
|
|
|
|
|
$self->stmtExplain($bcdelexpl, $oseq) if $ENV{'DBI_PROFILE'}; |
689
|
|
|
|
|
|
|
my $rows = $bcdelh->execute($oseq) or croak("Could not execute >".$bcdelh->{Statement}."<: ".$bcdelh->errstr); |
690
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 1; |
691
|
|
|
|
|
|
|
printf("INFO: Purged %s surplus identifiers from old sequence %u\n", $rows, $oseq) if $options{'verbose'}; |
692
|
|
|
|
|
|
|
$rows = "0" if $rows eq "0E0"; |
693
|
|
|
|
|
|
|
$recdel += $rows; |
694
|
|
|
|
|
|
|
}; |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 0; |
697
|
|
|
|
|
|
|
my ($rpdelh, $rpdelexpl) = $self->stmtHdl("DELETE FROM repos WHERE (alias=?) AND (seqno);"); |
698
|
|
|
|
|
|
|
$self->stmtExplain($rpdelexpl, $autopurge, $collno) if $ENV{'DBI_PROFILE'}; |
699
|
|
|
|
|
|
|
my $rows = $rpdelh->execute($autopurge, $collno) or croak("Could not execute >".$rpdelh->{Statement}."<: ".$rpdelh->errstr); |
700
|
|
|
|
|
|
|
$self->{dbh}->{AutoCommit} = 1; |
701
|
|
|
|
|
|
|
$rows = "0" if $rows eq "0E0"; |
702
|
|
|
|
|
|
|
printf("INFO: %u old sequences discarded\n", $rows) if $options{'verbose'}; |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
printf "NOTICE: New sequence %u for %s: processed %u Records from %u lines\n", |
706
|
|
|
|
|
|
|
$collno, $autopurge || "???", $reccount, $linecount; |
707
|
|
|
|
|
|
|
my $statline = sprintf "%u replaced, %u new, %u deleted, %u duplicate, %u nil, %u invalid, %u ignored", |
708
|
|
|
|
|
|
|
$recupd, $recnew, $recdel, $recdupl, $recnil, $recill, $recign; |
709
|
|
|
|
|
|
|
print " ($statline)\n"; |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
my $recok = $recupd + $recnew; |
712
|
|
|
|
|
|
|
my $numchg = ($recnew or $recdel) ? 1 : 0; |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
# my $ct1hdl = $self->stmtHdl("SELECT COUNT(*) FROM beacons WHERE seqno==? LIMIT 1;"); |
715
|
|
|
|
|
|
|
# $ct1hdl->execute($collno) or croak("could not execute live count: ".$ct1hdl->errstr); |
716
|
|
|
|
|
|
|
# my $ct1ref = $ct1hdl->fetchrow_arrayref(); |
717
|
|
|
|
|
|
|
# my $counti = $ct1ref->[0] || 0; |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
# my $ct2hdl = $self->stmtHdl("SELECT COUNT(DISTINCT hash) FROM beacons WHERE seqno==?"); |
720
|
|
|
|
|
|
|
# using subquery to trick SQLite into using indices |
721
|
|
|
|
|
|
|
# my $ct2hdl = $self->stmtHdl("SELECT COUNT(*) FROM (SELECT DISTINCT hash FROM beacons WHERE seqno==?) LIMIT 1;"); |
722
|
|
|
|
|
|
|
# $ct2hdl->execute($collno) or croak("could not execute live count: ".$ct2hdl->errstr); |
723
|
|
|
|
|
|
|
# my $ct2ref = $ct2hdl->fetchrow_arrayref(); |
724
|
|
|
|
|
|
|
# my $countu = $ct2ref->[0] || 0; |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# combined query turned out as not as efficient |
727
|
|
|
|
|
|
|
# my $ct0hdl = $self->stmtHdl("SELECT COUNT(*), COUNT(DISTINCT hash) FROM beacons WHERE seqno==? LIMIT 1;"); |
728
|
|
|
|
|
|
|
# $ct0hdl->execute($collno) or croak("could not execute live count: ".$ct0hdl->errstr); |
729
|
|
|
|
|
|
|
# my $ct0ref = $ct0hdl->fetchrow_arrayref(); |
730
|
|
|
|
|
|
|
# my ($counti, $countu) = ($ct0ref->[0] || 0, $ct0ref->[1] || 0); |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
my ($updh, $updexpl) = $self->stmtHdl(<<"XxX"); |
733
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET counti=?,countu=?,fstat=?,utime=?,ustat=?,sort=? WHERE seqno==?; |
734
|
|
|
|
|
|
|
XxX |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
my $counti = $self->idStat($collno, 'distinct' => 0) || 0; |
737
|
|
|
|
|
|
|
printf("WARNING: expected %u valid records, counted %u\n", $recok, $counti) if $recok != $counti; |
738
|
|
|
|
|
|
|
unless ( $numchg ) { |
739
|
|
|
|
|
|
|
$fields->{'_counti'} ||= 0; |
740
|
|
|
|
|
|
|
printf("WARNING: expected unchanged number %u valid records, counted %u\n", $fields->{'_counti'}, $counti) if $fields->{'_counti'} != $counti; |
741
|
|
|
|
|
|
|
}; |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
my $sort = $fields->{'_sort'} || ""; |
744
|
|
|
|
|
|
|
my $countu = $numchg ? ( $self->idStat($collno, 'distinct' => 1) || 0 ) |
745
|
|
|
|
|
|
|
: ( $fields->{'_countu'} || $self->idStat($collno, 'distinct' => 1) || 0 ); |
746
|
|
|
|
|
|
|
$self->stmtExplain($updexpl, $counti, $countu, $statline, time(), "successfully loaded", $sort, $collno) if $ENV{'DBI_PROFILE'}; |
747
|
|
|
|
|
|
|
$updh->execute($counti, $countu, $statline, time(), "successfully loaded", $sort, $collno) |
748
|
|
|
|
|
|
|
or croak("Could not execute >".$updh->{Statement}."<: ".$updh->errstr); |
749
|
|
|
|
|
|
|
close(BKN); |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
if ( $numchg or $options{'force'} ) { |
752
|
|
|
|
|
|
|
# if ( $options{'force'} ) { |
753
|
|
|
|
|
|
|
# print "[ANALYZE ..." if $options{'verbose'}; |
754
|
|
|
|
|
|
|
# $self->{dbh}->do("ANALYZE;"); |
755
|
|
|
|
|
|
|
# print "]\n" if $options{'verbose'}; |
756
|
|
|
|
|
|
|
# }; |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
if ( $options{'nostat'} ) { # invalidate since they might have changed |
759
|
|
|
|
|
|
|
$self->admin('gcounti', undef); |
760
|
|
|
|
|
|
|
$self->admin('gcountu', undef); |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
else { |
763
|
|
|
|
|
|
|
$self->admin('gcounti', $self->idStat(undef, 'distinct' => 0) || 0); |
764
|
|
|
|
|
|
|
$self->admin('gcountu', $self->idStat(undef, 'distinct' => 1) || 0); |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
}; |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
return ($collno, $recok, undef); |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=head4 processbeaconheader($self, $fieldref, [ %options] ) |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
Internal subroutine used by C. |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=over 8 |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=item $fieldref |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
Hash with raw fields. |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=item Supported options: |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
verbose => (0|1) |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
Show seqnos of old instances which are met by the alias |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=back |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=cut |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
sub processbeaconheader { |
794
|
|
|
|
|
|
|
my ($self, $fieldref, %options) = @_; |
795
|
|
|
|
|
|
|
my $osq = 0; |
796
|
|
|
|
|
|
|
my @carp; |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
if ( my $alias = $fieldref->{_alias} ) { |
799
|
|
|
|
|
|
|
my $stampfield = SeeAlso::Source::BeaconAggregator->beaconfields("TIMESTAMP"); |
800
|
|
|
|
|
|
|
my ($listh, $listexpl) = $self->stmtHdl("SELECT seqno, $stampfield, mtime, counti, countu FROM repos WHERE alias=?;"); |
801
|
|
|
|
|
|
|
$self->stmtExplain($listexpl, $alias) if $ENV{'DBI_PROFILE'}; |
802
|
|
|
|
|
|
|
$listh->execute($alias) or croak("Could not execute >".$listh->{Statement}."<: ".$listh->errstr); |
803
|
|
|
|
|
|
|
my ($rowcnt, $ocounti, $ocountu); |
804
|
|
|
|
|
|
|
while ( my($row) = $listh->fetchrow_arrayref ) { |
805
|
|
|
|
|
|
|
last unless defined $row; |
806
|
|
|
|
|
|
|
$rowcnt ++; |
807
|
|
|
|
|
|
|
($ocounti, $ocountu) = ($row->[3], $row->[4]); |
808
|
|
|
|
|
|
|
if ( $options{'verbose'} ) { |
809
|
|
|
|
|
|
|
print "* Old Instances for $alias:\n" unless $osq; |
810
|
|
|
|
|
|
|
$osq = $row->[0]; |
811
|
|
|
|
|
|
|
print "+\t#$osq ", SeeAlso::Source::BeaconAggregator::tToISO($row->[1] || $row->[2]), " (", $row->[3] || "???", ")\n"; |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
else { |
814
|
|
|
|
|
|
|
$osq = $row->[0]}; |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
if ( $rowcnt && ($rowcnt == 1) ) { |
817
|
|
|
|
|
|
|
$fieldref->{_counti} ||= $ocounti if $ocounti; |
818
|
|
|
|
|
|
|
$fieldref->{_countu} ||= $ocountu if $ocountu; |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
}; |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
my $format = ""; |
823
|
|
|
|
|
|
|
if ( $fieldref->{'FORMAT'} && $self->{accept}->{'FORMAT'} ) { |
824
|
|
|
|
|
|
|
if ( $fieldref->{'FORMAT'} =~ $self->{accept}->{'FORMAT'} ) { |
825
|
|
|
|
|
|
|
$format = $fieldref->{'FORMAT'}} |
826
|
|
|
|
|
|
|
else { |
827
|
|
|
|
|
|
|
push(@carp, "ERROR: only FORMAT '".$self->{accept}->{'FORMAT'}."' are supported, this is ".$fieldref->{'FORMAT'})} |
828
|
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
elsif ( $fieldref->{'FORMAT'} ) { |
830
|
|
|
|
|
|
|
$format = $fieldref->{'FORMAT'}} |
831
|
|
|
|
|
|
|
elsif ( $fieldref->{'VERSION'} or $fieldref->{'TARGET'} or $fieldref->{'PREFIX'} or $fieldref->{'MESSAGE'} ) { |
832
|
|
|
|
|
|
|
push(@carp, "WARNING: header line #FORMAT: BEACON should be supplied")} |
833
|
|
|
|
|
|
|
elsif ( $self->{accept}->{'FORMAT'} ) { |
834
|
|
|
|
|
|
|
push(@carp, "ERROR: header line #FORMAT is missing")} |
835
|
|
|
|
|
|
|
else { |
836
|
|
|
|
|
|
|
push(@carp, "WARNING: header line #FORMAT: BEACON should be supplied")}; |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
if ( $fieldref->{'FORMAT'} && ($fieldref->{'FORMAT'} =~ /v(?:ersion)?\s*(\d+(?:\.\d*)?)/i) ) { |
839
|
|
|
|
|
|
|
$fieldref->{'VERSION'} ||= $1}; |
840
|
|
|
|
|
|
|
unless ( $fieldref->{'VERSION'} ) { |
841
|
|
|
|
|
|
|
$fieldref->{'VERSION'} = $fieldref->{'FORMAT'} ? "0.1" : "1.0"; |
842
|
|
|
|
|
|
|
push(@carp, "NOTICE: added header field #VERSION as '".$fieldref->{'VERSION'}."'"); |
843
|
|
|
|
|
|
|
}; |
844
|
|
|
|
|
|
|
if ( $self->{accept}->{'VERSION'} ) { |
845
|
|
|
|
|
|
|
($fieldref->{'VERSION'} =~ $self->{accept}->{'VERSION'}) |
846
|
|
|
|
|
|
|
|| push(@carp, "ERROR: only VERSION '".$self->{accept}->{'VERSION'}."' is supported, this is ".$fieldref->{'VERSION'}); |
847
|
|
|
|
|
|
|
}; |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
if ( $fieldref->{'ALTTARGET'} ) { |
850
|
|
|
|
|
|
|
$fieldref->{'ALTTARGET'} = "" unless defined $fieldref->{'ALTTARGET'}; |
851
|
|
|
|
|
|
|
my $parsed = hDecode($fieldref, 'ALTTARGET'); |
852
|
|
|
|
|
|
|
if ( $parsed && ($parsed =~ /(^|[^%])(%.)*%\d\$s/) ) { |
853
|
|
|
|
|
|
|
$fieldref->{'ALTTARGET'} = $parsed; |
854
|
|
|
|
|
|
|
$format =~ s/\s*-altTARGET//; |
855
|
|
|
|
|
|
|
$format .= " -altTARGET"; |
856
|
|
|
|
|
|
|
($parsed =~ /(^|[^%])(%.)*%2\$s/) or |
857
|
|
|
|
|
|
|
push(@carp, "WARNING: header field #ALTTARGET should contain placeholder {ALTID}"); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
elsif ( $parsed ) { |
860
|
|
|
|
|
|
|
push(@carp, "ERROR: header field #ALTTARGET must contain placeholder {ALTID} (or {ID})"); |
861
|
|
|
|
|
|
|
delete $fieldref->{'ALTTARGET'}; |
862
|
|
|
|
|
|
|
} |
863
|
|
|
|
|
|
|
else { |
864
|
|
|
|
|
|
|
push(@carp, "ERROR: could not parse header field #ALTTARGET: '".$fieldref->{'ALTTARGET'}."'"); |
865
|
|
|
|
|
|
|
delete $fieldref->{'ALTTARGET'}; |
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
}; |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
if ( $fieldref->{'IMGTARGET'} ) { |
870
|
|
|
|
|
|
|
$fieldref->{'IMGTARGET'} = "" unless defined $fieldref->{'IMGTARGET'}; |
871
|
|
|
|
|
|
|
my $parsed = hDecode($fieldref, 'IMGTARGET'); |
872
|
|
|
|
|
|
|
if ( $parsed && ($parsed =~ /(^|[^%])(%.)*%\d\$s/) ) { |
873
|
|
|
|
|
|
|
$fieldref->{'IMGTARGET'} = $parsed; |
874
|
|
|
|
|
|
|
$format =~ s/\s*-imgTARGET//; |
875
|
|
|
|
|
|
|
$format .= " -imgTARGET"; |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
elsif ( $parsed ) { |
878
|
|
|
|
|
|
|
push(@carp, "WARNING: header field #IMGTARGET should contain placeholders {ID} or {ALTID}")} |
879
|
|
|
|
|
|
|
else { |
880
|
|
|
|
|
|
|
push(@carp, "ERROR: could not parse header field #IMGTARGET: '".$fieldref->{'IMGTARGET'}."'"); |
881
|
|
|
|
|
|
|
delete $fieldref->{'IMGTARGET'}; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
}; |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
if ( exists $fieldref->{'TARGET'} ) { |
886
|
|
|
|
|
|
|
$fieldref->{'TARGET'} = "" unless defined $fieldref->{'TARGET'}; |
887
|
|
|
|
|
|
|
my $parsed = hDecode($fieldref, 'TARGET'); |
888
|
|
|
|
|
|
|
if ( $parsed && ($parsed =~ /(^|[^%])(%.)*%1\$s/) && ($parsed !~ /(^|[^%])(%.)*%[2-9]\$s/) ) { |
889
|
|
|
|
|
|
|
$fieldref->{'TARGET'} = $parsed; |
890
|
|
|
|
|
|
|
$format .= " -hasTARGET"; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
elsif ( $parsed ) { |
893
|
|
|
|
|
|
|
if ( exists $fieldref->{'ALTTARGET'} ) { |
894
|
|
|
|
|
|
|
push(@carp, "ERROR: header field #TARGET must contain placeholder {ID} only"); |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
else { |
897
|
|
|
|
|
|
|
push(@carp, "WARNING: Adding implicit {ID} to #TARGET as #ALTTARGET"); |
898
|
|
|
|
|
|
|
$fieldref->{'ALTTARGET'} = $parsed."%2\$s"; |
899
|
|
|
|
|
|
|
$format .= " -altTARGET"; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
delete $fieldref->{'TARGET'}; |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
else { |
904
|
|
|
|
|
|
|
push(@carp, "ERROR: could not parse header field #TARGET: '".$fieldref->{'TARGET'}."'"); |
905
|
|
|
|
|
|
|
delete $fieldref->{'TARGET'}; |
906
|
|
|
|
|
|
|
} |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
elsif ( $format =~ /^BEACON/ ) { |
909
|
|
|
|
|
|
|
push(@carp, "WARNING: header field #TARGET not set: ALL beacon lines will have to provide their link by other means!"); |
910
|
|
|
|
|
|
|
$format =~ s/\s*-noTARGET//; |
911
|
|
|
|
|
|
|
$format .= " -noTARGET"; |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
else { |
914
|
|
|
|
|
|
|
push(@carp, "ERROR: header field #TARGET is mandatory")}; |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
$fieldref->{'MESSAGE'} = hDecode($fieldref, 'MESSAGE') if $fieldref->{'MESSAGE'}; |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
if ( $fieldref->{'TIMESTAMP'} ) { |
920
|
|
|
|
|
|
|
if ( my $parsed = hDecode($fieldref, 'TIMESTAMP') ) { |
921
|
|
|
|
|
|
|
printf("* %-30s %s\n", "Beacon Timestamp:", hEncode($parsed, 'TIMESTAMP')) if $options{'verbose'}; |
922
|
|
|
|
|
|
|
$fieldref->{'TIMESTAMP'} = $parsed; |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
else { # unparseable => use current |
925
|
|
|
|
|
|
|
push(@carp, "WARNING: cannot parse TIMESTAMP '".$fieldref->{'TIMESTAMP'}."', using current time"); |
926
|
|
|
|
|
|
|
$fieldref->{'TIMESTAMP'} = $^T; |
927
|
|
|
|
|
|
|
}; |
928
|
|
|
|
|
|
|
} |
929
|
|
|
|
|
|
|
else { |
930
|
|
|
|
|
|
|
# $fieldref->{'TIMESTAMP'} = $fieldref->{'_mtime'} || $^T; |
931
|
|
|
|
|
|
|
push(@carp, "NOTICE: no header field #TIMESTAMP detected"); |
932
|
|
|
|
|
|
|
}; |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
if ( $fieldref->{'REVISIT'} ) { |
935
|
|
|
|
|
|
|
if ( my $parsed = hDecode($fieldref, 'REVISIT') ) { |
936
|
|
|
|
|
|
|
if ( $parsed < $^T ) { |
937
|
|
|
|
|
|
|
printf("* %-30s %s [%s]\n", "STALE Revisit hint parsed as", hEncode($parsed, 'REVISIT'), $fieldref->{'REVISIT'})} # if $options{'verbose'} |
938
|
|
|
|
|
|
|
else { |
939
|
|
|
|
|
|
|
printf("* %-30s %s\n", "Revisit hint parsed as", hEncode($parsed, 'REVISIT')) if $options{'verbose'}}; |
940
|
|
|
|
|
|
|
$fieldref->{'REVISIT'} = $parsed; |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
else { # unparseable => discard |
943
|
|
|
|
|
|
|
push(@carp, "WARNING: cannot parse #REVISIT '".$fieldref->{'REVISIT'}."', discarding"); |
944
|
|
|
|
|
|
|
delete $fieldref->{'REVISIT'}; |
945
|
|
|
|
|
|
|
}; |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
else { |
948
|
|
|
|
|
|
|
push(@carp, "INFO: no header field #REVISIT detected"); |
949
|
|
|
|
|
|
|
}; |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
my $cancontinue = 1; |
952
|
|
|
|
|
|
|
my $err = ""; |
953
|
|
|
|
|
|
|
foreach ( @carp ) { |
954
|
|
|
|
|
|
|
print "$_\n"; |
955
|
|
|
|
|
|
|
if ( s/^ERROR: // ) { |
956
|
|
|
|
|
|
|
$cancontinue = 0; |
957
|
|
|
|
|
|
|
$err .= " | " if $err; |
958
|
|
|
|
|
|
|
$err .= $_; |
959
|
|
|
|
|
|
|
}; |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
unless ( $cancontinue or $options{'ignore-header-errors'} ) { |
962
|
|
|
|
|
|
|
print "CRITICAL: Aborting because of Header Errors\n"; |
963
|
|
|
|
|
|
|
return (undef, $err, $format); |
964
|
|
|
|
|
|
|
}; |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
$fieldref->{'_uri'} ||= $fieldref->{'FEED'}; |
967
|
|
|
|
|
|
|
delete $fieldref->{'_uri'} unless $fieldref->{'_uri'}; |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
$fieldref->{'_alias'} ||= $fieldref->{'FEED'} || $fieldref->{'TARGET'}; |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
my (@fn, @fd); |
972
|
|
|
|
|
|
|
while ( my ($key, $val) = each %$fieldref ) { |
973
|
|
|
|
|
|
|
next unless defined $val; |
974
|
|
|
|
|
|
|
my $dbkey = ""; |
975
|
|
|
|
|
|
|
if ( $dbkey = SeeAlso::Source::BeaconAggregator->beaconfields($key) ) { |
976
|
|
|
|
|
|
|
push(@fn, $dbkey)} |
977
|
|
|
|
|
|
|
elsif ( $key =~ /_(\w+)$/ ) { |
978
|
|
|
|
|
|
|
push(@fn, $1)} |
979
|
|
|
|
|
|
|
else { |
980
|
|
|
|
|
|
|
next}; |
981
|
|
|
|
|
|
|
my $myval = $val; |
982
|
|
|
|
|
|
|
unless ( $myval =~ /^\d+$/ ) { |
983
|
|
|
|
|
|
|
$myval =~ s/'/''/g; |
984
|
|
|
|
|
|
|
$myval = "'".$myval."'"; |
985
|
|
|
|
|
|
|
}; |
986
|
|
|
|
|
|
|
push(@fd, $myval); |
987
|
|
|
|
|
|
|
}; |
988
|
|
|
|
|
|
|
local($") = ",\n"; |
989
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
990
|
|
|
|
|
|
|
INSERT INTO repos ( seqno, @fn ) VALUES ( NULL, @fd ); |
991
|
|
|
|
|
|
|
XxX |
992
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl) if $ENV{'DBI_PROFILE'}; |
993
|
|
|
|
|
|
|
$sth->execute() or croak("Could not execute >".$sth->{Statement}."<:".$sth->errstr); |
994
|
|
|
|
|
|
|
my $collno = $self->{dbh}->last_insert_id("", "", "", ""); |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
my $rhandle; |
997
|
|
|
|
|
|
|
if ( $osq ) { |
998
|
|
|
|
|
|
|
$rhandle = $self->stmtHdl(<<"XxX"); |
999
|
|
|
|
|
|
|
UPDATE OR FAIL beacons SET seqno=$collno, hits=?, info=?, link=? WHERE hash=? AND seqno==$osq AND altid=?; |
1000
|
|
|
|
|
|
|
XxX |
1001
|
|
|
|
|
|
|
}; |
1002
|
|
|
|
|
|
|
my $ihandle = $self->stmtHdl(<<"XxX"); |
1003
|
|
|
|
|
|
|
INSERT OR IGNORE INTO beacons ( hash, seqno, altid, hits, info, link ) VALUES (?, $collno, ?, ?, ?, ?); |
1004
|
|
|
|
|
|
|
XxX |
1005
|
|
|
|
|
|
|
return ($collno, "", $format, $ihandle, $rhandle, $osq); |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
my ($lwpcarp817, $lwpcarp827); |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
=head3 update ($sq_or_alias, $params, %options) |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
Loads a beacon file into the database, possibly replacing a previous instance. |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Some magic is employed to autoconvert ISO-8859-1 or doubly UTF-8 encoded files |
1018
|
|
|
|
|
|
|
back to UTF-8. |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
Returns undef, if something goes wrong, or the file was not modified since, |
1021
|
|
|
|
|
|
|
otherwise returns a pair (new seqence number, number of lines imported). |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
=over 8 |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
=item $sq_or_alias |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
Sequence number or alias: Used to determine an existing instance. |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
=item $params |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
Hashref, containing |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
agent => LWP::UserAgent to use |
1037
|
|
|
|
|
|
|
_uri => Feed URL to load from |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
=item %options |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
Hash, propagated to C |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
verbose => (0|1) |
1044
|
|
|
|
|
|
|
force => (0|1) process unconditionally without timestamp comparison |
1045
|
|
|
|
|
|
|
nostat => (0|1) don't refresh global identifier counters |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
=back |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
Incorporates a new beacon source from a URI in the database or updates an existing one. |
1050
|
|
|
|
|
|
|
For HTTP URIs care is taken not to reload an unmodified BEACON feed (unless the 'force' |
1051
|
|
|
|
|
|
|
option is provided). |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
If the feed appears to be newer than the previously loaded version it is fetched, |
1054
|
|
|
|
|
|
|
some UTF-8 adjustments are performed if necessary, then it is stored to a temporary file |
1055
|
|
|
|
|
|
|
and from there finally processed by the C method above. |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
The URI to load is determined by the following order of precedence: |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
=over 8 |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
=item 1 |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
_uri Option |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
=item 2 |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
admin field uri stored in the database |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
=item 3 |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
meta field #FEED taken from the database |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
=back |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
Typical use is with an alias, not with a sequence number: |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
$db->update('whatever'); |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
Can be used to initially load beacon files from URIs: |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
$db->update("new_alias", {_uri => $file_uri} ); |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
=cut |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
sub update { |
1086
|
|
|
|
|
|
|
my ($self, $sq_or_alias, $params, %options) = @_; |
1087
|
|
|
|
|
|
|
$params = {} unless $params; |
1088
|
|
|
|
|
|
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
my $ua = $params->{'agent'}; |
1091
|
|
|
|
|
|
|
unless ( $ua ) { |
1092
|
|
|
|
|
|
|
require LWP::UserAgent; |
1093
|
|
|
|
|
|
|
$ua = LWP::UserAgent->new(agent => "SA-S-BeaconAggregator ", # end with space to get default agent appended |
1094
|
|
|
|
|
|
|
env_proxy => 1, |
1095
|
|
|
|
|
|
|
timeout => 300, |
1096
|
|
|
|
|
|
|
) or croak("cannot create UserAgent"); |
1097
|
|
|
|
|
|
|
}; |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
my ($cond, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($sq_or_alias); |
1100
|
|
|
|
|
|
|
my $alias = ($sq_or_alias =~ /^\d+$/) ? "" : $sq_or_alias; |
1101
|
|
|
|
|
|
|
my $feedname = SeeAlso::Source::BeaconAggregator->beaconfields("FEED"); |
1102
|
|
|
|
|
|
|
my ($ssth, $ssthexpl) = $self->stmtHdl(<<"XxX"); |
1103
|
|
|
|
|
|
|
SELECT seqno, uri, alias, $feedname, ftime, mtime, sort FROM repos $cond; |
1104
|
|
|
|
|
|
|
XxX |
1105
|
|
|
|
|
|
|
$self->stmtExplain($ssthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1106
|
|
|
|
|
|
|
$ssth->execute(@cval) or croak("Could not execute >".$ssth->{Statement}."<: ".$ssth->errstr); |
1107
|
|
|
|
|
|
|
croak("Select old instance error: ".$ssth->errstr) if $ssth->err; |
1108
|
|
|
|
|
|
|
my $aryref = $ssth->fetchrow_arrayref; |
1109
|
|
|
|
|
|
|
my ($osq, $ouri, $oalias, $feed, $fetchtime, $modtime, $osort) = $aryref ? @$aryref : (); |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
my $uri = $params->{'_uri'} || $ouri || $feed; |
1112
|
|
|
|
|
|
|
croak("Cannot update $sq_or_alias: URI not given nor determinable from previous content") unless $uri; |
1113
|
|
|
|
|
|
|
$uri =~ s/\s$//; |
1114
|
|
|
|
|
|
|
$alias ||= $oalias || ""; |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
print "Requesting $uri\n" if $options{'verbose'}; |
1117
|
|
|
|
|
|
|
my $rq = HTTP::Request->new('GET', $uri, ['Accept' => 'text/*']) or croak("could not construct request from $uri"); |
1118
|
|
|
|
|
|
|
if ( $fetchtime && $modtime && !$options{'force'} ) { # allow force-reload by deleting _ftime or _mtime |
1119
|
|
|
|
|
|
|
printf(" %-30s %s\n", "Old instance stamped", scalar localtime($modtime)) if $options{'verbose'}; |
1120
|
|
|
|
|
|
|
$rq->header('If-Modified-Since', HTTP::Date::time2str($modtime)); |
1121
|
|
|
|
|
|
|
}; |
1122
|
|
|
|
|
|
|
if ( $rq->can("accept_decodable") ) { # LWP 5.817 and newer |
1123
|
|
|
|
|
|
|
$rq->accept_decodable} |
1124
|
|
|
|
|
|
|
else { |
1125
|
|
|
|
|
|
|
carp("please upgrade to LWP >= 5.817 for compression negotiation") if $options{'verbose'} && (!$lwpcarp817++)}; |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
my $response = $ua->request($rq); # Well, we hoggishly slurp everything into memory, |
1128
|
|
|
|
|
|
|
# however explicit decompression of an already dumped result would be PITA |
1129
|
|
|
|
|
|
|
my $nuri = ($response->request)->uri; |
1130
|
|
|
|
|
|
|
print "NOTICE: Differing result URI: $nuri\n" if $uri ne $nuri; |
1131
|
|
|
|
|
|
|
if ( $response->is_success ) { |
1132
|
|
|
|
|
|
|
print $osq ? "INFO: refreshing $alias sq $osq from $uri\n" |
1133
|
|
|
|
|
|
|
: "INFO: importing previously unseen $alias from $uri\n"; |
1134
|
|
|
|
|
|
|
my $charset; |
1135
|
|
|
|
|
|
|
if ( $response->can("content_charset") ) { # LWP 5.827 and above |
1136
|
|
|
|
|
|
|
$charset = $response->content_charset; |
1137
|
|
|
|
|
|
|
print "DEBUG: Content charset is $charset\n" if $charset && $options{'verbose'}; |
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
else { |
1140
|
|
|
|
|
|
|
carp("please upgrade to LWP >= 5.827 for better detection of content_charset") if $options{'verbose'} && (!$lwpcarp827++)}; |
1141
|
|
|
|
|
|
|
$charset ||= "UTF-8"; |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
my $lm = $response->last_modified; |
1144
|
|
|
|
|
|
|
printf(" %-30s %s\n", "Last modified", scalar localtime($lm)) if $lm && $options{'verbose'}; |
1145
|
|
|
|
|
|
|
$lm ||= $^T; |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
my $vt = $response->fresh_until(h_min => 1800, h_max => 30 * 86400); |
1148
|
|
|
|
|
|
|
printf(" %-30s %s\n", "Should be valid until", scalar localtime($vt)) if $vt && $options{'verbose'}; |
1149
|
|
|
|
|
|
|
$vt ||= 0; |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# temporary file for dumped contents |
1152
|
|
|
|
|
|
|
my ($tmpfh, $tmpfile) = File::Temp::tempfile("BeaconAggregator-XXXXXXXX", SUFFIX => ".txt", TMPDIR => 1) or croak("Could not acquire temporary file for storage"); |
1153
|
|
|
|
|
|
|
my $contref; # reference to content buffer |
1154
|
|
|
|
|
|
|
if ( ! $response->content_is_text ) { |
1155
|
|
|
|
|
|
|
my $ct = $response->content_type; |
1156
|
|
|
|
|
|
|
print "WARNING: Response content is $ct, not text/*\n"; |
1157
|
|
|
|
|
|
|
if ( my $ce = $response->content_encoding ) { |
1158
|
|
|
|
|
|
|
print "NOTICE: Response is also Content-encoded: $ce\n"} |
1159
|
|
|
|
|
|
|
my $ctt = join("|", $response->decodable()); |
1160
|
|
|
|
|
|
|
if ( $ct =~ s!^(.+\/)?($ctt)$!$2! ) { |
1161
|
|
|
|
|
|
|
# yes: decode anyway since it could be a gzip-encoded .txt.gz file! |
1162
|
|
|
|
|
|
|
my $cr = $response->decoded_content( raise_error => 1, ref => 1); # method exists since LWP 5.802 (2004-11-30) |
1163
|
|
|
|
|
|
|
$response->remove_content_headers; |
1164
|
|
|
|
|
|
|
my $newresp = HTTP::Response->new($response->code, $response->message, $response->headers); |
1165
|
|
|
|
|
|
|
$newresp->content_type("text/plain; charset: $charset"); |
1166
|
|
|
|
|
|
|
$newresp->content_encoding($ct); |
1167
|
|
|
|
|
|
|
$newresp->content_ref($cr); |
1168
|
|
|
|
|
|
|
$response = $newresp; |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
}; |
1171
|
|
|
|
|
|
|
$contref = $response->decoded_content( raise_error => 1, ref => 1); # method exists since LWP 5.802 (2004-11-30) |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
if ( $$contref =~ /^\x{FFEF}/ ) { # properly encoded BOM => put Characters to file |
1174
|
|
|
|
|
|
|
binmode($tmpfh, ":utf8"); |
1175
|
|
|
|
|
|
|
print "INFO: properly encoded BOM detected: Groked UTF8\n"; # if $options{'verbose'}; |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
elsif ( $$contref =~ s/^\xef\xbb\xbf// ) { # BOM Bytes => put Bytes to file, re-read as UTF-8 |
1178
|
|
|
|
|
|
|
print "INFO: Byte coded BOM detected: trying to restitute character semantics\n"; # if $options{'verbose'}; |
1179
|
|
|
|
|
|
|
print "INFO: Length is ", length($$contref), " ", (utf8::is_utf8($$contref) ? "characters" : "bytes"), "\n"; |
1180
|
|
|
|
|
|
|
binmode($tmpfh, ":bytes"); |
1181
|
|
|
|
|
|
|
utf_deduplicate($contref) && binmode($tmpfh, ":utf8"); |
1182
|
|
|
|
|
|
|
} |
1183
|
|
|
|
|
|
|
elsif ( utf8::is_utf8($$contref) ) { # already Upgraded strings should be written as utf-8 |
1184
|
|
|
|
|
|
|
print "INFO: UTF8-ness already established\n" if $options{'verbose'}; |
1185
|
|
|
|
|
|
|
binmode($tmpfh, ":utf8"); |
1186
|
|
|
|
|
|
|
utf_deduplicate($contref); # but don't trust it (older LWP with file URLs, ...) |
1187
|
|
|
|
|
|
|
} |
1188
|
|
|
|
|
|
|
elsif ( utf8::decode($$contref) ) { # everything in character semantics now |
1189
|
|
|
|
|
|
|
print "INFO: Could decode bytes to UTF8-characters\n" if $options{'verbose'}; |
1190
|
|
|
|
|
|
|
binmode($tmpfh, ":utf8"); |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
else { # leave it alone |
1193
|
|
|
|
|
|
|
print "WARNING: No clue about character encoding: Assume ISO 8859-1\n"; # if $options{'verbose'}; |
1194
|
|
|
|
|
|
|
binmode($tmpfh, ":utf8"); |
1195
|
|
|
|
|
|
|
}; |
1196
|
|
|
|
|
|
|
print $tmpfh $$contref; |
1197
|
|
|
|
|
|
|
close($tmpfh); |
1198
|
|
|
|
|
|
|
# early cleanup since everything might be huge.... |
1199
|
|
|
|
|
|
|
$contref = $response = undef; |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
my ($collno, $count, $statref) = $self->loadFile($tmpfile, {_alias => $alias, _uri => $uri, _ruri => $nuri, _mtime => $lm, _sort => $osort}, %options); |
1202
|
|
|
|
|
|
|
if ( ! $collno && $osq ) { |
1203
|
|
|
|
|
|
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1204
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET utime=?,ustat=? WHERE seqno==?; |
1205
|
|
|
|
|
|
|
XxX |
1206
|
|
|
|
|
|
|
$self->stmtExplain($usthexpl, time(), ($statref ? "load error: $statref" : "internal error"), $osq) if $ENV{'DBI_PROFILE'}; |
1207
|
|
|
|
|
|
|
$usth->execute(time(), ($statref ? "load error: $statref" : "internal error"), $osq) |
1208
|
|
|
|
|
|
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1209
|
|
|
|
|
|
|
}; |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
unlink($tmpfile) if -f $tmpfile; |
1212
|
|
|
|
|
|
|
return $collno ? ($collno, $count) : undef; |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
elsif ( $response->code == 304 ) { |
1215
|
|
|
|
|
|
|
print "INFO: $alias not modified since ".HTTP::Date::time2str($modtime)."\n" if $options{'verbose'}; |
1216
|
|
|
|
|
|
|
my $vt = $response->fresh_until(h_min => 1800, h_max => 6 * 86400); |
1217
|
|
|
|
|
|
|
printf(" %-30s %s\n", "Will not try again before", scalar localtime($vt)) if $options{'verbose'}; |
1218
|
|
|
|
|
|
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1219
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET utime=?,ustat=?,ruri=? WHERE seqno==?; |
1220
|
|
|
|
|
|
|
XxX |
1221
|
|
|
|
|
|
|
$self->stmtExplain($usthexpl, time(), $response->status_line, $nuri, $osq) if $ENV{'DBI_PROFILE'}; |
1222
|
|
|
|
|
|
|
$usth->execute(time(), $response->status_line, $nuri, $osq) |
1223
|
|
|
|
|
|
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1224
|
|
|
|
|
|
|
return undef; |
1225
|
|
|
|
|
|
|
} |
1226
|
|
|
|
|
|
|
else { |
1227
|
|
|
|
|
|
|
print "WARNING: No access to $uri for $alias [".$response->status_line."]\n"; |
1228
|
|
|
|
|
|
|
print $response->headers_as_string, "\n"; |
1229
|
|
|
|
|
|
|
return undef unless $osq; |
1230
|
|
|
|
|
|
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1231
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET utime=?,ustat=?,ruri=? WHERE seqno==?; |
1232
|
|
|
|
|
|
|
XxX |
1233
|
|
|
|
|
|
|
$self->stmtExplain($usthexpl, time(), $response->status_line, $nuri, $osq) if $ENV{'DBI_PROFILE'}; |
1234
|
|
|
|
|
|
|
$usth->execute(time(), $response->status_line, $nuri, $osq) |
1235
|
|
|
|
|
|
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1236
|
|
|
|
|
|
|
return undef; |
1237
|
|
|
|
|
|
|
}; |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
sub utf_deduplicate { |
1243
|
|
|
|
|
|
|
my ($success, $stringref) = (0, @_); |
1244
|
|
|
|
|
|
|
if ( utf8::downgrade($$stringref, 1) ) { # 1 = FAIL_OK |
1245
|
|
|
|
|
|
|
my $prevlength = length($$stringref); |
1246
|
|
|
|
|
|
|
print "INFO: Downgrade was possible, length now $prevlength ", (utf8::is_utf8($$stringref) ? "characters" : "bytes"), "\n"; |
1247
|
|
|
|
|
|
|
while ( utf8::decode($$stringref) ) { |
1248
|
|
|
|
|
|
|
$success ++; |
1249
|
|
|
|
|
|
|
my $newlength = length($$stringref); |
1250
|
|
|
|
|
|
|
print "DEBUG: Reassembling as UTF-8 succeeded, length now $newlength ", (utf8::is_utf8($$stringref) ? "characters" : "bytes"), "\n"; |
1251
|
|
|
|
|
|
|
last if $newlength == $prevlength; |
1252
|
|
|
|
|
|
|
$prevlength = $newlength; |
1253
|
|
|
|
|
|
|
# last unless utf8::downgrade($$stringref, 1); |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
} |
1256
|
|
|
|
|
|
|
else { |
1257
|
|
|
|
|
|
|
print "WARNING: no downgrade possible, proceed with byte semantics"; |
1258
|
|
|
|
|
|
|
}; |
1259
|
|
|
|
|
|
|
return $success; |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=head3 unload ( [ $seqno_or_alias, %options ] ) |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
Deletes the sequence(s). |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=over 8 |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=item $seqno_or_alias |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
numeric sequence number, Alias or SQL pattern. |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
=item Supported options: |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
force => (0|1) |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
Needed to purge the complete database ($seqno_or_alias empty) or to purge |
1277
|
|
|
|
|
|
|
more than one sequence ($seqno_or_alias yields more than one seqno). |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=back |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=cut |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
sub unload { |
1285
|
|
|
|
|
|
|
my ($self, $seqno_or_alias, %options) = @_; |
1286
|
|
|
|
|
|
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
my @seqnos = (); |
1289
|
|
|
|
|
|
|
if ( $seqno_or_alias && ($seqno_or_alias =~ /^\d+$/) ) { |
1290
|
|
|
|
|
|
|
@seqnos = ($seqno_or_alias)} |
1291
|
|
|
|
|
|
|
elsif ( $seqno_or_alias || $options{'force'} ) { |
1292
|
|
|
|
|
|
|
@seqnos = $self->Seqnos('_alias', $seqno_or_alias ? ($seqno_or_alias) : ()); |
1293
|
|
|
|
|
|
|
unless ( @seqnos ) { |
1294
|
|
|
|
|
|
|
carp("no Seqnos selected by $seqno_or_alias"); |
1295
|
|
|
|
|
|
|
return 0; |
1296
|
|
|
|
|
|
|
}; |
1297
|
|
|
|
|
|
|
unless ( $options{'force'} or (@seqnos == 1) ) { |
1298
|
|
|
|
|
|
|
carp("Use --force to purge more than one sequence (@seqnos)"); |
1299
|
|
|
|
|
|
|
return 0; |
1300
|
|
|
|
|
|
|
}; |
1301
|
|
|
|
|
|
|
} |
1302
|
|
|
|
|
|
|
else { |
1303
|
|
|
|
|
|
|
carp("Use --force to purge the complete database"); |
1304
|
|
|
|
|
|
|
return 0; |
1305
|
|
|
|
|
|
|
}; |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
if ( $options{'force'} ) { |
1308
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1309
|
|
|
|
|
|
|
DELETE FROM beacons WHERE seqno==?; |
1310
|
|
|
|
|
|
|
XxX |
1311
|
|
|
|
|
|
|
foreach my $seqno ( @seqnos ) { |
1312
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, $seqno_or_alias) if $ENV{'DBI_PROFILE'}; |
1313
|
|
|
|
|
|
|
my $rows = $sth->execute($seqno_or_alias) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1314
|
|
|
|
|
|
|
print "INFO: $rows forced for $seqno\n" if $options{'verbose'}; |
1315
|
|
|
|
|
|
|
}; |
1316
|
|
|
|
|
|
|
}; |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
my ($cond, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($seqno_or_alias); |
1319
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1320
|
|
|
|
|
|
|
DELETE FROM repos $cond; |
1321
|
|
|
|
|
|
|
XxX |
1322
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1323
|
|
|
|
|
|
|
my $rows = $sth->execute(@cval) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1324
|
|
|
|
|
|
|
$rows = 0 if $rows eq "0E0"; |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
if ( $rows or $options{'force'} ) { |
1327
|
|
|
|
|
|
|
# if ( $options{'force'} ) { |
1328
|
|
|
|
|
|
|
# print "[ANALYZE ..." if $options{'verbose'}; |
1329
|
|
|
|
|
|
|
# $self->{dbh}->do("ANALYZE;"); |
1330
|
|
|
|
|
|
|
# print "]\n" if $options{'verbose'}; |
1331
|
|
|
|
|
|
|
# }; |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
if ( $options{'nostat'} ) { # invalidate since they might have changed |
1334
|
|
|
|
|
|
|
$self->admin('gcounti', undef); |
1335
|
|
|
|
|
|
|
$self->admin('gcountu', undef); |
1336
|
|
|
|
|
|
|
} |
1337
|
|
|
|
|
|
|
else { |
1338
|
|
|
|
|
|
|
$self->admin('gcounti', $self->idStat(undef, 'distinct' => 0) || 0); |
1339
|
|
|
|
|
|
|
$self->admin('gcountu', $self->idStat(undef, 'distinct' => 1) || 0); |
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
}; |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
return $rows; |
1344
|
|
|
|
|
|
|
} |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=head3 purge ( $seqno_or_alias[, %options ] ) |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
Deletes all identifiers from the database to the given pattern, |
1350
|
|
|
|
|
|
|
but leaves the stored header information intact, such that it |
1351
|
|
|
|
|
|
|
can be updated automatically. |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
=over 8 |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=item $seqno_or_alias |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
Pattern |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=item Supported options: |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
force => (0|1) |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
Allow purging of more than one sequence. |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=back |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=cut |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
sub purge { |
1371
|
|
|
|
|
|
|
my ($self, $seqno_or_alias, %options) = @_; |
1372
|
|
|
|
|
|
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1373
|
|
|
|
|
|
|
my @seqnos; |
1374
|
|
|
|
|
|
|
if ( $seqno_or_alias && ($seqno_or_alias =~ /^\d+$/) ) { |
1375
|
|
|
|
|
|
|
@seqnos = ($seqno_or_alias)} |
1376
|
|
|
|
|
|
|
elsif ( $seqno_or_alias || $options{'force'} ) { |
1377
|
|
|
|
|
|
|
@seqnos = $self->Seqnos('_alias', $seqno_or_alias ? ($seqno_or_alias) : ()); |
1378
|
|
|
|
|
|
|
unless ( @seqnos ) { |
1379
|
|
|
|
|
|
|
carp("no Seqnos selected by $seqno_or_alias"); |
1380
|
|
|
|
|
|
|
return 0; |
1381
|
|
|
|
|
|
|
}; |
1382
|
|
|
|
|
|
|
unless ( $options{'force'} or (@seqnos == 1) ) { |
1383
|
|
|
|
|
|
|
carp("Use --force to purge more than one sequence (@seqnos)"); |
1384
|
|
|
|
|
|
|
return 0; |
1385
|
|
|
|
|
|
|
}; |
1386
|
|
|
|
|
|
|
} |
1387
|
|
|
|
|
|
|
else { |
1388
|
|
|
|
|
|
|
carp("Use --force to purge the complete database"); |
1389
|
|
|
|
|
|
|
return 0; |
1390
|
|
|
|
|
|
|
}; |
1391
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1392
|
|
|
|
|
|
|
DELETE FROM beacons WHERE seqno==?; |
1393
|
|
|
|
|
|
|
XxX |
1394
|
|
|
|
|
|
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1395
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET counti=?,countu=?,utime=?,ustat=? WHERE seqno==?; |
1396
|
|
|
|
|
|
|
XxX |
1397
|
|
|
|
|
|
|
my $trows = 0; |
1398
|
|
|
|
|
|
|
foreach my $seqno ( @seqnos ) { |
1399
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, $seqno) if $ENV{'DBI_PROFILE'}; |
1400
|
|
|
|
|
|
|
my $rows = $sth->execute($seqno) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1401
|
|
|
|
|
|
|
$rows = "0" if $rows eq "0E0"; |
1402
|
|
|
|
|
|
|
print "INFO: $rows purged for $seqno\n" if $options{'verbose'}; |
1403
|
|
|
|
|
|
|
$trows += $rows; |
1404
|
|
|
|
|
|
|
$self->stmtExplain($usthexpl, 0, 0, time, "purged", $seqno) if $ENV{'DBI_PROFILE'}; |
1405
|
|
|
|
|
|
|
$usth->execute(0, 0, time, "purged", $seqno) |
1406
|
|
|
|
|
|
|
or croak("Could not execute >".$usth->{Statement}."<: ".$usth->errstr); |
1407
|
|
|
|
|
|
|
}; |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
if ( $trows or $options{'force'} ) { |
1410
|
|
|
|
|
|
|
# if ( $options{'force'} ) { |
1411
|
|
|
|
|
|
|
# print "[ANALYZE ..." if $options{'verbose'}; |
1412
|
|
|
|
|
|
|
# $self->{dbh}->do("ANALYZE;"); |
1413
|
|
|
|
|
|
|
# print "]\n" if $options{'verbose'}; |
1414
|
|
|
|
|
|
|
# }; |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
if ( $options{'nostat'} ) { # invalidate since they might have changed |
1417
|
|
|
|
|
|
|
$self->admin('gcounti', undef); |
1418
|
|
|
|
|
|
|
$self->admin('gcountu', undef); |
1419
|
|
|
|
|
|
|
} |
1420
|
|
|
|
|
|
|
else { |
1421
|
|
|
|
|
|
|
$self->admin('gcounti', $self->idStat(undef, 'distinct' => 0) || 0); |
1422
|
|
|
|
|
|
|
$self->admin('gcountu', $self->idStat(undef, 'distinct' => 1) || 0); |
1423
|
|
|
|
|
|
|
} |
1424
|
|
|
|
|
|
|
}; |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
return $trows; |
1427
|
|
|
|
|
|
|
} |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=head2 Methods for headers |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
=head3 ($rows, @oldvalues) = headerfield ( $sq_or_alias, $key [, $value] ) |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
Gets or sets an meta or admin Entry for the constituent file indicated by $sq_or_alias |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
=cut |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
sub headerfield { |
1439
|
|
|
|
|
|
|
my ($self, $sq_or_alias, $key, $value) = @_; |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
my $dbkey = ""; |
1442
|
|
|
|
|
|
|
if ( $dbkey = SeeAlso::Source::BeaconAggregator->beaconfields($key) ) { |
1443
|
|
|
|
|
|
|
} |
1444
|
|
|
|
|
|
|
elsif ( $key =~ /_(\w+)$/ ) { |
1445
|
|
|
|
|
|
|
$dbkey = $1} |
1446
|
|
|
|
|
|
|
else { |
1447
|
|
|
|
|
|
|
carp "Field $key not known"; |
1448
|
|
|
|
|
|
|
return undef; |
1449
|
|
|
|
|
|
|
}; |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
my ($cond, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($sq_or_alias); |
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
my ($osth, $osthexpl) = $self->stmtHdl(<<"XxX"); |
1454
|
|
|
|
|
|
|
SELECT $dbkey FROM repos $cond; |
1455
|
|
|
|
|
|
|
XxX |
1456
|
|
|
|
|
|
|
$self->stmtExplain($osthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1457
|
|
|
|
|
|
|
$osth->execute(@cval) or croak("Could not execute >".$osth->{Statement}."<:".$osth->errstr); |
1458
|
|
|
|
|
|
|
my $tmpval = $osth->fetchall_arrayref(); |
1459
|
|
|
|
|
|
|
my @oval = map { hEncode($_, $key) } map { (defined $_->[0]) ? ($_->[0]) : () } @$tmpval; |
1460
|
|
|
|
|
|
|
my $rows = scalar @oval; |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
if ( (defined $value) and ($value ne "") ) { # set |
1463
|
|
|
|
|
|
|
my ($usth, $usthexpl) = $self->stmtHdl(<<"XxX"); |
1464
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET $dbkey=? $cond; |
1465
|
|
|
|
|
|
|
XxX |
1466
|
|
|
|
|
|
|
$value = hDecode($value, $key) || ""; |
1467
|
|
|
|
|
|
|
$self->stmtExplain($usthexpl, $value, @cval) if $ENV{'DBI_PROFILE'}; |
1468
|
|
|
|
|
|
|
$rows = $usth->execute($value, @cval) or croak("Could not execute >".$usth->{Statement}."<:".$usth->errstr); |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
elsif ( defined $value ) { # clear |
1471
|
|
|
|
|
|
|
my ($dsth, $dsthexpl) = $self->stmtHdl(<<"XxX"); |
1472
|
|
|
|
|
|
|
UPDATE OR FAIL repos SET $dbkey=? $cond; |
1473
|
|
|
|
|
|
|
XxX |
1474
|
|
|
|
|
|
|
$self->stmtExplain($dsthexpl, undef, @cval) if $ENV{'DBI_PROFILE'}; |
1475
|
|
|
|
|
|
|
$rows = $dsth->execute(undef, @cval) or croak("Could not execute >".$dsth->{Statement}."<:".$dsth->errstr); |
1476
|
|
|
|
|
|
|
} |
1477
|
|
|
|
|
|
|
else { # read |
1478
|
|
|
|
|
|
|
} |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
return ($rows, @oval); |
1481
|
|
|
|
|
|
|
} |
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
=head3 ($resultref, $metaref) = headers ( [ $seqno_or_alias ] ) |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
Iterates over all |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
For each iteration returns two hash references: |
1488
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
=over 8 |
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
=item 1 |
1492
|
|
|
|
|
|
|
all official beacon fields |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
=item 2 |
1495
|
|
|
|
|
|
|
all administrative fields (_alias, ...) |
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
=back |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=cut |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
sub headers { |
1502
|
|
|
|
|
|
|
my ($self, $seqno_or_alias) = @_; |
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
unless ( $self->{_iterator_info} ) { |
1505
|
|
|
|
|
|
|
my ($constraint, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($seqno_or_alias); |
1506
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1507
|
|
|
|
|
|
|
SELECT * FROM repos $constraint; |
1508
|
|
|
|
|
|
|
XxX |
1509
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1510
|
|
|
|
|
|
|
$sth->execute(@cval) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1511
|
|
|
|
|
|
|
$self->{_iterator_info} = $sth; |
1512
|
|
|
|
|
|
|
}; |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
my $info = $self->{_iterator_info}->fetchrow_hashref; |
1515
|
|
|
|
|
|
|
unless ( defined $info ) { |
1516
|
|
|
|
|
|
|
croak("Error listing Collections: $self->{_iterator_info}->errstr") if $self->{_iterator_info}->err; |
1517
|
|
|
|
|
|
|
delete $self->{_iterator_info}; |
1518
|
|
|
|
|
|
|
return undef; |
1519
|
|
|
|
|
|
|
} |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
my $collno = $info->{seqno} || $seqno_or_alias; |
1522
|
|
|
|
|
|
|
my %meta = (_seqno => $collno); |
1523
|
|
|
|
|
|
|
my %result = (); |
1524
|
|
|
|
|
|
|
while ( my($key, $val) = each %$info ) { |
1525
|
|
|
|
|
|
|
next unless defined $val; |
1526
|
|
|
|
|
|
|
my $pval = hEncode($val, $key); |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
if ( $key =~ /^bc(\w+)$/ ) { |
1529
|
|
|
|
|
|
|
$result{$1} = $pval} |
1530
|
|
|
|
|
|
|
else { |
1531
|
|
|
|
|
|
|
$meta{"_$key"} = $pval}; |
1532
|
|
|
|
|
|
|
} |
1533
|
|
|
|
|
|
|
return \%result, \%meta; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
=head3 listCollections ( [ $seqno_or_alias ] ) |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
Iterates over all Sequences and returns on each call an array of |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
Seqno, Alias, Uri, Modification time, Identifier Count and Unique identifier count |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
Returns undef if done. |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
=cut |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
sub listCollections { |
1547
|
|
|
|
|
|
|
my ($self, $seqno_or_alias) = @_; |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
unless ( $self->{_iterator_listCollections} ) { |
1550
|
|
|
|
|
|
|
my ($constraint, @cval) = SeeAlso::Source::BeaconAggregator::mkConstraint($seqno_or_alias); |
1551
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1552
|
|
|
|
|
|
|
SELECT seqno, alias, uri, mtime, counti, countu FROM repos $constraint; |
1553
|
|
|
|
|
|
|
XxX |
1554
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, @cval) if $ENV{'DBI_PROFILE'}; |
1555
|
|
|
|
|
|
|
$sth->execute(@cval) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1556
|
|
|
|
|
|
|
$self->{_iterator_listCollections} = $sth; |
1557
|
|
|
|
|
|
|
}; |
1558
|
|
|
|
|
|
|
my $onerow = $self->{_iterator_listCollections}->fetchrow_arrayref; |
1559
|
|
|
|
|
|
|
unless ( $onerow ) { |
1560
|
|
|
|
|
|
|
croak("Error listing Collections: $self->{_iterator_listCollections}->errstr") if $self->{_iterator_listCollections}->err; |
1561
|
|
|
|
|
|
|
delete $self->{_iterator_listCollections}; |
1562
|
|
|
|
|
|
|
return (); |
1563
|
|
|
|
|
|
|
}; |
1564
|
|
|
|
|
|
|
return @$onerow; |
1565
|
|
|
|
|
|
|
} |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
=head2 Statistics |
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
=head3 idStat ( [ $seqno_or_alias, %options ] ) |
1570
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
Count identifiers for the given pattern. |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
=over 8 |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
=item Supported options: |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
distinct => (0|1) |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
Count multiple occurences only once |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
verbose => (0|1) |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
=back |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
=cut |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
sub idStat { |
1589
|
|
|
|
|
|
|
my ($self, $seqno_or_alias, %options) = @_; |
1590
|
|
|
|
|
|
|
$options{'verbose'} = $self->{'verbose'} unless exists $options{'verbose'}; |
1591
|
|
|
|
|
|
|
my $cond = ""; |
1592
|
|
|
|
|
|
|
if ( $seqno_or_alias && ($seqno_or_alias =~ /^\d+$/) ) { |
1593
|
|
|
|
|
|
|
$cond = "WHERE seqno==$seqno_or_alias"} |
1594
|
|
|
|
|
|
|
elsif ( $seqno_or_alias ) { |
1595
|
|
|
|
|
|
|
my @seqnos = $self->Seqnos('_alias', $seqno_or_alias); |
1596
|
|
|
|
|
|
|
if ( @seqnos ) { |
1597
|
|
|
|
|
|
|
$cond = "WHERE seqno IN (".join(",", @seqnos).")"} |
1598
|
|
|
|
|
|
|
else { |
1599
|
|
|
|
|
|
|
carp("no Seqnos selected by $seqno_or_alias"); |
1600
|
|
|
|
|
|
|
return 0; |
1601
|
|
|
|
|
|
|
}; |
1602
|
|
|
|
|
|
|
}; |
1603
|
|
|
|
|
|
|
# my $count_what = $options{'distinct'} ? "DISTINCT hash" : "*"; |
1604
|
|
|
|
|
|
|
# will not be optimized by SQLite or mySQL: SELECT COUNT($count_what) FROM beacons $cond; |
1605
|
|
|
|
|
|
|
# my $sth= $self->stmtHdl("SELECT COUNT($count_what) FROM beacons $cond LIMIT 1;"); |
1606
|
|
|
|
|
|
|
my $from = $options{'distinct'} ? "(SELECT DISTINCT hash FROM beacons $cond)" |
1607
|
|
|
|
|
|
|
: "beacons $cond"; |
1608
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1609
|
|
|
|
|
|
|
SELECT COUNT(*) FROM $from LIMIT 1; |
1610
|
|
|
|
|
|
|
XxX |
1611
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl) if $ENV{'DBI_PROFILE'}; |
1612
|
|
|
|
|
|
|
$sth->execute() or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1613
|
|
|
|
|
|
|
my $hits = $sth->fetchrow_arrayref; |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
return $hits->[0] || 0; |
1616
|
|
|
|
|
|
|
}; |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
=head3 idCounts ( [ $pattern, %options ] ) |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
Iterates through the entries according to the optional id filter expression. |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
For each iteration the call returns a triple consisting of (identifier, |
1624
|
|
|
|
|
|
|
number of rows, and sum of all individual counts). |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
=over 8 |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
=item Supported options: |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
distinct => (0|1) |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
Count multiple occurences in one beacon file only once. |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
=back |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
=cut |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
sub idCounts { |
1639
|
|
|
|
|
|
|
my ($self, $pattern, %options) = @_; |
1640
|
|
|
|
|
|
|
my $cond = $pattern ? qq!WHERE hash LIKE "$pattern"! : ""; |
1641
|
|
|
|
|
|
|
my $count_what = $options{'distinct'} ? "DISTINCT seqno" : "seqno"; |
1642
|
|
|
|
|
|
|
unless ( $self->{_iterator_idCounts} ) { |
1643
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1644
|
|
|
|
|
|
|
SELECT hash, COUNT($count_what), SUM(hits) FROM beacons $cond GROUP BY hash ORDER BY hash; |
1645
|
|
|
|
|
|
|
XxX |
1646
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl) if $ENV{'DBI_PROFILE'}; |
1647
|
|
|
|
|
|
|
$sth->execute() or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1648
|
|
|
|
|
|
|
$self->{_iterator_idCounts} = $sth; |
1649
|
|
|
|
|
|
|
unless ( defined $self->{identifierClass} ) { |
1650
|
|
|
|
|
|
|
my $package = $self->autoIdentifier(); |
1651
|
|
|
|
|
|
|
$options{'verbose'} && ref($package) && carp "Assuming identifiers of type ".ref($package)."\n"; |
1652
|
|
|
|
|
|
|
} |
1653
|
|
|
|
|
|
|
}; |
1654
|
|
|
|
|
|
|
my $onerow = $self->{_iterator_idCounts}->fetchrow_arrayref; |
1655
|
|
|
|
|
|
|
unless ( $onerow ) { |
1656
|
|
|
|
|
|
|
croak("Error listing Collections: $self->{_iterator_idCounts}->errstr") if $self->{_iterator_idCounts}->err; |
1657
|
|
|
|
|
|
|
delete $self->{_iterator_idCounts}; |
1658
|
|
|
|
|
|
|
return (); |
1659
|
|
|
|
|
|
|
}; |
1660
|
|
|
|
|
|
|
if ( defined $self->{identifierClass} ) { |
1661
|
|
|
|
|
|
|
my $c = $self->{identifierClass}; |
1662
|
|
|
|
|
|
|
# compat: hash might not take an argument, must resort to value, has to be cleared before... |
1663
|
|
|
|
|
|
|
$c->value(""); |
1664
|
|
|
|
|
|
|
my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]); |
1665
|
|
|
|
|
|
|
$onerow->[0] = $c->can("pretty") ? $c->pretty() : $c->value(); |
1666
|
|
|
|
|
|
|
}; |
1667
|
|
|
|
|
|
|
return @$onerow; |
1668
|
|
|
|
|
|
|
}; |
1669
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
=head3 idList ( [ $pattern ] ) |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
Iterates through the entries according to the optional selection. |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
For each iteration the call returns a tuple consisting of identifier and an |
1676
|
|
|
|
|
|
|
list of array references (Seqno, Hits, Info, explicit Link, AltId) or the emtpy list |
1677
|
|
|
|
|
|
|
if finished. |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
Hits, Info, Link and AltId are normalized to the empty string if undefined (or < 2 for hits). |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
It is important to finish all iterations before calling this method for "new" arguments: |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
1 while $db->idList(); # flush pending results |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
=cut |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
sub idList { |
1688
|
|
|
|
|
|
|
my ($self, $pattern) = @_; |
1689
|
|
|
|
|
|
|
my $cond = $pattern ? ($pattern =~ /%/ ? "WHERE hash LIKE ?" : qq"WHERE hash=?") |
1690
|
|
|
|
|
|
|
: ""; |
1691
|
|
|
|
|
|
|
unless ( $self->{_iterator_idList_handle} ) { |
1692
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1693
|
|
|
|
|
|
|
SELECT hash, seqno, hits, info, link, altid FROM beacons $cond ORDER BY hash, seqno, altid; |
1694
|
|
|
|
|
|
|
XxX |
1695
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, ($pattern ? ($pattern) : () )) if $ENV{'DBI_PROFILE'}; |
1696
|
|
|
|
|
|
|
$sth->execute(($pattern ? ($pattern) : () )) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1697
|
|
|
|
|
|
|
$self->{_iterator_idList_handle} = $sth; |
1698
|
|
|
|
|
|
|
$self->{_iterator_idList_crosscheck} = $self->RepoCols("ALTTARGET"); |
1699
|
|
|
|
|
|
|
$self->{_iterator_idList_prefetch} = undef; |
1700
|
|
|
|
|
|
|
$self->autoIdentifier() unless defined $self->{identifierClass}; |
1701
|
|
|
|
|
|
|
}; |
1702
|
|
|
|
|
|
|
unless ( exists $self->{_iterator_idList_prefetch} ) { # deferred exit |
1703
|
|
|
|
|
|
|
delete $self->{_iterator_idList_handle}; |
1704
|
|
|
|
|
|
|
delete $self->{_iterator_idList_crosscheck}; |
1705
|
|
|
|
|
|
|
return (); |
1706
|
|
|
|
|
|
|
}; |
1707
|
|
|
|
|
|
|
my $pf = $self->{_iterator_idList_prefetch}; |
1708
|
|
|
|
|
|
|
while ( my $onerow = $self->{_iterator_idList_handle}->fetchrow_arrayref ) { |
1709
|
|
|
|
|
|
|
# $onerow->[2] = "" unless $self->{_iterator_idList_crosscheck}->{$onerow->[1]}; # kill artefacts |
1710
|
|
|
|
|
|
|
$onerow->[2] = "" unless $onerow->[2]; # kill artefacts |
1711
|
|
|
|
|
|
|
$onerow->[3] = "" unless defined $onerow->[3]; # kill artefacts |
1712
|
|
|
|
|
|
|
$onerow->[4] = "" unless defined $onerow->[4]; # kill artefacts |
1713
|
|
|
|
|
|
|
$onerow->[5] = "" unless defined $onerow->[5]; # kill artefacts |
1714
|
|
|
|
|
|
|
if ( defined $self->{identifierClass} ) { |
1715
|
|
|
|
|
|
|
my $c = $self->{identifierClass}; |
1716
|
|
|
|
|
|
|
# compat: hash might not take an argument, must resort to value, has to be cleared before... |
1717
|
|
|
|
|
|
|
$c->value(""); |
1718
|
|
|
|
|
|
|
my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]); |
1719
|
|
|
|
|
|
|
$onerow->[0] = $c->can("pretty") ? $c->pretty() : $c->value(); |
1720
|
|
|
|
|
|
|
}; |
1721
|
|
|
|
|
|
|
if ( $pf ) { |
1722
|
|
|
|
|
|
|
if ( $pf->[0] eq $onerow->[0] ) { |
1723
|
|
|
|
|
|
|
push(@$pf, [@$onerow[1..@$onerow-1]]); |
1724
|
|
|
|
|
|
|
next; |
1725
|
|
|
|
|
|
|
} |
1726
|
|
|
|
|
|
|
else { |
1727
|
|
|
|
|
|
|
$self->{_iterator_idList_prefetch} = [$onerow->[0], [@$onerow[1..@$onerow-1]]]; |
1728
|
|
|
|
|
|
|
return @$pf; |
1729
|
|
|
|
|
|
|
} |
1730
|
|
|
|
|
|
|
} |
1731
|
|
|
|
|
|
|
else { |
1732
|
|
|
|
|
|
|
$pf = [$onerow->[0], [@$onerow[1..@$onerow-1]]]}; |
1733
|
|
|
|
|
|
|
}; |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
if ( $self->{_iterator_idList_handle}->err ) { |
1736
|
|
|
|
|
|
|
croak("Error listing Collections: $self->{_iterator_idList_handle}->errstr"); |
1737
|
|
|
|
|
|
|
}; |
1738
|
|
|
|
|
|
|
delete $self->{_iterator_idList_prefetch}; |
1739
|
|
|
|
|
|
|
return $pf ? @$pf : (); |
1740
|
|
|
|
|
|
|
}; |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
=head2 Manipulation of global metadata: Open Search Description |
1744
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=head3 setOSD ( $field, @values } |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
Sets the field $field of the OpenSearchDescription to @value(s). |
1748
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
=cut |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
sub setOSD { |
1752
|
|
|
|
|
|
|
my ($self) = shift; |
1753
|
|
|
|
|
|
|
$self->clearOSD($_[0]) or return undef; |
1754
|
|
|
|
|
|
|
return (defined $_[1]) ? $self->addOSD(@_) : 0; # value(s) to set |
1755
|
|
|
|
|
|
|
}; |
1756
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
=head3 clearOSD ( $field } |
1758
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
Clears the field $field of the OpenSearchDescription. |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
=cut |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
sub clearOSD { |
1764
|
|
|
|
|
|
|
my ($self, $field) = @_; |
1765
|
|
|
|
|
|
|
$field || (carp("no OSD field name provided"), return undef); |
1766
|
|
|
|
|
|
|
defined $self->osdKeys($field) || (carp("no valid OSD field '$field'"), return undef); |
1767
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1768
|
|
|
|
|
|
|
DELETE FROM osd WHERE key=?; |
1769
|
|
|
|
|
|
|
XxX |
1770
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, $field) if $ENV{'DBI_PROFILE'}; |
1771
|
|
|
|
|
|
|
$sth->execute($field) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1772
|
|
|
|
|
|
|
return 1; |
1773
|
|
|
|
|
|
|
} |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
=head3 addOSD ( $field, @values } |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
Adds more @value(s) as (repeatable) field $field of the OpenSearchDescription. |
1778
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
=cut |
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
sub addOSD { |
1782
|
|
|
|
|
|
|
my ($self, $field, @values) = @_; |
1783
|
|
|
|
|
|
|
$field || (carp("no OSD field name provided"), return undef); |
1784
|
|
|
|
|
|
|
return 0 unless @values; |
1785
|
|
|
|
|
|
|
defined $self->osdKeys($field) || (carp("no valid OSD field '$field'"), return undef); |
1786
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1787
|
|
|
|
|
|
|
INSERT INTO osd ( key, val ) VALUES ( ?, ? ); |
1788
|
|
|
|
|
|
|
XxX |
1789
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, $field, $values[0]) if $ENV{'DBI_PROFILE'}; |
1790
|
|
|
|
|
|
|
my $tstatus = []; |
1791
|
|
|
|
|
|
|
my $tuples = $sth->execute_array({ArrayTupleStatus => $tstatus}, $field, \@values) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1792
|
|
|
|
|
|
|
return $tuples; |
1793
|
|
|
|
|
|
|
} |
1794
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
=head2 Manipulation of global metadata: Beacon Metadata |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
These headers are used when you will be publishing a beacon file for the collection. |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
=head3 setBeaconMeta ( $field, $value ) |
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
Sets the field $field of the Beacon meta table (used to generate a BEACON file for this |
1802
|
|
|
|
|
|
|
service) to $value. |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
=cut |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
sub setBeaconMeta { |
1807
|
|
|
|
|
|
|
my ($self) = shift; |
1808
|
|
|
|
|
|
|
$self->clearBeaconMeta(@_) or return undef; |
1809
|
|
|
|
|
|
|
return (defined $_[1]) ? $self->addBeaconMeta(@_) : 0; # value to set |
1810
|
|
|
|
|
|
|
}; |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
=head3 clearBeaconMeta ( $field } |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
Deletes the field $field of the Beacon meta table. |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
=cut |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
sub clearBeaconMeta { |
1819
|
|
|
|
|
|
|
my ($self, $rfield) = @_; |
1820
|
|
|
|
|
|
|
$rfield || (carp("no Beacon field name provided"), return undef); |
1821
|
|
|
|
|
|
|
my $field = $self->beaconfields($rfield) or (carp("no valid Beacon field '$rfield'"), return undef); |
1822
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1823
|
|
|
|
|
|
|
DELETE FROM osd WHERE key=?; |
1824
|
|
|
|
|
|
|
XxX |
1825
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, $field) if $ENV{'DBI_PROFILE'}; |
1826
|
|
|
|
|
|
|
$sth->execute($field) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1827
|
|
|
|
|
|
|
return 1; |
1828
|
|
|
|
|
|
|
} |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
=head3 addBeaconMeta ( $field, $value ) |
1831
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
Appends $value to the field $field of the BEACON meta table |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
=cut |
1835
|
|
|
|
|
|
|
sub addBeaconMeta { |
1836
|
|
|
|
|
|
|
my ($self, $rfield, $value) = @_; |
1837
|
|
|
|
|
|
|
$rfield || (carp("no Beacon field name provided"), return undef); |
1838
|
|
|
|
|
|
|
my $field = $self->beaconfields($rfield) or (carp("no valid Beacon field '$rfield'"), return undef); |
1839
|
|
|
|
|
|
|
my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX"); |
1840
|
|
|
|
|
|
|
INSERT INTO osd ( key, val ) VALUES ( ?, ? ); |
1841
|
|
|
|
|
|
|
XxX |
1842
|
|
|
|
|
|
|
$self->stmtExplain($sthexpl, $field, $value) if $ENV{'DBI_PROFILE'}; |
1843
|
|
|
|
|
|
|
$sth->execute($field, $value) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr); |
1844
|
|
|
|
|
|
|
return 1; |
1845
|
|
|
|
|
|
|
} |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
=head3 admin ( [$field, [$value]] ) |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
Manipulates the admin table. |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
Yields a hashref to the admin table if called without arguments. |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
If called with $field, returns the current value, and sets the |
1854
|
|
|
|
|
|
|
table entry to $value if defined. |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
=cut |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
sub admin { |
1860
|
|
|
|
|
|
|
my ($self, $field, $value) = @_; |
1861
|
|
|
|
|
|
|
my $admref = $self->admhash(); |
1862
|
|
|
|
|
|
|
return $admref unless $field; |
1863
|
|
|
|
|
|
|
my $retval = $admref->{$field}; |
1864
|
|
|
|
|
|
|
return $retval unless defined $value; |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
my ($admh, $admexpl) = $self->stmtHdl("INSERT OR REPLACE INTO admin VALUES (?, ?);"); |
1867
|
|
|
|
|
|
|
$self->stmtExplain($admexpl, $field, $value) if $ENV{'DBI_PROFILE'}; |
1868
|
|
|
|
|
|
|
$admh->execute($field, $value) |
1869
|
|
|
|
|
|
|
or croak("Could not execute update to admin table: ".$admh->errstr); |
1870
|
|
|
|
|
|
|
return defined($retval) ? $retval : ""; |
1871
|
|
|
|
|
|
|
} |
1872
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
# on-the-fly conversions |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
sub hDecode { # external time to numeric timestamp, printf placeholders |
1877
|
|
|
|
|
|
|
my ($val, $fnam) = @_; |
1878
|
|
|
|
|
|
|
return $val unless $fnam; |
1879
|
|
|
|
|
|
|
local($_) = (ref $val) ? $val->{$fnam} : $val; |
1880
|
|
|
|
|
|
|
return undef unless defined $_; |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
if ( $fnam =~ /target$/i ) { s/%/%%/g; s/(\{id\}|\$PND)/%1\$s/gi; s/(\{altid\}|\$PND)/%2\$s/gi; } |
1883
|
|
|
|
|
|
|
elsif ( $fnam =~ /message$/i ) { s/%/%%/g; s/\{hits?\}/%s/gi; } |
1884
|
|
|
|
|
|
|
elsif ( $fnam =~ /time|revisit/i ) { |
1885
|
|
|
|
|
|
|
if ( /^\d+$/ ) { # legacy UNIX timestamp |
1886
|
|
|
|
|
|
|
} |
1887
|
|
|
|
|
|
|
elsif ( my $p = HTTP::Date::str2time($_, "GMT") ) { # all unqualified times are GMT |
1888
|
|
|
|
|
|
|
$_ = $p} |
1889
|
|
|
|
|
|
|
else { |
1890
|
|
|
|
|
|
|
carp("could not parse value '$_' as time in field $fnam"); |
1891
|
|
|
|
|
|
|
return undef; |
1892
|
|
|
|
|
|
|
}; |
1893
|
|
|
|
|
|
|
} |
1894
|
|
|
|
|
|
|
return $_; |
1895
|
|
|
|
|
|
|
} |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
sub hEncode { # timestamp to beacon format |
1898
|
|
|
|
|
|
|
my ($val, $fnam) = @_; |
1899
|
|
|
|
|
|
|
local($_) = (ref $val) ? $val->{$fnam} : $val; |
1900
|
|
|
|
|
|
|
return undef unless defined $_; |
1901
|
|
|
|
|
|
|
if ( $fnam =~ /time|revisit/i ) { $_ = SeeAlso::Source::BeaconAggregator::tToISO($_) } |
1902
|
|
|
|
|
|
|
elsif ( $fnam =~ /message/i ) { s/%s/{hits}/; s/%%/%/g; } |
1903
|
|
|
|
|
|
|
elsif ( $fnam =~ /target/i ) { s/%s/{ID}/; s/%1\$s/{ID}/; s/%2\$s/{ALTID}/; s/%%/%/g; }; |
1904
|
|
|
|
|
|
|
return $_; |
1905
|
|
|
|
|
|
|
} |
1906
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
=head1 AUTHOR |
1908
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
Thomas Berger |
1910
|
|
|
|
|
|
|
CPAN ID: THB |
1911
|
|
|
|
|
|
|
gymel.com |
1912
|
|
|
|
|
|
|
THB@cpan.org |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
=head1 COPYRIGHT |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
This program is free software; you can redistribute |
1917
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
The full text of the license can be found in the |
1920
|
|
|
|
|
|
|
LICENSE file included with this module. |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
=cut |
1923
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
1; |
1925
|
|
|
|
|
|
|
|