line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Fsdb.pm |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Copyright (C) 1991-2016 by John Heidemann |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
9
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License, |
10
|
|
|
|
|
|
|
# version 2, as published by the Free Software Foundation. |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15
|
|
|
|
|
|
|
# GNU General Public License for more details. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License along |
18
|
|
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc., |
19
|
|
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Fsdb; |
23
|
|
|
|
|
|
|
|
24
|
2
|
|
|
2
|
|
141648
|
use warnings; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
60
|
|
25
|
2
|
|
|
2
|
|
6
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
31
|
|
26
|
2
|
|
|
2
|
|
995
|
use utf8; |
|
2
|
|
|
|
|
18
|
|
|
2
|
|
|
|
|
8
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=encoding utf8 |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Fsdb - a flat-text database for shell scripting |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=cut |
36
|
|
|
|
|
|
|
our $VERSION = '2.63'; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 SYNOPSIS |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Fsdb, the flatfile streaming database is package of commands |
41
|
|
|
|
|
|
|
for manipulating flat-ASCII databases from |
42
|
|
|
|
|
|
|
shell scripts. Fsdb is useful to process medium amounts of data (with |
43
|
|
|
|
|
|
|
very little data you'd do it by hand, with megabytes you might want a |
44
|
|
|
|
|
|
|
real database). |
45
|
|
|
|
|
|
|
Fsdb was known as as Jdb from 1991 to Oct. 2008. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Fsdb is very good at doing things like: |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=over 4 |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item * |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
extracting measurements from experimental output |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item * |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
examining data to address different hypotheses |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
joining data from different experiments |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item * |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
eliminating/detecting outliers |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item * |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
computing statistics on data |
70
|
|
|
|
|
|
|
(mean, confidence intervals, correlations, histograms) |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item * |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
reformatting data for graphing programs |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=back |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Fsdb is built around the idea of a flat text file as a database. |
79
|
|
|
|
|
|
|
Fsdb files (by convention, with the extension F<.fsdb>), |
80
|
|
|
|
|
|
|
have a header documenting the schema (what the columns mean), |
81
|
|
|
|
|
|
|
and then each line represents a database record (or row). |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
For example: |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#fsdb experiment duration |
86
|
|
|
|
|
|
|
ufs_mab_sys 37.2 |
87
|
|
|
|
|
|
|
ufs_mab_sys 37.3 |
88
|
|
|
|
|
|
|
ufs_rcp_real 264.5 |
89
|
|
|
|
|
|
|
ufs_rcp_real 277.9 |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Is a simple file with four experiments (the rows), |
92
|
|
|
|
|
|
|
each with a description, size parameter, and run time |
93
|
|
|
|
|
|
|
in the first, second, and third columns. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Rather than hand-code scripts to do each special case, Fsdb provides |
96
|
|
|
|
|
|
|
higher-level functions. Although it's often easy throw together a |
97
|
|
|
|
|
|
|
custom script to do any single task, I believe that there are several |
98
|
|
|
|
|
|
|
advantages to using Fsdb: |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over 4 |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
these programs provide a higher level interface than plain Perl, so |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=over 4 |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item ** |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Fewer lines of simpler code: |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Picks out just one type of experiment and computes statistics on it, |
115
|
|
|
|
|
|
|
rather than: |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
while (<>) { split; $sum+=$F[1]; $ss+=$F[1]**2; $n++; } |
118
|
|
|
|
|
|
|
$mean = $sum / $n; $std_dev = ... |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
in dozens of places. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item * |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
the library uses names for columns, so |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=over 4 |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item ** |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
No more C<$F[1]>, use C<_duration>. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item ** |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
New or different order columns? No changes to your scripts! |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=back |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Thus if your experiment gets more complicated with a size parameter, |
141
|
|
|
|
|
|
|
so your log changes to: |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#fsdb experiment size duration |
144
|
|
|
|
|
|
|
ufs_mab_sys 1024 37.2 |
145
|
|
|
|
|
|
|
ufs_mab_sys 1024 37.3 |
146
|
|
|
|
|
|
|
ufs_rcp_real 1024 264.5 |
147
|
|
|
|
|
|
|
ufs_rcp_real 1024 277.9 |
148
|
|
|
|
|
|
|
ufs_mab_sys 2048 45.3 |
149
|
|
|
|
|
|
|
ufs_mab_sys 2048 44.2 |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Then the previous scripts still work, even though duration is |
152
|
|
|
|
|
|
|
now the third column, not the second. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=item * |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
A series of actions are self-documenting (each program records what it does). |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=over 4 |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item ** |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
No more wondering what hacks were used to compute the |
163
|
|
|
|
|
|
|
final data, just look at the comments at the end |
164
|
|
|
|
|
|
|
of the output. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=back |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
For example, the commands |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
add to the end of the output the lines |
173
|
|
|
|
|
|
|
# | dbrow _experiment eq "ufs_mab_sys" |
174
|
|
|
|
|
|
|
# | dbcolstats duration |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item * |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The library is mature, supporting large datasets, |
180
|
|
|
|
|
|
|
corner cases, error handling, backed by an automated test suite. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=over 4 |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item ** |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
No more puzzling about bad output because your custom script |
187
|
|
|
|
|
|
|
skimped on error checking. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item ** |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
No more memory thrashing when you try to sort ten million records. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item * |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Fsdb-2.x supports Perl scripting (in addition to shell scripting), |
198
|
|
|
|
|
|
|
with libraries to do Fsdb input and output, and easy support for pipelines. |
199
|
|
|
|
|
|
|
The shell script |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
dbcol name test1 | dbroweval '_test1 += 5;' |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
can be written in perl as: |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
dbpipeline(dbcol(qw(name test1)), dbroweval('_test1 += 5;')); |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=back |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
(The disadvantage is that you need to learn what functions Fsdb provides.) |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Fsdb is built on flat-ASCII databases. By storing data in simple text |
212
|
|
|
|
|
|
|
files and processing it with pipelines it is easy to experiment (in |
213
|
|
|
|
|
|
|
the shell) and look at the output. |
214
|
|
|
|
|
|
|
To the best of my knowledge, the original implementation of |
215
|
|
|
|
|
|
|
this idea was C, a commercial product described in the book |
216
|
|
|
|
|
|
|
I |
217
|
|
|
|
|
|
|
by Rod Manis, Evan Schaffer, and Robert Jorgensen (and |
218
|
|
|
|
|
|
|
also at the web page L). Fsdb is an incompatible |
219
|
|
|
|
|
|
|
re-implementation of their idea without any accelerated indexing or |
220
|
|
|
|
|
|
|
forms support. (But it's free, and probably has better statistics!). |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Fsdb-2.x will exploit multiple processors or cores, |
223
|
|
|
|
|
|
|
and provides Perl-level support for input, output, and threaded-pipelines. |
224
|
|
|
|
|
|
|
(As of Fsdb-2.44 it no longer uses Perl threading, just processes.) |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Installation instructions follow at the end of this document. |
227
|
|
|
|
|
|
|
Fsdb-2.x requires Perl 5.8 to run. |
228
|
|
|
|
|
|
|
All commands have manual pages and provide usage with the C<--help> option. |
229
|
|
|
|
|
|
|
All commands are backed by an automated test suite. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
The most recent version of Fsdb is available on the web at |
232
|
|
|
|
|
|
|
L. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head2 2.63, 2017-02-03 |
238
|
|
|
|
|
|
|
Re-add some features supposedly in 2.62 but not, and add more --header options. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=over 4 |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=item ENHANCEMENT |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
The option B<-j> is now a synonym for B<--parallelism>. |
245
|
|
|
|
|
|
|
(And several documention bugs about this option are fixed.) |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=item ENHANCEMENT |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Additional support for C<--header> in L, L, L, |
250
|
|
|
|
|
|
|
and L. |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=item BUG FIX |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Version 2.62 was supposed to have this improvement, but did not (and now does): |
255
|
|
|
|
|
|
|
L now allows the B<--possible-pivots> option, |
256
|
|
|
|
|
|
|
and if it is provided processes the data in one pass. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=item BUG FIX |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
Version 2.62 was supposed to have this improvement, but did not (and now does): |
261
|
|
|
|
|
|
|
L logs are now quoted. |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=back |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 README CONTENTS |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=over 4 |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=item executive summary |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=item what's new |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=item README CONTENTS |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=item installation |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=item basic data format |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=item basic data manipulation |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=item list of commands |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=item another example |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=item a gradebook example |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item a password example |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=item history |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=item related work |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=item release notes |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=item copyright |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=item comments |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=back |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head1 INSTALLATION |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Fsdb now uses the standard Perl build and installation from |
307
|
|
|
|
|
|
|
ExtUtil::MakeMaker(3), so the quick answer to installation is to type: |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
perl Makefile.PL |
310
|
|
|
|
|
|
|
make |
311
|
|
|
|
|
|
|
make test |
312
|
|
|
|
|
|
|
make install |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
Or, if you want to install it somewhere else, change the first line to |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
perl Makefile.PL PREFIX=$HOME |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
and it will go in your home directory's F, etc. |
319
|
|
|
|
|
|
|
(See L for more details.) |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
Fsdb requires perl 5.8 or later. |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
A test-suite is available, run it with |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
make test |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
A FreeBSD port to Fsdb is available, see |
328
|
|
|
|
|
|
|
L. |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
A Fink (MacOS X) port is available, see |
331
|
|
|
|
|
|
|
L. |
332
|
|
|
|
|
|
|
(Thanks to Lars Eggert for maintaining this port.) |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head1 BASIC DATA FORMAT |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
These programs are based on the idea storing data in simple ASCII |
338
|
|
|
|
|
|
|
files. A database is a file with one header line and then data or |
339
|
|
|
|
|
|
|
comment lines. For example: |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
#fsdb account passwd uid gid fullname homedir shell |
342
|
|
|
|
|
|
|
johnh * 2274 134 John_Heidemann /home/johnh /bin/bash |
343
|
|
|
|
|
|
|
greg * 2275 134 Greg_Johnson /home/greg /bin/bash |
344
|
|
|
|
|
|
|
root * 0 0 Root /root /bin/bash |
345
|
|
|
|
|
|
|
# this is a simple database |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
The header line must be first and begins with C<#h>. |
348
|
|
|
|
|
|
|
There are rows (records) and columns (fields), |
349
|
|
|
|
|
|
|
just like in a normal database. |
350
|
|
|
|
|
|
|
Comment lines begin with C<#>. |
351
|
|
|
|
|
|
|
Column names are any string not containing spaces or single quote |
352
|
|
|
|
|
|
|
(although it is prudent to keep them alphanumeric with underscore). |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
By default, columns are delimited by whitespace. |
355
|
|
|
|
|
|
|
With this default configuration, the contents of a field |
356
|
|
|
|
|
|
|
cannot contain whitespace. |
357
|
|
|
|
|
|
|
However, this limitation can be relaxed by changing the field separator |
358
|
|
|
|
|
|
|
as described below. |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
The big advantage of simple flat-text databases is that |
361
|
|
|
|
|
|
|
it is usually easy to massage data into this format, |
362
|
|
|
|
|
|
|
and it's reasonably easy to take data out of this |
363
|
|
|
|
|
|
|
format into other (text-based) programs, like gnuplot, jgraph, and |
364
|
|
|
|
|
|
|
LaTeX. Think Unix. Think pipes. |
365
|
|
|
|
|
|
|
(Or even output to Excel and HTML if you prefer.) |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Since no-whitespace in columns was a problem for some applications, |
368
|
|
|
|
|
|
|
there's an option which relaxes this rule. You can specify the field |
369
|
|
|
|
|
|
|
separator in the table header with C<-F x> where C is |
370
|
|
|
|
|
|
|
a code for the new field separator. |
371
|
|
|
|
|
|
|
A full list of codes is at L, |
372
|
|
|
|
|
|
|
but two common special values are C<-F t> |
373
|
|
|
|
|
|
|
which is a separator of a single tab character, |
374
|
|
|
|
|
|
|
and C<-F S>, a separator of two spaces. |
375
|
|
|
|
|
|
|
Both allowing (single) spaces in fields. An example: |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
#fsdb -F S account passwd uid gid fullname homedir shell |
378
|
|
|
|
|
|
|
johnh * 2274 134 John Heidemann /home/johnh /bin/bash |
379
|
|
|
|
|
|
|
greg * 2275 134 Greg Johnson /home/greg /bin/bash |
380
|
|
|
|
|
|
|
root * 0 0 Root /root /bin/bash |
381
|
|
|
|
|
|
|
# this is a simple database |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
See L for more details. Regardless of what the column |
384
|
|
|
|
|
|
|
separator is for the body of the data, it's always whitespace in the |
385
|
|
|
|
|
|
|
header. |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
There's also a third format: a "list". Because it's often hard to see |
388
|
|
|
|
|
|
|
what's columns past the first two, in list format each "column" is on |
389
|
|
|
|
|
|
|
a separate line. The programs dblistize and dbcolize convert to and |
390
|
|
|
|
|
|
|
from this format, and all programs work with either formats. |
391
|
|
|
|
|
|
|
The command |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
dbfilealter -R C < DATA/passwd.fsdb |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
outputs: |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
#fsdb -R C account passwd uid gid fullname homedir shell |
398
|
|
|
|
|
|
|
account: johnh |
399
|
|
|
|
|
|
|
passwd: * |
400
|
|
|
|
|
|
|
uid: 2274 |
401
|
|
|
|
|
|
|
gid: 134 |
402
|
|
|
|
|
|
|
fullname: John_Heidemann |
403
|
|
|
|
|
|
|
homedir: /home/johnh |
404
|
|
|
|
|
|
|
shell: /bin/bash |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
account: greg |
407
|
|
|
|
|
|
|
passwd: * |
408
|
|
|
|
|
|
|
uid: 2275 |
409
|
|
|
|
|
|
|
gid: 134 |
410
|
|
|
|
|
|
|
fullname: Greg_Johnson |
411
|
|
|
|
|
|
|
homedir: /home/greg |
412
|
|
|
|
|
|
|
shell: /bin/bash |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
account: root |
415
|
|
|
|
|
|
|
passwd: * |
416
|
|
|
|
|
|
|
uid: 0 |
417
|
|
|
|
|
|
|
gid: 0 |
418
|
|
|
|
|
|
|
fullname: Root |
419
|
|
|
|
|
|
|
homedir: /root |
420
|
|
|
|
|
|
|
shell: /bin/bash |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# this is a simple database |
423
|
|
|
|
|
|
|
# | dblistize |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
See L for more details. |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head1 BASIC DATA MANIPULATION |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
A number of programs exist to manipulate databases. |
431
|
|
|
|
|
|
|
Complex functions can be made by stringing together commands |
432
|
|
|
|
|
|
|
with shell pipelines. For example, to print the home |
433
|
|
|
|
|
|
|
directories of everyone with ``john'' in their names, |
434
|
|
|
|
|
|
|
you would do: |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
cat DATA/passwd | dbrow '_fullname =~ /John/' | dbcol homedir |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
The output might be: |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
#fsdb homedir |
441
|
|
|
|
|
|
|
/home/johnh |
442
|
|
|
|
|
|
|
/home/greg |
443
|
|
|
|
|
|
|
# this is a simple database |
444
|
|
|
|
|
|
|
# | dbrow _fullname =~ /John/ |
445
|
|
|
|
|
|
|
# | dbcol homedir |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
(Notice that comments are appended to the output listing each command, |
448
|
|
|
|
|
|
|
providing an automatic audit log.) |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
In addition to typical database functions (select, join, etc.) there |
451
|
|
|
|
|
|
|
are also a number of statistical functions. |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
The real power of Fsdb is that one can apply arbitrary code to rows |
454
|
|
|
|
|
|
|
to do powerful things. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
cat DATA/passwd | dbroweval '_fullname =~ s/(\w+)_(\w+)/$2,_$1/' |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
converts "John_Heidemann" into "Heidemann,_John". |
459
|
|
|
|
|
|
|
Not too much more work could split fullname into firstname and lastname |
460
|
|
|
|
|
|
|
fields. |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=head1 TALKING ABOUT COLUMNS |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
An advantage of Fsdb is that you can talk about columns by name |
466
|
|
|
|
|
|
|
(symbolically) rather than simply by their positions. So in the above |
467
|
|
|
|
|
|
|
example, C pulled out the home directory column, and |
468
|
|
|
|
|
|
|
C matched against column fullname. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
In general, you can use the name of the column listed on the C<#fsdb> line |
471
|
|
|
|
|
|
|
to identify it in most programs, and _name to identify it in code. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
Some alternatives for flexibility: |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=over 4 |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item * |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
Numeric values identify columns positionally, numbering from 0. |
480
|
|
|
|
|
|
|
So 0 or _0 is the first column, 1 is the second, etc. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item * |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
In code, _last_columnname gets the value from columname's previous row. |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=back |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
See L for more details about writing code. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head1 LIST OF COMMANDS |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Enough said. I'll summarize the commands, and then you can |
495
|
|
|
|
|
|
|
experiment. For a detailed description of each command, see a summary |
496
|
|
|
|
|
|
|
by running it with the argument C<--help> (or C<-?> if you prefer.) |
497
|
|
|
|
|
|
|
Full manual pages can be found by running the command |
498
|
|
|
|
|
|
|
with the argument C<--man>, or running the Unix command C |
499
|
|
|
|
|
|
|
or whatever program you want. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head2 TABLE CREATION |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=over 4 |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=item dbcolcreate |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
add columns to a database |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=item dbcoldefine |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
set the column headings for a non-Fsdb file |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=back |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=head2 TABLE MANIPULATION |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=over 4 |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=item dbcol |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
select columns from a table |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=item dbrow |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
select rows from a table |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=item dbsort |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
sort rows based on a set of columns |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=item dbjoin |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
compute the natural join of two tables |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=item dbcolrename |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
rename a column |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=item dbcolmerge |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
merge two columns into one |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
=item dbcolsplittocols |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
split one column into two or more columns |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=item dbcolsplittorows |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
split one column into multiple rows |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=item dbfilepivot |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
"pivots" a file, converting multiple rows |
554
|
|
|
|
|
|
|
corresponding to the same entity into a single row with multiple columns. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=item dbfilevalidate |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
check that db file doesn't have some common errors |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=back |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=head2 COMPUTATION AND STATISTICS |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=over 4 |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=item dbcolstats |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
compute statistics over a column (mean,etc.,optionally median) |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=item dbmultistats |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
group rows by some key value, then compute stats (mean, etc.) over each group |
573
|
|
|
|
|
|
|
(equivalent to dbmapreduce with dbcolstats as the reducer) |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=item dbmapreduce |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
group rows (map) and then apply an arbitrary function to each group (reduce) |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=item dbrvstatdiff |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
compare two samples distributions (mean/conf interval/T-test) |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=item dbcolmovingstats |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
computing moving statistics over a column of data |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=item dbcolstatscores |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
compute Z-scores and T-scores over one column of data |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=item dbcolpercentile |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
compute the rank or percentile of a column |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=item dbcolhisto |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
compute histograms over a column of data |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=item dbcolscorrelate |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
compute the coefficient of correlation over several columns |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=item dbcolsregression |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
compute linear regression and correlation for two columns |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=item dbrowaccumulate |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
compute a running sum over a column of data |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=item dbrowcount |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
count the number of rows (a subset of dbstats) |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=item dbrowdiff |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
compute differences between a columns in each row of a table |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=item dbrowenumerate |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
number each row |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=item dbroweval |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
run arbitrary Perl code on each row |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=item dbrowuniq |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
count/eliminate identical rows (like Unix uniq(1)) |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=item dbfilediff |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
compare fields on rows of a file (something like Unix diff(1)) |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=back |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=head2 OUTPUT CONTROL |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=over 4 |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=item dbcolneaten |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
pretty-print columns |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=item dbfilealter |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
convert between column or list format, or change the column separator |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=item dbfilestripcomments |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
remove comments from a table |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=item dbformmail |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
generate a script that sends form mail based on each row |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=back |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=head2 CONVERSIONS |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
(These programs convert data into fsdb. See their web pages for details.) |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=over 4 |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=item cgi_to_db |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
L |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=item combined_log_format_to_db |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
L |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=item html_table_to_db |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
HTML tables to fsdb (assuming they're reasonably formatted). |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=item kitrace_to_db |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
L |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=item ns_to_db |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
L |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=item sqlselect_to_db |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
the output of SQL SELECT tables to db |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=item tabdelim_to_db |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
spreadsheet tab-delimited files to db |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
=item tcpdump_to_db |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
(see man tcpdump(8) on any reasonable system) |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=item xml_to_db |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
XML input to fsdb, assuming they're very regular |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=back |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
(And out of fsdb:) |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=over 4 |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=item db_to_csv |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
Comma-separated-value format from fsdb. |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=item db_to_html_table |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
simple conversion of Fsdb to html tables |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=back |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=head2 STANDARD OPTIONS |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
Many programs have common options: |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=over 4 |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
=item B<-?> or B<--help> |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
Show basic usage. |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=item B<-N> on B<--new-name> |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
When a command creates a new column like L's C, |
731
|
|
|
|
|
|
|
this option lets one override the default name of that new column. |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=item B<-T TmpDir> |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
where to put tmp files. |
736
|
|
|
|
|
|
|
Also uses environment variable TMPDIR, if -T is |
737
|
|
|
|
|
|
|
not specified. |
738
|
|
|
|
|
|
|
Default is /tmp. |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
Show basic usage. |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=item B<-c FRACTION> or B<--confidence FRACTION> |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
Specify confidence interval FRACTION (L, L, etc.) |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=item B<-C S> or C<--element-separator S> |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Specify column separator S (L, L). |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=item B<-d> or B<--debug> |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Enable debugging (may be repeated for greater effect in some cases). |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=item B<-a> or B<--include-non-numeric> |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
Compute stats over all data (treating non-numbers as zeros). |
757
|
|
|
|
|
|
|
(By default, things that can't be treated as numbers |
758
|
|
|
|
|
|
|
are ignored for stats purposes) |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=item B<-S> or B<--pre-sorted> |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
Assume the data is pre-sorted. |
763
|
|
|
|
|
|
|
May be repeated to disable verification (saving a small amount of work). |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item B<-e E> or B<--empty E> |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
give value E as the value for empty (null) records |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=item B<-i I> or B<--input I> |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
Input data from file I. |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=item B<-o O> or B<--output O> |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
Write data out to file O. |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=item B<--header> H |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
Use H as the full Fsdb header, rather than reading a header from |
780
|
|
|
|
|
|
|
then input. This option is particularly useful when using Fsdb |
781
|
|
|
|
|
|
|
under Hadoop, where split files don't have heades. |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=item B<--nolog>. |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
Skip logging the program in a trailing comment. |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=back |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
When giving Perl code (in L and L) |
790
|
|
|
|
|
|
|
column names can be embedded if preceded by underscores. |
791
|
|
|
|
|
|
|
Look at L or L for examples.) |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
Most programs run in constant memory and use temporary files if necessary. |
794
|
|
|
|
|
|
|
Exceptions are L, L, L, |
795
|
|
|
|
|
|
|
L, L. |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head1 ANOTHER EXAMPLE |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
Take the raw data in C, |
801
|
|
|
|
|
|
|
put a header on it (C), |
802
|
|
|
|
|
|
|
took statistics of each category (C), |
803
|
|
|
|
|
|
|
pick out the relevant fields (C), and you get: |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
#fsdb size mean stddev pct_rsd |
806
|
|
|
|
|
|
|
1024 1.4962e+06 2.8497e+05 19.047 |
807
|
|
|
|
|
|
|
10240 5.0286e+06 6.0103e+05 11.952 |
808
|
|
|
|
|
|
|
102400 4.9216e+06 3.0939e+05 6.2863 |
809
|
|
|
|
|
|
|
# | dbcoldefine size bw |
810
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbmultistats -k size bw |
811
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
(The whole command was: |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
cat DATA/http_bandwidth | |
816
|
|
|
|
|
|
|
dbcoldefine size | |
817
|
|
|
|
|
|
|
dbmultistats -k size bw | |
818
|
|
|
|
|
|
|
dbcol size mean stddev pct_rsd |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
all on one line.) |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Then post-process them to get rid of the exponential notation |
823
|
|
|
|
|
|
|
by adding this to the end of the pipeline: |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);' |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
(Actually, this step is no longer required since L |
828
|
|
|
|
|
|
|
now uses a different default format.) |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
giving: |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
#fsdb size mean stddev pct_rsd |
833
|
|
|
|
|
|
|
1024 1496200 284970 19.047 |
834
|
|
|
|
|
|
|
10240 5028600 601030 11.952 |
835
|
|
|
|
|
|
|
102400 4921600 309390 6.2863 |
836
|
|
|
|
|
|
|
# | dbcoldefine size bw |
837
|
|
|
|
|
|
|
# | dbmultistats -k size bw |
838
|
|
|
|
|
|
|
# | dbcol size mean stddev pct_rsd |
839
|
|
|
|
|
|
|
# | dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); } |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
In a few lines, raw data is transformed to processed output. |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
Suppose you expect there is an odd distribution of results of one |
845
|
|
|
|
|
|
|
datapoint. Fsdb can easily produce a CDF (cumulative distribution |
846
|
|
|
|
|
|
|
function) of the data, suitable for graphing: |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
cat DB/DATA/http_bandwidth | \ |
849
|
|
|
|
|
|
|
dbcoldefine size bw | \ |
850
|
|
|
|
|
|
|
dbrow '_size == 102400' | \ |
851
|
|
|
|
|
|
|
dbcol bw | \ |
852
|
|
|
|
|
|
|
dbsort -n bw | \ |
853
|
|
|
|
|
|
|
dbrowenumerate | \ |
854
|
|
|
|
|
|
|
dbcolpercentile count | \ |
855
|
|
|
|
|
|
|
dbcol bw percentile | \ |
856
|
|
|
|
|
|
|
xgraph |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
The steps, roughly: |
859
|
|
|
|
|
|
|
1. get the raw input data and turn it into fsdb format, |
860
|
|
|
|
|
|
|
2. pick out just the relevant column (for efficiency) and sort it, |
861
|
|
|
|
|
|
|
3. for each data point, assign a CDF percentage to it, |
862
|
|
|
|
|
|
|
4. pick out the two columns to graph and show them |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=head1 A GRADEBOOK EXAMPLE |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
The first commercial program I wrote was a gradebook, |
868
|
|
|
|
|
|
|
so here's how to do it with Fsdb. |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
Format your data like DATA/grades. |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
#fsdb name email id test1 |
873
|
|
|
|
|
|
|
a a@ucla.example.edu 1 80 |
874
|
|
|
|
|
|
|
b b@usc.example.edu 2 70 |
875
|
|
|
|
|
|
|
c c@isi.example.edu 3 65 |
876
|
|
|
|
|
|
|
d d@lmu.example.edu 4 90 |
877
|
|
|
|
|
|
|
e e@caltech.example.edu 5 70 |
878
|
|
|
|
|
|
|
f f@oxy.example.edu 6 90 |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
Or if your students have spaces in their names, use C<-F S> and two spaces |
881
|
|
|
|
|
|
|
to separate each column: |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
#fsdb -F S name email id test1 |
884
|
|
|
|
|
|
|
alfred aho a@ucla.example.edu 1 80 |
885
|
|
|
|
|
|
|
butler lampson b@usc.example.edu 2 70 |
886
|
|
|
|
|
|
|
david clark c@isi.example.edu 3 65 |
887
|
|
|
|
|
|
|
constantine drovolis d@lmu.example.edu 4 90 |
888
|
|
|
|
|
|
|
debrorah estrin e@caltech.example.edu 5 70 |
889
|
|
|
|
|
|
|
sally floyd f@oxy.example.edu 6 90 |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
To compute statistics on an exam, do |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
cat DATA/grades | dbstats test1 |dblistize |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
giving |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
#fsdb -R C ... |
898
|
|
|
|
|
|
|
mean: 77.5 |
899
|
|
|
|
|
|
|
stddev: 10.84 |
900
|
|
|
|
|
|
|
pct_rsd: 13.987 |
901
|
|
|
|
|
|
|
conf_range: 11.377 |
902
|
|
|
|
|
|
|
conf_low: 66.123 |
903
|
|
|
|
|
|
|
conf_high: 88.877 |
904
|
|
|
|
|
|
|
conf_pct: 0.95 |
905
|
|
|
|
|
|
|
sum: 465 |
906
|
|
|
|
|
|
|
sum_squared: 36625 |
907
|
|
|
|
|
|
|
min: 65 |
908
|
|
|
|
|
|
|
max: 90 |
909
|
|
|
|
|
|
|
n: 6 |
910
|
|
|
|
|
|
|
... |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
To do a histogram: |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
cat DATA/grades | dbcolhisto -n 5 -g test1 |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
giving |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
#fsdb low histogram |
919
|
|
|
|
|
|
|
65 * |
920
|
|
|
|
|
|
|
70 ** |
921
|
|
|
|
|
|
|
75 |
922
|
|
|
|
|
|
|
80 * |
923
|
|
|
|
|
|
|
85 |
924
|
|
|
|
|
|
|
90 ** |
925
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbhistogram -n 5 -g test1 |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
Now you want to send out grades to the students by e-mail. |
928
|
|
|
|
|
|
|
Create a form-letter (in the file F): |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
To: _email (_name) |
931
|
|
|
|
|
|
|
From: J. Random Professor |
932
|
|
|
|
|
|
|
Subject: test1 scores |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
_name, your score on test1 was _test1. |
935
|
|
|
|
|
|
|
86+ A |
936
|
|
|
|
|
|
|
75-85 B |
937
|
|
|
|
|
|
|
70-74 C |
938
|
|
|
|
|
|
|
0-69 F |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Generate the shell script that will send the mail out: |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
cat DATA/grades | dbformmail test1.txt > test1.sh |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
And run it: |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
sh
|
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
The last two steps can be combined: |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
cat DATA/grades | dbformmail test1.txt | sh |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
but I like to keep a copy of exactly what I send. |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
At the end of the semester you'll want to compute grade totals and |
956
|
|
|
|
|
|
|
assign letter grades. Both fall out of dbroweval. |
957
|
|
|
|
|
|
|
For example, to compute weighted total grades with a 40% midterm/60% |
958
|
|
|
|
|
|
|
final where the midterm is 84 possible points and the final 100: |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
dbcol -rv total | |
961
|
|
|
|
|
|
|
dbcolcreate total - | |
962
|
|
|
|
|
|
|
dbroweval ' |
963
|
|
|
|
|
|
|
_total = .40 * _midterm/84.0 + .60 * _final/100.0; |
964
|
|
|
|
|
|
|
_total = sprintf("%4.2f", _total); |
965
|
|
|
|
|
|
|
if (_final eq "-" || ( _name =~ /^_/)) { _total = "-"; };' | |
966
|
|
|
|
|
|
|
dbcolneaten |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
If you got the data originally from a spreadsheet, save it in |
970
|
|
|
|
|
|
|
"tab-delimited" format and convert it with tabdelim_to_db |
971
|
|
|
|
|
|
|
(run tabdelim_to_db -? for examples). |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
=head1 A PASSWORD EXAMPLE |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
To convert the Unix password file to db: |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
cat /etc/passwd | sed 's/:/ /g'| \ |
979
|
|
|
|
|
|
|
dbcoldefine -F S login password uid gid gecos home shell \ |
980
|
|
|
|
|
|
|
>passwd.fsdb |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
To convert the group file |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
cat /etc/group | sed 's/:/ /g' | \ |
985
|
|
|
|
|
|
|
dbcoldefine -F S group password gid members \ |
986
|
|
|
|
|
|
|
>group.fsdb |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
To show the names of the groups that div7-members are in |
989
|
|
|
|
|
|
|
(assuming DIV7 is in the gecos field): |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
cat passwd.fsdb | dbrow '_gecos =~ /DIV7/' | dbcol login gid | \ |
992
|
|
|
|
|
|
|
dbjoin -i - -i group.fsdb gid | dbcol login group |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
=head1 SHORT EXAMPLES |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
Which Fsdb programs are the most complicated (based on number of test cases)? |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
ls TEST/*.cmd | \ |
1000
|
|
|
|
|
|
|
dbcoldefine test | \ |
1001
|
|
|
|
|
|
|
dbroweval '_test =~ s@^TEST/([^_]+).*$@$1@' | \ |
1002
|
|
|
|
|
|
|
dbrowuniq -c | \ |
1003
|
|
|
|
|
|
|
dbsort -nr count | \ |
1004
|
|
|
|
|
|
|
dbcolneaten |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
(Answer: L, then L, L and L.) |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
Stats on an exam (in C<$FILE>, where C<$COLUMN> is the name of the exam)? |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
cat $FILE | dbcolstats -q 4 $COLUMN <$FILE | dblistize | dbstripcomments |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
cat $FILE | dbcolhisto -g -n 20 $COLUMN | dbcolneaten | dbstripcomments |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
Merging a the hw1 column from file hw1.fsdb into grades.fsdb assuming |
1017
|
|
|
|
|
|
|
there's a common student id in column "id": |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
dbcol id hw1 t.fsdb |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
dbjoin -a -e - grades.fsdb t.fsdb id | \ |
1022
|
|
|
|
|
|
|
dbsort name | \ |
1023
|
|
|
|
|
|
|
dbcolneaten >new_grades.fsdb |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
Merging two fsdb files with the same rows: |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
cat file1.fsdb file2.fsdb >output.fsdb |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
or if you want to clean things up a bit |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
cat file1.fsdb file2.fsdb | dbstripextraheaders >output.fsdb |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
or if you want to know where the data came from |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
for i in 1 2 |
1037
|
|
|
|
|
|
|
do |
1038
|
|
|
|
|
|
|
dbcolcreate source $i < file$i.fsdb |
1039
|
|
|
|
|
|
|
done >output.fsdb |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
(assumes you're using a Bourne-shell compatible shell, not csh). |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
=head1 WARNINGS |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
As with any tool, one should (which means I) understand |
1047
|
|
|
|
|
|
|
the limits of the tool. |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
All Fsdb tools should run in I. |
1050
|
|
|
|
|
|
|
In some cases (such as F with quartiles, where the whole input |
1051
|
|
|
|
|
|
|
must be re-read), programs will spool data to disk if necessary. |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
Most tools buffer one or a few lines of data, so memory |
1054
|
|
|
|
|
|
|
will scale with the size of each line. |
1055
|
|
|
|
|
|
|
(So lines with many columns, or when columns have lots data, |
1056
|
|
|
|
|
|
|
may cause large memory consumption.) |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
All Fsdb tools should run in constant or at worst C time. |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
All Fsdb tools use normal Perl math routines for computation. |
1061
|
|
|
|
|
|
|
Although I make every attempt to choose numerically stable algorithms |
1062
|
|
|
|
|
|
|
(although I also welcome feedback and suggestions for improvement), |
1063
|
|
|
|
|
|
|
normal rounding due to computer floating point approximations |
1064
|
|
|
|
|
|
|
can result in inaccuracies when data spans a large range of precision. |
1065
|
|
|
|
|
|
|
(See for example the F test cases.) |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
Any requirements and limitations of each Fsdb tool |
1068
|
|
|
|
|
|
|
is documented on its manual page. |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
If any Fsdb program violates these assumptions, |
1071
|
|
|
|
|
|
|
that is a bug that should be documented |
1072
|
|
|
|
|
|
|
on the tool's manual page or ideally fixed. |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
Fsdb does depend on Perl's correctness, and Perl (and Fsdb) have |
1075
|
|
|
|
|
|
|
some bugs. Fsdb should work on perl from version 5.10 onward. |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
=head1 HISTORY |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
There have been three versions of Fsdb; |
1081
|
|
|
|
|
|
|
fsdb 1.0 is a complete re-write of the pre-1995 versions, |
1082
|
|
|
|
|
|
|
and was |
1083
|
|
|
|
|
|
|
distributed from 1995 to 2007. |
1084
|
|
|
|
|
|
|
Fsdb 2.0 is a significant re-write of the 1.x versions |
1085
|
|
|
|
|
|
|
for reasons described below. |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
Fsdb (in its various forms) has been used extensively by its author |
1088
|
|
|
|
|
|
|
since 1991. Since 1995 it's been used by two other researchers at |
1089
|
|
|
|
|
|
|
UCLA and several at ISI. In February 1998 it was announced to the |
1090
|
|
|
|
|
|
|
Internet. Since then it has found a few users, some outside where I |
1091
|
|
|
|
|
|
|
work. |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
=head2 Fsdb 2.0 Rationale |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
I've thought about fsdb-2.0 for many years, but it was started |
1096
|
|
|
|
|
|
|
in earnest in 2007. Fsdb-2.0 has the following goals: |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
=over 4 |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
=item in-one-process processing |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
While fsdb is great on the Unix command line as a pipeline between |
1103
|
|
|
|
|
|
|
programs, it should I be possible to set it up to run in a single |
1104
|
|
|
|
|
|
|
process. And if it does so, it should be able to avoid serializing |
1105
|
|
|
|
|
|
|
and deserializing (converting to and from text) data between each module. |
1106
|
|
|
|
|
|
|
(Accomplished in fsdb-2.0: see L, although still needs tuning.) |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
=item clean IO API |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
Fsdb's roots go back to perl4 and 1991, so the fsdb-1.x library is |
1111
|
|
|
|
|
|
|
very, very crufty. More than just being ugly (but it was that too), |
1112
|
|
|
|
|
|
|
this made things reading from one format file and writing to another |
1113
|
|
|
|
|
|
|
the application's job, when it should be the library's. |
1114
|
|
|
|
|
|
|
(Accomplished in fsdb-1.15 and improved in 2.0: see L.) |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=item normalized module APIs |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
Because fsdb modules were added as needed over 10 years, |
1119
|
|
|
|
|
|
|
sometimes the module APIs became inconsistent. |
1120
|
|
|
|
|
|
|
(For example, the 1.x C required an empty |
1121
|
|
|
|
|
|
|
value following the name of the new column, |
1122
|
|
|
|
|
|
|
but other programs specify empty values with the C<-e> argument.) |
1123
|
|
|
|
|
|
|
We should smooth over these inconsistencies. |
1124
|
|
|
|
|
|
|
(Accomplished as each module was ported in 2.0 through 2.7.) |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
=item everyone handles all input formats |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
Given a clean IO API, the distinction between "colized" |
1129
|
|
|
|
|
|
|
and "listized" fsdb files should go away. Any program |
1130
|
|
|
|
|
|
|
should be able to read and write files in any format. |
1131
|
|
|
|
|
|
|
(Accomplished in fsdb-2.1.) |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
=back |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
Fsdb-2.0 preserves backwards compatibility where possible, |
1136
|
|
|
|
|
|
|
but breaks it where necessary to accomplish the above goals. |
1137
|
|
|
|
|
|
|
In August 2008, Fsdb-2.7 was declared preferred over the 1.x versions. |
1138
|
|
|
|
|
|
|
Benchmarking in 2013 showed that threading performed much worse than |
1139
|
|
|
|
|
|
|
just using pipes, so Fsdb-2.44 uses threading "style", |
1140
|
|
|
|
|
|
|
but implemented with processes (via my "Freds" library). |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
=head2 Contributors |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
Fsdb includes code ported from Geoff Kuenning (C). |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
Fsdb contributors: |
1147
|
|
|
|
|
|
|
Ashvin Goel F, |
1148
|
|
|
|
|
|
|
Geoff Kuenning F, |
1149
|
|
|
|
|
|
|
Vikram Visweswariah F, |
1150
|
|
|
|
|
|
|
Kannan Varadahan F, |
1151
|
|
|
|
|
|
|
Lars Eggert F, |
1152
|
|
|
|
|
|
|
Arkadi Gelfond F, |
1153
|
|
|
|
|
|
|
David Graff F, |
1154
|
|
|
|
|
|
|
Haobo Yu F, |
1155
|
|
|
|
|
|
|
Pavlin Radoslavov F, |
1156
|
|
|
|
|
|
|
Graham Phillips, |
1157
|
|
|
|
|
|
|
Yuri Pradkin, |
1158
|
|
|
|
|
|
|
Alefiya Hussain, |
1159
|
|
|
|
|
|
|
Ya Xu, |
1160
|
|
|
|
|
|
|
Michael Schwendt, |
1161
|
|
|
|
|
|
|
Fabio Silva F, |
1162
|
|
|
|
|
|
|
Jerry Zhao F, |
1163
|
|
|
|
|
|
|
Ning Xu F, |
1164
|
|
|
|
|
|
|
Martin Lukac F, |
1165
|
|
|
|
|
|
|
Xue Cai, |
1166
|
|
|
|
|
|
|
Michael McQuaid, |
1167
|
|
|
|
|
|
|
Christopher Meng, |
1168
|
|
|
|
|
|
|
Calvin Ardi, |
1169
|
|
|
|
|
|
|
H. Merijn Brand, |
1170
|
|
|
|
|
|
|
Lan Wei. |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
Fsdb includes datasets contributed from NIST (F), |
1173
|
|
|
|
|
|
|
from |
1174
|
|
|
|
|
|
|
L, |
1175
|
|
|
|
|
|
|
the NIST/SEMATECH e-Handbook of Statistical Methods, section |
1176
|
|
|
|
|
|
|
1.4.2.8.1. Background and Data. The source is public domain, and |
1177
|
|
|
|
|
|
|
reproduced with permission. |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
=head1 RELATED WORK |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
As stated in the introduction, Fsdb is an incompatible reimplementation |
1185
|
|
|
|
|
|
|
of the ideas found in C. By storing data in simple text files and |
1186
|
|
|
|
|
|
|
processing it with pipelines it is easy to experiment (in the shell) |
1187
|
|
|
|
|
|
|
and look at the output. The original implementation of this idea was |
1188
|
|
|
|
|
|
|
/rdb, a commercial product described in the book I
|
1189
|
|
|
|
|
|
|
database management: application development in the UNIX environment> |
1190
|
|
|
|
|
|
|
by Rod Manis, Evan Schaffer, and Robert Jorgensen (and also at the web |
1191
|
|
|
|
|
|
|
page L). |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
While Fsdb is inspired by Rdb, it includes no code from it, |
1194
|
|
|
|
|
|
|
and Fsdb makes several different design choices. |
1195
|
|
|
|
|
|
|
In particular: rdb attempts to be closer to a "real" database, |
1196
|
|
|
|
|
|
|
with provision for locking, file indexing. |
1197
|
|
|
|
|
|
|
Fsdb focuses on single user use and so eschews these choices. |
1198
|
|
|
|
|
|
|
Rdb also has some support for interactive editing. |
1199
|
|
|
|
|
|
|
Fsdb leaves editing to text editors like emacs or vi. |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
In August, 2002 I found out Carlo Strozzi extended RDB with his |
1202
|
|
|
|
|
|
|
package NoSQL L. According to |
1203
|
|
|
|
|
|
|
Mr. Strozzi, he implemented NoSQL in awk to avoid the Perl start-up of |
1204
|
|
|
|
|
|
|
RDB. Although I haven't found Perl startup overhead to be a big |
1205
|
|
|
|
|
|
|
problem on my platforms (from old Sparcstation IPCs to 2GHz |
1206
|
|
|
|
|
|
|
Pentium-4s), you may want to evaluate his system. |
1207
|
|
|
|
|
|
|
The Linux Journal has a description of NoSQL |
1208
|
|
|
|
|
|
|
at L. |
1209
|
|
|
|
|
|
|
It seems quite similar to Fsdb. |
1210
|
|
|
|
|
|
|
Like /rdb, NoSQL supports indexing (not present in Fsdb). |
1211
|
|
|
|
|
|
|
Fsdb appears to have richer support for statistics, |
1212
|
|
|
|
|
|
|
and, as of Fsdb-2.x, its support for Perl threading may support |
1213
|
|
|
|
|
|
|
faster performance (one-process, less serialization and deserialization). |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
=head1 RELEASE NOTES |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
Versions prior to 1.0 were released informally on my web page |
1219
|
|
|
|
|
|
|
but were not announced. |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
=head2 0.0 1991 |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
started for my own research use |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
=head2 0.1 26-May-94 |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
first check-in to RCS |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
=head2 0.2 15-Mar-95 |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
parts now require perl5 |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
=head2 1.0, 22-Jul-97 |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
adds autoconf support and a test script. |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=head2 1.1, 20-Jan-98 |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
support for double space field separators, better tests |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
=head2 1.2, 11-Feb-98 |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
minor changes and release on comp.lang.perl.announce |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
=head2 1.3, 17-Mar-98 |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=over 4 |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
=item * |
1250
|
|
|
|
|
|
|
adds median and quartile options to dbstats |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
=item * |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
adds dmalloc_to_db converter |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
=item * |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
fixes some warnings |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
=item * |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
dbjoin now can run on unsorted input |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=item * |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
fixes a dbjoin bug |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
=item * |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
some more tests in the test suite |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
=back |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=head2 1.4, 27-Mar-98 |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=over 4 |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=item * |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
improves error messages (all should now report the program that makes the error) |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
=item * |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
fixed a bug in dbstats output when the mean is zero |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=back |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=head2 1.5, 25-Jun-98 |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=over 4 |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
=item BUG FIX |
1298
|
|
|
|
|
|
|
dbcolhisto, dbcolpercentile now handles non-numeric values like dbstats |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=item NEW |
1301
|
|
|
|
|
|
|
dbcolstats computes zscores and tscores over a column |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=item NEW |
1304
|
|
|
|
|
|
|
dbcolscorrelate computes correlation coefficients between two columns |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
=item INTERNAL |
1307
|
|
|
|
|
|
|
ficus_getopt.pl has been replaced by DbGetopt.pm |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=item BUG FIX |
1310
|
|
|
|
|
|
|
all tests are now ``portable'' (previously some tests ran only on my system) |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=item BUG FIX |
1313
|
|
|
|
|
|
|
you no longer need to have the db programs in your path (fix arose from a discussion with Arkadi Gelfond) |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
=item BUG FIX |
1316
|
|
|
|
|
|
|
installation no longer uses cp -f (to work on SunOS 4) |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
=back |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
=head2 1.6, 24-May-99 |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
=over 4 |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
=item NEW |
1325
|
|
|
|
|
|
|
dbsort, dbstats, dbmultistats now run in constant memory (using tmp files if necessary) |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
=item NEW |
1328
|
|
|
|
|
|
|
dbcolmovingstats does moving means over a series of data |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
=item NEW |
1331
|
|
|
|
|
|
|
dbcol has a -v option to get all columns except those listed |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
=item NEW |
1334
|
|
|
|
|
|
|
dbmultistats does quartiles and medians |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=item NEW |
1337
|
|
|
|
|
|
|
dbstripextraheaders now also cleans up bogus comments before the fist header |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
=item BUG FIX |
1340
|
|
|
|
|
|
|
dbcolneaten works better with double-space-separated data |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=back |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
=head2 1.7, 5-Jan-00 |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
=over 4 |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=item NEW |
1349
|
|
|
|
|
|
|
dbcolize now detects and rejects lines that contain embedded copies of the field separator |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=item NEW |
1352
|
|
|
|
|
|
|
configure tries harder to prevent people from improperly configuring/installing fsdb |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
=item NEW |
1355
|
|
|
|
|
|
|
tcpdump_to_db converter (incomplete) |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=item NEW |
1358
|
|
|
|
|
|
|
tabdelim_to_db converter: from spreadsheet tab-delimited files to db |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=item NEW |
1361
|
|
|
|
|
|
|
mailing lists for fsdb are C and C |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
To subscribe to either, send mail to C or C with "subscribe" in the BODY of the message. |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=item BUG FIX |
1366
|
|
|
|
|
|
|
dbjoin used to produce incorrect output if there were extra, unmatched values in the 2nd table. Thanks to Graham Phillips for providing a test case. |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=item BUG FIX |
1369
|
|
|
|
|
|
|
the sample commands in the usage strings now all should explicitly include the source of data (typically from "cat foo.fsdb |"). Thanks to Ya Xu for pointing out this documentation deficiency. |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=item BUG FIX (DOCUMENTATION) |
1372
|
|
|
|
|
|
|
dbcolmovingstats had incorrect sample output. |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=back |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
=head2 1.8, 28-Jun-00 |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
=over 4 |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
=item BUG FIX |
1381
|
|
|
|
|
|
|
header options are now preserved when writing with dblistize |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
=item NEW |
1384
|
|
|
|
|
|
|
dbrowuniq now optionally checks for uniqueness only on certain fields |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
=item NEW |
1387
|
|
|
|
|
|
|
dbrowsplituniq makes one pass through a file and splits it into separate files based on the given fields |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
=item NEW |
1390
|
|
|
|
|
|
|
converter for "crl" format network traces |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
=item NEW |
1393
|
|
|
|
|
|
|
anywhere you use arbitrary code (like dbroweval), _last_foo now maps to the last row's value for field _foo. |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=item OPTIMIZATION |
1396
|
|
|
|
|
|
|
comment processing slightly changed so that dbmultistats now is much faster on files with lots of comments (for example, ~100k lines of comments and 700 lines of data!) (Thanks to Graham Phillips for pointing out this performance problem.) |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
=item BUG FIX |
1399
|
|
|
|
|
|
|
dbstats with median/quartiles now correctly handles singleton data points. |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
=back |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
=head2 1.9, 6-Nov-00 |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
=over 4 |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
=item NEW |
1408
|
|
|
|
|
|
|
dbfilesplit, split a single input file into multiple output files (based on code contributed by Pavlin Radoslavov). |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
=item BUG FIX |
1411
|
|
|
|
|
|
|
dbsort now works with perl-5.6 |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
=back |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=head2 1.10, 10-Apr-01 |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
=over 4 |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
=item BUG FIX |
1420
|
|
|
|
|
|
|
dbstats now handles the case where there are more n-tiles than data |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
=item NEW |
1423
|
|
|
|
|
|
|
dbstats now includes a -S option to optimize work on pre-sorted data (inspired by code contributed by Haobo Yu) |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
=item BUG FIX |
1426
|
|
|
|
|
|
|
dbsort now has a better estimate of memory usage when run on data with very short records (problem detected by Haobo Yu) |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=item BUG FIX |
1429
|
|
|
|
|
|
|
cleanup of temporary files is slightly better |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
=back |
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
=head2 1.11, 2-Nov-01 |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
=over 4 |
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
=item BUG FIX |
1438
|
|
|
|
|
|
|
dbcolneaten now runs in constant memory |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=item NEW |
1441
|
|
|
|
|
|
|
dbcolneaten now supports "field specifiers" that allow some control over how wide columns should be |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
=item OPTIMIZATION |
1444
|
|
|
|
|
|
|
dbsort now tries hard to be filesystem cache-friendly (inspired by "Information and Control in Gray-box Systems" by the Arpaci-Dusseau's at SOSP 2001) |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
=item INTERNAL |
1447
|
|
|
|
|
|
|
t_distr now ported to perl5 module DbTDistr |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
=back |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
=head2 1.12, 30-Oct-02 |
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
=over 4 |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
=item BUG FIX |
1456
|
|
|
|
|
|
|
dbmultistats documentation typo fixed |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=item NEW |
1459
|
|
|
|
|
|
|
dbcolmultiscale |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
=item NEW |
1462
|
|
|
|
|
|
|
dbcol has -r option for "relaxed error checking" |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
=item NEW |
1465
|
|
|
|
|
|
|
dbcolneaten has new -e option to strip end-of-line spaces |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
=item NEW |
1468
|
|
|
|
|
|
|
dbrow finally has a -v option to negate the test |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
=item BUG FIX |
1471
|
|
|
|
|
|
|
math bug in dbcoldiff fixed by Ashvin Goel (need to check Scheaffer test cases) |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=item BUG FIX |
1474
|
|
|
|
|
|
|
some patches to run with Perl 5.8. Note: some programs (dbcolmultiscale, dbmultistats, dbrowsplituniq) generate warnings like: "Use of uninitialized value in concatenation (.)" or "string at /usr/lib/perl5/5.8.0/FileCache.pm line 98, line 2". Please ignore this until I figure out how to suppress it. (Thanks to Jerry Zhao for noticing perl-5.8 problems.) |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=item BUG FIX |
1477
|
|
|
|
|
|
|
fixed an autoconf problem where configure would fail to find a reasonable prefix (thanks to Fabio Silva for reporting the problem) |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
=item NEW |
1480
|
|
|
|
|
|
|
db_to_html_table: simple conversion to html tables (NO fancy stuff) |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=item NEW |
1483
|
|
|
|
|
|
|
dblib now has a function dblib_text2html() that will do simple conversion of iso-8859-1 to HTML |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
=back |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
=head2 1.13, 4-Feb-04 |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
=over 4 |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
=item NEW |
1494
|
|
|
|
|
|
|
fsdb added to the freebsd ports tree L. Maintainer: C |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
=item BUG FIX |
1497
|
|
|
|
|
|
|
properly handle trailing spaces when data must be numeric (ex. dbstats with -FS, see test dbstats_trailing_spaces). Fix from Ning Xu C. |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=item NEW |
1500
|
|
|
|
|
|
|
dbcolize error message improved (bug report from Terrence Brannon), and list format documented in the README. |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
=item NEW |
1503
|
|
|
|
|
|
|
cgi_to_db converts CGI.pm-format storage to fsdb list format |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
=item BUG FIX |
1506
|
|
|
|
|
|
|
handle numeric synonyms for column names in dbcol properly |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
=item ENHANCEMENT |
1509
|
|
|
|
|
|
|
"talking about columns" section added to README. Lack of documentation pointed out by Lars Eggert. |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
=item CHANGE |
1512
|
|
|
|
|
|
|
dbformmail now defaults to using Mail ("Berkeley Mail") to send mail, rather than sendmail (sendmail is still an option, but mail doesn't require running as root) |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
=item NEW |
1515
|
|
|
|
|
|
|
on platforms that support it (i.e., with perl 5.8), fsdb works fine with unicode |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
=item NEW |
1518
|
|
|
|
|
|
|
dbfilevalidate: check a db file for some common errors |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
=back |
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
=head2 1.14, 24-Aug-06 |
1524
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
=over 4 |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=item ENHANCEMENT |
1529
|
|
|
|
|
|
|
README cleanup |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1532
|
|
|
|
|
|
|
dbcolsplit renamed dbcolsplittocols |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
=item NEW |
1535
|
|
|
|
|
|
|
dbcolsplittorows split one column into multiple rows |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
=item NEW |
1538
|
|
|
|
|
|
|
dbcolsregression compute linear regression and correlation for two columns |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=item ENHANCEMENT |
1541
|
|
|
|
|
|
|
cvs_to_db: better error handling, normalize field names, skip blank lines |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
=item ENHANCEMENT |
1544
|
|
|
|
|
|
|
dbjoin now detects (and fails) if non-joined files have duplicate names |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
=item BUG FIX |
1547
|
|
|
|
|
|
|
minor bug fixed in calculation of Student t-distributions (doesn't change any test output, but may have caused small errors) |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=back |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
=head2 1.15, 12-Nov-07 |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
=over 4 |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
=item NEW |
1556
|
|
|
|
|
|
|
fsdb-1.14 added to the MacOS Fink system L. (Thanks to Lars Eggert for maintaining this port.) |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=item NEW |
1559
|
|
|
|
|
|
|
Fsdb::IO::Reader and Fsdb::IO::Writer now provide reasonably clean OO I/O interfaces to Fsdb files. Highly recommended if you use fsdb directly from perl. In the fullness of time I expect to reimplement the entire thing using these APIs to replace the current dblib.pl which is still hobbled by its roots in perl4. |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
=item NEW |
1562
|
|
|
|
|
|
|
dbmapreduce now implements a Google-style map/reduce abstraction, generalizing dbmultistats. |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
=item ENHANCEMENT |
1565
|
|
|
|
|
|
|
fsdb now uses the Perl build system (Makefile.PL, etc.), instead of autoconf. This change paves the way to better perl-5-style modularization, proper manual pages, input of both listize and colize format for every program, and world peace. |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
=item ENHANCEMENT |
1568
|
|
|
|
|
|
|
dblib.pl is now moved to Fsdb::Old.pm. |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
=item BUG FIX |
1571
|
|
|
|
|
|
|
dbmultistats now propagates its format argument (-f). Bug and fix from Martin Lukac (thanks!). |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
=item ENHANCEMENT |
1574
|
|
|
|
|
|
|
dbformmail documentation now is clearer that it doesn't send the mail, you have to run the shell script it writes. (Problem observed by Unkyu Park.) |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
=item ENHANCEMENT |
1577
|
|
|
|
|
|
|
adapted to autoconf-2.61 (and then these changes were discarded in favor of The Perl Way. |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=item BUG FIX |
1580
|
|
|
|
|
|
|
dbmultistats memory usage corrected (O(# tags), not O(1)) |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=item ENHANCEMENT |
1583
|
|
|
|
|
|
|
dbmultistats can now optionally run with pre-grouped input in O(1) memory |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
=item ENHANCEMENT |
1586
|
|
|
|
|
|
|
dbroweval -N was finally implemented (eat comments) |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
=back |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
=head2 2.0, 25-Jan-08 |
1591
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
2.0, 25-Jan-08 --- a quiet 2.0 release (gearing up towards complete) |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
=over 4 |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
=item ENHANCEMENT: |
1597
|
|
|
|
|
|
|
shifting old programs to Perl modules, with |
1598
|
|
|
|
|
|
|
the front-end program as just a wrapper. |
1599
|
|
|
|
|
|
|
In the short-term, this change just means programs have real man pages. |
1600
|
|
|
|
|
|
|
In the long-run, it will mean that one can run a pipeline in a single |
1601
|
|
|
|
|
|
|
Perl program. |
1602
|
|
|
|
|
|
|
So far: |
1603
|
|
|
|
|
|
|
L, |
1604
|
|
|
|
|
|
|
L, |
1605
|
|
|
|
|
|
|
the new L. |
1606
|
|
|
|
|
|
|
L |
1607
|
|
|
|
|
|
|
the new L, |
1608
|
|
|
|
|
|
|
the old C (renamed L), |
1609
|
|
|
|
|
|
|
L, |
1610
|
|
|
|
|
|
|
L, |
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
=item NEW: |
1613
|
|
|
|
|
|
|
L is an internal-only module that lets one |
1614
|
|
|
|
|
|
|
use fsdb commands from within perl (via threads). |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
It also provides perl function aliases for the internal modules, |
1617
|
|
|
|
|
|
|
so a string of fsdb commands in perl are nearly as terse as in the |
1618
|
|
|
|
|
|
|
shell: |
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
use Fsdb::Filter::dbpipeline qw(:all); |
1621
|
|
|
|
|
|
|
dbpipeline( |
1622
|
|
|
|
|
|
|
dbrow(qw(name test1)), |
1623
|
|
|
|
|
|
|
dbroweval('_test1 += 5;') |
1624
|
|
|
|
|
|
|
); |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1627
|
|
|
|
|
|
|
The old L has been renamed L. |
1628
|
|
|
|
|
|
|
The new L does the same thing as the old L. |
1629
|
|
|
|
|
|
|
This incompatibility is unfortunate but normalizes program names. |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
=item CHANGE: |
1632
|
|
|
|
|
|
|
The new L program |
1633
|
|
|
|
|
|
|
always outputs C<-> (the default empty value) for |
1634
|
|
|
|
|
|
|
statistics it cannot compute (for example, standard deviation |
1635
|
|
|
|
|
|
|
if there is only one row), |
1636
|
|
|
|
|
|
|
instead of the old mix of C<-> and "na". |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1639
|
|
|
|
|
|
|
The old L program, now called L, |
1640
|
|
|
|
|
|
|
also has different arguments. The C<-t mean,stddev> option is now |
1641
|
|
|
|
|
|
|
C<--tmean mean --tstddev stddev>. See L for details. |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1644
|
|
|
|
|
|
|
L now assumes all new columns get the default |
1645
|
|
|
|
|
|
|
value rather than requiring each column to have an initial constant value. |
1646
|
|
|
|
|
|
|
To change the initial value, sue the new C<-e> option. |
1647
|
|
|
|
|
|
|
|
1648
|
|
|
|
|
|
|
=item NEW: |
1649
|
|
|
|
|
|
|
L counts rows, an almost-subset of L's C output |
1650
|
|
|
|
|
|
|
(except without differentiating numeric/non-numeric input), |
1651
|
|
|
|
|
|
|
or the equivalent of C. |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
=item NEW: |
1654
|
|
|
|
|
|
|
L merges two sorted files. |
1655
|
|
|
|
|
|
|
This functionality was previously embedded in L. |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1658
|
|
|
|
|
|
|
L's C<-i> option to include non-matches |
1659
|
|
|
|
|
|
|
is now renamed C<-a>, so as to not conflict with the new |
1660
|
|
|
|
|
|
|
standard option C<-i> for input file. |
1661
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
=back |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
=head2 2.1, 6-Apr-08 |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
2.1, 6-Apr-08 --- another alpha 2.0, but now all converted programs understand both listize and colize format |
1667
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
=over 4 |
1669
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
=item ENHANCEMENT: |
1671
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1672
|
|
|
|
|
|
|
New in 2.1: |
1673
|
|
|
|
|
|
|
L, |
1674
|
|
|
|
|
|
|
L, |
1675
|
|
|
|
|
|
|
L, |
1676
|
|
|
|
|
|
|
L, |
1677
|
|
|
|
|
|
|
L, |
1678
|
|
|
|
|
|
|
L |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
=item ENHANCEMENT |
1681
|
|
|
|
|
|
|
L now handles an arbitrary number of input files, |
1682
|
|
|
|
|
|
|
not just exactly two. |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
=item NEW |
1685
|
|
|
|
|
|
|
L is an internal routine that handles merging exactly two files. |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1688
|
|
|
|
|
|
|
L now specifies inputs like L, |
1689
|
|
|
|
|
|
|
rather than assuming the first two arguments were tables (as in fsdb-1). |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
The old L argument C<-i> is now C<-a> or <--type=outer>. |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
A minor change: comments in the source files for |
1694
|
|
|
|
|
|
|
L are now intermixed with output |
1695
|
|
|
|
|
|
|
rather than being delayed until the end. |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
=item ENHANCEMENT |
1698
|
|
|
|
|
|
|
L now no longer produces warnings when null values are |
1699
|
|
|
|
|
|
|
passed to numeric comparisons. |
1700
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
=item BUG FIX |
1702
|
|
|
|
|
|
|
L now once again works with code that lacks a trailing semicolon. |
1703
|
|
|
|
|
|
|
(This bug fixes a regression from 1.15.) |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1706
|
|
|
|
|
|
|
L's old C<-e> option (to avoid end-of-line spaces) is now C<-E> |
1707
|
|
|
|
|
|
|
to avoid conflicts with the standard empty field argument. |
1708
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1710
|
|
|
|
|
|
|
L's old C<-e> option is now C<-E> to avoid conflicts. |
1711
|
|
|
|
|
|
|
And its C<-n>, C<-s>, and C<-w> are now |
1712
|
|
|
|
|
|
|
C<-N>, C<-S>, and C<-W> to correspond. |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
=item NEW |
1715
|
|
|
|
|
|
|
L replaces L, L, and L, |
1716
|
|
|
|
|
|
|
but with different options. |
1717
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
=item ENHANCEMENT |
1719
|
|
|
|
|
|
|
The library routines C now understand both list-format |
1720
|
|
|
|
|
|
|
and column-format data, so all converted programs can now |
1721
|
|
|
|
|
|
|
I read either format. This capability was one |
1722
|
|
|
|
|
|
|
of the milestone goals for 2.0, so yea! |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
=back |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
=head2 2.2, 23-May-08 |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
Release 2.2 is another 2.x alpha release. Now I of the |
1729
|
|
|
|
|
|
|
commands are ported, but a few remain, and I plan one last |
1730
|
|
|
|
|
|
|
incompatible change (to the file header) before 2.x final. |
1731
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
=over 4 |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
=item ENHANCEMENT |
1735
|
|
|
|
|
|
|
|
1736
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1737
|
|
|
|
|
|
|
New in 2.2: |
1738
|
|
|
|
|
|
|
L, |
1739
|
|
|
|
|
|
|
L. |
1740
|
|
|
|
|
|
|
L. |
1741
|
|
|
|
|
|
|
L. |
1742
|
|
|
|
|
|
|
L. |
1743
|
|
|
|
|
|
|
L. |
1744
|
|
|
|
|
|
|
L. |
1745
|
|
|
|
|
|
|
L. |
1746
|
|
|
|
|
|
|
L. |
1747
|
|
|
|
|
|
|
L. |
1748
|
|
|
|
|
|
|
L. |
1749
|
|
|
|
|
|
|
Also |
1750
|
|
|
|
|
|
|
L |
1751
|
|
|
|
|
|
|
exists only as a front-end (command-line) program. |
1752
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1754
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
The following programs have been dropped from fsdb-2.x: |
1756
|
|
|
|
|
|
|
L, |
1757
|
|
|
|
|
|
|
L, |
1758
|
|
|
|
|
|
|
L, |
1759
|
|
|
|
|
|
|
L. |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
=item NEW |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
L to convert Apache logfiles |
1764
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
Options to L are now B<-B> and B<-I>, |
1768
|
|
|
|
|
|
|
not B<-a> and B<-i>. |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1771
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
L is now L. |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
=item BUG FIXES |
1775
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
L better handles empty columns; |
1777
|
|
|
|
|
|
|
L warning suppressed (actually a bug in high-bucket handling). |
1778
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
L now requires a C<-k> option in front of the |
1782
|
|
|
|
|
|
|
key (tag) field, or if none is given, it will group by the first field |
1783
|
|
|
|
|
|
|
(both like L). |
1784
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
=item KNOWN BUG |
1786
|
|
|
|
|
|
|
|
1787
|
|
|
|
|
|
|
L with quantile option doesn't work currently. |
1788
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1790
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
L is renamed L. |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
=item BUG FIXES |
1794
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
L was leaving its log message as a command, not a comment. |
1796
|
|
|
|
|
|
|
Oops. No longer. |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
=back |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
=head2 2.3, 27-May-08 (alpha) |
1801
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
Another alpha release, this one just to fix the critical dbjoin bug |
1803
|
|
|
|
|
|
|
listed below (that happens to have blocked my MP3 jukebox :-). |
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
=over 4 |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
=item BUG FIX |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
Dbsort no longer hangs if given an input file with no rows. |
1810
|
|
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
=item BUG FIX |
1812
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
Dbjoin now works with unsorted input coming from a pipeline (like stdin). |
1814
|
|
|
|
|
|
|
Perl-5.8.8 has a bug (?) that was making this case fail---opening |
1815
|
|
|
|
|
|
|
stdin in one thread, reading some, then reading more in a different |
1816
|
|
|
|
|
|
|
thread caused an lseek which works on files, but fails on pipes like stdin. |
1817
|
|
|
|
|
|
|
Go figure. |
1818
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
=item BUG FIX / KNOWN BUG |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
The dbjoin fix also fixed dbmultistats -q |
1822
|
|
|
|
|
|
|
(it now gives the right answer). |
1823
|
|
|
|
|
|
|
Although a new bug appeared, messages like: |
1824
|
|
|
|
|
|
|
Attempt to free unreferenced scalar: SV 0xa9dd0c4, Perl interpreter: 0xa8350b8 during global destruction. |
1825
|
|
|
|
|
|
|
So the dbmultistats_quartile test is still disabled. |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
=back |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
=head2 2.4, 18-Jun-08 |
1830
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
Another alpha release, mostly to fix minor usability |
1832
|
|
|
|
|
|
|
problems in dbmapreduce and client functions. |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
=over 4 |
1835
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
=item ENHANCEMENT |
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
L now defaults to running user supplied code without warnings |
1839
|
|
|
|
|
|
|
(as with fsdb-1.x). |
1840
|
|
|
|
|
|
|
Use C<--warnings> or C<-w> to turn them back on. |
1841
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
=item ENHANCEMENT |
1843
|
|
|
|
|
|
|
|
1844
|
|
|
|
|
|
|
L can now write different format output |
1845
|
|
|
|
|
|
|
than the input, using the C<-m> option. |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
=item KNOWN BUG |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
L emits warnings on perl 5.10.0 |
1850
|
|
|
|
|
|
|
about "Unbalanced string table refcount" and "Scalars leaked" |
1851
|
|
|
|
|
|
|
when run with an external program as a reducer. |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
L emits the warning "Attempt to free unreferenced scalar" |
1854
|
|
|
|
|
|
|
when run with quartiles. |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
In each case the output is correct. |
1857
|
|
|
|
|
|
|
I believe these can be ignored. |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
=item CHANGE |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
L no longer logs a line for each reducer that is invoked. |
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
=back |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
=head2 2.5, 24-Jun-08 |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
Another alpha release, fixing more minor bugs in |
1869
|
|
|
|
|
|
|
C and lossage in C. |
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
=over 4 |
1872
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
=item ENHANCEMENT |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
L can now tolerate non-map-aware reducers |
1876
|
|
|
|
|
|
|
that pass back the key column in put. |
1877
|
|
|
|
|
|
|
It also passes the current key as the last argument to |
1878
|
|
|
|
|
|
|
external reducers. |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=item BUG FIX |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
L, correctly handle C<-header> option again. |
1883
|
|
|
|
|
|
|
(Broken since fsdb-2.3.) |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
=back |
1886
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
=head2 2.6, 11-Jul-08 |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
Another alpha release, needed to fix DaGronk. |
1891
|
|
|
|
|
|
|
One new port, small bug fixes, and important fix to L. |
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
=over 4 |
1894
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
=item ENHANCEMENT |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1898
|
|
|
|
|
|
|
New in 2.2: |
1899
|
|
|
|
|
|
|
L. |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE and ENHANCEMENTS |
1902
|
|
|
|
|
|
|
L arguments changed, |
1903
|
|
|
|
|
|
|
use C<--rank> to require ranking instead of C<-r>. |
1904
|
|
|
|
|
|
|
Also, C<--ascending> and C<--descending> can now be specified separately, |
1905
|
|
|
|
|
|
|
both for C<--percentile> and C<--rank>. |
1906
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
=item BUG FIX |
1908
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
Sigh, the sense of the --warnings option in L was inverted. No longer. |
1910
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
=item BUG FIX |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
I found and fixed the string leaks (errors like "Unbalanced string |
1914
|
|
|
|
|
|
|
table refcount" and "Scalars leaked") in L and L. |
1915
|
|
|
|
|
|
|
(All Cs in threads must be manually destroyed.) |
1916
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
=item BUG FIX |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
The C<-C> option to specify the column separator in L |
1920
|
|
|
|
|
|
|
now works again (broken since it was ported). |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
=back |
1923
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
2.7, 30-Jul-08 beta |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
The beta release of fsdb-2.x. Finally, all programs are ported. |
1927
|
|
|
|
|
|
|
As statistics, the number of lines of non-library code doubled from |
1928
|
|
|
|
|
|
|
7.5k to 15.5k. The libraries are much more complete, |
1929
|
|
|
|
|
|
|
going from 866 to 5164 lines. |
1930
|
|
|
|
|
|
|
The overall number of programs is about the same, |
1931
|
|
|
|
|
|
|
although 19 were dropped and 11 were added. |
1932
|
|
|
|
|
|
|
The number of test cases has grown from 116 to 175. |
1933
|
|
|
|
|
|
|
All programs are now in perl-5, no more shell scripts or perl-4. |
1934
|
|
|
|
|
|
|
All programs now have manual pages. |
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
Although this is a major step forward, I still expect |
1937
|
|
|
|
|
|
|
to rename "fsdb" to "fsdb". |
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
=over 4 |
1940
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
=item ENHANCEMENT |
1942
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1944
|
|
|
|
|
|
|
New in 2.7: |
1945
|
|
|
|
|
|
|
L. |
1946
|
|
|
|
|
|
|
L. |
1947
|
|
|
|
|
|
|
L. |
1948
|
|
|
|
|
|
|
L. |
1949
|
|
|
|
|
|
|
L. |
1950
|
|
|
|
|
|
|
L, |
1951
|
|
|
|
|
|
|
L, |
1952
|
|
|
|
|
|
|
L, |
1953
|
|
|
|
|
|
|
L, |
1954
|
|
|
|
|
|
|
L, |
1955
|
|
|
|
|
|
|
L. |
1956
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
The following programs have been dropped from fsdb-2.x: |
1960
|
|
|
|
|
|
|
L, |
1961
|
|
|
|
|
|
|
L, |
1962
|
|
|
|
|
|
|
L. |
1963
|
|
|
|
|
|
|
L. |
1964
|
|
|
|
|
|
|
They may come back, but seemed overly specialized. |
1965
|
|
|
|
|
|
|
The following program |
1966
|
|
|
|
|
|
|
L |
1967
|
|
|
|
|
|
|
was dropped because it is superseded by L. |
1968
|
|
|
|
|
|
|
L |
1969
|
|
|
|
|
|
|
was dropped pending a test cases and examples. |
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
=item ENHANCEMENT |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
L now has a C<-c> option to correct errors. |
1974
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
=item NEW |
1976
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
L provides the inverse of |
1978
|
|
|
|
|
|
|
L. |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
=back |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
=head2 2.8, 5-Aug-08 |
1984
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
Change header format, preserving forwards compatibility. |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
=over 4 |
1988
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
=item BUG FIX |
1990
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
Complete editing pass over the manual, making sure it aligns |
1992
|
|
|
|
|
|
|
with fsdb-2.x. |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=item SEMI-COMPATIBLE CHANGE |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
The header of fsdb files has changed, it is now #fsdb, not #h (or #L) |
1997
|
|
|
|
|
|
|
and parsing of -F and -R are also different. |
1998
|
|
|
|
|
|
|
See L for the new specification. |
1999
|
|
|
|
|
|
|
The v1 file format will be read, compatibly, but |
2000
|
|
|
|
|
|
|
not written. |
2001
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
=item BUG FIX |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
L now tolerates comments that precede the first key, |
2005
|
|
|
|
|
|
|
instead of failing with an error message. |
2006
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
=back |
2008
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
=head2 2.9, 6-Aug-08 |
2011
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
Still in beta; just a quick bug-fix for L. |
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
=over 4 |
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
=item ENHANCEMENT |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
L now generates plausible output when given no rows |
2019
|
|
|
|
|
|
|
of input. |
2020
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
=back |
2022
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
=head2 2.10, 23-Sep-08 |
2024
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
Still in beta, but picking up some bug fixes. |
2026
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
=over 4 |
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
=item ENHANCEMENT |
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
L now generates plausible output when given no rows |
2032
|
|
|
|
|
|
|
of input. |
2033
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
=item ENHANCEMENT |
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
L the warnings option was backwards; |
2037
|
|
|
|
|
|
|
now corrected. As a result, warnings in user code now default off |
2038
|
|
|
|
|
|
|
(like in fsdb-1.x). |
2039
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
=item BUG FIX |
2041
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
L now defaults to assuming the target column is numeric. |
2043
|
|
|
|
|
|
|
The new option C<-N> allows selection of a non-numeric target. |
2044
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
=item BUG FIX |
2046
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
L now includes C<--sample> and C<--nosample> options |
2048
|
|
|
|
|
|
|
to compute the sample or full population correlation coefficients. |
2049
|
|
|
|
|
|
|
Thanks to Xue Cai for finding this bug. |
2050
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
=back |
2052
|
|
|
|
|
|
|
|
2053
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
=head2 2.11, 14-Oct-08 |
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
Still in beta, but picking up some bug fixes. |
2057
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
=over 4 |
2059
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
=item ENHANCEMENT |
2061
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
L is now more aggressive about filling in empty cells |
2063
|
|
|
|
|
|
|
with the official empty value, rather than leaving them blank or as whitespace. |
2064
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
=item ENHANCEMENT |
2066
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
L now catches failures during pipeline element setup |
2068
|
|
|
|
|
|
|
and exits reasonably gracefully. |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
=item BUG FIX |
2071
|
|
|
|
|
|
|
|
2072
|
|
|
|
|
|
|
L now reaps child processes, thus avoiding |
2073
|
|
|
|
|
|
|
running out of processes when used a lot. |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
=back |
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
=head2 2.12, 16-Oct-08 |
2078
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
Finally, a full (non-beta) 2.x release! |
2080
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
=over 4 |
2082
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
2084
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
Jdb has been renamed Fsdb, the flatfile-streaming database. |
2086
|
|
|
|
|
|
|
This change affects all internal Perl APIs, |
2087
|
|
|
|
|
|
|
but no shell command-level APIs. |
2088
|
|
|
|
|
|
|
While Jdb served well for more than ten years, |
2089
|
|
|
|
|
|
|
it is easily confused with the Java debugger (even though Jdb was there first!). |
2090
|
|
|
|
|
|
|
It also is too generic to work well in web search engines. |
2091
|
|
|
|
|
|
|
Finally, Jdb stands for ``John's database'', and we're a bit beyond that. |
2092
|
|
|
|
|
|
|
(However, some call me the ``file-system guy'', so |
2093
|
|
|
|
|
|
|
one could argue it retains that meeting.) |
2094
|
|
|
|
|
|
|
|
2095
|
|
|
|
|
|
|
If you just used the shell commands, this change should not affect you. |
2096
|
|
|
|
|
|
|
If you used the Perl-level libraries directly in your code, |
2097
|
|
|
|
|
|
|
you should be able to rename "Jdb" to "Fsdb" to move to 2.12. |
2098
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
The jdb-announce list not yet been renamed, but it will be shortly. |
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
With this release I've accomplished everything I wanted to |
2102
|
|
|
|
|
|
|
in fsdb-2.x. I therefore expect to return to boring, bugfix releases. |
2103
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
=back |
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
=head2 2.13, 30-Oct-08 |
2107
|
|
|
|
|
|
|
|
2108
|
|
|
|
|
|
|
=over 4 |
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
=item BUG FIX |
2111
|
|
|
|
|
|
|
|
2112
|
|
|
|
|
|
|
L now treats non-numeric data as zero by default. |
2113
|
|
|
|
|
|
|
|
2114
|
|
|
|
|
|
|
=item BUG FIX |
2115
|
|
|
|
|
|
|
|
2116
|
|
|
|
|
|
|
Fixed a perl-5.10ism in L that |
2117
|
|
|
|
|
|
|
breaks that program under 5.8. |
2118
|
|
|
|
|
|
|
Thanks to Martin Lukac for reporting the bug. |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
=back |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
=head2 2.14, 26-Nov-08 |
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
=over 4 |
2125
|
|
|
|
|
|
|
|
2126
|
|
|
|
|
|
|
=item BUG FIX |
2127
|
|
|
|
|
|
|
|
2128
|
|
|
|
|
|
|
Improved documentation for L's C<-f> option. |
2129
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
=item ENHANCEMENT |
2131
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
L how computes a moving standard deviation in addition |
2133
|
|
|
|
|
|
|
to a moving mean. |
2134
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
=back |
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
|
2138
|
|
|
|
|
|
|
=head2 2.15, 13-Apr-09 |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
=over 4 |
2141
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
=item BUG FIX |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
Fix a F bug reported by Shalindra Fernando. |
2145
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
=back |
2147
|
|
|
|
|
|
|
|
2148
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
=head2 2.16, 14-Apr-09 |
2150
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
=over 4 |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
=item BUG FIX |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
Another minor release bug: on some systems F looses |
2156
|
|
|
|
|
|
|
executable permissions. Again reported by Shalindra Fernando. |
2157
|
|
|
|
|
|
|
|
2158
|
|
|
|
|
|
|
=back |
2159
|
|
|
|
|
|
|
|
2160
|
|
|
|
|
|
|
=head2 2.17, 25-Jun-09 |
2161
|
|
|
|
|
|
|
|
2162
|
|
|
|
|
|
|
=over 4 |
2163
|
|
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
=item TYPO FIXES |
2165
|
|
|
|
|
|
|
|
2166
|
|
|
|
|
|
|
Typo in the F manual fixed. |
2167
|
|
|
|
|
|
|
|
2168
|
|
|
|
|
|
|
=item IMPROVEMENT |
2169
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
There is no longer a comment line to label columns |
2171
|
|
|
|
|
|
|
in F, instead the header line is tweaked to |
2172
|
|
|
|
|
|
|
line up. This change restores the Jdb-1.x behavior, and |
2173
|
|
|
|
|
|
|
means that repeated runs of dbcolneaten no longer add comment lines |
2174
|
|
|
|
|
|
|
each time. |
2175
|
|
|
|
|
|
|
|
2176
|
|
|
|
|
|
|
=item BUG FIX |
2177
|
|
|
|
|
|
|
|
2178
|
|
|
|
|
|
|
It turns out F was not correctly handling trailing spaces |
2179
|
|
|
|
|
|
|
when given the C<-E> option to suppress them. This regression is now |
2180
|
|
|
|
|
|
|
fixed. |
2181
|
|
|
|
|
|
|
|
2182
|
|
|
|
|
|
|
=item EXTENSION |
2183
|
|
|
|
|
|
|
|
2184
|
|
|
|
|
|
|
L can now handle direct references to the last row |
2185
|
|
|
|
|
|
|
via F<$lfref>, a dubious but now documented feature. |
2186
|
|
|
|
|
|
|
|
2187
|
|
|
|
|
|
|
=item BUG FIXES |
2188
|
|
|
|
|
|
|
|
2189
|
|
|
|
|
|
|
Separators set with C<-C> in F and F |
2190
|
|
|
|
|
|
|
were not properly |
2191
|
|
|
|
|
|
|
setting the heading, and null fields were not recognized. |
2192
|
|
|
|
|
|
|
The first bug was reported by Martin Lukac. |
2193
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
=back |
2195
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
=head2 2.18, 1-Jul-09 A minor release |
2197
|
|
|
|
|
|
|
|
2198
|
|
|
|
|
|
|
=over 4 |
2199
|
|
|
|
|
|
|
|
2200
|
|
|
|
|
|
|
=item IMPROVEMENT |
2201
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
Documentation for F has been improved. |
2203
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
=item IMPROVEMENT |
2205
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
The package should now be PGP-signed. |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
=back |
2209
|
|
|
|
|
|
|
|
2210
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
=head2 2.19, 10-Jul-09 |
2212
|
|
|
|
|
|
|
|
2213
|
|
|
|
|
|
|
=over 4 |
2214
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
=item BUG FIX |
2216
|
|
|
|
|
|
|
|
2217
|
|
|
|
|
|
|
Internal improvements to debugging output and robustness of |
2218
|
|
|
|
|
|
|
F and F. |
2219
|
|
|
|
|
|
|
F re-enabled. |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
=back |
2222
|
|
|
|
|
|
|
|
2223
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
=head2 2.20, 30-Nov-09 |
2225
|
|
|
|
|
|
|
(A collection of minor bugfixes, plus a build against Fedora 12.) |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
=over 4 |
2228
|
|
|
|
|
|
|
|
2229
|
|
|
|
|
|
|
=item BUG FIX |
2230
|
|
|
|
|
|
|
|
2231
|
|
|
|
|
|
|
Loging for |
2232
|
|
|
|
|
|
|
F |
2233
|
|
|
|
|
|
|
with code refs is now stable |
2234
|
|
|
|
|
|
|
(it no longer includes a hex pointer to the code reference). |
2235
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
=item BUG FIX |
2237
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
Better handling of mixed blank lines in F |
2239
|
|
|
|
|
|
|
(see test case F). |
2240
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
=item BUG FIX |
2242
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
F now handles multi-line input better, |
2244
|
|
|
|
|
|
|
and handles tables with COLSPAN. |
2245
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
=item BUG FIX |
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
F now cleans up threads in an C |
2249
|
|
|
|
|
|
|
to prevent "cannot detach a joined thread" errors that popped |
2250
|
|
|
|
|
|
|
up in perl-5.10. Hopefully this prevents a race condition |
2251
|
|
|
|
|
|
|
that causes the test suites to hang about 20% of the time |
2252
|
|
|
|
|
|
|
(in F). |
2253
|
|
|
|
|
|
|
|
2254
|
|
|
|
|
|
|
=item IMPROVEMENT |
2255
|
|
|
|
|
|
|
|
2256
|
|
|
|
|
|
|
F now detects and correctly fails |
2257
|
|
|
|
|
|
|
when the input and reducer have incompatible |
2258
|
|
|
|
|
|
|
field separators. |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=item IMPROVEMENT |
2261
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
F, F, F, F, |
2263
|
|
|
|
|
|
|
and F |
2264
|
|
|
|
|
|
|
now all take an C<-F> option to let one specify the output field separator |
2265
|
|
|
|
|
|
|
(so they work better with F). |
2266
|
|
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
=item BUG FIX |
2268
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
An omitted C<-k> from the manual page of F |
2270
|
|
|
|
|
|
|
is now there. Bug reported by Unkyu Park. |
2271
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
=back |
2273
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
=head2 2.21, 17-Apr-10 |
2276
|
|
|
|
|
|
|
bug fix release |
2277
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
=over 4 |
2279
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
=item BUG FIX |
2281
|
|
|
|
|
|
|
|
2282
|
|
|
|
|
|
|
F now no longer fails with -outputheader => never |
2283
|
|
|
|
|
|
|
(an obscure bug). |
2284
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
=item IMPROVEMENT |
2286
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
F (in the warnings section) |
2288
|
|
|
|
|
|
|
and F now more carefully document how they |
2289
|
|
|
|
|
|
|
handle (and do not handle) numerical precision problems, |
2290
|
|
|
|
|
|
|
and other general limits. Thanks to Yuri Pradkin for prompting |
2291
|
|
|
|
|
|
|
this documentation. |
2292
|
|
|
|
|
|
|
|
2293
|
|
|
|
|
|
|
=item IMPROVEMENT |
2294
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
C |
2296
|
|
|
|
|
|
|
is now restored from C. |
2297
|
|
|
|
|
|
|
|
2298
|
|
|
|
|
|
|
=item IMPROVEMENT |
2299
|
|
|
|
|
|
|
|
2300
|
|
|
|
|
|
|
Documention for multiple styles of input approaches |
2301
|
|
|
|
|
|
|
(including performance description) added to L. |
2302
|
|
|
|
|
|
|
|
2303
|
|
|
|
|
|
|
=back |
2304
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
=head2 2.22, 2010-10-31 |
2306
|
|
|
|
|
|
|
One new tool F and several bug fixes for Perl 5.10. |
2307
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
=over 4 |
2309
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
=item BUG FIX |
2311
|
|
|
|
|
|
|
|
2312
|
|
|
|
|
|
|
F now correctly handles n-way merges. |
2313
|
|
|
|
|
|
|
Bug reported by Yuri Pradkin. |
2314
|
|
|
|
|
|
|
|
2315
|
|
|
|
|
|
|
=item INCOMPARABLE CHANGE |
2316
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
F now defaults to I padding the last column. |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
=item ADDITION |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
F now takes B<-N NewColumn> to give the new |
2322
|
|
|
|
|
|
|
column a name other than "count". Feature requested by Mike Rouch |
2323
|
|
|
|
|
|
|
in January 2005. |
2324
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
=item ADDITION |
2326
|
|
|
|
|
|
|
|
2327
|
|
|
|
|
|
|
New program F copies the last value of a column |
2328
|
|
|
|
|
|
|
into a new column copylast_column of the next row. |
2329
|
|
|
|
|
|
|
New program requested by Fabio Silva; |
2330
|
|
|
|
|
|
|
useful for converting dbmultistats output into dbrvstatdiff input. |
2331
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
=item BUG FIX |
2333
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
Several tools (particularly F and F) would |
2335
|
|
|
|
|
|
|
report errors like "Unbalanced string table refcount: (1) for "STDOUT" |
2336
|
|
|
|
|
|
|
during global destruction" on exit, at least on certain versions |
2337
|
|
|
|
|
|
|
of Perl (for me on 5.10.1), but similar errors have been off-and-on |
2338
|
|
|
|
|
|
|
for several Perl releases. Although I think my code looked |
2339
|
|
|
|
|
|
|
OK, I worked around this problem with a different way of handling |
2340
|
|
|
|
|
|
|
standard IO redirection. |
2341
|
|
|
|
|
|
|
|
2342
|
|
|
|
|
|
|
=back |
2343
|
|
|
|
|
|
|
|
2344
|
|
|
|
|
|
|
|
2345
|
|
|
|
|
|
|
=head2 2.23, 2011-03-10 |
2346
|
|
|
|
|
|
|
Several small portability bugfixes; improved F for large datasets |
2347
|
|
|
|
|
|
|
|
2348
|
|
|
|
|
|
|
=over 4 |
2349
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
=item IMPROVEMENT |
2351
|
|
|
|
|
|
|
|
2352
|
|
|
|
|
|
|
Documentation to F was changed to use "sd" to refer to |
2353
|
|
|
|
|
|
|
standard deviation, not "ss" (which might be confused with sum-of-squares). |
2354
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
=item BUG FIX |
2356
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
This documentation about F was missing the F<-k> option |
2358
|
|
|
|
|
|
|
in some cases. |
2359
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
=item BUG FIX |
2361
|
|
|
|
|
|
|
|
2362
|
|
|
|
|
|
|
F was failing on MacOS-10.6.3 for some tests with |
2363
|
|
|
|
|
|
|
the error |
2364
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
dbmapreduce: cannot run external dbmapreduce reduce program (perl TEST/dbmapreduce_external_with_key.pl) |
2366
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
The problem seemed to be only in the error, not in operation. |
2368
|
|
|
|
|
|
|
On MacOS, the error is now suppressed. |
2369
|
|
|
|
|
|
|
Thanks to Alefiya Hussain for providing access to a Mac system |
2370
|
|
|
|
|
|
|
that allowed debugging of this problem. |
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
=item IMPROVEMENT |
2373
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
The F command requires an external |
2375
|
|
|
|
|
|
|
Perl library (F). On computers that |
2376
|
|
|
|
|
|
|
lack this optional library, previously Fsdb would configure |
2377
|
|
|
|
|
|
|
with a warning and then test cases would fail. |
2378
|
|
|
|
|
|
|
Now those test cases are skipped with an additional warning. |
2379
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
=item BUG FIX |
2381
|
|
|
|
|
|
|
|
2382
|
|
|
|
|
|
|
The test suite now supports alternative valid output, as a hack |
2383
|
|
|
|
|
|
|
to account for last-digit floating point differences. |
2384
|
|
|
|
|
|
|
(Not very satisfying :-( |
2385
|
|
|
|
|
|
|
|
2386
|
|
|
|
|
|
|
=item BUG FIX |
2387
|
|
|
|
|
|
|
|
2388
|
|
|
|
|
|
|
F output for confidence intervals on very large |
2389
|
|
|
|
|
|
|
datasets has changed. Previously it failed for more than 2^31-1 |
2390
|
|
|
|
|
|
|
records, and handling of T-Distributions with thousands of rows |
2391
|
|
|
|
|
|
|
was a bit dubious. Now datasets with more than 10000 are considered |
2392
|
|
|
|
|
|
|
infinitely large and hopefully correctly handled. |
2393
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
=back |
2395
|
|
|
|
|
|
|
|
2396
|
|
|
|
|
|
|
=head2 2.24, 2011-04-15 |
2397
|
|
|
|
|
|
|
Improvements to fix an old bug in dbmapreduce with different field separators |
2398
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
=over 4 |
2400
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
=item IMPROVEMENT |
2402
|
|
|
|
|
|
|
|
2403
|
|
|
|
|
|
|
The F command had a C<--correct> option to |
2404
|
|
|
|
|
|
|
work-around from incompatible field-separators, |
2405
|
|
|
|
|
|
|
but it did nothing. Now it does the correct but sad, data-loosing |
2406
|
|
|
|
|
|
|
thing. |
2407
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
=item IMPROVEMENT |
2409
|
|
|
|
|
|
|
|
2410
|
|
|
|
|
|
|
The F command |
2411
|
|
|
|
|
|
|
previously failed with an error message when invoked |
2412
|
|
|
|
|
|
|
on input with a non-default field separator. |
2413
|
|
|
|
|
|
|
The root cause was the underlying F |
2414
|
|
|
|
|
|
|
that did not handle the case of reducers that generated |
2415
|
|
|
|
|
|
|
output with a different field separator than the input. |
2416
|
|
|
|
|
|
|
We now detect and repair incompatible field separators. |
2417
|
|
|
|
|
|
|
This change corrects a problem originally documented and detected |
2418
|
|
|
|
|
|
|
in Fsdb-2.20. |
2419
|
|
|
|
|
|
|
Bug re-reported by Unkyu Park. |
2420
|
|
|
|
|
|
|
|
2421
|
|
|
|
|
|
|
=back |
2422
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
=head2 2.25, 2011-08-07 |
2424
|
|
|
|
|
|
|
Two new tools, F and F, and a bugfix for two people. |
2425
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
=over 4 |
2427
|
|
|
|
|
|
|
|
2428
|
|
|
|
|
|
|
=item IMPROVEMENT |
2429
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
F now supports a F<--utc> option, |
2431
|
|
|
|
|
|
|
which also fixes this test case for users outside of the Pacific |
2432
|
|
|
|
|
|
|
time zone. Bug reported by David Graff, and also by Peter Desnoyers |
2433
|
|
|
|
|
|
|
(within a week of each other :-) |
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
=item NEW |
2436
|
|
|
|
|
|
|
|
2437
|
|
|
|
|
|
|
F can convert simple, very regular XML files into Fsdb. |
2438
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
=item NEW |
2440
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
F "pivots" a file, converting multiple rows |
2442
|
|
|
|
|
|
|
corresponding to the same entity into a single row with multiple columns. |
2443
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
=back |
2445
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
=head2 2.26, 2011-12-12 |
2447
|
|
|
|
|
|
|
Bug fixes, particularly for perl-5.14.2. |
2448
|
|
|
|
|
|
|
|
2449
|
|
|
|
|
|
|
=over 4 |
2450
|
|
|
|
|
|
|
|
2451
|
|
|
|
|
|
|
=item BUG FIX |
2452
|
|
|
|
|
|
|
|
2453
|
|
|
|
|
|
|
Bugs fixed in L manual page. |
2454
|
|
|
|
|
|
|
|
2455
|
|
|
|
|
|
|
=item BUG FIX |
2456
|
|
|
|
|
|
|
|
2457
|
|
|
|
|
|
|
Fixed problems where L was truncating floating point numbers |
2458
|
|
|
|
|
|
|
when sorting. This strange behavior happens as of perl-5.14.2 and |
2459
|
|
|
|
|
|
|
it I like a Perl bug. I've worked around it for the test suites, |
2460
|
|
|
|
|
|
|
but I'm a bit nervous. |
2461
|
|
|
|
|
|
|
|
2462
|
|
|
|
|
|
|
=back |
2463
|
|
|
|
|
|
|
|
2464
|
|
|
|
|
|
|
=head2 2.27, 2012-11-15 |
2465
|
|
|
|
|
|
|
Accumulated bug fixes. |
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
=over 4 |
2468
|
|
|
|
|
|
|
|
2469
|
|
|
|
|
|
|
=item IMPROVEMENT |
2470
|
|
|
|
|
|
|
|
2471
|
|
|
|
|
|
|
F now reports errors in CVS input with real diagnostics. |
2472
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
=item IMPROVEMENT |
2474
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
F can now compute median, when given the C<-m> option. |
2476
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
=item BUG FIX |
2478
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
F non-numeric handling (the C<-a> option) now works properly. |
2480
|
|
|
|
|
|
|
|
2481
|
|
|
|
|
|
|
=item DOCUMENTATION |
2482
|
|
|
|
|
|
|
|
2483
|
|
|
|
|
|
|
The internal |
2484
|
|
|
|
|
|
|
F test framework |
2485
|
|
|
|
|
|
|
is now documented. |
2486
|
|
|
|
|
|
|
|
2487
|
|
|
|
|
|
|
=item BUG FIX |
2488
|
|
|
|
|
|
|
|
2489
|
|
|
|
|
|
|
F now correctly handles the case where there is no input |
2490
|
|
|
|
|
|
|
(previously it output a blank line, which is a malformed fsdb file). |
2491
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug. |
2492
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
=back |
2494
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
=head2 2.28, 2012-11-15 |
2496
|
|
|
|
|
|
|
A quick release to fix most rpmlint errors. |
2497
|
|
|
|
|
|
|
|
2498
|
|
|
|
|
|
|
=over 4 |
2499
|
|
|
|
|
|
|
|
2500
|
|
|
|
|
|
|
=item BUG FIX |
2501
|
|
|
|
|
|
|
|
2502
|
|
|
|
|
|
|
Fixed a number of minor release problems (wrong permissions, old FSF |
2503
|
|
|
|
|
|
|
address, etc.) found by rpmlint. |
2504
|
|
|
|
|
|
|
|
2505
|
|
|
|
|
|
|
=back |
2506
|
|
|
|
|
|
|
|
2507
|
|
|
|
|
|
|
=head2 2.29, 2012-11-20 |
2508
|
|
|
|
|
|
|
a quick release for CPAN testing |
2509
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
=over 4 |
2511
|
|
|
|
|
|
|
|
2512
|
|
|
|
|
|
|
=item IMPROVEMENT |
2513
|
|
|
|
|
|
|
|
2514
|
|
|
|
|
|
|
Tweaked the RPM spec. |
2515
|
|
|
|
|
|
|
|
2516
|
|
|
|
|
|
|
=item IMPROVEMENT |
2517
|
|
|
|
|
|
|
|
2518
|
|
|
|
|
|
|
Modified F to fail gracefully on Perl installations |
2519
|
|
|
|
|
|
|
that lack threads. (Without this fix, I get massive failures |
2520
|
|
|
|
|
|
|
in the non-ithreads test system.) |
2521
|
|
|
|
|
|
|
|
2522
|
|
|
|
|
|
|
=back |
2523
|
|
|
|
|
|
|
|
2524
|
|
|
|
|
|
|
=head2 2.30, 2012-11-25 |
2525
|
|
|
|
|
|
|
improvements to perl portability |
2526
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
=over 4 |
2528
|
|
|
|
|
|
|
|
2529
|
|
|
|
|
|
|
=item BUG FIX |
2530
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
Removed unicode character in documention of F |
2532
|
|
|
|
|
|
|
so pod tests will pass. (Sigh, that should work :-( ) |
2533
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
=item BUG FIX |
2535
|
|
|
|
|
|
|
|
2536
|
|
|
|
|
|
|
Fixed test suite failures on 5 tests (F |
2537
|
|
|
|
|
|
|
was the first) due to L's addition of a period. |
2538
|
|
|
|
|
|
|
This problem was breaking Fsdb on perl-5.17. |
2539
|
|
|
|
|
|
|
Thanks to Michael McQuaid for helping diagnose this problem. |
2540
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
=item IMPROVEMENT |
2542
|
|
|
|
|
|
|
|
2543
|
|
|
|
|
|
|
The test suite now prints out the names of tests it tries. |
2544
|
|
|
|
|
|
|
|
2545
|
|
|
|
|
|
|
=back |
2546
|
|
|
|
|
|
|
|
2547
|
|
|
|
|
|
|
=head2 2.31, 2012-11-28 |
2548
|
|
|
|
|
|
|
A release with actual improvements to dbfilepivot and dbrowuniq. |
2549
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
=over 4 |
2551
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
=item BUG FIX |
2553
|
|
|
|
|
|
|
|
2554
|
|
|
|
|
|
|
Documentation fixes: typos in L, |
2555
|
|
|
|
|
|
|
bugs in L, |
2556
|
|
|
|
|
|
|
clarification for comment handling in L. |
2557
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
=item IMPROVEMENT |
2559
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
Previously L assumed the input was grouped by keys |
2561
|
|
|
|
|
|
|
and didn't very that pre-condition. |
2562
|
|
|
|
|
|
|
Now there is no pre-condition (it will sort the input by default), |
2563
|
|
|
|
|
|
|
and it checks if the invariant is violated. |
2564
|
|
|
|
|
|
|
|
2565
|
|
|
|
|
|
|
=item BUG FIX |
2566
|
|
|
|
|
|
|
|
2567
|
|
|
|
|
|
|
Previously L failed if the input had comments (oops :-); |
2568
|
|
|
|
|
|
|
no longer. |
2569
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
=item IMPROVEMENT |
2571
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
Now L has the C<-L> option to preserve the last |
2573
|
|
|
|
|
|
|
unique row (instead of the first), a common idiom. |
2574
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
=back |
2576
|
|
|
|
|
|
|
|
2577
|
|
|
|
|
|
|
=head2 2.32, 2012-12-21 |
2578
|
|
|
|
|
|
|
Test suites should now be more numerically robust. |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
=over 4 |
2581
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
=item NEW |
2583
|
|
|
|
|
|
|
|
2584
|
|
|
|
|
|
|
New L does fsdb-aware file differencing. |
2585
|
|
|
|
|
|
|
It does not do smart intuition of add/removes like Unix diff(1), |
2586
|
|
|
|
|
|
|
but it does know about columns, and with C<-E>, it does |
2587
|
|
|
|
|
|
|
numeric-aware differences. |
2588
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
=item IMPROVEMENT |
2590
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
Test suites that are numeric now use L to do numeric-aware |
2592
|
|
|
|
|
|
|
comparisons, so the test suite should now be robust to slightly different |
2593
|
|
|
|
|
|
|
computers and operating systems and compilers than I what I use. |
2594
|
|
|
|
|
|
|
|
2595
|
|
|
|
|
|
|
=back |
2596
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
=head2 2.33, 2012-12-23 |
2598
|
|
|
|
|
|
|
Minor fixes to some test cases. |
2599
|
|
|
|
|
|
|
|
2600
|
|
|
|
|
|
|
=over 4 |
2601
|
|
|
|
|
|
|
|
2602
|
|
|
|
|
|
|
=item IMPROVEMENT |
2603
|
|
|
|
|
|
|
|
2604
|
|
|
|
|
|
|
L and L |
2605
|
|
|
|
|
|
|
now supports the C<-N> option to give the new column a |
2606
|
|
|
|
|
|
|
different name. (And a test cases where this duplication mattered |
2607
|
|
|
|
|
|
|
have been fixed.) |
2608
|
|
|
|
|
|
|
|
2609
|
|
|
|
|
|
|
=item IMPROVEMENT |
2610
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
L now show the t-test breakpoint with a reasonable number of |
2612
|
|
|
|
|
|
|
floating point digits. |
2613
|
|
|
|
|
|
|
|
2614
|
|
|
|
|
|
|
=item BUG FIX |
2615
|
|
|
|
|
|
|
|
2616
|
|
|
|
|
|
|
Fixed a numerical stability problem in the F test case. |
2617
|
|
|
|
|
|
|
|
2618
|
|
|
|
|
|
|
=back |
2619
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
2621
|
|
|
|
|
|
|
|
2622
|
|
|
|
|
|
|
=head2 2.34, 2013-02-10 |
2623
|
|
|
|
|
|
|
Parallelism in L. |
2624
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
=over 4 |
2626
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
=item IMPROVEMENT |
2628
|
|
|
|
|
|
|
|
2629
|
|
|
|
|
|
|
Documention for L now includes resource requirements. |
2630
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
=item IMPROVEMENT |
2632
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
Default memory usage for L is now about 256MB. |
2634
|
|
|
|
|
|
|
(The world keeps moving forward.) |
2635
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
=item IMPROVEMENT |
2637
|
|
|
|
|
|
|
|
2638
|
|
|
|
|
|
|
L now does merging in parallel. |
2639
|
|
|
|
|
|
|
As a side-effect, L should be faster when |
2640
|
|
|
|
|
|
|
input overflows memory. The level of parallelism |
2641
|
|
|
|
|
|
|
can be limited with the C<--parallelism> option. |
2642
|
|
|
|
|
|
|
(There is more work to do here, but we're off to a start.) |
2643
|
|
|
|
|
|
|
|
2644
|
|
|
|
|
|
|
=back |
2645
|
|
|
|
|
|
|
|
2646
|
|
|
|
|
|
|
=head2 2.35, 2013-02-23 |
2647
|
|
|
|
|
|
|
Improvements to dbmerge parallelism |
2648
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
=over 4 |
2650
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
=item BUG FIX |
2652
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
Fsdb temporary files are now created more securely (with File::Temp). |
2654
|
|
|
|
|
|
|
|
2655
|
|
|
|
|
|
|
=item IMPROVEMENT |
2656
|
|
|
|
|
|
|
|
2657
|
|
|
|
|
|
|
Programs that sort or merge on fields (L, L, L, |
2658
|
|
|
|
|
|
|
L) now report an error if no fields on which to join or merge |
2659
|
|
|
|
|
|
|
are given. |
2660
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
=item IMPROVEMENT |
2662
|
|
|
|
|
|
|
|
2663
|
|
|
|
|
|
|
Parallelism in L is should now be more consistent, |
2664
|
|
|
|
|
|
|
with less starting and stopping. |
2665
|
|
|
|
|
|
|
|
2666
|
|
|
|
|
|
|
=item IMPROVEMENT |
2667
|
|
|
|
|
|
|
In L, the C<--xargs> option lets one give input filenames on |
2668
|
|
|
|
|
|
|
standard input, rather than the command line. |
2669
|
|
|
|
|
|
|
This feature paves the way for faster dbsort for large inputs |
2670
|
|
|
|
|
|
|
(by pipelining sorting and merging), expected in the next release. |
2671
|
|
|
|
|
|
|
|
2672
|
|
|
|
|
|
|
=back |
2673
|
|
|
|
|
|
|
|
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
=head2 2.36, 2013-02-25 |
2676
|
|
|
|
|
|
|
dbsort pipelines with dbmerge |
2677
|
|
|
|
|
|
|
|
2678
|
|
|
|
|
|
|
=over 4 |
2679
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
=item IMPROVEMENT |
2681
|
|
|
|
|
|
|
For large inputs, |
2682
|
|
|
|
|
|
|
L now pipelines sorting and merging, |
2683
|
|
|
|
|
|
|
allowing earlier processing. |
2684
|
|
|
|
|
|
|
|
2685
|
|
|
|
|
|
|
=item BUG FIX |
2686
|
|
|
|
|
|
|
Since 2.35, L delayed cleanup of intermediate files, |
2687
|
|
|
|
|
|
|
thereby requiring extra disk space. |
2688
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
=back |
2690
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
=head2 2.37, 2013-02-26 |
2692
|
|
|
|
|
|
|
quick bugfix to support parallel sort and merge from recent releases |
2693
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
=over 4 |
2695
|
|
|
|
|
|
|
|
2696
|
|
|
|
|
|
|
=item BUG FIX |
2697
|
|
|
|
|
|
|
Since 2.35, L delayed removal of input files given by |
2698
|
|
|
|
|
|
|
C<--xargs>. This problem is now fixed. |
2699
|
|
|
|
|
|
|
|
2700
|
|
|
|
|
|
|
=back |
2701
|
|
|
|
|
|
|
|
2702
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
=head2 2.38, 2013-04-29 |
2704
|
|
|
|
|
|
|
minor bug fixes |
2705
|
|
|
|
|
|
|
|
2706
|
|
|
|
|
|
|
=over 4 |
2707
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
=item CLARIFICATION |
2709
|
|
|
|
|
|
|
|
2710
|
|
|
|
|
|
|
Configure now rejects Windows since tests seem to hang |
2711
|
|
|
|
|
|
|
on some versions of Windows. |
2712
|
|
|
|
|
|
|
(I would love help from a Windows developer to get this problem fixed, |
2713
|
|
|
|
|
|
|
but I cannot do it.) See F. |
2714
|
|
|
|
|
|
|
|
2715
|
|
|
|
|
|
|
=item IMPROVEMENT |
2716
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
All programs that use temporary files |
2718
|
|
|
|
|
|
|
(L, L, L, L) |
2719
|
|
|
|
|
|
|
now take the C<-T> option |
2720
|
|
|
|
|
|
|
and set the temporary directory consistently. |
2721
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
In addition, error messages are better when the temporary directory |
2723
|
|
|
|
|
|
|
has problems. Problem reported by Liang Zhu. |
2724
|
|
|
|
|
|
|
|
2725
|
|
|
|
|
|
|
=item BUG FIX |
2726
|
|
|
|
|
|
|
|
2727
|
|
|
|
|
|
|
L was failing with external, map-reduce aware reducers |
2728
|
|
|
|
|
|
|
(when invoked with -M and an external program). |
2729
|
|
|
|
|
|
|
(Sigh, did this case ever work?) |
2730
|
|
|
|
|
|
|
This case should now work. |
2731
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug (in 2011). |
2732
|
|
|
|
|
|
|
|
2733
|
|
|
|
|
|
|
=item BUG FIX |
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
Fixed perl-5.10 problem with L. |
2736
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug (in 2013). |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
=back |
2739
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
=head2 2.39, date 2013-05-31 |
2741
|
|
|
|
|
|
|
quick release for the dbrowuniq extension |
2742
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
=over 4 |
2744
|
|
|
|
|
|
|
|
2745
|
|
|
|
|
|
|
=item BUG FIX |
2746
|
|
|
|
|
|
|
|
2747
|
|
|
|
|
|
|
Actually in 2.38, the Fedora F<.spec> got cleaner dependencies. |
2748
|
|
|
|
|
|
|
Suggestion from Christopher Meng via L. |
2749
|
|
|
|
|
|
|
|
2750
|
|
|
|
|
|
|
=item ENHANCEMENT |
2751
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
Fsdb files are now explicitly set into UTF-8 encoding, |
2753
|
|
|
|
|
|
|
unless one specifies C<-encoding> to C. |
2754
|
|
|
|
|
|
|
|
2755
|
|
|
|
|
|
|
=item ENHANCEMENT |
2756
|
|
|
|
|
|
|
|
2757
|
|
|
|
|
|
|
L now supports C<-I> for incremental counting. |
2758
|
|
|
|
|
|
|
|
2759
|
|
|
|
|
|
|
=back |
2760
|
|
|
|
|
|
|
|
2761
|
|
|
|
|
|
|
=head2 2.40, 2013-07-13 |
2762
|
|
|
|
|
|
|
small bug fixes |
2763
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
=over 4 |
2765
|
|
|
|
|
|
|
|
2766
|
|
|
|
|
|
|
=item BUG FIX |
2767
|
|
|
|
|
|
|
|
2768
|
|
|
|
|
|
|
L now has more respect for a user-given temporary directory; |
2769
|
|
|
|
|
|
|
it no longer is ignored for merging. |
2770
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
=item IMPROVEMENT |
2772
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
L now has options to output the first, last, and both first |
2774
|
|
|
|
|
|
|
and last rows of a run (C<-F>, C<-L>, and C<-B>). |
2775
|
|
|
|
|
|
|
|
2776
|
|
|
|
|
|
|
=item BUG FIX |
2777
|
|
|
|
|
|
|
|
2778
|
|
|
|
|
|
|
L now correctly handles C<-N>. Sigh, it didn't work before. |
2779
|
|
|
|
|
|
|
|
2780
|
|
|
|
|
|
|
=back |
2781
|
|
|
|
|
|
|
|
2782
|
|
|
|
|
|
|
=head2 2.41, 2013-07-29 |
2783
|
|
|
|
|
|
|
small bug and packaging fixes |
2784
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
=over 4 |
2786
|
|
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
=item ENHANCEMENT |
2788
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
Documentation to L improved |
2790
|
|
|
|
|
|
|
(inspired by questions from Qian Kun). |
2791
|
|
|
|
|
|
|
|
2792
|
|
|
|
|
|
|
=item BUG FIX |
2793
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
L no longer duplicates |
2795
|
|
|
|
|
|
|
singleton unique lines when outputting both (with C<-B>). |
2796
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
=item BUG FIX |
2798
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
Add missing C dependency to F. |
2800
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
=item ENHANCEMENT |
2802
|
|
|
|
|
|
|
|
2803
|
|
|
|
|
|
|
Tests now show the diff of the failing output |
2804
|
|
|
|
|
|
|
if run with C. |
2805
|
|
|
|
|
|
|
|
2806
|
|
|
|
|
|
|
=item ENHANCEMENT |
2807
|
|
|
|
|
|
|
|
2808
|
|
|
|
|
|
|
L now includes documentation for how to output extra rows. |
2809
|
|
|
|
|
|
|
Suggestion from Yuri Pradkin. |
2810
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
=item BUG FIX |
2812
|
|
|
|
|
|
|
|
2813
|
|
|
|
|
|
|
Several improvements to the Fedora package |
2814
|
|
|
|
|
|
|
from Michael Schwendt |
2815
|
|
|
|
|
|
|
via L, |
2816
|
|
|
|
|
|
|
and from the harsh master that is F. |
2817
|
|
|
|
|
|
|
(I am stymied at teaching it that "outliers" is spelled correctly. |
2818
|
|
|
|
|
|
|
Maybe I should send it Schneier's book. And an unresolvable |
2819
|
|
|
|
|
|
|
invalid-spec-name lurks in the SRPM.) |
2820
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
=back |
2822
|
|
|
|
|
|
|
|
2823
|
|
|
|
|
|
|
=head2 2.42, 2013-07-31 |
2824
|
|
|
|
|
|
|
A bug fix and packaging release. |
2825
|
|
|
|
|
|
|
|
2826
|
|
|
|
|
|
|
=over 4 |
2827
|
|
|
|
|
|
|
|
2828
|
|
|
|
|
|
|
=item ENHANCEMENT |
2829
|
|
|
|
|
|
|
|
2830
|
|
|
|
|
|
|
Documentation to L improved |
2831
|
|
|
|
|
|
|
to better memory usage. |
2832
|
|
|
|
|
|
|
(Based on problem report by Lin Quan.) |
2833
|
|
|
|
|
|
|
|
2834
|
|
|
|
|
|
|
=item BUG FIX |
2835
|
|
|
|
|
|
|
|
2836
|
|
|
|
|
|
|
The F<.spec> is now F |
2837
|
|
|
|
|
|
|
to satisfy F. |
2838
|
|
|
|
|
|
|
Thanks to Christopher Meng for a specific bug report. |
2839
|
|
|
|
|
|
|
|
2840
|
|
|
|
|
|
|
=item BUG FIX |
2841
|
|
|
|
|
|
|
|
2842
|
|
|
|
|
|
|
Test F no longer has a column |
2843
|
|
|
|
|
|
|
that caused failures because of numerical instability. |
2844
|
|
|
|
|
|
|
|
2845
|
|
|
|
|
|
|
=item BUG FIX |
2846
|
|
|
|
|
|
|
|
2847
|
|
|
|
|
|
|
Some tests now better handle bugs in old versions of perl (5.10, 5.12). |
2848
|
|
|
|
|
|
|
Thanks to Calvin Ardi for help debugging this on a Mac with perl-5.12, |
2849
|
|
|
|
|
|
|
but the fix should affect other platforms. |
2850
|
|
|
|
|
|
|
|
2851
|
|
|
|
|
|
|
=back |
2852
|
|
|
|
|
|
|
|
2853
|
|
|
|
|
|
|
=head2 2.43, 2013-08-27 |
2854
|
|
|
|
|
|
|
Adds in-file compression. |
2855
|
|
|
|
|
|
|
|
2856
|
|
|
|
|
|
|
=over 4 |
2857
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
=item BUG FIX |
2859
|
|
|
|
|
|
|
|
2860
|
|
|
|
|
|
|
Changed the sort on F to strings |
2861
|
|
|
|
|
|
|
(from numerics) so we're less susceptible to false test-failures |
2862
|
|
|
|
|
|
|
due to floating point IO differences. |
2863
|
|
|
|
|
|
|
|
2864
|
|
|
|
|
|
|
=item EXPERIMENTAL ENHANCEMENT |
2865
|
|
|
|
|
|
|
|
2866
|
|
|
|
|
|
|
Yet more parallelism in L: |
2867
|
|
|
|
|
|
|
new "endgame-mode" builds a merge tree of processes at the end |
2868
|
|
|
|
|
|
|
of large merge tasks to get maximally parallelism. |
2869
|
|
|
|
|
|
|
Currently this feature is off by default |
2870
|
|
|
|
|
|
|
because it can hang for some inputs. |
2871
|
|
|
|
|
|
|
Enable this experimental feature with C<--endgame>. |
2872
|
|
|
|
|
|
|
|
2873
|
|
|
|
|
|
|
=item ENHANCEMENT |
2874
|
|
|
|
|
|
|
|
2875
|
|
|
|
|
|
|
C now handles being given C objects |
2876
|
|
|
|
|
|
|
(as exercised by L). |
2877
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
=item BUG FIX |
2879
|
|
|
|
|
|
|
|
2880
|
|
|
|
|
|
|
Handling of NamedTmpfiles now supports concurrency. |
2881
|
|
|
|
|
|
|
This fix will hopefully fix occasional |
2882
|
|
|
|
|
|
|
"Use of uninitialized value $_ in string ne at ...NamedTmpfile.pm line 93." |
2883
|
|
|
|
|
|
|
errors. |
2884
|
|
|
|
|
|
|
|
2885
|
|
|
|
|
|
|
=item BUG FIX |
2886
|
|
|
|
|
|
|
|
2887
|
|
|
|
|
|
|
Fsdb now requires perl 5.10. |
2888
|
|
|
|
|
|
|
This is a bug fix because some test cases used to require it, |
2889
|
|
|
|
|
|
|
but this fact was not properly documented. |
2890
|
|
|
|
|
|
|
(Back-porting to 5.008 would require removing all C/> operators.) |
2891
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
=item ENHANCEMENT |
2893
|
|
|
|
|
|
|
|
2894
|
|
|
|
|
|
|
Fsdb now handles automatic compression of file contents. |
2895
|
|
|
|
|
|
|
Enable compression with C |
2896
|
|
|
|
|
|
|
(or C or C). |
2897
|
|
|
|
|
|
|
All programs should operate on compressed files |
2898
|
|
|
|
|
|
|
and leave the output with the same level of compression. |
2899
|
|
|
|
|
|
|
C is recommended as fastest and most efficient. |
2900
|
|
|
|
|
|
|
C is produces unrepeatable output (and so has no |
2901
|
|
|
|
|
|
|
output test), it seems to insist on adding a timestamp. |
2902
|
|
|
|
|
|
|
|
2903
|
|
|
|
|
|
|
=back |
2904
|
|
|
|
|
|
|
|
2905
|
|
|
|
|
|
|
=head2 2.44, 2013-10-02 |
2906
|
|
|
|
|
|
|
A major change--all threads are gone. |
2907
|
|
|
|
|
|
|
|
2908
|
|
|
|
|
|
|
=over 4 |
2909
|
|
|
|
|
|
|
|
2910
|
|
|
|
|
|
|
=item ENHANCEMENT |
2911
|
|
|
|
|
|
|
|
2912
|
|
|
|
|
|
|
Fsdb is now thread free and only uses processes for parallelism. |
2913
|
|
|
|
|
|
|
This change is a big change--the entire motivation for Fsdb-2 |
2914
|
|
|
|
|
|
|
was to exploit parallelism via threading. |
2915
|
|
|
|
|
|
|
Parallelism--good, but perl threading--bad for performance. |
2916
|
|
|
|
|
|
|
Horribly bad for performance. |
2917
|
|
|
|
|
|
|
About 20x worse than pipes on my box. |
2918
|
|
|
|
|
|
|
(See perl bug #119445 for the discussion.) |
2919
|
|
|
|
|
|
|
|
2920
|
|
|
|
|
|
|
=item NEW |
2921
|
|
|
|
|
|
|
|
2922
|
|
|
|
|
|
|
C provides a thread-like abstraction over forking, |
2923
|
|
|
|
|
|
|
with some nice support for callbacks in the parent upon child termination. |
2924
|
|
|
|
|
|
|
|
2925
|
|
|
|
|
|
|
=item ENHANCEMENT |
2926
|
|
|
|
|
|
|
|
2927
|
|
|
|
|
|
|
Details about removing threads: |
2928
|
|
|
|
|
|
|
C is thread free, |
2929
|
|
|
|
|
|
|
and new tests to verify each of its parts. |
2930
|
|
|
|
|
|
|
The easy cases are C, |
2931
|
|
|
|
|
|
|
C, C, C, and |
2932
|
|
|
|
|
|
|
C, each of which use it in simple ways (2013-09-09). |
2933
|
|
|
|
|
|
|
C is now thread free (2013-09-13), |
2934
|
|
|
|
|
|
|
but was a significant rewrite, |
2935
|
|
|
|
|
|
|
which brought C along. |
2936
|
|
|
|
|
|
|
C is partly thread free (2013-09-21), |
2937
|
|
|
|
|
|
|
again as a rewrite, |
2938
|
|
|
|
|
|
|
and it brings C along. |
2939
|
|
|
|
|
|
|
Full C support took much longer (2013-10-02). |
2940
|
|
|
|
|
|
|
|
2941
|
|
|
|
|
|
|
=item BUG FIX |
2942
|
|
|
|
|
|
|
|
2943
|
|
|
|
|
|
|
When running with user-only output (C<-n>), |
2944
|
|
|
|
|
|
|
L now resets the output vector C<$ofref> |
2945
|
|
|
|
|
|
|
after it has been output. |
2946
|
|
|
|
|
|
|
|
2947
|
|
|
|
|
|
|
=item NEW |
2948
|
|
|
|
|
|
|
|
2949
|
|
|
|
|
|
|
L will create all columns at the head of each row |
2950
|
|
|
|
|
|
|
with the C<--first> option. |
2951
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
=item NEW |
2953
|
|
|
|
|
|
|
|
2954
|
|
|
|
|
|
|
L will concatenate two files, |
2955
|
|
|
|
|
|
|
verifying that they have the same schema. |
2956
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
=item ENHANCEMENT |
2958
|
|
|
|
|
|
|
|
2959
|
|
|
|
|
|
|
L now passes comments through, |
2960
|
|
|
|
|
|
|
rather than eating them as before. |
2961
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
Also, L now supports a C<--> option to prevent misinterpreting |
2963
|
|
|
|
|
|
|
sub-program parameters as for dbmapreduce. |
2964
|
|
|
|
|
|
|
|
2965
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
2966
|
|
|
|
|
|
|
|
2967
|
|
|
|
|
|
|
L no longer figures out if it needs to add the key |
2968
|
|
|
|
|
|
|
to the output. For multi-key-aware reducers, it never does |
2969
|
|
|
|
|
|
|
(and cannot). For non-multi-key-aware reducers, |
2970
|
|
|
|
|
|
|
it defaults to add the key and will now fail if the reducer adds the key |
2971
|
|
|
|
|
|
|
(with error "dbcolcreate: attempt to create pre-existing column..."). |
2972
|
|
|
|
|
|
|
In such cases, one must disable adding the key with the new |
2973
|
|
|
|
|
|
|
option C<--no-prepend-key>. |
2974
|
|
|
|
|
|
|
|
2975
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
2976
|
|
|
|
|
|
|
|
2977
|
|
|
|
|
|
|
L no longer copies the input field separator by default. |
2978
|
|
|
|
|
|
|
For multi-key-aware reducers, it never does |
2979
|
|
|
|
|
|
|
(and cannot). For non-multi-key-aware reducers, |
2980
|
|
|
|
|
|
|
it defaults to I copying the field separator, |
2981
|
|
|
|
|
|
|
but it will copy it (the old default) with the C<--copy-fs> option |
2982
|
|
|
|
|
|
|
|
2983
|
|
|
|
|
|
|
=back |
2984
|
|
|
|
|
|
|
|
2985
|
|
|
|
|
|
|
=head2 2.45, 2013-10-07 |
2986
|
|
|
|
|
|
|
cleanup from de-thread-ification |
2987
|
|
|
|
|
|
|
|
2988
|
|
|
|
|
|
|
=over 4 |
2989
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
=item BUG FIX |
2991
|
|
|
|
|
|
|
|
2992
|
|
|
|
|
|
|
Corrected a fast busy-wait in L. |
2993
|
|
|
|
|
|
|
|
2994
|
|
|
|
|
|
|
=item ENHANCEMENT |
2995
|
|
|
|
|
|
|
|
2996
|
|
|
|
|
|
|
Endgame mode enabled in L; it (and also large cases of L) |
2997
|
|
|
|
|
|
|
should now exploit greater parallelism. |
2998
|
|
|
|
|
|
|
|
2999
|
|
|
|
|
|
|
=item BUG FIX |
3000
|
|
|
|
|
|
|
|
3001
|
|
|
|
|
|
|
Test case with C (gone since 2.44) now removed. |
3002
|
|
|
|
|
|
|
|
3003
|
|
|
|
|
|
|
=back |
3004
|
|
|
|
|
|
|
|
3005
|
|
|
|
|
|
|
=head2 2.46, 2013-10-08 |
3006
|
|
|
|
|
|
|
continuing cleanup of our no-threads version |
3007
|
|
|
|
|
|
|
|
3008
|
|
|
|
|
|
|
=over 4 |
3009
|
|
|
|
|
|
|
|
3010
|
|
|
|
|
|
|
=item BUG FIX |
3011
|
|
|
|
|
|
|
|
3012
|
|
|
|
|
|
|
Fixed some packaging details. |
3013
|
|
|
|
|
|
|
(Really, threads are no longer required, |
3014
|
|
|
|
|
|
|
missing tests in the MANIFEST.) |
3015
|
|
|
|
|
|
|
|
3016
|
|
|
|
|
|
|
=item IMPROVEMENT |
3017
|
|
|
|
|
|
|
|
3018
|
|
|
|
|
|
|
L now better communicates with the merge process to avoid |
3019
|
|
|
|
|
|
|
bursty parallelism. |
3020
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
L now can take C<-autoflush => 1> |
3022
|
|
|
|
|
|
|
for line-buffered IO. |
3023
|
|
|
|
|
|
|
|
3024
|
|
|
|
|
|
|
=back |
3025
|
|
|
|
|
|
|
|
3026
|
|
|
|
|
|
|
=head2 2.47, 2013-10-12 |
3027
|
|
|
|
|
|
|
test suite cleanup for non-threaded perls |
3028
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
=over 4 |
3030
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
=item BUG FIX |
3032
|
|
|
|
|
|
|
|
3033
|
|
|
|
|
|
|
Removed some stray "use threads" in some test cases. |
3034
|
|
|
|
|
|
|
We didn't need them, and these were breaking non-threaded perls. |
3035
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
=item BUG FIX |
3037
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
Better handling of Fred cleanup; |
3039
|
|
|
|
|
|
|
should fix intermittent L failures on BSD. |
3040
|
|
|
|
|
|
|
|
3041
|
|
|
|
|
|
|
=item ENHANCEMENT |
3042
|
|
|
|
|
|
|
|
3043
|
|
|
|
|
|
|
Improved test framework to show output when tests fail. |
3044
|
|
|
|
|
|
|
(This time, for real.) |
3045
|
|
|
|
|
|
|
|
3046
|
|
|
|
|
|
|
=back |
3047
|
|
|
|
|
|
|
|
3048
|
|
|
|
|
|
|
=head2 2.48, 2014-01-03 |
3049
|
|
|
|
|
|
|
small bugfixes and improved release engineering |
3050
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=over 4 |
3052
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
=item ENHANCEMENT |
3054
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
Test suites now skip tests for libraries that are missing. |
3056
|
|
|
|
|
|
|
(Patch for missing C contributed by Calvin Ardi.) |
3057
|
|
|
|
|
|
|
|
3058
|
|
|
|
|
|
|
=item ENHANCEMENT |
3059
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
Removed references to Jdb in the package specification. |
3061
|
|
|
|
|
|
|
Since the name was changed in 2008, there's no longer a huge |
3062
|
|
|
|
|
|
|
need for backwards comparability. |
3063
|
|
|
|
|
|
|
(Suggestion form Petr Å abata.) |
3064
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
=item ENHANCEMENT |
3066
|
|
|
|
|
|
|
|
3067
|
|
|
|
|
|
|
Test suites now invoke the perl using the path from C<$Config{perlpath}>. |
3068
|
|
|
|
|
|
|
Hopefully this helps testing in environments where there are multiple installed |
3069
|
|
|
|
|
|
|
perls and the default perl is not the same as the perl-under-test |
3070
|
|
|
|
|
|
|
(as happens in cpantesters.org). |
3071
|
|
|
|
|
|
|
|
3072
|
|
|
|
|
|
|
=item BUG FIX |
3073
|
|
|
|
|
|
|
|
3074
|
|
|
|
|
|
|
Added specific encoding to this manpage to account for |
3075
|
|
|
|
|
|
|
Unicode. Required to build correctly against perl-5.18. |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
=back |
3078
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
=head2 2.49, 2014-01-04 |
3080
|
|
|
|
|
|
|
bugfix to unicode handling in Fsdb IO (plus minor packaging fixes) |
3081
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
=over 4 |
3083
|
|
|
|
|
|
|
|
3084
|
|
|
|
|
|
|
=item BUG FIX |
3085
|
|
|
|
|
|
|
|
3086
|
|
|
|
|
|
|
Restored a line in the F<.spec> to chmod g-s. |
3087
|
|
|
|
|
|
|
|
3088
|
|
|
|
|
|
|
=item BUG FIX |
3089
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
Unicode decoding is now handled correctly for programs that read |
3091
|
|
|
|
|
|
|
from standard input. |
3092
|
|
|
|
|
|
|
(Also: New test scripts cover unicode input and output.) |
3093
|
|
|
|
|
|
|
|
3094
|
|
|
|
|
|
|
=item BUG FIX |
3095
|
|
|
|
|
|
|
|
3096
|
|
|
|
|
|
|
Fix to L documentation encoding line. |
3097
|
|
|
|
|
|
|
Addresses test failure in perl-5.16 and earlier. |
3098
|
|
|
|
|
|
|
(Who knew "encoding" had to be followed by a blank line.) |
3099
|
|
|
|
|
|
|
|
3100
|
|
|
|
|
|
|
=back |
3101
|
|
|
|
|
|
|
|
3102
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
3103
|
|
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
=head2 2.50, 2014-05-27 |
3105
|
|
|
|
|
|
|
a quick release for spec tweaks |
3106
|
|
|
|
|
|
|
|
3107
|
|
|
|
|
|
|
=over 4 |
3108
|
|
|
|
|
|
|
|
3109
|
|
|
|
|
|
|
=item ENHANCEMENT |
3110
|
|
|
|
|
|
|
|
3111
|
|
|
|
|
|
|
In L, the C<-N> (no output, even comments) option now |
3112
|
|
|
|
|
|
|
implies C<-n>, and it now suppresses the header and trailer. |
3113
|
|
|
|
|
|
|
|
3114
|
|
|
|
|
|
|
=item BUG FIX |
3115
|
|
|
|
|
|
|
|
3116
|
|
|
|
|
|
|
A few more tweaks to the F from Petr Å abata. |
3117
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
=item BUG FIX |
3119
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
Fixed 3 uses of C |
3121
|
|
|
|
|
|
|
failures (due to warnings, not real failures) on some platforms. |
3122
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
=back |
3124
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
=head2 2.51, 2014-09-05 |
3126
|
|
|
|
|
|
|
Feature enhancements to L, L, L, and new L |
3127
|
|
|
|
|
|
|
|
3128
|
|
|
|
|
|
|
=over 4 |
3129
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
=item ENHANCEMENT |
3131
|
|
|
|
|
|
|
|
3132
|
|
|
|
|
|
|
L now has a C<--no-recreate-fatal> |
3133
|
|
|
|
|
|
|
that causes it to ignore creation of existing columns |
3134
|
|
|
|
|
|
|
(instead of failing). |
3135
|
|
|
|
|
|
|
|
3136
|
|
|
|
|
|
|
=item ENHANCEMENT |
3137
|
|
|
|
|
|
|
|
3138
|
|
|
|
|
|
|
L once again is robust to reducers |
3139
|
|
|
|
|
|
|
that output the key; |
3140
|
|
|
|
|
|
|
C<--no-prepend-key> is no longer mandatory. |
3141
|
|
|
|
|
|
|
|
3142
|
|
|
|
|
|
|
=item ENHANCEMENT |
3143
|
|
|
|
|
|
|
|
3144
|
|
|
|
|
|
|
L can now enumerate the output rows with C<-E>. |
3145
|
|
|
|
|
|
|
|
3146
|
|
|
|
|
|
|
=item BUG FIX |
3147
|
|
|
|
|
|
|
|
3148
|
|
|
|
|
|
|
L is more mathematically robust. |
3149
|
|
|
|
|
|
|
Previously for some inputs and some platforms, |
3150
|
|
|
|
|
|
|
floating point rounding could |
3151
|
|
|
|
|
|
|
sometimes cause squareroots of negative numbers. |
3152
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
=item NEW |
3154
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
L converts the output of the MySQL or MarinaDB |
3156
|
|
|
|
|
|
|
select comment into fsdb format. |
3157
|
|
|
|
|
|
|
|
3158
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
3159
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
L now outputs the I row |
3161
|
|
|
|
|
|
|
when doing sloppy numeric comparisons, |
3162
|
|
|
|
|
|
|
to better support test suites. |
3163
|
|
|
|
|
|
|
|
3164
|
|
|
|
|
|
|
=back |
3165
|
|
|
|
|
|
|
|
3166
|
|
|
|
|
|
|
=head2 2.52, 2014-11-03 |
3167
|
|
|
|
|
|
|
Fixing the test suite for line number changes. |
3168
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
=over 4 |
3170
|
|
|
|
|
|
|
|
3171
|
|
|
|
|
|
|
=item ENHANCEMENT |
3172
|
|
|
|
|
|
|
|
3173
|
|
|
|
|
|
|
Test suites changes to be robust to exact line numbers of failures, |
3174
|
|
|
|
|
|
|
since different Perl releases fail on different lines. |
3175
|
|
|
|
|
|
|
L |
3176
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
=back |
3178
|
|
|
|
|
|
|
|
3179
|
|
|
|
|
|
|
|
3180
|
|
|
|
|
|
|
=head2 2.53, 2014-11-26 |
3181
|
|
|
|
|
|
|
bug fixes and stability improvements to dbmapreduce |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
=over 4 |
3184
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
=item ENHANCEMENT |
3186
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
The L how supports a C<--quiet> option. |
3188
|
|
|
|
|
|
|
|
3189
|
|
|
|
|
|
|
=item ENHANCEMENT |
3190
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
Better documention of L. |
3192
|
|
|
|
|
|
|
|
3193
|
|
|
|
|
|
|
=item BUGFIX |
3194
|
|
|
|
|
|
|
|
3195
|
|
|
|
|
|
|
Added groff-base and perl-podlators to the Fedora package spec. |
3196
|
|
|
|
|
|
|
Fixes L. |
3197
|
|
|
|
|
|
|
(Also in package 2.52-2.) |
3198
|
|
|
|
|
|
|
|
3199
|
|
|
|
|
|
|
=item BUGFIX |
3200
|
|
|
|
|
|
|
|
3201
|
|
|
|
|
|
|
An important stability improvement to L. |
3202
|
|
|
|
|
|
|
It, plus L, and L now support |
3203
|
|
|
|
|
|
|
controlled parallelism with the C<--pararallelism=N> option. |
3204
|
|
|
|
|
|
|
They default to run with the number of available CPUs. |
3205
|
|
|
|
|
|
|
L also moderates its level of parallelism. |
3206
|
|
|
|
|
|
|
Previously it would create reducers as needed, |
3207
|
|
|
|
|
|
|
causing CPU thrashing if reducers ran much slower than data production. |
3208
|
|
|
|
|
|
|
|
3209
|
|
|
|
|
|
|
=item BUGFIX |
3210
|
|
|
|
|
|
|
|
3211
|
|
|
|
|
|
|
The combination of L with L now works |
3212
|
|
|
|
|
|
|
as it should. (The obscure bug was an interaction with L |
3213
|
|
|
|
|
|
|
with non-multi-key reducers that output their own key. L |
3214
|
|
|
|
|
|
|
has too many useful corner cases.) |
3215
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
=back |
3217
|
|
|
|
|
|
|
|
3218
|
|
|
|
|
|
|
=head2 2.54, 2014-11-28 |
3219
|
|
|
|
|
|
|
fix for the test suite to correct failing tests on not-my-platform |
3220
|
|
|
|
|
|
|
|
3221
|
|
|
|
|
|
|
=over 4 |
3222
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
=item BUGFIX |
3224
|
|
|
|
|
|
|
|
3225
|
|
|
|
|
|
|
Sigh, the test suite now has a test suite. |
3226
|
|
|
|
|
|
|
Because, yes, I broke it, causing many incorrect failures |
3227
|
|
|
|
|
|
|
at cpantesters. |
3228
|
|
|
|
|
|
|
Now fixed. |
3229
|
|
|
|
|
|
|
|
3230
|
|
|
|
|
|
|
=back |
3231
|
|
|
|
|
|
|
|
3232
|
|
|
|
|
|
|
=head2 2.55, 2015-01-05 |
3233
|
|
|
|
|
|
|
many spelling fixes and L tests are more robust to different numeric precision |
3234
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
=over 4 |
3236
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
=item ENHANCEMENT |
3238
|
|
|
|
|
|
|
|
3239
|
|
|
|
|
|
|
L now can be extra quiet, as I continue to try to track down |
3240
|
|
|
|
|
|
|
a numeric difference on FreeBSD AMD boxes. |
3241
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
=item ENHANCEMENT |
3243
|
|
|
|
|
|
|
|
3244
|
|
|
|
|
|
|
L gave different test output |
3245
|
|
|
|
|
|
|
(just reflecting rounding error) |
3246
|
|
|
|
|
|
|
when stddev approaches zero. We now detect hand handle this case. |
3247
|
|
|
|
|
|
|
See |
3248
|
|
|
|
|
|
|
and thanks to H. Merijn Brand for the bug report. |
3249
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
=item BUG FIX |
3251
|
|
|
|
|
|
|
|
3252
|
|
|
|
|
|
|
Many, many spelling bugs found by |
3253
|
|
|
|
|
|
|
H. Merijn Brand; thanks for the bug report. |
3254
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
=item INCOMPATBLE CHANGE |
3256
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
A number of programs had misspelled "separator" |
3258
|
|
|
|
|
|
|
in C<--fieldseparator> and C<--columnseparator> options as "seperator". |
3259
|
|
|
|
|
|
|
These are now correctly spelled. |
3260
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
=back |
3262
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
=head2 2.56, 2015-02-03 |
3264
|
|
|
|
|
|
|
fix against Getopt::Long-2.43's stricter error checkign |
3265
|
|
|
|
|
|
|
|
3266
|
|
|
|
|
|
|
=over 4 |
3267
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
=item BUG FIX |
3269
|
|
|
|
|
|
|
|
3270
|
|
|
|
|
|
|
Internal argument parsing uses Getopt::Long, but mixed pass-through and EE. |
3271
|
|
|
|
|
|
|
Bug reported by Petr Pisar at L.a |
3272
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
=item BUG FIX |
3274
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
Added missing BuildRequires for C. |
3276
|
|
|
|
|
|
|
|
3277
|
|
|
|
|
|
|
=back |
3278
|
|
|
|
|
|
|
|
3279
|
|
|
|
|
|
|
=head2 2.57, 2015-04-29 |
3280
|
|
|
|
|
|
|
Minor changes, with better performance from L. |
3281
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
=over 4 |
3283
|
|
|
|
|
|
|
|
3284
|
|
|
|
|
|
|
=item BUG FIX |
3285
|
|
|
|
|
|
|
|
3286
|
|
|
|
|
|
|
L now honors C<--remove-inputs> (previously it didn't). |
3287
|
|
|
|
|
|
|
This omission meant that L (and L) would accumulate |
3288
|
|
|
|
|
|
|
files in F when running. Bad news for inputs with 4M keys. |
3289
|
|
|
|
|
|
|
|
3290
|
|
|
|
|
|
|
=item ENHANCMENT |
3291
|
|
|
|
|
|
|
|
3292
|
|
|
|
|
|
|
L should be faster with lots of small keys. |
3293
|
|
|
|
|
|
|
L now supports C<-k> to get some of the functionality of |
3294
|
|
|
|
|
|
|
L (if data is pre-sorted and median/quartiles are not required). |
3295
|
|
|
|
|
|
|
|
3296
|
|
|
|
|
|
|
L now honors C<--remove-inputs> (previously it didn't). |
3297
|
|
|
|
|
|
|
This omission meant that L (and L) would accumulate |
3298
|
|
|
|
|
|
|
files in F when running. Bad news for inputs with 4M keys. |
3299
|
|
|
|
|
|
|
|
3300
|
|
|
|
|
|
|
=back |
3301
|
|
|
|
|
|
|
|
3302
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
=head2 2.58, 2015-04-30 |
3304
|
|
|
|
|
|
|
Bugfix in L |
3305
|
|
|
|
|
|
|
|
3306
|
|
|
|
|
|
|
=over 4 |
3307
|
|
|
|
|
|
|
|
3308
|
|
|
|
|
|
|
=item BUG FIX |
3309
|
|
|
|
|
|
|
|
3310
|
|
|
|
|
|
|
Fixed a case where L suffered mojobake in endgame mode. |
3311
|
|
|
|
|
|
|
This bug surfaced when L was applied to large files |
3312
|
|
|
|
|
|
|
(big enough to require merging) with unicode in them; |
3313
|
|
|
|
|
|
|
the symptom was soemthing like: |
3314
|
|
|
|
|
|
|
Wide character in print at /usr/lib64/perl5/IO/Handle.pm line 420, line 111. |
3315
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
=back |
3317
|
|
|
|
|
|
|
|
3318
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
=head2 2.59, 2016-09-01 |
3320
|
|
|
|
|
|
|
Collect a few small bug fixes and documentation improvements. |
3321
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
=over 4 |
3323
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
=item BUG FIX |
3325
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
More IO is explicitly marked UTF-8 to avoid Perl's tendency to |
3327
|
|
|
|
|
|
|
mojibake on otherwise valid unicode input. |
3328
|
|
|
|
|
|
|
This change helps L. |
3329
|
|
|
|
|
|
|
|
3330
|
|
|
|
|
|
|
=item ENHANCEMENT |
3331
|
|
|
|
|
|
|
|
3332
|
|
|
|
|
|
|
L now crossreferences L. |
3333
|
|
|
|
|
|
|
|
3334
|
|
|
|
|
|
|
=item ENHANCEMENT |
3335
|
|
|
|
|
|
|
|
3336
|
|
|
|
|
|
|
Documentation for L now clarifies that the default is baseline mode. |
3337
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
=item BUG FIX |
3339
|
|
|
|
|
|
|
|
3340
|
|
|
|
|
|
|
L now propagates C<-T> into the sorting process (if it is required). |
3341
|
|
|
|
|
|
|
Thanks to Lan Wei for reporting this bug. |
3342
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
=back |
3344
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
|
3346
|
|
|
|
|
|
|
=head2 2.60, 2016-09-04 |
3347
|
|
|
|
|
|
|
Adds support for hash joins. |
3348
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
=over 4 |
3350
|
|
|
|
|
|
|
|
3351
|
|
|
|
|
|
|
=item ENHANCEMENT |
3352
|
|
|
|
|
|
|
|
3353
|
|
|
|
|
|
|
L now supports hash joins |
3354
|
|
|
|
|
|
|
with C<-t lefthash> and C<-t righthash>. |
3355
|
|
|
|
|
|
|
Hash joins cache a table in memory, but do not require |
3356
|
|
|
|
|
|
|
that the other table be sorted. |
3357
|
|
|
|
|
|
|
They are ideal when joining a large table against a small one. |
3358
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
=back |
3360
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
=head2 2.61, 2016-09-05 |
3362
|
|
|
|
|
|
|
Support left and right outer joins. |
3363
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
=over 4 |
3365
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
=item ENHANCEMENT |
3367
|
|
|
|
|
|
|
|
3368
|
|
|
|
|
|
|
L now handles left and right outer joins |
3369
|
|
|
|
|
|
|
with C<-t left> and C<-t right>. |
3370
|
|
|
|
|
|
|
|
3371
|
|
|
|
|
|
|
=item ENHANCEMENT |
3372
|
|
|
|
|
|
|
|
3373
|
|
|
|
|
|
|
L hash joins are now selected |
3374
|
|
|
|
|
|
|
with C<-m lefthash> and C<-m righthash> |
3375
|
|
|
|
|
|
|
(not the shortlived C<-t righthash> option). |
3376
|
|
|
|
|
|
|
(Technically this change is incompatible with Fsdd-2.60, but |
3377
|
|
|
|
|
|
|
no one but me ever used that version.) |
3378
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
=back |
3380
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
=head2 2.62, 2016-11-29 |
3382
|
|
|
|
|
|
|
A new L and other minor improvements. |
3383
|
|
|
|
|
|
|
|
3384
|
|
|
|
|
|
|
=over 4 |
3385
|
|
|
|
|
|
|
|
3386
|
|
|
|
|
|
|
=item ENHANCEMENT |
3387
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
Documentation for L now includes sample output. |
3389
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
=item NEW |
3391
|
|
|
|
|
|
|
|
3392
|
|
|
|
|
|
|
L converts a specific form of YAML to fsdb. |
3393
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
=item BUG FIX |
3395
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
The test suite now uses C rather than C |
3397
|
|
|
|
|
|
|
to make OpenBSD-5.9 happier, I hope. |
3398
|
|
|
|
|
|
|
|
3399
|
|
|
|
|
|
|
=item ENHANCEMENT |
3400
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
Comments that log operations at the end of each file now do simple |
3402
|
|
|
|
|
|
|
quoting of spaces. (It is not guaranteed to be fully shell-compliant.) |
3403
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
=item ENHANCEMENT |
3405
|
|
|
|
|
|
|
|
3406
|
|
|
|
|
|
|
There is a new standard option, C<--header>, |
3407
|
|
|
|
|
|
|
allowing one to specify an Fsdb header for inputs that lack it. |
3408
|
|
|
|
|
|
|
Currently it is supported by L, |
3409
|
|
|
|
|
|
|
L, L, L, L, |
3410
|
|
|
|
|
|
|
L. |
3411
|
|
|
|
|
|
|
|
3412
|
|
|
|
|
|
|
=item ENHANCEMENT |
3413
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
L now allows the B<--possible-pivots> option, |
3415
|
|
|
|
|
|
|
and if it is provided processes the data in one pass. |
3416
|
|
|
|
|
|
|
|
3417
|
|
|
|
|
|
|
=item ENHANCEMENT |
3418
|
|
|
|
|
|
|
|
3419
|
|
|
|
|
|
|
L logs are now quoted. |
3420
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
=back |
3422
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
=head1 AUTHOR |
3424
|
|
|
|
|
|
|
|
3425
|
|
|
|
|
|
|
John Heidemann, C |
3426
|
|
|
|
|
|
|
|
3427
|
|
|
|
|
|
|
See L for the many people who have contributed |
3428
|
|
|
|
|
|
|
bug reports and fixes. |
3429
|
|
|
|
|
|
|
|
3430
|
|
|
|
|
|
|
|
3431
|
|
|
|
|
|
|
=head1 COPYRIGHT |
3432
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
Fsdb is Copyright (C) 1991-2016 by John Heidemann . |
3434
|
|
|
|
|
|
|
|
3435
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
3436
|
|
|
|
|
|
|
it under the terms of version 2 of the GNU General Public License as |
3437
|
|
|
|
|
|
|
published by the Free Software Foundation. |
3438
|
|
|
|
|
|
|
|
3439
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but |
3440
|
|
|
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of |
3441
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3442
|
|
|
|
|
|
|
General Public License for more details. |
3443
|
|
|
|
|
|
|
|
3444
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
3445
|
|
|
|
|
|
|
along with this program; if not, write to the Free Software |
3446
|
|
|
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
3447
|
|
|
|
|
|
|
|
3448
|
|
|
|
|
|
|
A copy of the GNU General Public License can be found in the file |
3449
|
|
|
|
|
|
|
``COPYING''. |
3450
|
|
|
|
|
|
|
|
3451
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
|
3453
|
|
|
|
|
|
|
=head1 COMMENTS and BUG REPORTS |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
Any comments about these programs should be sent to John Heidemann |
3456
|
|
|
|
|
|
|
C. |
3457
|
|
|
|
|
|
|
|
3458
|
|
|
|
|
|
|
|
3459
|
|
|
|
|
|
|
=cut |
3460
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
1; # End of Fsdb |
3462
|
|
|
|
|
|
|
|
3463
|
|
|
|
|
|
|
# LocalWords: Exp rdb Manis Evan Schaffer passwd uid gid fullname homedir greg |
3464
|
|
|
|
|
|
|
# LocalWords: gnuplot jgraph dbrow dbcol dbcolcreate dbcoldefine FSDB README un |
3465
|
|
|
|
|
|
|
# LocalWords: dbcolrename dbcolmerge dbcolsplit dbjoin dbsort dbcoldiff Perl bw |
3466
|
|
|
|
|
|
|
# LocalWords: dbmultistats dbrowdiff dbrowenumerate dbroweval dbstats dblistize |
3467
|
|
|
|
|
|
|
# LocalWords: dbcolneaten dbcoltighten dbstripcomments dbstripextraheaders pct |
3468
|
|
|
|
|
|
|
# LocalWords: dbstripleadingspace stddev rsd dbsetheader sprintf LIBDIR BINDIR |
3469
|
|
|
|
|
|
|
# LocalWords: LocalWords isi URL com dbpercentile dbhistogram GRADEBOOK min ss |
3470
|
|
|
|
|
|
|
# LocalWords: gradebook conf std dev dbrowaccumulate dbcolpercentile db dcliff |
3471
|
|
|
|
|
|
|
# LocalWords: dbuniq uniq dbcolize distr pl Apr autoconf Jul html printf Fx fsdb |
3472
|
|
|
|
|
|
|
# LocalWords: printfs dbrowuniq dbrecolize dbformmail kitrace geoff ns berkeley |
3473
|
|
|
|
|
|
|
# LocalWords: comp lang perl Haobo Yu outliers Jorgensen csh dbrowsplituniq crl |
3474
|
|
|
|
|
|
|
# LocalWords: dbcolmovingstats dbcolstats zscores tscores dbcolhisto columnar |
3475
|
|
|
|
|
|
|
# LocalWords: dmalloc tabdelim stats numerics datapoint CDF xgraph max txt sed |
3476
|
|
|
|
|
|
|
# LocalWords: login gecos div cmd nr hw hw assuing Kuenning Vikram Visweswariah |
3477
|
|
|
|
|
|
|
# LocalWords: Kannan Varadahan Arkadi Gelfond Pavlin Radoslavov quartile getopt |
3478
|
|
|
|
|
|
|
# LocalWords: dbcolscorrelate DbGetopt cp tmp nd Ya Xu dbfilesplit |
3479
|
|
|
|
|
|
|
# LocalWords: MERCHANTABILITY tba dbcolsplittocols dbcolsplittorows cvs johnh |
3480
|
|
|
|
|
|
|
# LocalWords: dbcolsregression datasets whitespace LaTeX FS columnname cgi pre |
3481
|
|
|
|
|
|
|
# LocalWords: columname's dbfilevalidate tcpdump http rv eq Bourne DbTDistr |
3482
|
|
|
|
|
|
|
# LocalWords: Goel Eggert Ning Strozzi NoSQL awk startup Sparcstation IPCs GHz |
3483
|
|
|
|
|
|
|
# LocalWords: SunOS Arpaci Dusseau's SOSP Scheaffer STDIN dblib iso freebsd OO |
3484
|
|
|
|
|
|
|
# LocalWords: sendmail unicode Makefile dbmapreduce dbcolmultiscale andersen |
3485
|
|
|
|
|
|
|
# LocalWords: lampson chen drovolis estrin floyd Lukac NIST SEMATECH RCS qw |
3486
|
|
|
|
|
|
|
# LocalWords: listize colize Unkyu dbpipeline ithreads dbfilealter dbrowcount |
3487
|
|
|
|
|
|
|
# LocalWords: dbrvstatdiff dbcolstatscores dbfilestripcomments csv nolog aho |
3488
|
|
|
|
|
|
|
# LocalWords: alfred david clark constantine debrorah Fsdb's colized listized |
3489
|
|
|
|
|
|
|
# LocalWords: Ashvin dbmerge na tmean tstddev wc logfiles stdin lseek SV xa |
3490
|
|
|
|
|
|
|
# LocalWords: refcount lossage DaGronk dbcolscorellate ipchain |