line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Fsdb.pm |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Copyright (C) 1991-2016 by John Heidemann |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
9
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License, |
10
|
|
|
|
|
|
|
# version 2, as published by the Free Software Foundation. |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15
|
|
|
|
|
|
|
# GNU General Public License for more details. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License along |
18
|
|
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc., |
19
|
|
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Fsdb; |
23
|
|
|
|
|
|
|
|
24
|
2
|
|
|
2
|
|
145848
|
use warnings; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
100
|
|
25
|
2
|
|
|
2
|
|
17
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
63
|
|
26
|
2
|
|
|
2
|
|
1070
|
use utf8; |
|
2
|
|
|
|
|
39
|
|
|
2
|
|
|
|
|
14
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=encoding utf8 |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Fsdb - a flat-text database for shell scripting |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=cut |
36
|
|
|
|
|
|
|
our $VERSION = '2.64'; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 SYNOPSIS |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Fsdb, the flatfile streaming database is package of commands |
41
|
|
|
|
|
|
|
for manipulating flat-ASCII databases from |
42
|
|
|
|
|
|
|
shell scripts. Fsdb is useful to process medium amounts of data (with |
43
|
|
|
|
|
|
|
very little data you'd do it by hand, with megabytes you might want a |
44
|
|
|
|
|
|
|
real database). |
45
|
|
|
|
|
|
|
Fsdb was known as as Jdb from 1991 to Oct. 2008. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Fsdb is very good at doing things like: |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=over 4 |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item * |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
extracting measurements from experimental output |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item * |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
examining data to address different hypotheses |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
joining data from different experiments |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item * |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
eliminating/detecting outliers |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item * |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
computing statistics on data |
70
|
|
|
|
|
|
|
(mean, confidence intervals, correlations, histograms) |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item * |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
reformatting data for graphing programs |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=back |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Fsdb is built around the idea of a flat text file as a database. |
79
|
|
|
|
|
|
|
Fsdb files (by convention, with the extension F<.fsdb>), |
80
|
|
|
|
|
|
|
have a header documenting the schema (what the columns mean), |
81
|
|
|
|
|
|
|
and then each line represents a database record (or row). |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
For example: |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#fsdb experiment duration |
86
|
|
|
|
|
|
|
ufs_mab_sys 37.2 |
87
|
|
|
|
|
|
|
ufs_mab_sys 37.3 |
88
|
|
|
|
|
|
|
ufs_rcp_real 264.5 |
89
|
|
|
|
|
|
|
ufs_rcp_real 277.9 |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Is a simple file with four experiments (the rows), |
92
|
|
|
|
|
|
|
each with a description, size parameter, and run time |
93
|
|
|
|
|
|
|
in the first, second, and third columns. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Rather than hand-code scripts to do each special case, Fsdb provides |
96
|
|
|
|
|
|
|
higher-level functions. Although it's often easy throw together a |
97
|
|
|
|
|
|
|
custom script to do any single task, I believe that there are several |
98
|
|
|
|
|
|
|
advantages to using Fsdb: |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over 4 |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
these programs provide a higher level interface than plain Perl, so |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=over 4 |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item ** |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Fewer lines of simpler code: |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Picks out just one type of experiment and computes statistics on it, |
115
|
|
|
|
|
|
|
rather than: |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
while (<>) { split; $sum+=$F[1]; $ss+=$F[1]**2; $n++; } |
118
|
|
|
|
|
|
|
$mean = $sum / $n; $std_dev = ... |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
in dozens of places. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item * |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
the library uses names for columns, so |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=over 4 |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item ** |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
No more C<$F[1]>, use C<_duration>. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item ** |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
New or different order columns? No changes to your scripts! |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=back |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Thus if your experiment gets more complicated with a size parameter, |
141
|
|
|
|
|
|
|
so your log changes to: |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#fsdb experiment size duration |
144
|
|
|
|
|
|
|
ufs_mab_sys 1024 37.2 |
145
|
|
|
|
|
|
|
ufs_mab_sys 1024 37.3 |
146
|
|
|
|
|
|
|
ufs_rcp_real 1024 264.5 |
147
|
|
|
|
|
|
|
ufs_rcp_real 1024 277.9 |
148
|
|
|
|
|
|
|
ufs_mab_sys 2048 45.3 |
149
|
|
|
|
|
|
|
ufs_mab_sys 2048 44.2 |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Then the previous scripts still work, even though duration is |
152
|
|
|
|
|
|
|
now the third column, not the second. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=item * |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
A series of actions are self-documenting (each program records what it does). |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=over 4 |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item ** |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
No more wondering what hacks were used to compute the |
163
|
|
|
|
|
|
|
final data, just look at the comments at the end |
164
|
|
|
|
|
|
|
of the output. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=back |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
For example, the commands |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
add to the end of the output the lines |
173
|
|
|
|
|
|
|
# | dbrow _experiment eq "ufs_mab_sys" |
174
|
|
|
|
|
|
|
# | dbcolstats duration |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item * |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The library is mature, supporting large datasets (more than 100GB), |
180
|
|
|
|
|
|
|
corner cases, error handling, backed by an automated test suite. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=over 4 |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item ** |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
No more puzzling about bad output because your custom script |
187
|
|
|
|
|
|
|
skimped on error checking. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item ** |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
No more memory thrashing when you try to sort ten million records. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item * |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Fsdb-2.x supports Perl scripting (in addition to shell scripting), |
198
|
|
|
|
|
|
|
with libraries to do Fsdb input and output, and easy support for pipelines. |
199
|
|
|
|
|
|
|
The shell script |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
dbcol name test1 | dbroweval '_test1 += 5;' |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
can be written in perl as: |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
dbpipeline(dbcol(qw(name test1)), dbroweval('_test1 += 5;')); |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=back |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
(The disadvantage is that you need to learn what functions Fsdb provides.) |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Fsdb is built on flat-ASCII databases. By storing data in simple text |
212
|
|
|
|
|
|
|
files and processing it with pipelines it is easy to experiment (in |
213
|
|
|
|
|
|
|
the shell) and look at the output. |
214
|
|
|
|
|
|
|
To the best of my knowledge, the original implementation of |
215
|
|
|
|
|
|
|
this idea was C, a commercial product described in the book |
216
|
|
|
|
|
|
|
I |
217
|
|
|
|
|
|
|
by Rod Manis, Evan Schaffer, and Robert Jorgensen (and |
218
|
|
|
|
|
|
|
also at the web page L). Fsdb is an incompatible |
219
|
|
|
|
|
|
|
re-implementation of their idea without any accelerated indexing or |
220
|
|
|
|
|
|
|
forms support. (But it's free, and probably has better statistics!). |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Fsdb-2.x will exploit multiple processors or cores, |
223
|
|
|
|
|
|
|
and provides Perl-level support for input, output, and threaded-pipelines. |
224
|
|
|
|
|
|
|
(As of Fsdb-2.44 it no longer uses Perl threading, just processes, |
225
|
|
|
|
|
|
|
since they are faster.) |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Installation instructions follow at the end of this document. |
228
|
|
|
|
|
|
|
Fsdb-2.x requires Perl 5.8 to run. |
229
|
|
|
|
|
|
|
All commands have manual pages and provide usage with the C<--help> option. |
230
|
|
|
|
|
|
|
All commands are backed by an automated test suite. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
The most recent version of Fsdb is available on the web at |
233
|
|
|
|
|
|
|
L. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head2 2.64, 2017-11-20 |
239
|
|
|
|
|
|
|
several small bugfixes and enhancements |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=over 4 |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=item BUG FIX |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
In L, the C option previously did not |
246
|
|
|
|
|
|
|
correctly set up C<_last_fieldname>. It now does. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=item ENHANCEMENT |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
The L converter now has an optional C<-F x> option |
251
|
|
|
|
|
|
|
to set the field separator. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=item ENHANCEMENT |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
Finally L has a C<--header> option, |
256
|
|
|
|
|
|
|
and a new C<-N> option to give the list of resulting output columns. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
Now L and L produce no output |
261
|
|
|
|
|
|
|
(but a schema) when given no input but a schema. |
262
|
|
|
|
|
|
|
Previously they gave a null row of output. |
263
|
|
|
|
|
|
|
The C<--output-on-no-input> and C<--no-output-on-no-input> |
264
|
|
|
|
|
|
|
options can control this behavior. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=back |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 README CONTENTS |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=over 4 |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item executive summary |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=item what's new |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item README CONTENTS |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=item installation |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=item basic data format |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=item basic data manipulation |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item list of commands |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=item another example |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=item a gradebook example |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=item a password example |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=item history |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=item related work |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item release notes |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=item copyright |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=item comments |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=back |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head1 INSTALLATION |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
Fsdb now uses the standard Perl build and installation from |
310
|
|
|
|
|
|
|
ExtUtil::MakeMaker(3), so the quick answer to installation is to type: |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
perl Makefile.PL |
313
|
|
|
|
|
|
|
make |
314
|
|
|
|
|
|
|
make test |
315
|
|
|
|
|
|
|
make install |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Or, if you want to install it somewhere else, change the first line to |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
perl Makefile.PL PREFIX=$HOME |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
and it will go in your home directory's F, etc. |
322
|
|
|
|
|
|
|
(See L for more details.) |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Fsdb requires perl 5.8 or later. |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
A test-suite is available, run it with |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
make test |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
A FreeBSD port to Fsdb is available, see |
331
|
|
|
|
|
|
|
L. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
A Fink (MacOS X) port is available, see |
334
|
|
|
|
|
|
|
L. |
335
|
|
|
|
|
|
|
(Thanks to Lars Eggert for maintaining this port.) |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head1 BASIC DATA FORMAT |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
These programs are based on the idea storing data in simple ASCII |
341
|
|
|
|
|
|
|
files. A database is a file with one header line and then data or |
342
|
|
|
|
|
|
|
comment lines. For example: |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
#fsdb account passwd uid gid fullname homedir shell |
345
|
|
|
|
|
|
|
johnh * 2274 134 John_Heidemann /home/johnh /bin/bash |
346
|
|
|
|
|
|
|
greg * 2275 134 Greg_Johnson /home/greg /bin/bash |
347
|
|
|
|
|
|
|
root * 0 0 Root /root /bin/bash |
348
|
|
|
|
|
|
|
# this is a simple database |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
The header line must be first and begins with C<#h>. |
351
|
|
|
|
|
|
|
There are rows (records) and columns (fields), |
352
|
|
|
|
|
|
|
just like in a normal database. |
353
|
|
|
|
|
|
|
Comment lines begin with C<#>. |
354
|
|
|
|
|
|
|
Column names are any string not containing spaces or single quote |
355
|
|
|
|
|
|
|
(although it is prudent to keep them alphanumeric with underscore). |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
By default, columns are delimited by whitespace. |
358
|
|
|
|
|
|
|
With this default configuration, the contents of a field |
359
|
|
|
|
|
|
|
cannot contain whitespace. |
360
|
|
|
|
|
|
|
However, this limitation can be relaxed by changing the field separator |
361
|
|
|
|
|
|
|
as described below. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
The big advantage of simple flat-text databases is that |
364
|
|
|
|
|
|
|
it is usually easy to massage data into this format, |
365
|
|
|
|
|
|
|
and it's reasonably easy to take data out of this |
366
|
|
|
|
|
|
|
format into other (text-based) programs, like gnuplot, jgraph, and |
367
|
|
|
|
|
|
|
LaTeX. Think Unix. Think pipes. |
368
|
|
|
|
|
|
|
(Or even output to Excel and HTML if you prefer.) |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Since no-whitespace in columns was a problem for some applications, |
371
|
|
|
|
|
|
|
there's an option which relaxes this rule. You can specify the field |
372
|
|
|
|
|
|
|
separator in the table header with C<-F x> where C is |
373
|
|
|
|
|
|
|
a code for the new field separator. |
374
|
|
|
|
|
|
|
A full list of codes is at L, |
375
|
|
|
|
|
|
|
but two common special values are C<-F t> |
376
|
|
|
|
|
|
|
which is a separator of a single tab character, |
377
|
|
|
|
|
|
|
and C<-F S>, a separator of two spaces. |
378
|
|
|
|
|
|
|
Both allowing (single) spaces in fields. An example: |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
#fsdb -F S account passwd uid gid fullname homedir shell |
381
|
|
|
|
|
|
|
johnh * 2274 134 John Heidemann /home/johnh /bin/bash |
382
|
|
|
|
|
|
|
greg * 2275 134 Greg Johnson /home/greg /bin/bash |
383
|
|
|
|
|
|
|
root * 0 0 Root /root /bin/bash |
384
|
|
|
|
|
|
|
# this is a simple database |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
See L for more details. Regardless of what the column |
387
|
|
|
|
|
|
|
separator is for the body of the data, it's always whitespace in the |
388
|
|
|
|
|
|
|
header. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
There's also a third format: a "list". Because it's often hard to see |
391
|
|
|
|
|
|
|
what's columns past the first two, in list format each "column" is on |
392
|
|
|
|
|
|
|
a separate line. The programs dblistize and dbcolize convert to and |
393
|
|
|
|
|
|
|
from this format, and all programs work with either formats. |
394
|
|
|
|
|
|
|
The command |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
dbfilealter -R C < DATA/passwd.fsdb |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
outputs: |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
#fsdb -R C account passwd uid gid fullname homedir shell |
401
|
|
|
|
|
|
|
account: johnh |
402
|
|
|
|
|
|
|
passwd: * |
403
|
|
|
|
|
|
|
uid: 2274 |
404
|
|
|
|
|
|
|
gid: 134 |
405
|
|
|
|
|
|
|
fullname: John_Heidemann |
406
|
|
|
|
|
|
|
homedir: /home/johnh |
407
|
|
|
|
|
|
|
shell: /bin/bash |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
account: greg |
410
|
|
|
|
|
|
|
passwd: * |
411
|
|
|
|
|
|
|
uid: 2275 |
412
|
|
|
|
|
|
|
gid: 134 |
413
|
|
|
|
|
|
|
fullname: Greg_Johnson |
414
|
|
|
|
|
|
|
homedir: /home/greg |
415
|
|
|
|
|
|
|
shell: /bin/bash |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
account: root |
418
|
|
|
|
|
|
|
passwd: * |
419
|
|
|
|
|
|
|
uid: 0 |
420
|
|
|
|
|
|
|
gid: 0 |
421
|
|
|
|
|
|
|
fullname: Root |
422
|
|
|
|
|
|
|
homedir: /root |
423
|
|
|
|
|
|
|
shell: /bin/bash |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# this is a simple database |
426
|
|
|
|
|
|
|
# | dblistize |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
See L for more details. |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=head1 BASIC DATA MANIPULATION |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
A number of programs exist to manipulate databases. |
434
|
|
|
|
|
|
|
Complex functions can be made by stringing together commands |
435
|
|
|
|
|
|
|
with shell pipelines. For example, to print the home |
436
|
|
|
|
|
|
|
directories of everyone with ``john'' in their names, |
437
|
|
|
|
|
|
|
you would do: |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
cat DATA/passwd | dbrow '_fullname =~ /John/' | dbcol homedir |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
The output might be: |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
#fsdb homedir |
444
|
|
|
|
|
|
|
/home/johnh |
445
|
|
|
|
|
|
|
/home/greg |
446
|
|
|
|
|
|
|
# this is a simple database |
447
|
|
|
|
|
|
|
# | dbrow _fullname =~ /John/ |
448
|
|
|
|
|
|
|
# | dbcol homedir |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
(Notice that comments are appended to the output listing each command, |
451
|
|
|
|
|
|
|
providing an automatic audit log.) |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
In addition to typical database functions (select, join, etc.) there |
454
|
|
|
|
|
|
|
are also a number of statistical functions. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
The real power of Fsdb is that one can apply arbitrary code to rows |
457
|
|
|
|
|
|
|
to do powerful things. |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
cat DATA/passwd | dbroweval '_fullname =~ s/(\w+)_(\w+)/$2,_$1/' |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
converts "John_Heidemann" into "Heidemann,_John". |
462
|
|
|
|
|
|
|
Not too much more work could split fullname into firstname and lastname |
463
|
|
|
|
|
|
|
fields. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head1 TALKING ABOUT COLUMNS |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
An advantage of Fsdb is that you can talk about columns by name |
469
|
|
|
|
|
|
|
(symbolically) rather than simply by their positions. So in the above |
470
|
|
|
|
|
|
|
example, C pulled out the home directory column, and |
471
|
|
|
|
|
|
|
C matched against column fullname. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
In general, you can use the name of the column listed on the C<#fsdb> line |
474
|
|
|
|
|
|
|
to identify it in most programs, and _name to identify it in code. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Some alternatives for flexibility: |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=over 4 |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=item * |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Numeric values identify columns positionally, numbering from 0. |
483
|
|
|
|
|
|
|
So 0 or _0 is the first column, 1 is the second, etc. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=item * |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
In code, _last_columnname gets the value from columname's previous row. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=back |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
See L for more details about writing code. |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head1 LIST OF COMMANDS |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Enough said. I'll summarize the commands, and then you can |
498
|
|
|
|
|
|
|
experiment. For a detailed description of each command, see a summary |
499
|
|
|
|
|
|
|
by running it with the argument C<--help> (or C<-?> if you prefer.) |
500
|
|
|
|
|
|
|
Full manual pages can be found by running the command |
501
|
|
|
|
|
|
|
with the argument C<--man>, or running the Unix command C |
502
|
|
|
|
|
|
|
or whatever program you want. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=head2 TABLE CREATION |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=over 4 |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=item dbcolcreate |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
add columns to a database |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=item dbcoldefine |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
set the column headings for a non-Fsdb file |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=back |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=head2 TABLE MANIPULATION |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=over 4 |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=item dbcol |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
select columns from a table |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item dbrow |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
select rows from a table |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=item dbsort |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sort rows based on a set of columns |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=item dbjoin |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
compute the natural join of two tables |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=item dbcolrename |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
rename a column |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
=item dbcolmerge |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
merge two columns into one |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=item dbcolsplittocols |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
split one column into two or more columns |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=item dbcolsplittorows |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
split one column into multiple rows |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=item dbfilepivot |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
"pivots" a file, converting multiple rows |
557
|
|
|
|
|
|
|
corresponding to the same entity into a single row with multiple columns. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=item dbfilevalidate |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
check that db file doesn't have some common errors |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
=back |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=head2 COMPUTATION AND STATISTICS |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=over 4 |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=item dbcolstats |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
compute statistics over a column (mean,etc.,optionally median) |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=item dbmultistats |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
group rows by some key value, then compute stats (mean, etc.) over each group |
576
|
|
|
|
|
|
|
(equivalent to dbmapreduce with dbcolstats as the reducer) |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=item dbmapreduce |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
group rows (map) and then apply an arbitrary function to each group (reduce) |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
=item dbrvstatdiff |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
compare two samples distributions (mean/conf interval/T-test) |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=item dbcolmovingstats |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
computing moving statistics over a column of data |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=item dbcolstatscores |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
compute Z-scores and T-scores over one column of data |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=item dbcolpercentile |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
compute the rank or percentile of a column |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=item dbcolhisto |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
compute histograms over a column of data |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=item dbcolscorrelate |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
compute the coefficient of correlation over several columns |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=item dbcolsregression |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
compute linear regression and correlation for two columns |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=item dbrowaccumulate |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
compute a running sum over a column of data |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=item dbrowcount |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
count the number of rows (a subset of dbstats) |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=item dbrowdiff |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
compute differences between a columns in each row of a table |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=item dbrowenumerate |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
number each row |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=item dbroweval |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
run arbitrary Perl code on each row |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=item dbrowuniq |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
count/eliminate identical rows (like Unix uniq(1)) |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=item dbfilediff |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
compare fields on rows of a file (something like Unix diff(1)) |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
=back |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=head2 OUTPUT CONTROL |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=over 4 |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=item dbcolneaten |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
pretty-print columns |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=item dbfilealter |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
convert between column or list format, or change the column separator |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=item dbfilestripcomments |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
remove comments from a table |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=item dbformmail |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
generate a script that sends form mail based on each row |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=back |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head2 CONVERSIONS |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
(These programs convert data into fsdb. See their web pages for details.) |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=over 4 |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=item cgi_to_db |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
L |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
=item combined_log_format_to_db |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
L |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=item html_table_to_db |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
HTML tables to fsdb (assuming they're reasonably formatted). |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=item kitrace_to_db |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
L |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=item ns_to_db |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
L |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
=item sqlselect_to_db |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
the output of SQL SELECT tables to db |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=item tabdelim_to_db |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
spreadsheet tab-delimited files to db |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=item tcpdump_to_db |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
(see man tcpdump(8) on any reasonable system) |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=item xml_to_db |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
XML input to fsdb, assuming they're very regular |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=back |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
(And out of fsdb:) |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
=over 4 |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=item db_to_csv |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
Comma-separated-value format from fsdb. |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
=item db_to_html_table |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
simple conversion of Fsdb to html tables |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=back |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=head2 STANDARD OPTIONS |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
Many programs have common options: |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=over 4 |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=item B<-?> or B<--help> |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
Show basic usage. |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=item B<-N> on B<--new-name> |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
When a command creates a new column like L's C, |
734
|
|
|
|
|
|
|
this option lets one override the default name of that new column. |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=item B<-T TmpDir> |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
where to put tmp files. |
739
|
|
|
|
|
|
|
Also uses environment variable TMPDIR, if -T is |
740
|
|
|
|
|
|
|
not specified. |
741
|
|
|
|
|
|
|
Default is /tmp. |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Show basic usage. |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=item B<-c FRACTION> or B<--confidence FRACTION> |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
Specify confidence interval FRACTION (L, L, etc.) |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=item B<-C S> or C<--element-separator S> |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Specify column separator S (L, L). |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
=item B<-d> or B<--debug> |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
Enable debugging (may be repeated for greater effect in some cases). |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
=item B<-a> or B<--include-non-numeric> |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
Compute stats over all data (treating non-numbers as zeros). |
760
|
|
|
|
|
|
|
(By default, things that can't be treated as numbers |
761
|
|
|
|
|
|
|
are ignored for stats purposes) |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=item B<-S> or B<--pre-sorted> |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
Assume the data is pre-sorted. |
766
|
|
|
|
|
|
|
May be repeated to disable verification (saving a small amount of work). |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=item B<-e E> or B<--empty E> |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
give value E as the value for empty (null) records |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=item B<-i I> or B<--input I> |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
Input data from file I. |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=item B<-o O> or B<--output O> |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
Write data out to file O. |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
=item B<--header> H |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
Use H as the full Fsdb header, rather than reading a header from |
783
|
|
|
|
|
|
|
then input. This option is particularly useful when using Fsdb |
784
|
|
|
|
|
|
|
under Hadoop, where split files don't have heades. |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=item B<--nolog>. |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
Skip logging the program in a trailing comment. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
=back |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
When giving Perl code (in L and L) |
793
|
|
|
|
|
|
|
column names can be embedded if preceded by underscores. |
794
|
|
|
|
|
|
|
Look at L or L for examples.) |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Most programs run in constant memory and use temporary files if necessary. |
797
|
|
|
|
|
|
|
Exceptions are L, L, L, |
798
|
|
|
|
|
|
|
L, L. |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=head1 ANOTHER EXAMPLE |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Take the raw data in C, |
804
|
|
|
|
|
|
|
put a header on it (C), |
805
|
|
|
|
|
|
|
took statistics of each category (C), |
806
|
|
|
|
|
|
|
pick out the relevant fields (C), and you get: |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
#fsdb size mean stddev pct_rsd |
809
|
|
|
|
|
|
|
1024 1.4962e+06 2.8497e+05 19.047 |
810
|
|
|
|
|
|
|
10240 5.0286e+06 6.0103e+05 11.952 |
811
|
|
|
|
|
|
|
102400 4.9216e+06 3.0939e+05 6.2863 |
812
|
|
|
|
|
|
|
# | dbcoldefine size bw |
813
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbmultistats -k size bw |
814
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
(The whole command was: |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
cat DATA/http_bandwidth | |
819
|
|
|
|
|
|
|
dbcoldefine size | |
820
|
|
|
|
|
|
|
dbmultistats -k size bw | |
821
|
|
|
|
|
|
|
dbcol size mean stddev pct_rsd |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
all on one line.) |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
Then post-process them to get rid of the exponential notation |
826
|
|
|
|
|
|
|
by adding this to the end of the pipeline: |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);' |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
(Actually, this step is no longer required since L |
831
|
|
|
|
|
|
|
now uses a different default format.) |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
giving: |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
#fsdb size mean stddev pct_rsd |
836
|
|
|
|
|
|
|
1024 1496200 284970 19.047 |
837
|
|
|
|
|
|
|
10240 5028600 601030 11.952 |
838
|
|
|
|
|
|
|
102400 4921600 309390 6.2863 |
839
|
|
|
|
|
|
|
# | dbcoldefine size bw |
840
|
|
|
|
|
|
|
# | dbmultistats -k size bw |
841
|
|
|
|
|
|
|
# | dbcol size mean stddev pct_rsd |
842
|
|
|
|
|
|
|
# | dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); } |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
In a few lines, raw data is transformed to processed output. |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
Suppose you expect there is an odd distribution of results of one |
848
|
|
|
|
|
|
|
datapoint. Fsdb can easily produce a CDF (cumulative distribution |
849
|
|
|
|
|
|
|
function) of the data, suitable for graphing: |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
cat DB/DATA/http_bandwidth | \ |
852
|
|
|
|
|
|
|
dbcoldefine size bw | \ |
853
|
|
|
|
|
|
|
dbrow '_size == 102400' | \ |
854
|
|
|
|
|
|
|
dbcol bw | \ |
855
|
|
|
|
|
|
|
dbsort -n bw | \ |
856
|
|
|
|
|
|
|
dbrowenumerate | \ |
857
|
|
|
|
|
|
|
dbcolpercentile count | \ |
858
|
|
|
|
|
|
|
dbcol bw percentile | \ |
859
|
|
|
|
|
|
|
xgraph |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
The steps, roughly: |
862
|
|
|
|
|
|
|
1. get the raw input data and turn it into fsdb format, |
863
|
|
|
|
|
|
|
2. pick out just the relevant column (for efficiency) and sort it, |
864
|
|
|
|
|
|
|
3. for each data point, assign a CDF percentage to it, |
865
|
|
|
|
|
|
|
4. pick out the two columns to graph and show them |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=head1 A GRADEBOOK EXAMPLE |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
The first commercial program I wrote was a gradebook, |
871
|
|
|
|
|
|
|
so here's how to do it with Fsdb. |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
Format your data like DATA/grades. |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
#fsdb name email id test1 |
876
|
|
|
|
|
|
|
a a@ucla.example.edu 1 80 |
877
|
|
|
|
|
|
|
b b@usc.example.edu 2 70 |
878
|
|
|
|
|
|
|
c c@isi.example.edu 3 65 |
879
|
|
|
|
|
|
|
d d@lmu.example.edu 4 90 |
880
|
|
|
|
|
|
|
e e@caltech.example.edu 5 70 |
881
|
|
|
|
|
|
|
f f@oxy.example.edu 6 90 |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
Or if your students have spaces in their names, use C<-F S> and two spaces |
884
|
|
|
|
|
|
|
to separate each column: |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
#fsdb -F S name email id test1 |
887
|
|
|
|
|
|
|
alfred aho a@ucla.example.edu 1 80 |
888
|
|
|
|
|
|
|
butler lampson b@usc.example.edu 2 70 |
889
|
|
|
|
|
|
|
david clark c@isi.example.edu 3 65 |
890
|
|
|
|
|
|
|
constantine drovolis d@lmu.example.edu 4 90 |
891
|
|
|
|
|
|
|
debrorah estrin e@caltech.example.edu 5 70 |
892
|
|
|
|
|
|
|
sally floyd f@oxy.example.edu 6 90 |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
To compute statistics on an exam, do |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
cat DATA/grades | dbstats test1 |dblistize |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
giving |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
#fsdb -R C ... |
901
|
|
|
|
|
|
|
mean: 77.5 |
902
|
|
|
|
|
|
|
stddev: 10.84 |
903
|
|
|
|
|
|
|
pct_rsd: 13.987 |
904
|
|
|
|
|
|
|
conf_range: 11.377 |
905
|
|
|
|
|
|
|
conf_low: 66.123 |
906
|
|
|
|
|
|
|
conf_high: 88.877 |
907
|
|
|
|
|
|
|
conf_pct: 0.95 |
908
|
|
|
|
|
|
|
sum: 465 |
909
|
|
|
|
|
|
|
sum_squared: 36625 |
910
|
|
|
|
|
|
|
min: 65 |
911
|
|
|
|
|
|
|
max: 90 |
912
|
|
|
|
|
|
|
n: 6 |
913
|
|
|
|
|
|
|
... |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
To do a histogram: |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
cat DATA/grades | dbcolhisto -n 5 -g test1 |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
giving |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
#fsdb low histogram |
922
|
|
|
|
|
|
|
65 * |
923
|
|
|
|
|
|
|
70 ** |
924
|
|
|
|
|
|
|
75 |
925
|
|
|
|
|
|
|
80 * |
926
|
|
|
|
|
|
|
85 |
927
|
|
|
|
|
|
|
90 ** |
928
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbhistogram -n 5 -g test1 |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Now you want to send out grades to the students by e-mail. |
931
|
|
|
|
|
|
|
Create a form-letter (in the file F): |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
To: _email (_name) |
934
|
|
|
|
|
|
|
From: J. Random Professor |
935
|
|
|
|
|
|
|
Subject: test1 scores |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
_name, your score on test1 was _test1. |
938
|
|
|
|
|
|
|
86+ A |
939
|
|
|
|
|
|
|
75-85 B |
940
|
|
|
|
|
|
|
70-74 C |
941
|
|
|
|
|
|
|
0-69 F |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
Generate the shell script that will send the mail out: |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
cat DATA/grades | dbformmail test1.txt > test1.sh |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
And run it: |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
sh
|
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
The last two steps can be combined: |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
cat DATA/grades | dbformmail test1.txt | sh |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
but I like to keep a copy of exactly what I send. |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
At the end of the semester you'll want to compute grade totals and |
959
|
|
|
|
|
|
|
assign letter grades. Both fall out of dbroweval. |
960
|
|
|
|
|
|
|
For example, to compute weighted total grades with a 40% midterm/60% |
961
|
|
|
|
|
|
|
final where the midterm is 84 possible points and the final 100: |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
dbcol -rv total | |
964
|
|
|
|
|
|
|
dbcolcreate total - | |
965
|
|
|
|
|
|
|
dbroweval ' |
966
|
|
|
|
|
|
|
_total = .40 * _midterm/84.0 + .60 * _final/100.0; |
967
|
|
|
|
|
|
|
_total = sprintf("%4.2f", _total); |
968
|
|
|
|
|
|
|
if (_final eq "-" || ( _name =~ /^_/)) { _total = "-"; };' | |
969
|
|
|
|
|
|
|
dbcolneaten |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
If you got the data originally from a spreadsheet, save it in |
973
|
|
|
|
|
|
|
"tab-delimited" format and convert it with tabdelim_to_db |
974
|
|
|
|
|
|
|
(run tabdelim_to_db -? for examples). |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
=head1 A PASSWORD EXAMPLE |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
To convert the Unix password file to db: |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
cat /etc/passwd | sed 's/:/ /g'| \ |
982
|
|
|
|
|
|
|
dbcoldefine -F S login password uid gid gecos home shell \ |
983
|
|
|
|
|
|
|
>passwd.fsdb |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
To convert the group file |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
cat /etc/group | sed 's/:/ /g' | \ |
988
|
|
|
|
|
|
|
dbcoldefine -F S group password gid members \ |
989
|
|
|
|
|
|
|
>group.fsdb |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
To show the names of the groups that div7-members are in |
992
|
|
|
|
|
|
|
(assuming DIV7 is in the gecos field): |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
cat passwd.fsdb | dbrow '_gecos =~ /DIV7/' | dbcol login gid | \ |
995
|
|
|
|
|
|
|
dbjoin -i - -i group.fsdb gid | dbcol login group |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
=head1 SHORT EXAMPLES |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
Which Fsdb programs are the most complicated (based on number of test cases)? |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
ls TEST/*.cmd | \ |
1003
|
|
|
|
|
|
|
dbcoldefine test | \ |
1004
|
|
|
|
|
|
|
dbroweval '_test =~ s@^TEST/([^_]+).*$@$1@' | \ |
1005
|
|
|
|
|
|
|
dbrowuniq -c | \ |
1006
|
|
|
|
|
|
|
dbsort -nr count | \ |
1007
|
|
|
|
|
|
|
dbcolneaten |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
(Answer: L, then L, L and L.) |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
Stats on an exam (in C<$FILE>, where C<$COLUMN> is the name of the exam)? |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
cat $FILE | dbcolstats -q 4 $COLUMN <$FILE | dblistize | dbstripcomments |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
cat $FILE | dbcolhisto -g -n 20 $COLUMN | dbcolneaten | dbstripcomments |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
Merging a the hw1 column from file hw1.fsdb into grades.fsdb assuming |
1020
|
|
|
|
|
|
|
there's a common student id in column "id": |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
dbcol id hw1 t.fsdb |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
dbjoin -a -e - grades.fsdb t.fsdb id | \ |
1025
|
|
|
|
|
|
|
dbsort name | \ |
1026
|
|
|
|
|
|
|
dbcolneaten >new_grades.fsdb |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
Merging two fsdb files with the same rows: |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
cat file1.fsdb file2.fsdb >output.fsdb |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
or if you want to clean things up a bit |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
cat file1.fsdb file2.fsdb | dbstripextraheaders >output.fsdb |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
or if you want to know where the data came from |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
for i in 1 2 |
1040
|
|
|
|
|
|
|
do |
1041
|
|
|
|
|
|
|
dbcolcreate source $i < file$i.fsdb |
1042
|
|
|
|
|
|
|
done >output.fsdb |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
(assumes you're using a Bourne-shell compatible shell, not csh). |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
=head1 WARNINGS |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
As with any tool, one should (which means I) understand |
1050
|
|
|
|
|
|
|
the limits of the tool. |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
All Fsdb tools should run in I. |
1053
|
|
|
|
|
|
|
In some cases (such as F with quartiles, where the whole input |
1054
|
|
|
|
|
|
|
must be re-read), programs will spool data to disk if necessary. |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
Most tools buffer one or a few lines of data, so memory |
1057
|
|
|
|
|
|
|
will scale with the size of each line. |
1058
|
|
|
|
|
|
|
(So lines with many columns, or when columns have lots data, |
1059
|
|
|
|
|
|
|
may cause large memory consumption.) |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
All Fsdb tools should run in constant or at worst C time. |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
All Fsdb tools use normal Perl math routines for computation. |
1064
|
|
|
|
|
|
|
Although I make every attempt to choose numerically stable algorithms |
1065
|
|
|
|
|
|
|
(although I also welcome feedback and suggestions for improvement), |
1066
|
|
|
|
|
|
|
normal rounding due to computer floating point approximations |
1067
|
|
|
|
|
|
|
can result in inaccuracies when data spans a large range of precision. |
1068
|
|
|
|
|
|
|
(See for example the F test cases.) |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
Any requirements and limitations of each Fsdb tool |
1071
|
|
|
|
|
|
|
is documented on its manual page. |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
If any Fsdb program violates these assumptions, |
1074
|
|
|
|
|
|
|
that is a bug that should be documented |
1075
|
|
|
|
|
|
|
on the tool's manual page or ideally fixed. |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Fsdb does depend on Perl's correctness, and Perl (and Fsdb) have |
1078
|
|
|
|
|
|
|
some bugs. Fsdb should work on perl from version 5.10 onward. |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
=head1 HISTORY |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
There have been three versions of Fsdb; |
1084
|
|
|
|
|
|
|
fsdb 1.0 is a complete re-write of the pre-1995 versions, |
1085
|
|
|
|
|
|
|
and was |
1086
|
|
|
|
|
|
|
distributed from 1995 to 2007. |
1087
|
|
|
|
|
|
|
Fsdb 2.0 is a significant re-write of the 1.x versions |
1088
|
|
|
|
|
|
|
for reasons described below. |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
Fsdb (in its various forms) has been used extensively by its author |
1091
|
|
|
|
|
|
|
since 1991. Since 1995 it's been used by two other researchers at |
1092
|
|
|
|
|
|
|
UCLA and several at ISI. In February 1998 it was announced to the |
1093
|
|
|
|
|
|
|
Internet. Since then it has found a few users, some outside where I |
1094
|
|
|
|
|
|
|
work. |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
=head2 Fsdb 2.0 Rationale |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
I've thought about fsdb-2.0 for many years, but it was started |
1099
|
|
|
|
|
|
|
in earnest in 2007. Fsdb-2.0 has the following goals: |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
=over 4 |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
=item in-one-process processing |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
While fsdb is great on the Unix command line as a pipeline between |
1106
|
|
|
|
|
|
|
programs, it should I be possible to set it up to run in a single |
1107
|
|
|
|
|
|
|
process. And if it does so, it should be able to avoid serializing |
1108
|
|
|
|
|
|
|
and deserializing (converting to and from text) data between each module. |
1109
|
|
|
|
|
|
|
(Accomplished in fsdb-2.0: see L, although still needs tuning.) |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
=item clean IO API |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
Fsdb's roots go back to perl4 and 1991, so the fsdb-1.x library is |
1114
|
|
|
|
|
|
|
very, very crufty. More than just being ugly (but it was that too), |
1115
|
|
|
|
|
|
|
this made things reading from one format file and writing to another |
1116
|
|
|
|
|
|
|
the application's job, when it should be the library's. |
1117
|
|
|
|
|
|
|
(Accomplished in fsdb-1.15 and improved in 2.0: see L.) |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
=item normalized module APIs |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
Because fsdb modules were added as needed over 10 years, |
1122
|
|
|
|
|
|
|
sometimes the module APIs became inconsistent. |
1123
|
|
|
|
|
|
|
(For example, the 1.x C required an empty |
1124
|
|
|
|
|
|
|
value following the name of the new column, |
1125
|
|
|
|
|
|
|
but other programs specify empty values with the C<-e> argument.) |
1126
|
|
|
|
|
|
|
We should smooth over these inconsistencies. |
1127
|
|
|
|
|
|
|
(Accomplished as each module was ported in 2.0 through 2.7.) |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
=item everyone handles all input formats |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
Given a clean IO API, the distinction between "colized" |
1132
|
|
|
|
|
|
|
and "listized" fsdb files should go away. Any program |
1133
|
|
|
|
|
|
|
should be able to read and write files in any format. |
1134
|
|
|
|
|
|
|
(Accomplished in fsdb-2.1.) |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=back |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
Fsdb-2.0 preserves backwards compatibility where possible, |
1139
|
|
|
|
|
|
|
but breaks it where necessary to accomplish the above goals. |
1140
|
|
|
|
|
|
|
In August 2008, Fsdb-2.7 was declared preferred over the 1.x versions. |
1141
|
|
|
|
|
|
|
Benchmarking in 2013 showed that threading performed much worse than |
1142
|
|
|
|
|
|
|
just using pipes, so Fsdb-2.44 uses threading "style", |
1143
|
|
|
|
|
|
|
but implemented with processes (via my "Freds" library). |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
=head2 Contributors |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
Fsdb includes code ported from Geoff Kuenning (C). |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
Fsdb contributors: |
1150
|
|
|
|
|
|
|
Ashvin Goel F, |
1151
|
|
|
|
|
|
|
Geoff Kuenning F, |
1152
|
|
|
|
|
|
|
Vikram Visweswariah F, |
1153
|
|
|
|
|
|
|
Kannan Varadahan F, |
1154
|
|
|
|
|
|
|
Lars Eggert F, |
1155
|
|
|
|
|
|
|
Arkadi Gelfond F, |
1156
|
|
|
|
|
|
|
David Graff F, |
1157
|
|
|
|
|
|
|
Haobo Yu F, |
1158
|
|
|
|
|
|
|
Pavlin Radoslavov F, |
1159
|
|
|
|
|
|
|
Graham Phillips, |
1160
|
|
|
|
|
|
|
Yuri Pradkin, |
1161
|
|
|
|
|
|
|
Alefiya Hussain, |
1162
|
|
|
|
|
|
|
Ya Xu, |
1163
|
|
|
|
|
|
|
Michael Schwendt, |
1164
|
|
|
|
|
|
|
Fabio Silva F, |
1165
|
|
|
|
|
|
|
Jerry Zhao F, |
1166
|
|
|
|
|
|
|
Ning Xu F, |
1167
|
|
|
|
|
|
|
Martin Lukac F, |
1168
|
|
|
|
|
|
|
Xue Cai, |
1169
|
|
|
|
|
|
|
Michael McQuaid, |
1170
|
|
|
|
|
|
|
Christopher Meng, |
1171
|
|
|
|
|
|
|
Calvin Ardi, |
1172
|
|
|
|
|
|
|
H. Merijn Brand, |
1173
|
|
|
|
|
|
|
Lan Wei. |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
Fsdb includes datasets contributed from NIST (F), |
1176
|
|
|
|
|
|
|
from |
1177
|
|
|
|
|
|
|
L, |
1178
|
|
|
|
|
|
|
the NIST/SEMATECH e-Handbook of Statistical Methods, section |
1179
|
|
|
|
|
|
|
1.4.2.8.1. Background and Data. The source is public domain, and |
1180
|
|
|
|
|
|
|
reproduced with permission. |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
=head1 RELATED WORK |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
As stated in the introduction, Fsdb is an incompatible reimplementation |
1188
|
|
|
|
|
|
|
of the ideas found in C. By storing data in simple text files and |
1189
|
|
|
|
|
|
|
processing it with pipelines it is easy to experiment (in the shell) |
1190
|
|
|
|
|
|
|
and look at the output. The original implementation of this idea was |
1191
|
|
|
|
|
|
|
/rdb, a commercial product described in the book I
|
1192
|
|
|
|
|
|
|
database management: application development in the UNIX environment> |
1193
|
|
|
|
|
|
|
by Rod Manis, Evan Schaffer, and Robert Jorgensen (and also at the web |
1194
|
|
|
|
|
|
|
page L). |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
While Fsdb is inspired by Rdb, it includes no code from it, |
1197
|
|
|
|
|
|
|
and Fsdb makes several different design choices. |
1198
|
|
|
|
|
|
|
In particular: rdb attempts to be closer to a "real" database, |
1199
|
|
|
|
|
|
|
with provision for locking, file indexing. |
1200
|
|
|
|
|
|
|
Fsdb focuses on single user use and so eschews these choices. |
1201
|
|
|
|
|
|
|
Rdb also has some support for interactive editing. |
1202
|
|
|
|
|
|
|
Fsdb leaves editing to text editors like emacs or vi. |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
In August, 2002 I found out Carlo Strozzi extended RDB with his |
1205
|
|
|
|
|
|
|
package NoSQL L. According to |
1206
|
|
|
|
|
|
|
Mr. Strozzi, he implemented NoSQL in awk to avoid the Perl start-up of |
1207
|
|
|
|
|
|
|
RDB. Although I haven't found Perl startup overhead to be a big |
1208
|
|
|
|
|
|
|
problem on my platforms (from old Sparcstation IPCs to 2GHz |
1209
|
|
|
|
|
|
|
Pentium-4s), you may want to evaluate his system. |
1210
|
|
|
|
|
|
|
The Linux Journal has a description of NoSQL |
1211
|
|
|
|
|
|
|
at L. |
1212
|
|
|
|
|
|
|
It seems quite similar to Fsdb. |
1213
|
|
|
|
|
|
|
Like /rdb, NoSQL supports indexing (not present in Fsdb). |
1214
|
|
|
|
|
|
|
Fsdb appears to have richer support for statistics, |
1215
|
|
|
|
|
|
|
and, as of Fsdb-2.x, its support for Perl threading may support |
1216
|
|
|
|
|
|
|
faster performance (one-process, less serialization and deserialization). |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
=head1 RELEASE NOTES |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
Versions prior to 1.0 were released informally on my web page |
1222
|
|
|
|
|
|
|
but were not announced. |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
=head2 0.0 1991 |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
started for my own research use |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
=head2 0.1 26-May-94 |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
first check-in to RCS |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
=head2 0.2 15-Mar-95 |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
parts now require perl5 |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
=head2 1.0, 22-Jul-97 |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
adds autoconf support and a test script. |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=head2 1.1, 20-Jan-98 |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
support for double space field separators, better tests |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
=head2 1.2, 11-Feb-98 |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
minor changes and release on comp.lang.perl.announce |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=head2 1.3, 17-Mar-98 |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
=over 4 |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
=item * |
1253
|
|
|
|
|
|
|
adds median and quartile options to dbstats |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
=item * |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
adds dmalloc_to_db converter |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
=item * |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
fixes some warnings |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=item * |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
dbjoin now can run on unsorted input |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
=item * |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
fixes a dbjoin bug |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=item * |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
some more tests in the test suite |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
=back |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=head2 1.4, 27-Mar-98 |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=over 4 |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=item * |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
improves error messages (all should now report the program that makes the error) |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
=item * |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
fixed a bug in dbstats output when the mean is zero |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
=back |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
=head2 1.5, 25-Jun-98 |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
=over 4 |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=item BUG FIX |
1301
|
|
|
|
|
|
|
dbcolhisto, dbcolpercentile now handles non-numeric values like dbstats |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=item NEW |
1304
|
|
|
|
|
|
|
dbcolstats computes zscores and tscores over a column |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
=item NEW |
1307
|
|
|
|
|
|
|
dbcolscorrelate computes correlation coefficients between two columns |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=item INTERNAL |
1310
|
|
|
|
|
|
|
ficus_getopt.pl has been replaced by DbGetopt.pm |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=item BUG FIX |
1313
|
|
|
|
|
|
|
all tests are now ``portable'' (previously some tests ran only on my system) |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
=item BUG FIX |
1316
|
|
|
|
|
|
|
you no longer need to have the db programs in your path (fix arose from a discussion with Arkadi Gelfond) |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
=item BUG FIX |
1319
|
|
|
|
|
|
|
installation no longer uses cp -f (to work on SunOS 4) |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=back |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
=head2 1.6, 24-May-99 |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=over 4 |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
=item NEW |
1328
|
|
|
|
|
|
|
dbsort, dbstats, dbmultistats now run in constant memory (using tmp files if necessary) |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
=item NEW |
1331
|
|
|
|
|
|
|
dbcolmovingstats does moving means over a series of data |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
=item NEW |
1334
|
|
|
|
|
|
|
dbcol has a -v option to get all columns except those listed |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=item NEW |
1337
|
|
|
|
|
|
|
dbmultistats does quartiles and medians |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
=item NEW |
1340
|
|
|
|
|
|
|
dbstripextraheaders now also cleans up bogus comments before the fist header |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=item BUG FIX |
1343
|
|
|
|
|
|
|
dbcolneaten works better with double-space-separated data |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
=back |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
=head2 1.7, 5-Jan-00 |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=over 4 |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=item NEW |
1352
|
|
|
|
|
|
|
dbcolize now detects and rejects lines that contain embedded copies of the field separator |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
=item NEW |
1355
|
|
|
|
|
|
|
configure tries harder to prevent people from improperly configuring/installing fsdb |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=item NEW |
1358
|
|
|
|
|
|
|
tcpdump_to_db converter (incomplete) |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=item NEW |
1361
|
|
|
|
|
|
|
tabdelim_to_db converter: from spreadsheet tab-delimited files to db |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
=item NEW |
1364
|
|
|
|
|
|
|
mailing lists for fsdb are C and C |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
To subscribe to either, send mail to C or C with "subscribe" in the BODY of the message. |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=item BUG FIX |
1369
|
|
|
|
|
|
|
dbjoin used to produce incorrect output if there were extra, unmatched values in the 2nd table. Thanks to Graham Phillips for providing a test case. |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=item BUG FIX |
1372
|
|
|
|
|
|
|
the sample commands in the usage strings now all should explicitly include the source of data (typically from "cat foo.fsdb |"). Thanks to Ya Xu for pointing out this documentation deficiency. |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=item BUG FIX (DOCUMENTATION) |
1375
|
|
|
|
|
|
|
dbcolmovingstats had incorrect sample output. |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
=back |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
=head2 1.8, 28-Jun-00 |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
=over 4 |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
=item BUG FIX |
1384
|
|
|
|
|
|
|
header options are now preserved when writing with dblistize |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
=item NEW |
1387
|
|
|
|
|
|
|
dbrowuniq now optionally checks for uniqueness only on certain fields |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
=item NEW |
1390
|
|
|
|
|
|
|
dbrowsplituniq makes one pass through a file and splits it into separate files based on the given fields |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
=item NEW |
1393
|
|
|
|
|
|
|
converter for "crl" format network traces |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=item NEW |
1396
|
|
|
|
|
|
|
anywhere you use arbitrary code (like dbroweval), _last_foo now maps to the last row's value for field _foo. |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
=item OPTIMIZATION |
1399
|
|
|
|
|
|
|
comment processing slightly changed so that dbmultistats now is much faster on files with lots of comments (for example, ~100k lines of comments and 700 lines of data!) (Thanks to Graham Phillips for pointing out this performance problem.) |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
=item BUG FIX |
1402
|
|
|
|
|
|
|
dbstats with median/quartiles now correctly handles singleton data points. |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
=back |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=head2 1.9, 6-Nov-00 |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
=over 4 |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
=item NEW |
1411
|
|
|
|
|
|
|
dbfilesplit, split a single input file into multiple output files (based on code contributed by Pavlin Radoslavov). |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
=item BUG FIX |
1414
|
|
|
|
|
|
|
dbsort now works with perl-5.6 |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
=back |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
=head2 1.10, 10-Apr-01 |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=over 4 |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
=item BUG FIX |
1423
|
|
|
|
|
|
|
dbstats now handles the case where there are more n-tiles than data |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
=item NEW |
1426
|
|
|
|
|
|
|
dbstats now includes a -S option to optimize work on pre-sorted data (inspired by code contributed by Haobo Yu) |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=item BUG FIX |
1429
|
|
|
|
|
|
|
dbsort now has a better estimate of memory usage when run on data with very short records (problem detected by Haobo Yu) |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
=item BUG FIX |
1432
|
|
|
|
|
|
|
cleanup of temporary files is slightly better |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
=back |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
=head2 1.11, 2-Nov-01 |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=over 4 |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=item BUG FIX |
1441
|
|
|
|
|
|
|
dbcolneaten now runs in constant memory |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
=item NEW |
1444
|
|
|
|
|
|
|
dbcolneaten now supports "field specifiers" that allow some control over how wide columns should be |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
=item OPTIMIZATION |
1447
|
|
|
|
|
|
|
dbsort now tries hard to be filesystem cache-friendly (inspired by "Information and Control in Gray-box Systems" by the Arpaci-Dusseau's at SOSP 2001) |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
=item INTERNAL |
1450
|
|
|
|
|
|
|
t_distr now ported to perl5 module DbTDistr |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=back |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=head2 1.12, 30-Oct-02 |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
=over 4 |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=item BUG FIX |
1459
|
|
|
|
|
|
|
dbmultistats documentation typo fixed |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
=item NEW |
1462
|
|
|
|
|
|
|
dbcolmultiscale |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
=item NEW |
1465
|
|
|
|
|
|
|
dbcol has -r option for "relaxed error checking" |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
=item NEW |
1468
|
|
|
|
|
|
|
dbcolneaten has new -e option to strip end-of-line spaces |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
=item NEW |
1471
|
|
|
|
|
|
|
dbrow finally has a -v option to negate the test |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=item BUG FIX |
1474
|
|
|
|
|
|
|
math bug in dbcoldiff fixed by Ashvin Goel (need to check Scheaffer test cases) |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=item BUG FIX |
1477
|
|
|
|
|
|
|
some patches to run with Perl 5.8. Note: some programs (dbcolmultiscale, dbmultistats, dbrowsplituniq) generate warnings like: "Use of uninitialized value in concatenation (.)" or "string at /usr/lib/perl5/5.8.0/FileCache.pm line 98, line 2". Please ignore this until I figure out how to suppress it. (Thanks to Jerry Zhao for noticing perl-5.8 problems.) |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
=item BUG FIX |
1480
|
|
|
|
|
|
|
fixed an autoconf problem where configure would fail to find a reasonable prefix (thanks to Fabio Silva for reporting the problem) |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=item NEW |
1483
|
|
|
|
|
|
|
db_to_html_table: simple conversion to html tables (NO fancy stuff) |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
=item NEW |
1486
|
|
|
|
|
|
|
dblib now has a function dblib_text2html() that will do simple conversion of iso-8859-1 to HTML |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
=back |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
=head2 1.13, 4-Feb-04 |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
=over 4 |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
=item NEW |
1497
|
|
|
|
|
|
|
fsdb added to the freebsd ports tree L. Maintainer: C |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=item BUG FIX |
1500
|
|
|
|
|
|
|
properly handle trailing spaces when data must be numeric (ex. dbstats with -FS, see test dbstats_trailing_spaces). Fix from Ning Xu C. |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
=item NEW |
1503
|
|
|
|
|
|
|
dbcolize error message improved (bug report from Terrence Brannon), and list format documented in the README. |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
=item NEW |
1506
|
|
|
|
|
|
|
cgi_to_db converts CGI.pm-format storage to fsdb list format |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
=item BUG FIX |
1509
|
|
|
|
|
|
|
handle numeric synonyms for column names in dbcol properly |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
=item ENHANCEMENT |
1512
|
|
|
|
|
|
|
"talking about columns" section added to README. Lack of documentation pointed out by Lars Eggert. |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
=item CHANGE |
1515
|
|
|
|
|
|
|
dbformmail now defaults to using Mail ("Berkeley Mail") to send mail, rather than sendmail (sendmail is still an option, but mail doesn't require running as root) |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
=item NEW |
1518
|
|
|
|
|
|
|
on platforms that support it (i.e., with perl 5.8), fsdb works fine with unicode |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
=item NEW |
1521
|
|
|
|
|
|
|
dbfilevalidate: check a db file for some common errors |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
=back |
1524
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
=head2 1.14, 24-Aug-06 |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=over 4 |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
=item ENHANCEMENT |
1532
|
|
|
|
|
|
|
README cleanup |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1535
|
|
|
|
|
|
|
dbcolsplit renamed dbcolsplittocols |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
=item NEW |
1538
|
|
|
|
|
|
|
dbcolsplittorows split one column into multiple rows |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=item NEW |
1541
|
|
|
|
|
|
|
dbcolsregression compute linear regression and correlation for two columns |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
=item ENHANCEMENT |
1544
|
|
|
|
|
|
|
cvs_to_db: better error handling, normalize field names, skip blank lines |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
=item ENHANCEMENT |
1547
|
|
|
|
|
|
|
dbjoin now detects (and fails) if non-joined files have duplicate names |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=item BUG FIX |
1550
|
|
|
|
|
|
|
minor bug fixed in calculation of Student t-distributions (doesn't change any test output, but may have caused small errors) |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
=back |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
=head2 1.15, 12-Nov-07 |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
=over 4 |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=item NEW |
1559
|
|
|
|
|
|
|
fsdb-1.14 added to the MacOS Fink system L. (Thanks to Lars Eggert for maintaining this port.) |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
=item NEW |
1562
|
|
|
|
|
|
|
Fsdb::IO::Reader and Fsdb::IO::Writer now provide reasonably clean OO I/O interfaces to Fsdb files. Highly recommended if you use fsdb directly from perl. In the fullness of time I expect to reimplement the entire thing using these APIs to replace the current dblib.pl which is still hobbled by its roots in perl4. |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
=item NEW |
1565
|
|
|
|
|
|
|
dbmapreduce now implements a Google-style map/reduce abstraction, generalizing dbmultistats. |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
=item ENHANCEMENT |
1568
|
|
|
|
|
|
|
fsdb now uses the Perl build system (Makefile.PL, etc.), instead of autoconf. This change paves the way to better perl-5-style modularization, proper manual pages, input of both listize and colize format for every program, and world peace. |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
=item ENHANCEMENT |
1571
|
|
|
|
|
|
|
dblib.pl is now moved to Fsdb::Old.pm. |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
=item BUG FIX |
1574
|
|
|
|
|
|
|
dbmultistats now propagates its format argument (-f). Bug and fix from Martin Lukac (thanks!). |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
=item ENHANCEMENT |
1577
|
|
|
|
|
|
|
dbformmail documentation now is clearer that it doesn't send the mail, you have to run the shell script it writes. (Problem observed by Unkyu Park.) |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=item ENHANCEMENT |
1580
|
|
|
|
|
|
|
adapted to autoconf-2.61 (and then these changes were discarded in favor of The Perl Way. |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=item BUG FIX |
1583
|
|
|
|
|
|
|
dbmultistats memory usage corrected (O(# tags), not O(1)) |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
=item ENHANCEMENT |
1586
|
|
|
|
|
|
|
dbmultistats can now optionally run with pre-grouped input in O(1) memory |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
=item ENHANCEMENT |
1589
|
|
|
|
|
|
|
dbroweval -N was finally implemented (eat comments) |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=back |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
=head2 2.0, 25-Jan-08 |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
2.0, 25-Jan-08 --- a quiet 2.0 release (gearing up towards complete) |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
=over 4 |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
=item ENHANCEMENT: |
1600
|
|
|
|
|
|
|
shifting old programs to Perl modules, with |
1601
|
|
|
|
|
|
|
the front-end program as just a wrapper. |
1602
|
|
|
|
|
|
|
In the short-term, this change just means programs have real man pages. |
1603
|
|
|
|
|
|
|
In the long-run, it will mean that one can run a pipeline in a single |
1604
|
|
|
|
|
|
|
Perl program. |
1605
|
|
|
|
|
|
|
So far: |
1606
|
|
|
|
|
|
|
L, |
1607
|
|
|
|
|
|
|
L, |
1608
|
|
|
|
|
|
|
the new L. |
1609
|
|
|
|
|
|
|
L |
1610
|
|
|
|
|
|
|
the new L, |
1611
|
|
|
|
|
|
|
the old C (renamed L), |
1612
|
|
|
|
|
|
|
L, |
1613
|
|
|
|
|
|
|
L, |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
=item NEW: |
1616
|
|
|
|
|
|
|
L is an internal-only module that lets one |
1617
|
|
|
|
|
|
|
use fsdb commands from within perl (via threads). |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
It also provides perl function aliases for the internal modules, |
1620
|
|
|
|
|
|
|
so a string of fsdb commands in perl are nearly as terse as in the |
1621
|
|
|
|
|
|
|
shell: |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
use Fsdb::Filter::dbpipeline qw(:all); |
1624
|
|
|
|
|
|
|
dbpipeline( |
1625
|
|
|
|
|
|
|
dbrow(qw(name test1)), |
1626
|
|
|
|
|
|
|
dbroweval('_test1 += 5;') |
1627
|
|
|
|
|
|
|
); |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1630
|
|
|
|
|
|
|
The old L has been renamed L. |
1631
|
|
|
|
|
|
|
The new L does the same thing as the old L. |
1632
|
|
|
|
|
|
|
This incompatibility is unfortunate but normalizes program names. |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
=item CHANGE: |
1635
|
|
|
|
|
|
|
The new L program |
1636
|
|
|
|
|
|
|
always outputs C<-> (the default empty value) for |
1637
|
|
|
|
|
|
|
statistics it cannot compute (for example, standard deviation |
1638
|
|
|
|
|
|
|
if there is only one row), |
1639
|
|
|
|
|
|
|
instead of the old mix of C<-> and "na". |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1642
|
|
|
|
|
|
|
The old L program, now called L, |
1643
|
|
|
|
|
|
|
also has different arguments. The C<-t mean,stddev> option is now |
1644
|
|
|
|
|
|
|
C<--tmean mean --tstddev stddev>. See L for details. |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1647
|
|
|
|
|
|
|
L now assumes all new columns get the default |
1648
|
|
|
|
|
|
|
value rather than requiring each column to have an initial constant value. |
1649
|
|
|
|
|
|
|
To change the initial value, sue the new C<-e> option. |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
=item NEW: |
1652
|
|
|
|
|
|
|
L counts rows, an almost-subset of L's C output |
1653
|
|
|
|
|
|
|
(except without differentiating numeric/non-numeric input), |
1654
|
|
|
|
|
|
|
or the equivalent of C. |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
=item NEW: |
1657
|
|
|
|
|
|
|
L merges two sorted files. |
1658
|
|
|
|
|
|
|
This functionality was previously embedded in L. |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE: |
1661
|
|
|
|
|
|
|
L's C<-i> option to include non-matches |
1662
|
|
|
|
|
|
|
is now renamed C<-a>, so as to not conflict with the new |
1663
|
|
|
|
|
|
|
standard option C<-i> for input file. |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
=back |
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
=head2 2.1, 6-Apr-08 |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
2.1, 6-Apr-08 --- another alpha 2.0, but now all converted programs understand both listize and colize format |
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
=over 4 |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
=item ENHANCEMENT: |
1674
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1675
|
|
|
|
|
|
|
New in 2.1: |
1676
|
|
|
|
|
|
|
L, |
1677
|
|
|
|
|
|
|
L, |
1678
|
|
|
|
|
|
|
L, |
1679
|
|
|
|
|
|
|
L, |
1680
|
|
|
|
|
|
|
L, |
1681
|
|
|
|
|
|
|
L |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
=item ENHANCEMENT |
1684
|
|
|
|
|
|
|
L now handles an arbitrary number of input files, |
1685
|
|
|
|
|
|
|
not just exactly two. |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
=item NEW |
1688
|
|
|
|
|
|
|
L is an internal routine that handles merging exactly two files. |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1691
|
|
|
|
|
|
|
L now specifies inputs like L, |
1692
|
|
|
|
|
|
|
rather than assuming the first two arguments were tables (as in fsdb-1). |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
The old L argument C<-i> is now C<-a> or <--type=outer>. |
1695
|
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
A minor change: comments in the source files for |
1697
|
|
|
|
|
|
|
L are now intermixed with output |
1698
|
|
|
|
|
|
|
rather than being delayed until the end. |
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
=item ENHANCEMENT |
1701
|
|
|
|
|
|
|
L now no longer produces warnings when null values are |
1702
|
|
|
|
|
|
|
passed to numeric comparisons. |
1703
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
=item BUG FIX |
1705
|
|
|
|
|
|
|
L now once again works with code that lacks a trailing semicolon. |
1706
|
|
|
|
|
|
|
(This bug fixes a regression from 1.15.) |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1709
|
|
|
|
|
|
|
L's old C<-e> option (to avoid end-of-line spaces) is now C<-E> |
1710
|
|
|
|
|
|
|
to avoid conflicts with the standard empty field argument. |
1711
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1713
|
|
|
|
|
|
|
L's old C<-e> option is now C<-E> to avoid conflicts. |
1714
|
|
|
|
|
|
|
And its C<-n>, C<-s>, and C<-w> are now |
1715
|
|
|
|
|
|
|
C<-N>, C<-S>, and C<-W> to correspond. |
1716
|
|
|
|
|
|
|
|
1717
|
|
|
|
|
|
|
=item NEW |
1718
|
|
|
|
|
|
|
L replaces L, L, and L, |
1719
|
|
|
|
|
|
|
but with different options. |
1720
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
=item ENHANCEMENT |
1722
|
|
|
|
|
|
|
The library routines C now understand both list-format |
1723
|
|
|
|
|
|
|
and column-format data, so all converted programs can now |
1724
|
|
|
|
|
|
|
I read either format. This capability was one |
1725
|
|
|
|
|
|
|
of the milestone goals for 2.0, so yea! |
1726
|
|
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
=back |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
=head2 2.2, 23-May-08 |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
Release 2.2 is another 2.x alpha release. Now I of the |
1732
|
|
|
|
|
|
|
commands are ported, but a few remain, and I plan one last |
1733
|
|
|
|
|
|
|
incompatible change (to the file header) before 2.x final. |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
=over 4 |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
=item ENHANCEMENT |
1738
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1740
|
|
|
|
|
|
|
New in 2.2: |
1741
|
|
|
|
|
|
|
L, |
1742
|
|
|
|
|
|
|
L. |
1743
|
|
|
|
|
|
|
L. |
1744
|
|
|
|
|
|
|
L. |
1745
|
|
|
|
|
|
|
L. |
1746
|
|
|
|
|
|
|
L. |
1747
|
|
|
|
|
|
|
L. |
1748
|
|
|
|
|
|
|
L. |
1749
|
|
|
|
|
|
|
L. |
1750
|
|
|
|
|
|
|
L. |
1751
|
|
|
|
|
|
|
L. |
1752
|
|
|
|
|
|
|
Also |
1753
|
|
|
|
|
|
|
L |
1754
|
|
|
|
|
|
|
exists only as a front-end (command-line) program. |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
The following programs have been dropped from fsdb-2.x: |
1759
|
|
|
|
|
|
|
L, |
1760
|
|
|
|
|
|
|
L, |
1761
|
|
|
|
|
|
|
L, |
1762
|
|
|
|
|
|
|
L. |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
=item NEW |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
L to convert Apache logfiles |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
Options to L are now B<-B> and B<-I>, |
1771
|
|
|
|
|
|
|
not B<-a> and B<-i>. |
1772
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
L is now L. |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
=item BUG FIXES |
1778
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
L better handles empty columns; |
1780
|
|
|
|
|
|
|
L warning suppressed (actually a bug in high-bucket handling). |
1781
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1783
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
L now requires a C<-k> option in front of the |
1785
|
|
|
|
|
|
|
key (tag) field, or if none is given, it will group by the first field |
1786
|
|
|
|
|
|
|
(both like L). |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=item KNOWN BUG |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
L with quantile option doesn't work currently. |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
L is renamed L. |
1795
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
=item BUG FIXES |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
L was leaving its log message as a command, not a comment. |
1799
|
|
|
|
|
|
|
Oops. No longer. |
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
=back |
1802
|
|
|
|
|
|
|
|
1803
|
|
|
|
|
|
|
=head2 2.3, 27-May-08 (alpha) |
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
Another alpha release, this one just to fix the critical dbjoin bug |
1806
|
|
|
|
|
|
|
listed below (that happens to have blocked my MP3 jukebox :-). |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
=over 4 |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
=item BUG FIX |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
Dbsort no longer hangs if given an input file with no rows. |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
=item BUG FIX |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
Dbjoin now works with unsorted input coming from a pipeline (like stdin). |
1817
|
|
|
|
|
|
|
Perl-5.8.8 has a bug (?) that was making this case fail---opening |
1818
|
|
|
|
|
|
|
stdin in one thread, reading some, then reading more in a different |
1819
|
|
|
|
|
|
|
thread caused an lseek which works on files, but fails on pipes like stdin. |
1820
|
|
|
|
|
|
|
Go figure. |
1821
|
|
|
|
|
|
|
|
1822
|
|
|
|
|
|
|
=item BUG FIX / KNOWN BUG |
1823
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
The dbjoin fix also fixed dbmultistats -q |
1825
|
|
|
|
|
|
|
(it now gives the right answer). |
1826
|
|
|
|
|
|
|
Although a new bug appeared, messages like: |
1827
|
|
|
|
|
|
|
Attempt to free unreferenced scalar: SV 0xa9dd0c4, Perl interpreter: 0xa8350b8 during global destruction. |
1828
|
|
|
|
|
|
|
So the dbmultistats_quartile test is still disabled. |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
=back |
1831
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
=head2 2.4, 18-Jun-08 |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
Another alpha release, mostly to fix minor usability |
1835
|
|
|
|
|
|
|
problems in dbmapreduce and client functions. |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
=over 4 |
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
=item ENHANCEMENT |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
L now defaults to running user supplied code without warnings |
1842
|
|
|
|
|
|
|
(as with fsdb-1.x). |
1843
|
|
|
|
|
|
|
Use C<--warnings> or C<-w> to turn them back on. |
1844
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
=item ENHANCEMENT |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
L can now write different format output |
1848
|
|
|
|
|
|
|
than the input, using the C<-m> option. |
1849
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
=item KNOWN BUG |
1851
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
L emits warnings on perl 5.10.0 |
1853
|
|
|
|
|
|
|
about "Unbalanced string table refcount" and "Scalars leaked" |
1854
|
|
|
|
|
|
|
when run with an external program as a reducer. |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
L emits the warning "Attempt to free unreferenced scalar" |
1857
|
|
|
|
|
|
|
when run with quartiles. |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
In each case the output is correct. |
1860
|
|
|
|
|
|
|
I believe these can be ignored. |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
=item CHANGE |
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
L no longer logs a line for each reducer that is invoked. |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
=back |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
=head2 2.5, 24-Jun-08 |
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
Another alpha release, fixing more minor bugs in |
1872
|
|
|
|
|
|
|
C and lossage in C. |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
=over 4 |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
=item ENHANCEMENT |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
L can now tolerate non-map-aware reducers |
1879
|
|
|
|
|
|
|
that pass back the key column in put. |
1880
|
|
|
|
|
|
|
It also passes the current key as the last argument to |
1881
|
|
|
|
|
|
|
external reducers. |
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
=item BUG FIX |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
L, correctly handle C<-header> option again. |
1886
|
|
|
|
|
|
|
(Broken since fsdb-2.3.) |
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
=back |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
=head2 2.6, 11-Jul-08 |
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
Another alpha release, needed to fix DaGronk. |
1894
|
|
|
|
|
|
|
One new port, small bug fixes, and important fix to L. |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
=over 4 |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
=item ENHANCEMENT |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1901
|
|
|
|
|
|
|
New in 2.2: |
1902
|
|
|
|
|
|
|
L. |
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE and ENHANCEMENTS |
1905
|
|
|
|
|
|
|
L arguments changed, |
1906
|
|
|
|
|
|
|
use C<--rank> to require ranking instead of C<-r>. |
1907
|
|
|
|
|
|
|
Also, C<--ascending> and C<--descending> can now be specified separately, |
1908
|
|
|
|
|
|
|
both for C<--percentile> and C<--rank>. |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
=item BUG FIX |
1911
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
Sigh, the sense of the --warnings option in L was inverted. No longer. |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
=item BUG FIX |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
I found and fixed the string leaks (errors like "Unbalanced string |
1917
|
|
|
|
|
|
|
table refcount" and "Scalars leaked") in L and L. |
1918
|
|
|
|
|
|
|
(All Cs in threads must be manually destroyed.) |
1919
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
=item BUG FIX |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
The C<-C> option to specify the column separator in L |
1923
|
|
|
|
|
|
|
now works again (broken since it was ported). |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
=back |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
2.7, 30-Jul-08 beta |
1928
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
The beta release of fsdb-2.x. Finally, all programs are ported. |
1930
|
|
|
|
|
|
|
As statistics, the number of lines of non-library code doubled from |
1931
|
|
|
|
|
|
|
7.5k to 15.5k. The libraries are much more complete, |
1932
|
|
|
|
|
|
|
going from 866 to 5164 lines. |
1933
|
|
|
|
|
|
|
The overall number of programs is about the same, |
1934
|
|
|
|
|
|
|
although 19 were dropped and 11 were added. |
1935
|
|
|
|
|
|
|
The number of test cases has grown from 116 to 175. |
1936
|
|
|
|
|
|
|
All programs are now in perl-5, no more shell scripts or perl-4. |
1937
|
|
|
|
|
|
|
All programs now have manual pages. |
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
Although this is a major step forward, I still expect |
1940
|
|
|
|
|
|
|
to rename "fsdb" to "fsdb". |
1941
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
=over 4 |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
=item ENHANCEMENT |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
shifting more old programs to Perl modules. |
1947
|
|
|
|
|
|
|
New in 2.7: |
1948
|
|
|
|
|
|
|
L. |
1949
|
|
|
|
|
|
|
L. |
1950
|
|
|
|
|
|
|
L. |
1951
|
|
|
|
|
|
|
L. |
1952
|
|
|
|
|
|
|
L. |
1953
|
|
|
|
|
|
|
L, |
1954
|
|
|
|
|
|
|
L, |
1955
|
|
|
|
|
|
|
L, |
1956
|
|
|
|
|
|
|
L, |
1957
|
|
|
|
|
|
|
L, |
1958
|
|
|
|
|
|
|
L. |
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
The following programs have been dropped from fsdb-2.x: |
1963
|
|
|
|
|
|
|
L, |
1964
|
|
|
|
|
|
|
L, |
1965
|
|
|
|
|
|
|
L. |
1966
|
|
|
|
|
|
|
L. |
1967
|
|
|
|
|
|
|
They may come back, but seemed overly specialized. |
1968
|
|
|
|
|
|
|
The following program |
1969
|
|
|
|
|
|
|
L |
1970
|
|
|
|
|
|
|
was dropped because it is superseded by L. |
1971
|
|
|
|
|
|
|
L |
1972
|
|
|
|
|
|
|
was dropped pending a test cases and examples. |
1973
|
|
|
|
|
|
|
|
1974
|
|
|
|
|
|
|
=item ENHANCEMENT |
1975
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
L now has a C<-c> option to correct errors. |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
=item NEW |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
L provides the inverse of |
1981
|
|
|
|
|
|
|
L. |
1982
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
=back |
1984
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
=head2 2.8, 5-Aug-08 |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
Change header format, preserving forwards compatibility. |
1989
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
=over 4 |
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
=item BUG FIX |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
Complete editing pass over the manual, making sure it aligns |
1995
|
|
|
|
|
|
|
with fsdb-2.x. |
1996
|
|
|
|
|
|
|
|
1997
|
|
|
|
|
|
|
=item SEMI-COMPATIBLE CHANGE |
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
The header of fsdb files has changed, it is now #fsdb, not #h (or #L) |
2000
|
|
|
|
|
|
|
and parsing of -F and -R are also different. |
2001
|
|
|
|
|
|
|
See L for the new specification. |
2002
|
|
|
|
|
|
|
The v1 file format will be read, compatibly, but |
2003
|
|
|
|
|
|
|
not written. |
2004
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
=item BUG FIX |
2006
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
L now tolerates comments that precede the first key, |
2008
|
|
|
|
|
|
|
instead of failing with an error message. |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
=back |
2011
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
=head2 2.9, 6-Aug-08 |
2014
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
Still in beta; just a quick bug-fix for L. |
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
=over 4 |
2018
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
=item ENHANCEMENT |
2020
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
L now generates plausible output when given no rows |
2022
|
|
|
|
|
|
|
of input. |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
=back |
2025
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
=head2 2.10, 23-Sep-08 |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
Still in beta, but picking up some bug fixes. |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
=over 4 |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
=item ENHANCEMENT |
2033
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
L now generates plausible output when given no rows |
2035
|
|
|
|
|
|
|
of input. |
2036
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
=item ENHANCEMENT |
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
L the warnings option was backwards; |
2040
|
|
|
|
|
|
|
now corrected. As a result, warnings in user code now default off |
2041
|
|
|
|
|
|
|
(like in fsdb-1.x). |
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
=item BUG FIX |
2044
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
L now defaults to assuming the target column is numeric. |
2046
|
|
|
|
|
|
|
The new option C<-N> allows selection of a non-numeric target. |
2047
|
|
|
|
|
|
|
|
2048
|
|
|
|
|
|
|
=item BUG FIX |
2049
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
L now includes C<--sample> and C<--nosample> options |
2051
|
|
|
|
|
|
|
to compute the sample or full population correlation coefficients. |
2052
|
|
|
|
|
|
|
Thanks to Xue Cai for finding this bug. |
2053
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
=back |
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
=head2 2.11, 14-Oct-08 |
2058
|
|
|
|
|
|
|
|
2059
|
|
|
|
|
|
|
Still in beta, but picking up some bug fixes. |
2060
|
|
|
|
|
|
|
|
2061
|
|
|
|
|
|
|
=over 4 |
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
=item ENHANCEMENT |
2064
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
L is now more aggressive about filling in empty cells |
2066
|
|
|
|
|
|
|
with the official empty value, rather than leaving them blank or as whitespace. |
2067
|
|
|
|
|
|
|
|
2068
|
|
|
|
|
|
|
=item ENHANCEMENT |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
L now catches failures during pipeline element setup |
2071
|
|
|
|
|
|
|
and exits reasonably gracefully. |
2072
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
=item BUG FIX |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
L now reaps child processes, thus avoiding |
2076
|
|
|
|
|
|
|
running out of processes when used a lot. |
2077
|
|
|
|
|
|
|
|
2078
|
|
|
|
|
|
|
=back |
2079
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
=head2 2.12, 16-Oct-08 |
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
Finally, a full (non-beta) 2.x release! |
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
=over 4 |
2085
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
Jdb has been renamed Fsdb, the flatfile-streaming database. |
2089
|
|
|
|
|
|
|
This change affects all internal Perl APIs, |
2090
|
|
|
|
|
|
|
but no shell command-level APIs. |
2091
|
|
|
|
|
|
|
While Jdb served well for more than ten years, |
2092
|
|
|
|
|
|
|
it is easily confused with the Java debugger (even though Jdb was there first!). |
2093
|
|
|
|
|
|
|
It also is too generic to work well in web search engines. |
2094
|
|
|
|
|
|
|
Finally, Jdb stands for ``John's database'', and we're a bit beyond that. |
2095
|
|
|
|
|
|
|
(However, some call me the ``file-system guy'', so |
2096
|
|
|
|
|
|
|
one could argue it retains that meeting.) |
2097
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
If you just used the shell commands, this change should not affect you. |
2099
|
|
|
|
|
|
|
If you used the Perl-level libraries directly in your code, |
2100
|
|
|
|
|
|
|
you should be able to rename "Jdb" to "Fsdb" to move to 2.12. |
2101
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
The jdb-announce list not yet been renamed, but it will be shortly. |
2103
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
With this release I've accomplished everything I wanted to |
2105
|
|
|
|
|
|
|
in fsdb-2.x. I therefore expect to return to boring, bugfix releases. |
2106
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
=back |
2108
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
=head2 2.13, 30-Oct-08 |
2110
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
=over 4 |
2112
|
|
|
|
|
|
|
|
2113
|
|
|
|
|
|
|
=item BUG FIX |
2114
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
L now treats non-numeric data as zero by default. |
2116
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
=item BUG FIX |
2118
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
Fixed a perl-5.10ism in L that |
2120
|
|
|
|
|
|
|
breaks that program under 5.8. |
2121
|
|
|
|
|
|
|
Thanks to Martin Lukac for reporting the bug. |
2122
|
|
|
|
|
|
|
|
2123
|
|
|
|
|
|
|
=back |
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
=head2 2.14, 26-Nov-08 |
2126
|
|
|
|
|
|
|
|
2127
|
|
|
|
|
|
|
=over 4 |
2128
|
|
|
|
|
|
|
|
2129
|
|
|
|
|
|
|
=item BUG FIX |
2130
|
|
|
|
|
|
|
|
2131
|
|
|
|
|
|
|
Improved documentation for L's C<-f> option. |
2132
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
=item ENHANCEMENT |
2134
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
L how computes a moving standard deviation in addition |
2136
|
|
|
|
|
|
|
to a moving mean. |
2137
|
|
|
|
|
|
|
|
2138
|
|
|
|
|
|
|
=back |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
=head2 2.15, 13-Apr-09 |
2142
|
|
|
|
|
|
|
|
2143
|
|
|
|
|
|
|
=over 4 |
2144
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
=item BUG FIX |
2146
|
|
|
|
|
|
|
|
2147
|
|
|
|
|
|
|
Fix a F bug reported by Shalindra Fernando. |
2148
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
=back |
2150
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
|
2152
|
|
|
|
|
|
|
=head2 2.16, 14-Apr-09 |
2153
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
=over 4 |
2155
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
=item BUG FIX |
2157
|
|
|
|
|
|
|
|
2158
|
|
|
|
|
|
|
Another minor release bug: on some systems F looses |
2159
|
|
|
|
|
|
|
executable permissions. Again reported by Shalindra Fernando. |
2160
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
=back |
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
=head2 2.17, 25-Jun-09 |
2164
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
=over 4 |
2166
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
=item TYPO FIXES |
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
Typo in the F manual fixed. |
2170
|
|
|
|
|
|
|
|
2171
|
|
|
|
|
|
|
=item IMPROVEMENT |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
There is no longer a comment line to label columns |
2174
|
|
|
|
|
|
|
in F, instead the header line is tweaked to |
2175
|
|
|
|
|
|
|
line up. This change restores the Jdb-1.x behavior, and |
2176
|
|
|
|
|
|
|
means that repeated runs of dbcolneaten no longer add comment lines |
2177
|
|
|
|
|
|
|
each time. |
2178
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
=item BUG FIX |
2180
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
It turns out F was not correctly handling trailing spaces |
2182
|
|
|
|
|
|
|
when given the C<-E> option to suppress them. This regression is now |
2183
|
|
|
|
|
|
|
fixed. |
2184
|
|
|
|
|
|
|
|
2185
|
|
|
|
|
|
|
=item EXTENSION |
2186
|
|
|
|
|
|
|
|
2187
|
|
|
|
|
|
|
L can now handle direct references to the last row |
2188
|
|
|
|
|
|
|
via F<$lfref>, a dubious but now documented feature. |
2189
|
|
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
=item BUG FIXES |
2191
|
|
|
|
|
|
|
|
2192
|
|
|
|
|
|
|
Separators set with C<-C> in F and F |
2193
|
|
|
|
|
|
|
were not properly |
2194
|
|
|
|
|
|
|
setting the heading, and null fields were not recognized. |
2195
|
|
|
|
|
|
|
The first bug was reported by Martin Lukac. |
2196
|
|
|
|
|
|
|
|
2197
|
|
|
|
|
|
|
=back |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
=head2 2.18, 1-Jul-09 A minor release |
2200
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
=over 4 |
2202
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
=item IMPROVEMENT |
2204
|
|
|
|
|
|
|
|
2205
|
|
|
|
|
|
|
Documentation for F has been improved. |
2206
|
|
|
|
|
|
|
|
2207
|
|
|
|
|
|
|
=item IMPROVEMENT |
2208
|
|
|
|
|
|
|
|
2209
|
|
|
|
|
|
|
The package should now be PGP-signed. |
2210
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
=back |
2212
|
|
|
|
|
|
|
|
2213
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
=head2 2.19, 10-Jul-09 |
2215
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
=over 4 |
2217
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
=item BUG FIX |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
Internal improvements to debugging output and robustness of |
2221
|
|
|
|
|
|
|
F and F. |
2222
|
|
|
|
|
|
|
F re-enabled. |
2223
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
=back |
2225
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
=head2 2.20, 30-Nov-09 |
2228
|
|
|
|
|
|
|
(A collection of minor bugfixes, plus a build against Fedora 12.) |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
=over 4 |
2231
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
=item BUG FIX |
2233
|
|
|
|
|
|
|
|
2234
|
|
|
|
|
|
|
Loging for |
2235
|
|
|
|
|
|
|
F |
2236
|
|
|
|
|
|
|
with code refs is now stable |
2237
|
|
|
|
|
|
|
(it no longer includes a hex pointer to the code reference). |
2238
|
|
|
|
|
|
|
|
2239
|
|
|
|
|
|
|
=item BUG FIX |
2240
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
Better handling of mixed blank lines in F |
2242
|
|
|
|
|
|
|
(see test case F). |
2243
|
|
|
|
|
|
|
|
2244
|
|
|
|
|
|
|
=item BUG FIX |
2245
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
F now handles multi-line input better, |
2247
|
|
|
|
|
|
|
and handles tables with COLSPAN. |
2248
|
|
|
|
|
|
|
|
2249
|
|
|
|
|
|
|
=item BUG FIX |
2250
|
|
|
|
|
|
|
|
2251
|
|
|
|
|
|
|
F now cleans up threads in an C |
2252
|
|
|
|
|
|
|
to prevent "cannot detach a joined thread" errors that popped |
2253
|
|
|
|
|
|
|
up in perl-5.10. Hopefully this prevents a race condition |
2254
|
|
|
|
|
|
|
that causes the test suites to hang about 20% of the time |
2255
|
|
|
|
|
|
|
(in F). |
2256
|
|
|
|
|
|
|
|
2257
|
|
|
|
|
|
|
=item IMPROVEMENT |
2258
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
F now detects and correctly fails |
2260
|
|
|
|
|
|
|
when the input and reducer have incompatible |
2261
|
|
|
|
|
|
|
field separators. |
2262
|
|
|
|
|
|
|
|
2263
|
|
|
|
|
|
|
=item IMPROVEMENT |
2264
|
|
|
|
|
|
|
|
2265
|
|
|
|
|
|
|
F, F, F, F, |
2266
|
|
|
|
|
|
|
and F |
2267
|
|
|
|
|
|
|
now all take an C<-F> option to let one specify the output field separator |
2268
|
|
|
|
|
|
|
(so they work better with F). |
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
=item BUG FIX |
2271
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
An omitted C<-k> from the manual page of F |
2273
|
|
|
|
|
|
|
is now there. Bug reported by Unkyu Park. |
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
=back |
2276
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
=head2 2.21, 17-Apr-10 |
2279
|
|
|
|
|
|
|
bug fix release |
2280
|
|
|
|
|
|
|
|
2281
|
|
|
|
|
|
|
=over 4 |
2282
|
|
|
|
|
|
|
|
2283
|
|
|
|
|
|
|
=item BUG FIX |
2284
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
F now no longer fails with -outputheader => never |
2286
|
|
|
|
|
|
|
(an obscure bug). |
2287
|
|
|
|
|
|
|
|
2288
|
|
|
|
|
|
|
=item IMPROVEMENT |
2289
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
F (in the warnings section) |
2291
|
|
|
|
|
|
|
and F now more carefully document how they |
2292
|
|
|
|
|
|
|
handle (and do not handle) numerical precision problems, |
2293
|
|
|
|
|
|
|
and other general limits. Thanks to Yuri Pradkin for prompting |
2294
|
|
|
|
|
|
|
this documentation. |
2295
|
|
|
|
|
|
|
|
2296
|
|
|
|
|
|
|
=item IMPROVEMENT |
2297
|
|
|
|
|
|
|
|
2298
|
|
|
|
|
|
|
C |
2299
|
|
|
|
|
|
|
is now restored from C. |
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
=item IMPROVEMENT |
2302
|
|
|
|
|
|
|
|
2303
|
|
|
|
|
|
|
Documention for multiple styles of input approaches |
2304
|
|
|
|
|
|
|
(including performance description) added to L. |
2305
|
|
|
|
|
|
|
|
2306
|
|
|
|
|
|
|
=back |
2307
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
=head2 2.22, 2010-10-31 |
2309
|
|
|
|
|
|
|
One new tool F and several bug fixes for Perl 5.10. |
2310
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
=over 4 |
2312
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
=item BUG FIX |
2314
|
|
|
|
|
|
|
|
2315
|
|
|
|
|
|
|
F now correctly handles n-way merges. |
2316
|
|
|
|
|
|
|
Bug reported by Yuri Pradkin. |
2317
|
|
|
|
|
|
|
|
2318
|
|
|
|
|
|
|
=item INCOMPARABLE CHANGE |
2319
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
F now defaults to I padding the last column. |
2321
|
|
|
|
|
|
|
|
2322
|
|
|
|
|
|
|
=item ADDITION |
2323
|
|
|
|
|
|
|
|
2324
|
|
|
|
|
|
|
F now takes B<-N NewColumn> to give the new |
2325
|
|
|
|
|
|
|
column a name other than "count". Feature requested by Mike Rouch |
2326
|
|
|
|
|
|
|
in January 2005. |
2327
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
=item ADDITION |
2329
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
New program F copies the last value of a column |
2331
|
|
|
|
|
|
|
into a new column copylast_column of the next row. |
2332
|
|
|
|
|
|
|
New program requested by Fabio Silva; |
2333
|
|
|
|
|
|
|
useful for converting dbmultistats output into dbrvstatdiff input. |
2334
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
=item BUG FIX |
2336
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
Several tools (particularly F and F) would |
2338
|
|
|
|
|
|
|
report errors like "Unbalanced string table refcount: (1) for "STDOUT" |
2339
|
|
|
|
|
|
|
during global destruction" on exit, at least on certain versions |
2340
|
|
|
|
|
|
|
of Perl (for me on 5.10.1), but similar errors have been off-and-on |
2341
|
|
|
|
|
|
|
for several Perl releases. Although I think my code looked |
2342
|
|
|
|
|
|
|
OK, I worked around this problem with a different way of handling |
2343
|
|
|
|
|
|
|
standard IO redirection. |
2344
|
|
|
|
|
|
|
|
2345
|
|
|
|
|
|
|
=back |
2346
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
|
2348
|
|
|
|
|
|
|
=head2 2.23, 2011-03-10 |
2349
|
|
|
|
|
|
|
Several small portability bugfixes; improved F for large datasets |
2350
|
|
|
|
|
|
|
|
2351
|
|
|
|
|
|
|
=over 4 |
2352
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
=item IMPROVEMENT |
2354
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
Documentation to F was changed to use "sd" to refer to |
2356
|
|
|
|
|
|
|
standard deviation, not "ss" (which might be confused with sum-of-squares). |
2357
|
|
|
|
|
|
|
|
2358
|
|
|
|
|
|
|
=item BUG FIX |
2359
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
This documentation about F was missing the F<-k> option |
2361
|
|
|
|
|
|
|
in some cases. |
2362
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
=item BUG FIX |
2364
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
F was failing on MacOS-10.6.3 for some tests with |
2366
|
|
|
|
|
|
|
the error |
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
dbmapreduce: cannot run external dbmapreduce reduce program (perl TEST/dbmapreduce_external_with_key.pl) |
2369
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
The problem seemed to be only in the error, not in operation. |
2371
|
|
|
|
|
|
|
On MacOS, the error is now suppressed. |
2372
|
|
|
|
|
|
|
Thanks to Alefiya Hussain for providing access to a Mac system |
2373
|
|
|
|
|
|
|
that allowed debugging of this problem. |
2374
|
|
|
|
|
|
|
|
2375
|
|
|
|
|
|
|
=item IMPROVEMENT |
2376
|
|
|
|
|
|
|
|
2377
|
|
|
|
|
|
|
The F command requires an external |
2378
|
|
|
|
|
|
|
Perl library (F). On computers that |
2379
|
|
|
|
|
|
|
lack this optional library, previously Fsdb would configure |
2380
|
|
|
|
|
|
|
with a warning and then test cases would fail. |
2381
|
|
|
|
|
|
|
Now those test cases are skipped with an additional warning. |
2382
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
=item BUG FIX |
2384
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
The test suite now supports alternative valid output, as a hack |
2386
|
|
|
|
|
|
|
to account for last-digit floating point differences. |
2387
|
|
|
|
|
|
|
(Not very satisfying :-( |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
=item BUG FIX |
2390
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
F output for confidence intervals on very large |
2392
|
|
|
|
|
|
|
datasets has changed. Previously it failed for more than 2^31-1 |
2393
|
|
|
|
|
|
|
records, and handling of T-Distributions with thousands of rows |
2394
|
|
|
|
|
|
|
was a bit dubious. Now datasets with more than 10000 are considered |
2395
|
|
|
|
|
|
|
infinitely large and hopefully correctly handled. |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
=back |
2398
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
=head2 2.24, 2011-04-15 |
2400
|
|
|
|
|
|
|
Improvements to fix an old bug in dbmapreduce with different field separators |
2401
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
=over 4 |
2403
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
=item IMPROVEMENT |
2405
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
The F command had a C<--correct> option to |
2407
|
|
|
|
|
|
|
work-around from incompatible field-separators, |
2408
|
|
|
|
|
|
|
but it did nothing. Now it does the correct but sad, data-loosing |
2409
|
|
|
|
|
|
|
thing. |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
=item IMPROVEMENT |
2412
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
The F command |
2414
|
|
|
|
|
|
|
previously failed with an error message when invoked |
2415
|
|
|
|
|
|
|
on input with a non-default field separator. |
2416
|
|
|
|
|
|
|
The root cause was the underlying F |
2417
|
|
|
|
|
|
|
that did not handle the case of reducers that generated |
2418
|
|
|
|
|
|
|
output with a different field separator than the input. |
2419
|
|
|
|
|
|
|
We now detect and repair incompatible field separators. |
2420
|
|
|
|
|
|
|
This change corrects a problem originally documented and detected |
2421
|
|
|
|
|
|
|
in Fsdb-2.20. |
2422
|
|
|
|
|
|
|
Bug re-reported by Unkyu Park. |
2423
|
|
|
|
|
|
|
|
2424
|
|
|
|
|
|
|
=back |
2425
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
=head2 2.25, 2011-08-07 |
2427
|
|
|
|
|
|
|
Two new tools, F and F, and a bugfix for two people. |
2428
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
=over 4 |
2430
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
=item IMPROVEMENT |
2432
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
F now supports a F<--utc> option, |
2434
|
|
|
|
|
|
|
which also fixes this test case for users outside of the Pacific |
2435
|
|
|
|
|
|
|
time zone. Bug reported by David Graff, and also by Peter Desnoyers |
2436
|
|
|
|
|
|
|
(within a week of each other :-) |
2437
|
|
|
|
|
|
|
|
2438
|
|
|
|
|
|
|
=item NEW |
2439
|
|
|
|
|
|
|
|
2440
|
|
|
|
|
|
|
F can convert simple, very regular XML files into Fsdb. |
2441
|
|
|
|
|
|
|
|
2442
|
|
|
|
|
|
|
=item NEW |
2443
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
F "pivots" a file, converting multiple rows |
2445
|
|
|
|
|
|
|
corresponding to the same entity into a single row with multiple columns. |
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
=back |
2448
|
|
|
|
|
|
|
|
2449
|
|
|
|
|
|
|
=head2 2.26, 2011-12-12 |
2450
|
|
|
|
|
|
|
Bug fixes, particularly for perl-5.14.2. |
2451
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
=over 4 |
2453
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
=item BUG FIX |
2455
|
|
|
|
|
|
|
|
2456
|
|
|
|
|
|
|
Bugs fixed in L manual page. |
2457
|
|
|
|
|
|
|
|
2458
|
|
|
|
|
|
|
=item BUG FIX |
2459
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
Fixed problems where L was truncating floating point numbers |
2461
|
|
|
|
|
|
|
when sorting. This strange behavior happens as of perl-5.14.2 and |
2462
|
|
|
|
|
|
|
it I like a Perl bug. I've worked around it for the test suites, |
2463
|
|
|
|
|
|
|
but I'm a bit nervous. |
2464
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
=back |
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
=head2 2.27, 2012-11-15 |
2468
|
|
|
|
|
|
|
Accumulated bug fixes. |
2469
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
=over 4 |
2471
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
=item IMPROVEMENT |
2473
|
|
|
|
|
|
|
|
2474
|
|
|
|
|
|
|
F now reports errors in CVS input with real diagnostics. |
2475
|
|
|
|
|
|
|
|
2476
|
|
|
|
|
|
|
=item IMPROVEMENT |
2477
|
|
|
|
|
|
|
|
2478
|
|
|
|
|
|
|
F can now compute median, when given the C<-m> option. |
2479
|
|
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
=item BUG FIX |
2481
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
F non-numeric handling (the C<-a> option) now works properly. |
2483
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
=item DOCUMENTATION |
2485
|
|
|
|
|
|
|
|
2486
|
|
|
|
|
|
|
The internal |
2487
|
|
|
|
|
|
|
F test framework |
2488
|
|
|
|
|
|
|
is now documented. |
2489
|
|
|
|
|
|
|
|
2490
|
|
|
|
|
|
|
=item BUG FIX |
2491
|
|
|
|
|
|
|
|
2492
|
|
|
|
|
|
|
F now correctly handles the case where there is no input |
2493
|
|
|
|
|
|
|
(previously it output a blank line, which is a malformed fsdb file). |
2494
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug. |
2495
|
|
|
|
|
|
|
|
2496
|
|
|
|
|
|
|
=back |
2497
|
|
|
|
|
|
|
|
2498
|
|
|
|
|
|
|
=head2 2.28, 2012-11-15 |
2499
|
|
|
|
|
|
|
A quick release to fix most rpmlint errors. |
2500
|
|
|
|
|
|
|
|
2501
|
|
|
|
|
|
|
=over 4 |
2502
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
=item BUG FIX |
2504
|
|
|
|
|
|
|
|
2505
|
|
|
|
|
|
|
Fixed a number of minor release problems (wrong permissions, old FSF |
2506
|
|
|
|
|
|
|
address, etc.) found by rpmlint. |
2507
|
|
|
|
|
|
|
|
2508
|
|
|
|
|
|
|
=back |
2509
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
=head2 2.29, 2012-11-20 |
2511
|
|
|
|
|
|
|
a quick release for CPAN testing |
2512
|
|
|
|
|
|
|
|
2513
|
|
|
|
|
|
|
=over 4 |
2514
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
=item IMPROVEMENT |
2516
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
Tweaked the RPM spec. |
2518
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
=item IMPROVEMENT |
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
Modified F to fail gracefully on Perl installations |
2522
|
|
|
|
|
|
|
that lack threads. (Without this fix, I get massive failures |
2523
|
|
|
|
|
|
|
in the non-ithreads test system.) |
2524
|
|
|
|
|
|
|
|
2525
|
|
|
|
|
|
|
=back |
2526
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
=head2 2.30, 2012-11-25 |
2528
|
|
|
|
|
|
|
improvements to perl portability |
2529
|
|
|
|
|
|
|
|
2530
|
|
|
|
|
|
|
=over 4 |
2531
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
=item BUG FIX |
2533
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
Removed unicode character in documention of F |
2535
|
|
|
|
|
|
|
so pod tests will pass. (Sigh, that should work :-( ) |
2536
|
|
|
|
|
|
|
|
2537
|
|
|
|
|
|
|
=item BUG FIX |
2538
|
|
|
|
|
|
|
|
2539
|
|
|
|
|
|
|
Fixed test suite failures on 5 tests (F |
2540
|
|
|
|
|
|
|
was the first) due to L's addition of a period. |
2541
|
|
|
|
|
|
|
This problem was breaking Fsdb on perl-5.17. |
2542
|
|
|
|
|
|
|
Thanks to Michael McQuaid for helping diagnose this problem. |
2543
|
|
|
|
|
|
|
|
2544
|
|
|
|
|
|
|
=item IMPROVEMENT |
2545
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
The test suite now prints out the names of tests it tries. |
2547
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
=back |
2549
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
=head2 2.31, 2012-11-28 |
2551
|
|
|
|
|
|
|
A release with actual improvements to dbfilepivot and dbrowuniq. |
2552
|
|
|
|
|
|
|
|
2553
|
|
|
|
|
|
|
=over 4 |
2554
|
|
|
|
|
|
|
|
2555
|
|
|
|
|
|
|
=item BUG FIX |
2556
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
Documentation fixes: typos in L, |
2558
|
|
|
|
|
|
|
bugs in L, |
2559
|
|
|
|
|
|
|
clarification for comment handling in L. |
2560
|
|
|
|
|
|
|
|
2561
|
|
|
|
|
|
|
=item IMPROVEMENT |
2562
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
Previously L assumed the input was grouped by keys |
2564
|
|
|
|
|
|
|
and didn't very that pre-condition. |
2565
|
|
|
|
|
|
|
Now there is no pre-condition (it will sort the input by default), |
2566
|
|
|
|
|
|
|
and it checks if the invariant is violated. |
2567
|
|
|
|
|
|
|
|
2568
|
|
|
|
|
|
|
=item BUG FIX |
2569
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
Previously L failed if the input had comments (oops :-); |
2571
|
|
|
|
|
|
|
no longer. |
2572
|
|
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
=item IMPROVEMENT |
2574
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
Now L has the C<-L> option to preserve the last |
2576
|
|
|
|
|
|
|
unique row (instead of the first), a common idiom. |
2577
|
|
|
|
|
|
|
|
2578
|
|
|
|
|
|
|
=back |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
=head2 2.32, 2012-12-21 |
2581
|
|
|
|
|
|
|
Test suites should now be more numerically robust. |
2582
|
|
|
|
|
|
|
|
2583
|
|
|
|
|
|
|
=over 4 |
2584
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
=item NEW |
2586
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
New L does fsdb-aware file differencing. |
2588
|
|
|
|
|
|
|
It does not do smart intuition of add/removes like Unix diff(1), |
2589
|
|
|
|
|
|
|
but it does know about columns, and with C<-E>, it does |
2590
|
|
|
|
|
|
|
numeric-aware differences. |
2591
|
|
|
|
|
|
|
|
2592
|
|
|
|
|
|
|
=item IMPROVEMENT |
2593
|
|
|
|
|
|
|
|
2594
|
|
|
|
|
|
|
Test suites that are numeric now use L to do numeric-aware |
2595
|
|
|
|
|
|
|
comparisons, so the test suite should now be robust to slightly different |
2596
|
|
|
|
|
|
|
computers and operating systems and compilers than I what I use. |
2597
|
|
|
|
|
|
|
|
2598
|
|
|
|
|
|
|
=back |
2599
|
|
|
|
|
|
|
|
2600
|
|
|
|
|
|
|
=head2 2.33, 2012-12-23 |
2601
|
|
|
|
|
|
|
Minor fixes to some test cases. |
2602
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
=over 4 |
2604
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
=item IMPROVEMENT |
2606
|
|
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
L and L |
2608
|
|
|
|
|
|
|
now supports the C<-N> option to give the new column a |
2609
|
|
|
|
|
|
|
different name. (And a test cases where this duplication mattered |
2610
|
|
|
|
|
|
|
have been fixed.) |
2611
|
|
|
|
|
|
|
|
2612
|
|
|
|
|
|
|
=item IMPROVEMENT |
2613
|
|
|
|
|
|
|
|
2614
|
|
|
|
|
|
|
L now show the t-test breakpoint with a reasonable number of |
2615
|
|
|
|
|
|
|
floating point digits. |
2616
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
=item BUG FIX |
2618
|
|
|
|
|
|
|
|
2619
|
|
|
|
|
|
|
Fixed a numerical stability problem in the F test case. |
2620
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
=back |
2622
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
2624
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
=head2 2.34, 2013-02-10 |
2626
|
|
|
|
|
|
|
Parallelism in L. |
2627
|
|
|
|
|
|
|
|
2628
|
|
|
|
|
|
|
=over 4 |
2629
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
=item IMPROVEMENT |
2631
|
|
|
|
|
|
|
|
2632
|
|
|
|
|
|
|
Documention for L now includes resource requirements. |
2633
|
|
|
|
|
|
|
|
2634
|
|
|
|
|
|
|
=item IMPROVEMENT |
2635
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
Default memory usage for L is now about 256MB. |
2637
|
|
|
|
|
|
|
(The world keeps moving forward.) |
2638
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
=item IMPROVEMENT |
2640
|
|
|
|
|
|
|
|
2641
|
|
|
|
|
|
|
L now does merging in parallel. |
2642
|
|
|
|
|
|
|
As a side-effect, L should be faster when |
2643
|
|
|
|
|
|
|
input overflows memory. The level of parallelism |
2644
|
|
|
|
|
|
|
can be limited with the C<--parallelism> option. |
2645
|
|
|
|
|
|
|
(There is more work to do here, but we're off to a start.) |
2646
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
=back |
2648
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
=head2 2.35, 2013-02-23 |
2650
|
|
|
|
|
|
|
Improvements to dbmerge parallelism |
2651
|
|
|
|
|
|
|
|
2652
|
|
|
|
|
|
|
=over 4 |
2653
|
|
|
|
|
|
|
|
2654
|
|
|
|
|
|
|
=item BUG FIX |
2655
|
|
|
|
|
|
|
|
2656
|
|
|
|
|
|
|
Fsdb temporary files are now created more securely (with File::Temp). |
2657
|
|
|
|
|
|
|
|
2658
|
|
|
|
|
|
|
=item IMPROVEMENT |
2659
|
|
|
|
|
|
|
|
2660
|
|
|
|
|
|
|
Programs that sort or merge on fields (L, L, L, |
2661
|
|
|
|
|
|
|
L) now report an error if no fields on which to join or merge |
2662
|
|
|
|
|
|
|
are given. |
2663
|
|
|
|
|
|
|
|
2664
|
|
|
|
|
|
|
=item IMPROVEMENT |
2665
|
|
|
|
|
|
|
|
2666
|
|
|
|
|
|
|
Parallelism in L is should now be more consistent, |
2667
|
|
|
|
|
|
|
with less starting and stopping. |
2668
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
=item IMPROVEMENT |
2670
|
|
|
|
|
|
|
In L, the C<--xargs> option lets one give input filenames on |
2671
|
|
|
|
|
|
|
standard input, rather than the command line. |
2672
|
|
|
|
|
|
|
This feature paves the way for faster dbsort for large inputs |
2673
|
|
|
|
|
|
|
(by pipelining sorting and merging), expected in the next release. |
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
=back |
2676
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
|
2678
|
|
|
|
|
|
|
=head2 2.36, 2013-02-25 |
2679
|
|
|
|
|
|
|
dbsort pipelines with dbmerge |
2680
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
=over 4 |
2682
|
|
|
|
|
|
|
|
2683
|
|
|
|
|
|
|
=item IMPROVEMENT |
2684
|
|
|
|
|
|
|
For large inputs, |
2685
|
|
|
|
|
|
|
L now pipelines sorting and merging, |
2686
|
|
|
|
|
|
|
allowing earlier processing. |
2687
|
|
|
|
|
|
|
|
2688
|
|
|
|
|
|
|
=item BUG FIX |
2689
|
|
|
|
|
|
|
Since 2.35, L delayed cleanup of intermediate files, |
2690
|
|
|
|
|
|
|
thereby requiring extra disk space. |
2691
|
|
|
|
|
|
|
|
2692
|
|
|
|
|
|
|
=back |
2693
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
=head2 2.37, 2013-02-26 |
2695
|
|
|
|
|
|
|
quick bugfix to support parallel sort and merge from recent releases |
2696
|
|
|
|
|
|
|
|
2697
|
|
|
|
|
|
|
=over 4 |
2698
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
=item BUG FIX |
2700
|
|
|
|
|
|
|
Since 2.35, L delayed removal of input files given by |
2701
|
|
|
|
|
|
|
C<--xargs>. This problem is now fixed. |
2702
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
=back |
2704
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
|
2706
|
|
|
|
|
|
|
=head2 2.38, 2013-04-29 |
2707
|
|
|
|
|
|
|
minor bug fixes |
2708
|
|
|
|
|
|
|
|
2709
|
|
|
|
|
|
|
=over 4 |
2710
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
=item CLARIFICATION |
2712
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
Configure now rejects Windows since tests seem to hang |
2714
|
|
|
|
|
|
|
on some versions of Windows. |
2715
|
|
|
|
|
|
|
(I would love help from a Windows developer to get this problem fixed, |
2716
|
|
|
|
|
|
|
but I cannot do it.) See F. |
2717
|
|
|
|
|
|
|
|
2718
|
|
|
|
|
|
|
=item IMPROVEMENT |
2719
|
|
|
|
|
|
|
|
2720
|
|
|
|
|
|
|
All programs that use temporary files |
2721
|
|
|
|
|
|
|
(L, L, L, L) |
2722
|
|
|
|
|
|
|
now take the C<-T> option |
2723
|
|
|
|
|
|
|
and set the temporary directory consistently. |
2724
|
|
|
|
|
|
|
|
2725
|
|
|
|
|
|
|
In addition, error messages are better when the temporary directory |
2726
|
|
|
|
|
|
|
has problems. Problem reported by Liang Zhu. |
2727
|
|
|
|
|
|
|
|
2728
|
|
|
|
|
|
|
=item BUG FIX |
2729
|
|
|
|
|
|
|
|
2730
|
|
|
|
|
|
|
L was failing with external, map-reduce aware reducers |
2731
|
|
|
|
|
|
|
(when invoked with -M and an external program). |
2732
|
|
|
|
|
|
|
(Sigh, did this case ever work?) |
2733
|
|
|
|
|
|
|
This case should now work. |
2734
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug (in 2011). |
2735
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
=item BUG FIX |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
Fixed perl-5.10 problem with L. |
2739
|
|
|
|
|
|
|
Thanks to Yuri Pradkin for reporting this bug (in 2013). |
2740
|
|
|
|
|
|
|
|
2741
|
|
|
|
|
|
|
=back |
2742
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
=head2 2.39, date 2013-05-31 |
2744
|
|
|
|
|
|
|
quick release for the dbrowuniq extension |
2745
|
|
|
|
|
|
|
|
2746
|
|
|
|
|
|
|
=over 4 |
2747
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
=item BUG FIX |
2749
|
|
|
|
|
|
|
|
2750
|
|
|
|
|
|
|
Actually in 2.38, the Fedora F<.spec> got cleaner dependencies. |
2751
|
|
|
|
|
|
|
Suggestion from Christopher Meng via L. |
2752
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
=item ENHANCEMENT |
2754
|
|
|
|
|
|
|
|
2755
|
|
|
|
|
|
|
Fsdb files are now explicitly set into UTF-8 encoding, |
2756
|
|
|
|
|
|
|
unless one specifies C<-encoding> to C. |
2757
|
|
|
|
|
|
|
|
2758
|
|
|
|
|
|
|
=item ENHANCEMENT |
2759
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
L now supports C<-I> for incremental counting. |
2761
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
=back |
2763
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
=head2 2.40, 2013-07-13 |
2765
|
|
|
|
|
|
|
small bug fixes |
2766
|
|
|
|
|
|
|
|
2767
|
|
|
|
|
|
|
=over 4 |
2768
|
|
|
|
|
|
|
|
2769
|
|
|
|
|
|
|
=item BUG FIX |
2770
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
L now has more respect for a user-given temporary directory; |
2772
|
|
|
|
|
|
|
it no longer is ignored for merging. |
2773
|
|
|
|
|
|
|
|
2774
|
|
|
|
|
|
|
=item IMPROVEMENT |
2775
|
|
|
|
|
|
|
|
2776
|
|
|
|
|
|
|
L now has options to output the first, last, and both first |
2777
|
|
|
|
|
|
|
and last rows of a run (C<-F>, C<-L>, and C<-B>). |
2778
|
|
|
|
|
|
|
|
2779
|
|
|
|
|
|
|
=item BUG FIX |
2780
|
|
|
|
|
|
|
|
2781
|
|
|
|
|
|
|
L now correctly handles C<-N>. Sigh, it didn't work before. |
2782
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
=back |
2784
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
=head2 2.41, 2013-07-29 |
2786
|
|
|
|
|
|
|
small bug and packaging fixes |
2787
|
|
|
|
|
|
|
|
2788
|
|
|
|
|
|
|
=over 4 |
2789
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
=item ENHANCEMENT |
2791
|
|
|
|
|
|
|
|
2792
|
|
|
|
|
|
|
Documentation to L improved |
2793
|
|
|
|
|
|
|
(inspired by questions from Qian Kun). |
2794
|
|
|
|
|
|
|
|
2795
|
|
|
|
|
|
|
=item BUG FIX |
2796
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
L no longer duplicates |
2798
|
|
|
|
|
|
|
singleton unique lines when outputting both (with C<-B>). |
2799
|
|
|
|
|
|
|
|
2800
|
|
|
|
|
|
|
=item BUG FIX |
2801
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
Add missing C dependency to F. |
2803
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
=item ENHANCEMENT |
2805
|
|
|
|
|
|
|
|
2806
|
|
|
|
|
|
|
Tests now show the diff of the failing output |
2807
|
|
|
|
|
|
|
if run with C. |
2808
|
|
|
|
|
|
|
|
2809
|
|
|
|
|
|
|
=item ENHANCEMENT |
2810
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
L now includes documentation for how to output extra rows. |
2812
|
|
|
|
|
|
|
Suggestion from Yuri Pradkin. |
2813
|
|
|
|
|
|
|
|
2814
|
|
|
|
|
|
|
=item BUG FIX |
2815
|
|
|
|
|
|
|
|
2816
|
|
|
|
|
|
|
Several improvements to the Fedora package |
2817
|
|
|
|
|
|
|
from Michael Schwendt |
2818
|
|
|
|
|
|
|
via L, |
2819
|
|
|
|
|
|
|
and from the harsh master that is F. |
2820
|
|
|
|
|
|
|
(I am stymied at teaching it that "outliers" is spelled correctly. |
2821
|
|
|
|
|
|
|
Maybe I should send it Schneier's book. And an unresolvable |
2822
|
|
|
|
|
|
|
invalid-spec-name lurks in the SRPM.) |
2823
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
=back |
2825
|
|
|
|
|
|
|
|
2826
|
|
|
|
|
|
|
=head2 2.42, 2013-07-31 |
2827
|
|
|
|
|
|
|
A bug fix and packaging release. |
2828
|
|
|
|
|
|
|
|
2829
|
|
|
|
|
|
|
=over 4 |
2830
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
=item ENHANCEMENT |
2832
|
|
|
|
|
|
|
|
2833
|
|
|
|
|
|
|
Documentation to L improved |
2834
|
|
|
|
|
|
|
to better memory usage. |
2835
|
|
|
|
|
|
|
(Based on problem report by Lin Quan.) |
2836
|
|
|
|
|
|
|
|
2837
|
|
|
|
|
|
|
=item BUG FIX |
2838
|
|
|
|
|
|
|
|
2839
|
|
|
|
|
|
|
The F<.spec> is now F |
2840
|
|
|
|
|
|
|
to satisfy F. |
2841
|
|
|
|
|
|
|
Thanks to Christopher Meng for a specific bug report. |
2842
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
=item BUG FIX |
2844
|
|
|
|
|
|
|
|
2845
|
|
|
|
|
|
|
Test F no longer has a column |
2846
|
|
|
|
|
|
|
that caused failures because of numerical instability. |
2847
|
|
|
|
|
|
|
|
2848
|
|
|
|
|
|
|
=item BUG FIX |
2849
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
Some tests now better handle bugs in old versions of perl (5.10, 5.12). |
2851
|
|
|
|
|
|
|
Thanks to Calvin Ardi for help debugging this on a Mac with perl-5.12, |
2852
|
|
|
|
|
|
|
but the fix should affect other platforms. |
2853
|
|
|
|
|
|
|
|
2854
|
|
|
|
|
|
|
=back |
2855
|
|
|
|
|
|
|
|
2856
|
|
|
|
|
|
|
=head2 2.43, 2013-08-27 |
2857
|
|
|
|
|
|
|
Adds in-file compression. |
2858
|
|
|
|
|
|
|
|
2859
|
|
|
|
|
|
|
=over 4 |
2860
|
|
|
|
|
|
|
|
2861
|
|
|
|
|
|
|
=item BUG FIX |
2862
|
|
|
|
|
|
|
|
2863
|
|
|
|
|
|
|
Changed the sort on F to strings |
2864
|
|
|
|
|
|
|
(from numerics) so we're less susceptible to false test-failures |
2865
|
|
|
|
|
|
|
due to floating point IO differences. |
2866
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
=item EXPERIMENTAL ENHANCEMENT |
2868
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
Yet more parallelism in L: |
2870
|
|
|
|
|
|
|
new "endgame-mode" builds a merge tree of processes at the end |
2871
|
|
|
|
|
|
|
of large merge tasks to get maximally parallelism. |
2872
|
|
|
|
|
|
|
Currently this feature is off by default |
2873
|
|
|
|
|
|
|
because it can hang for some inputs. |
2874
|
|
|
|
|
|
|
Enable this experimental feature with C<--endgame>. |
2875
|
|
|
|
|
|
|
|
2876
|
|
|
|
|
|
|
=item ENHANCEMENT |
2877
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
C now handles being given C objects |
2879
|
|
|
|
|
|
|
(as exercised by L). |
2880
|
|
|
|
|
|
|
|
2881
|
|
|
|
|
|
|
=item BUG FIX |
2882
|
|
|
|
|
|
|
|
2883
|
|
|
|
|
|
|
Handling of NamedTmpfiles now supports concurrency. |
2884
|
|
|
|
|
|
|
This fix will hopefully fix occasional |
2885
|
|
|
|
|
|
|
"Use of uninitialized value $_ in string ne at ...NamedTmpfile.pm line 93." |
2886
|
|
|
|
|
|
|
errors. |
2887
|
|
|
|
|
|
|
|
2888
|
|
|
|
|
|
|
=item BUG FIX |
2889
|
|
|
|
|
|
|
|
2890
|
|
|
|
|
|
|
Fsdb now requires perl 5.10. |
2891
|
|
|
|
|
|
|
This is a bug fix because some test cases used to require it, |
2892
|
|
|
|
|
|
|
but this fact was not properly documented. |
2893
|
|
|
|
|
|
|
(Back-porting to 5.008 would require removing all C/> operators.) |
2894
|
|
|
|
|
|
|
|
2895
|
|
|
|
|
|
|
=item ENHANCEMENT |
2896
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
Fsdb now handles automatic compression of file contents. |
2898
|
|
|
|
|
|
|
Enable compression with C |
2899
|
|
|
|
|
|
|
(or C or C). |
2900
|
|
|
|
|
|
|
All programs should operate on compressed files |
2901
|
|
|
|
|
|
|
and leave the output with the same level of compression. |
2902
|
|
|
|
|
|
|
C is recommended as fastest and most efficient. |
2903
|
|
|
|
|
|
|
C is produces unrepeatable output (and so has no |
2904
|
|
|
|
|
|
|
output test), it seems to insist on adding a timestamp. |
2905
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
=back |
2907
|
|
|
|
|
|
|
|
2908
|
|
|
|
|
|
|
=head2 2.44, 2013-10-02 |
2909
|
|
|
|
|
|
|
A major change--all threads are gone. |
2910
|
|
|
|
|
|
|
|
2911
|
|
|
|
|
|
|
=over 4 |
2912
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
=item ENHANCEMENT |
2914
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
Fsdb is now thread free and only uses processes for parallelism. |
2916
|
|
|
|
|
|
|
This change is a big change--the entire motivation for Fsdb-2 |
2917
|
|
|
|
|
|
|
was to exploit parallelism via threading. |
2918
|
|
|
|
|
|
|
Parallelism--good, but perl threading--bad for performance. |
2919
|
|
|
|
|
|
|
Horribly bad for performance. |
2920
|
|
|
|
|
|
|
About 20x worse than pipes on my box. |
2921
|
|
|
|
|
|
|
(See perl bug #119445 for the discussion.) |
2922
|
|
|
|
|
|
|
|
2923
|
|
|
|
|
|
|
=item NEW |
2924
|
|
|
|
|
|
|
|
2925
|
|
|
|
|
|
|
C provides a thread-like abstraction over forking, |
2926
|
|
|
|
|
|
|
with some nice support for callbacks in the parent upon child termination. |
2927
|
|
|
|
|
|
|
|
2928
|
|
|
|
|
|
|
=item ENHANCEMENT |
2929
|
|
|
|
|
|
|
|
2930
|
|
|
|
|
|
|
Details about removing threads: |
2931
|
|
|
|
|
|
|
C is thread free, |
2932
|
|
|
|
|
|
|
and new tests to verify each of its parts. |
2933
|
|
|
|
|
|
|
The easy cases are C, |
2934
|
|
|
|
|
|
|
C, C, C, and |
2935
|
|
|
|
|
|
|
C, each of which use it in simple ways (2013-09-09). |
2936
|
|
|
|
|
|
|
C is now thread free (2013-09-13), |
2937
|
|
|
|
|
|
|
but was a significant rewrite, |
2938
|
|
|
|
|
|
|
which brought C along. |
2939
|
|
|
|
|
|
|
C is partly thread free (2013-09-21), |
2940
|
|
|
|
|
|
|
again as a rewrite, |
2941
|
|
|
|
|
|
|
and it brings C along. |
2942
|
|
|
|
|
|
|
Full C support took much longer (2013-10-02). |
2943
|
|
|
|
|
|
|
|
2944
|
|
|
|
|
|
|
=item BUG FIX |
2945
|
|
|
|
|
|
|
|
2946
|
|
|
|
|
|
|
When running with user-only output (C<-n>), |
2947
|
|
|
|
|
|
|
L now resets the output vector C<$ofref> |
2948
|
|
|
|
|
|
|
after it has been output. |
2949
|
|
|
|
|
|
|
|
2950
|
|
|
|
|
|
|
=item NEW |
2951
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
L will create all columns at the head of each row |
2953
|
|
|
|
|
|
|
with the C<--first> option. |
2954
|
|
|
|
|
|
|
|
2955
|
|
|
|
|
|
|
=item NEW |
2956
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
L will concatenate two files, |
2958
|
|
|
|
|
|
|
verifying that they have the same schema. |
2959
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
=item ENHANCEMENT |
2961
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
L now passes comments through, |
2963
|
|
|
|
|
|
|
rather than eating them as before. |
2964
|
|
|
|
|
|
|
|
2965
|
|
|
|
|
|
|
Also, L now supports a C<--> option to prevent misinterpreting |
2966
|
|
|
|
|
|
|
sub-program parameters as for dbmapreduce. |
2967
|
|
|
|
|
|
|
|
2968
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
2969
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
L no longer figures out if it needs to add the key |
2971
|
|
|
|
|
|
|
to the output. For multi-key-aware reducers, it never does |
2972
|
|
|
|
|
|
|
(and cannot). For non-multi-key-aware reducers, |
2973
|
|
|
|
|
|
|
it defaults to add the key and will now fail if the reducer adds the key |
2974
|
|
|
|
|
|
|
(with error "dbcolcreate: attempt to create pre-existing column..."). |
2975
|
|
|
|
|
|
|
In such cases, one must disable adding the key with the new |
2976
|
|
|
|
|
|
|
option C<--no-prepend-key>. |
2977
|
|
|
|
|
|
|
|
2978
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
2979
|
|
|
|
|
|
|
|
2980
|
|
|
|
|
|
|
L no longer copies the input field separator by default. |
2981
|
|
|
|
|
|
|
For multi-key-aware reducers, it never does |
2982
|
|
|
|
|
|
|
(and cannot). For non-multi-key-aware reducers, |
2983
|
|
|
|
|
|
|
it defaults to I copying the field separator, |
2984
|
|
|
|
|
|
|
but it will copy it (the old default) with the C<--copy-fs> option |
2985
|
|
|
|
|
|
|
|
2986
|
|
|
|
|
|
|
=back |
2987
|
|
|
|
|
|
|
|
2988
|
|
|
|
|
|
|
=head2 2.45, 2013-10-07 |
2989
|
|
|
|
|
|
|
cleanup from de-thread-ification |
2990
|
|
|
|
|
|
|
|
2991
|
|
|
|
|
|
|
=over 4 |
2992
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
=item BUG FIX |
2994
|
|
|
|
|
|
|
|
2995
|
|
|
|
|
|
|
Corrected a fast busy-wait in L. |
2996
|
|
|
|
|
|
|
|
2997
|
|
|
|
|
|
|
=item ENHANCEMENT |
2998
|
|
|
|
|
|
|
|
2999
|
|
|
|
|
|
|
Endgame mode enabled in L; it (and also large cases of L) |
3000
|
|
|
|
|
|
|
should now exploit greater parallelism. |
3001
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
=item BUG FIX |
3003
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
Test case with C (gone since 2.44) now removed. |
3005
|
|
|
|
|
|
|
|
3006
|
|
|
|
|
|
|
=back |
3007
|
|
|
|
|
|
|
|
3008
|
|
|
|
|
|
|
=head2 2.46, 2013-10-08 |
3009
|
|
|
|
|
|
|
continuing cleanup of our no-threads version |
3010
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
=over 4 |
3012
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
=item BUG FIX |
3014
|
|
|
|
|
|
|
|
3015
|
|
|
|
|
|
|
Fixed some packaging details. |
3016
|
|
|
|
|
|
|
(Really, threads are no longer required, |
3017
|
|
|
|
|
|
|
missing tests in the MANIFEST.) |
3018
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
=item IMPROVEMENT |
3020
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
L now better communicates with the merge process to avoid |
3022
|
|
|
|
|
|
|
bursty parallelism. |
3023
|
|
|
|
|
|
|
|
3024
|
|
|
|
|
|
|
L now can take C<-autoflush => 1> |
3025
|
|
|
|
|
|
|
for line-buffered IO. |
3026
|
|
|
|
|
|
|
|
3027
|
|
|
|
|
|
|
=back |
3028
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
=head2 2.47, 2013-10-12 |
3030
|
|
|
|
|
|
|
test suite cleanup for non-threaded perls |
3031
|
|
|
|
|
|
|
|
3032
|
|
|
|
|
|
|
=over 4 |
3033
|
|
|
|
|
|
|
|
3034
|
|
|
|
|
|
|
=item BUG FIX |
3035
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
Removed some stray "use threads" in some test cases. |
3037
|
|
|
|
|
|
|
We didn't need them, and these were breaking non-threaded perls. |
3038
|
|
|
|
|
|
|
|
3039
|
|
|
|
|
|
|
=item BUG FIX |
3040
|
|
|
|
|
|
|
|
3041
|
|
|
|
|
|
|
Better handling of Fred cleanup; |
3042
|
|
|
|
|
|
|
should fix intermittent L failures on BSD. |
3043
|
|
|
|
|
|
|
|
3044
|
|
|
|
|
|
|
=item ENHANCEMENT |
3045
|
|
|
|
|
|
|
|
3046
|
|
|
|
|
|
|
Improved test framework to show output when tests fail. |
3047
|
|
|
|
|
|
|
(This time, for real.) |
3048
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
=back |
3050
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=head2 2.48, 2014-01-03 |
3052
|
|
|
|
|
|
|
small bugfixes and improved release engineering |
3053
|
|
|
|
|
|
|
|
3054
|
|
|
|
|
|
|
=over 4 |
3055
|
|
|
|
|
|
|
|
3056
|
|
|
|
|
|
|
=item ENHANCEMENT |
3057
|
|
|
|
|
|
|
|
3058
|
|
|
|
|
|
|
Test suites now skip tests for libraries that are missing. |
3059
|
|
|
|
|
|
|
(Patch for missing C contributed by Calvin Ardi.) |
3060
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
=item ENHANCEMENT |
3062
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
Removed references to Jdb in the package specification. |
3064
|
|
|
|
|
|
|
Since the name was changed in 2008, there's no longer a huge |
3065
|
|
|
|
|
|
|
need for backwards comparability. |
3066
|
|
|
|
|
|
|
(Suggestion form Petr Å abata.) |
3067
|
|
|
|
|
|
|
|
3068
|
|
|
|
|
|
|
=item ENHANCEMENT |
3069
|
|
|
|
|
|
|
|
3070
|
|
|
|
|
|
|
Test suites now invoke the perl using the path from C<$Config{perlpath}>. |
3071
|
|
|
|
|
|
|
Hopefully this helps testing in environments where there are multiple installed |
3072
|
|
|
|
|
|
|
perls and the default perl is not the same as the perl-under-test |
3073
|
|
|
|
|
|
|
(as happens in cpantesters.org). |
3074
|
|
|
|
|
|
|
|
3075
|
|
|
|
|
|
|
=item BUG FIX |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
Added specific encoding to this manpage to account for |
3078
|
|
|
|
|
|
|
Unicode. Required to build correctly against perl-5.18. |
3079
|
|
|
|
|
|
|
|
3080
|
|
|
|
|
|
|
=back |
3081
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
=head2 2.49, 2014-01-04 |
3083
|
|
|
|
|
|
|
bugfix to unicode handling in Fsdb IO (plus minor packaging fixes) |
3084
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
=over 4 |
3086
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
=item BUG FIX |
3088
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
Restored a line in the F<.spec> to chmod g-s. |
3090
|
|
|
|
|
|
|
|
3091
|
|
|
|
|
|
|
=item BUG FIX |
3092
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
Unicode decoding is now handled correctly for programs that read |
3094
|
|
|
|
|
|
|
from standard input. |
3095
|
|
|
|
|
|
|
(Also: New test scripts cover unicode input and output.) |
3096
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
=item BUG FIX |
3098
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
Fix to L documentation encoding line. |
3100
|
|
|
|
|
|
|
Addresses test failure in perl-5.16 and earlier. |
3101
|
|
|
|
|
|
|
(Who knew "encoding" had to be followed by a blank line.) |
3102
|
|
|
|
|
|
|
|
3103
|
|
|
|
|
|
|
=back |
3104
|
|
|
|
|
|
|
|
3105
|
|
|
|
|
|
|
=head1 WHAT'S NEW |
3106
|
|
|
|
|
|
|
|
3107
|
|
|
|
|
|
|
=head2 2.50, 2014-05-27 |
3108
|
|
|
|
|
|
|
a quick release for spec tweaks |
3109
|
|
|
|
|
|
|
|
3110
|
|
|
|
|
|
|
=over 4 |
3111
|
|
|
|
|
|
|
|
3112
|
|
|
|
|
|
|
=item ENHANCEMENT |
3113
|
|
|
|
|
|
|
|
3114
|
|
|
|
|
|
|
In L, the C<-N> (no output, even comments) option now |
3115
|
|
|
|
|
|
|
implies C<-n>, and it now suppresses the header and trailer. |
3116
|
|
|
|
|
|
|
|
3117
|
|
|
|
|
|
|
=item BUG FIX |
3118
|
|
|
|
|
|
|
|
3119
|
|
|
|
|
|
|
A few more tweaks to the F from Petr Å abata. |
3120
|
|
|
|
|
|
|
|
3121
|
|
|
|
|
|
|
=item BUG FIX |
3122
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
Fixed 3 uses of C |
3124
|
|
|
|
|
|
|
failures (due to warnings, not real failures) on some platforms. |
3125
|
|
|
|
|
|
|
|
3126
|
|
|
|
|
|
|
=back |
3127
|
|
|
|
|
|
|
|
3128
|
|
|
|
|
|
|
=head2 2.51, 2014-09-05 |
3129
|
|
|
|
|
|
|
Feature enhancements to L, L, L, and new L |
3130
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
=over 4 |
3132
|
|
|
|
|
|
|
|
3133
|
|
|
|
|
|
|
=item ENHANCEMENT |
3134
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
L now has a C<--no-recreate-fatal> |
3136
|
|
|
|
|
|
|
that causes it to ignore creation of existing columns |
3137
|
|
|
|
|
|
|
(instead of failing). |
3138
|
|
|
|
|
|
|
|
3139
|
|
|
|
|
|
|
=item ENHANCEMENT |
3140
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
L once again is robust to reducers |
3142
|
|
|
|
|
|
|
that output the key; |
3143
|
|
|
|
|
|
|
C<--no-prepend-key> is no longer mandatory. |
3144
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
=item ENHANCEMENT |
3146
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
L can now enumerate the output rows with C<-E>. |
3148
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
=item BUG FIX |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
L is more mathematically robust. |
3152
|
|
|
|
|
|
|
Previously for some inputs and some platforms, |
3153
|
|
|
|
|
|
|
floating point rounding could |
3154
|
|
|
|
|
|
|
sometimes cause squareroots of negative numbers. |
3155
|
|
|
|
|
|
|
|
3156
|
|
|
|
|
|
|
=item NEW |
3157
|
|
|
|
|
|
|
|
3158
|
|
|
|
|
|
|
L converts the output of the MySQL or MarinaDB |
3159
|
|
|
|
|
|
|
select comment into fsdb format. |
3160
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
=item INCOMPATIBLE CHANGE |
3162
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
L now outputs the I row |
3164
|
|
|
|
|
|
|
when doing sloppy numeric comparisons, |
3165
|
|
|
|
|
|
|
to better support test suites. |
3166
|
|
|
|
|
|
|
|
3167
|
|
|
|
|
|
|
=back |
3168
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
=head2 2.52, 2014-11-03 |
3170
|
|
|
|
|
|
|
Fixing the test suite for line number changes. |
3171
|
|
|
|
|
|
|
|
3172
|
|
|
|
|
|
|
=over 4 |
3173
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
=item ENHANCEMENT |
3175
|
|
|
|
|
|
|
|
3176
|
|
|
|
|
|
|
Test suites changes to be robust to exact line numbers of failures, |
3177
|
|
|
|
|
|
|
since different Perl releases fail on different lines. |
3178
|
|
|
|
|
|
|
L |
3179
|
|
|
|
|
|
|
|
3180
|
|
|
|
|
|
|
=back |
3181
|
|
|
|
|
|
|
|
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
=head2 2.53, 2014-11-26 |
3184
|
|
|
|
|
|
|
bug fixes and stability improvements to dbmapreduce |
3185
|
|
|
|
|
|
|
|
3186
|
|
|
|
|
|
|
=over 4 |
3187
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
=item ENHANCEMENT |
3189
|
|
|
|
|
|
|
|
3190
|
|
|
|
|
|
|
The L how supports a C<--quiet> option. |
3191
|
|
|
|
|
|
|
|
3192
|
|
|
|
|
|
|
=item ENHANCEMENT |
3193
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
Better documention of L. |
3195
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
=item BUGFIX |
3197
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
Added groff-base and perl-podlators to the Fedora package spec. |
3199
|
|
|
|
|
|
|
Fixes L. |
3200
|
|
|
|
|
|
|
(Also in package 2.52-2.) |
3201
|
|
|
|
|
|
|
|
3202
|
|
|
|
|
|
|
=item BUGFIX |
3203
|
|
|
|
|
|
|
|
3204
|
|
|
|
|
|
|
An important stability improvement to L. |
3205
|
|
|
|
|
|
|
It, plus L, and L now support |
3206
|
|
|
|
|
|
|
controlled parallelism with the C<--pararallelism=N> option. |
3207
|
|
|
|
|
|
|
They default to run with the number of available CPUs. |
3208
|
|
|
|
|
|
|
L also moderates its level of parallelism. |
3209
|
|
|
|
|
|
|
Previously it would create reducers as needed, |
3210
|
|
|
|
|
|
|
causing CPU thrashing if reducers ran much slower than data production. |
3211
|
|
|
|
|
|
|
|
3212
|
|
|
|
|
|
|
=item BUGFIX |
3213
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
The combination of L with L now works |
3215
|
|
|
|
|
|
|
as it should. (The obscure bug was an interaction with L |
3216
|
|
|
|
|
|
|
with non-multi-key reducers that output their own key. L |
3217
|
|
|
|
|
|
|
has too many useful corner cases.) |
3218
|
|
|
|
|
|
|
|
3219
|
|
|
|
|
|
|
=back |
3220
|
|
|
|
|
|
|
|
3221
|
|
|
|
|
|
|
=head2 2.54, 2014-11-28 |
3222
|
|
|
|
|
|
|
fix for the test suite to correct failing tests on not-my-platform |
3223
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
=over 4 |
3225
|
|
|
|
|
|
|
|
3226
|
|
|
|
|
|
|
=item BUGFIX |
3227
|
|
|
|
|
|
|
|
3228
|
|
|
|
|
|
|
Sigh, the test suite now has a test suite. |
3229
|
|
|
|
|
|
|
Because, yes, I broke it, causing many incorrect failures |
3230
|
|
|
|
|
|
|
at cpantesters. |
3231
|
|
|
|
|
|
|
Now fixed. |
3232
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
=back |
3234
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
=head2 2.55, 2015-01-05 |
3236
|
|
|
|
|
|
|
many spelling fixes and L tests are more robust to different numeric precision |
3237
|
|
|
|
|
|
|
|
3238
|
|
|
|
|
|
|
=over 4 |
3239
|
|
|
|
|
|
|
|
3240
|
|
|
|
|
|
|
=item ENHANCEMENT |
3241
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
L now can be extra quiet, as I continue to try to track down |
3243
|
|
|
|
|
|
|
a numeric difference on FreeBSD AMD boxes. |
3244
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
=item ENHANCEMENT |
3246
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
L gave different test output |
3248
|
|
|
|
|
|
|
(just reflecting rounding error) |
3249
|
|
|
|
|
|
|
when stddev approaches zero. We now detect hand handle this case. |
3250
|
|
|
|
|
|
|
See |
3251
|
|
|
|
|
|
|
and thanks to H. Merijn Brand for the bug report. |
3252
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
=item BUG FIX |
3254
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
Many, many spelling bugs found by |
3256
|
|
|
|
|
|
|
H. Merijn Brand; thanks for the bug report. |
3257
|
|
|
|
|
|
|
|
3258
|
|
|
|
|
|
|
=item INCOMPATBLE CHANGE |
3259
|
|
|
|
|
|
|
|
3260
|
|
|
|
|
|
|
A number of programs had misspelled "separator" |
3261
|
|
|
|
|
|
|
in C<--fieldseparator> and C<--columnseparator> options as "seperator". |
3262
|
|
|
|
|
|
|
These are now correctly spelled. |
3263
|
|
|
|
|
|
|
|
3264
|
|
|
|
|
|
|
=back |
3265
|
|
|
|
|
|
|
|
3266
|
|
|
|
|
|
|
=head2 2.56, 2015-02-03 |
3267
|
|
|
|
|
|
|
fix against Getopt::Long-2.43's stricter error checkign |
3268
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
=over 4 |
3270
|
|
|
|
|
|
|
|
3271
|
|
|
|
|
|
|
=item BUG FIX |
3272
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
Internal argument parsing uses Getopt::Long, but mixed pass-through and EE. |
3274
|
|
|
|
|
|
|
Bug reported by Petr Pisar at L.a |
3275
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
=item BUG FIX |
3277
|
|
|
|
|
|
|
|
3278
|
|
|
|
|
|
|
Added missing BuildRequires for C. |
3279
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
=back |
3281
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
=head2 2.57, 2015-04-29 |
3283
|
|
|
|
|
|
|
Minor changes, with better performance from L. |
3284
|
|
|
|
|
|
|
|
3285
|
|
|
|
|
|
|
=over 4 |
3286
|
|
|
|
|
|
|
|
3287
|
|
|
|
|
|
|
=item BUG FIX |
3288
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
L now honors C<--remove-inputs> (previously it didn't). |
3290
|
|
|
|
|
|
|
This omission meant that L (and L) would accumulate |
3291
|
|
|
|
|
|
|
files in F when running. Bad news for inputs with 4M keys. |
3292
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
=item ENHANCMENT |
3294
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
L should be faster with lots of small keys. |
3296
|
|
|
|
|
|
|
L now supports C<-k> to get some of the functionality of |
3297
|
|
|
|
|
|
|
L (if data is pre-sorted and median/quartiles are not required). |
3298
|
|
|
|
|
|
|
|
3299
|
|
|
|
|
|
|
L now honors C<--remove-inputs> (previously it didn't). |
3300
|
|
|
|
|
|
|
This omission meant that L (and L) would accumulate |
3301
|
|
|
|
|
|
|
files in F when running. Bad news for inputs with 4M keys. |
3302
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
=back |
3304
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
|
3306
|
|
|
|
|
|
|
=head2 2.58, 2015-04-30 |
3307
|
|
|
|
|
|
|
Bugfix in L |
3308
|
|
|
|
|
|
|
|
3309
|
|
|
|
|
|
|
=over 4 |
3310
|
|
|
|
|
|
|
|
3311
|
|
|
|
|
|
|
=item BUG FIX |
3312
|
|
|
|
|
|
|
|
3313
|
|
|
|
|
|
|
Fixed a case where L suffered mojobake in endgame mode. |
3314
|
|
|
|
|
|
|
This bug surfaced when L was applied to large files |
3315
|
|
|
|
|
|
|
(big enough to require merging) with unicode in them; |
3316
|
|
|
|
|
|
|
the symptom was soemthing like: |
3317
|
|
|
|
|
|
|
Wide character in print at /usr/lib64/perl5/IO/Handle.pm line 420, line 111. |
3318
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
=back |
3320
|
|
|
|
|
|
|
|
3321
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
=head2 2.59, 2016-09-01 |
3323
|
|
|
|
|
|
|
Collect a few small bug fixes and documentation improvements. |
3324
|
|
|
|
|
|
|
|
3325
|
|
|
|
|
|
|
=over 4 |
3326
|
|
|
|
|
|
|
|
3327
|
|
|
|
|
|
|
=item BUG FIX |
3328
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
More IO is explicitly marked UTF-8 to avoid Perl's tendency to |
3330
|
|
|
|
|
|
|
mojibake on otherwise valid unicode input. |
3331
|
|
|
|
|
|
|
This change helps L. |
3332
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
=item ENHANCEMENT |
3334
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
L now crossreferences L. |
3336
|
|
|
|
|
|
|
|
3337
|
|
|
|
|
|
|
=item ENHANCEMENT |
3338
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
Documentation for L now clarifies that the default is baseline mode. |
3340
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
=item BUG FIX |
3342
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
L now propagates C<-T> into the sorting process (if it is required). |
3344
|
|
|
|
|
|
|
Thanks to Lan Wei for reporting this bug. |
3345
|
|
|
|
|
|
|
|
3346
|
|
|
|
|
|
|
=back |
3347
|
|
|
|
|
|
|
|
3348
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
=head2 2.60, 2016-09-04 |
3350
|
|
|
|
|
|
|
Adds support for hash joins. |
3351
|
|
|
|
|
|
|
|
3352
|
|
|
|
|
|
|
=over 4 |
3353
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
=item ENHANCEMENT |
3355
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
L now supports hash joins |
3357
|
|
|
|
|
|
|
with C<-t lefthash> and C<-t righthash>. |
3358
|
|
|
|
|
|
|
Hash joins cache a table in memory, but do not require |
3359
|
|
|
|
|
|
|
that the other table be sorted. |
3360
|
|
|
|
|
|
|
They are ideal when joining a large table against a small one. |
3361
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
=back |
3363
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
=head2 2.61, 2016-09-05 |
3365
|
|
|
|
|
|
|
Support left and right outer joins. |
3366
|
|
|
|
|
|
|
|
3367
|
|
|
|
|
|
|
=over 4 |
3368
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
=item ENHANCEMENT |
3370
|
|
|
|
|
|
|
|
3371
|
|
|
|
|
|
|
L now handles left and right outer joins |
3372
|
|
|
|
|
|
|
with C<-t left> and C<-t right>. |
3373
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
=item ENHANCEMENT |
3375
|
|
|
|
|
|
|
|
3376
|
|
|
|
|
|
|
L hash joins are now selected |
3377
|
|
|
|
|
|
|
with C<-m lefthash> and C<-m righthash> |
3378
|
|
|
|
|
|
|
(not the shortlived C<-t righthash> option). |
3379
|
|
|
|
|
|
|
(Technically this change is incompatible with Fsdd-2.60, but |
3380
|
|
|
|
|
|
|
no one but me ever used that version.) |
3381
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
=back |
3383
|
|
|
|
|
|
|
|
3384
|
|
|
|
|
|
|
=head2 2.62, 2016-11-29 |
3385
|
|
|
|
|
|
|
A new L and other minor improvements. |
3386
|
|
|
|
|
|
|
|
3387
|
|
|
|
|
|
|
=over 4 |
3388
|
|
|
|
|
|
|
|
3389
|
|
|
|
|
|
|
=item ENHANCEMENT |
3390
|
|
|
|
|
|
|
|
3391
|
|
|
|
|
|
|
Documentation for L now includes sample output. |
3392
|
|
|
|
|
|
|
|
3393
|
|
|
|
|
|
|
=item NEW |
3394
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
L converts a specific form of YAML to fsdb. |
3396
|
|
|
|
|
|
|
|
3397
|
|
|
|
|
|
|
=item BUG FIX |
3398
|
|
|
|
|
|
|
|
3399
|
|
|
|
|
|
|
The test suite now uses C rather than C |
3400
|
|
|
|
|
|
|
to make OpenBSD-5.9 happier, I hope. |
3401
|
|
|
|
|
|
|
|
3402
|
|
|
|
|
|
|
=item ENHANCEMENT |
3403
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
Comments that log operations at the end of each file now do simple |
3405
|
|
|
|
|
|
|
quoting of spaces. (It is not guaranteed to be fully shell-compliant.) |
3406
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
=item ENHANCEMENT |
3408
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
There is a new standard option, C<--header>, |
3410
|
|
|
|
|
|
|
allowing one to specify an Fsdb header for inputs that lack it. |
3411
|
|
|
|
|
|
|
Currently it is supported by L, |
3412
|
|
|
|
|
|
|
L, L, L, L, |
3413
|
|
|
|
|
|
|
L. |
3414
|
|
|
|
|
|
|
|
3415
|
|
|
|
|
|
|
=item ENHANCEMENT |
3416
|
|
|
|
|
|
|
|
3417
|
|
|
|
|
|
|
L now allows the B<--possible-pivots> option, |
3418
|
|
|
|
|
|
|
and if it is provided processes the data in one pass. |
3419
|
|
|
|
|
|
|
|
3420
|
|
|
|
|
|
|
=item ENHANCEMENT |
3421
|
|
|
|
|
|
|
|
3422
|
|
|
|
|
|
|
L logs are now quoted. |
3423
|
|
|
|
|
|
|
|
3424
|
|
|
|
|
|
|
=back |
3425
|
|
|
|
|
|
|
|
3426
|
|
|
|
|
|
|
=head2 2.63, 2017-02-03 |
3427
|
|
|
|
|
|
|
Re-add some features supposedly in 2.62 but not, and add more --header options. |
3428
|
|
|
|
|
|
|
|
3429
|
|
|
|
|
|
|
=over 4 |
3430
|
|
|
|
|
|
|
|
3431
|
|
|
|
|
|
|
=item ENHANCEMENT |
3432
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
The option B<-j> is now a synonym for B<--parallelism>. |
3434
|
|
|
|
|
|
|
(And several documention bugs about this option are fixed.) |
3435
|
|
|
|
|
|
|
|
3436
|
|
|
|
|
|
|
=item ENHANCEMENT |
3437
|
|
|
|
|
|
|
|
3438
|
|
|
|
|
|
|
Additional support for C<--header> in L, L, L, |
3439
|
|
|
|
|
|
|
and L. |
3440
|
|
|
|
|
|
|
|
3441
|
|
|
|
|
|
|
=item BUG FIX |
3442
|
|
|
|
|
|
|
|
3443
|
|
|
|
|
|
|
Version 2.62 was supposed to have this improvement, but did not (and now does): |
3444
|
|
|
|
|
|
|
L now allows the B<--possible-pivots> option, |
3445
|
|
|
|
|
|
|
and if it is provided processes the data in one pass. |
3446
|
|
|
|
|
|
|
|
3447
|
|
|
|
|
|
|
=item BUG FIX |
3448
|
|
|
|
|
|
|
|
3449
|
|
|
|
|
|
|
Version 2.62 was supposed to have this improvement, but did not (and now does): |
3450
|
|
|
|
|
|
|
L logs are now quoted. |
3451
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
=back |
3453
|
|
|
|
|
|
|
|
3454
|
|
|
|
|
|
|
=head1 AUTHOR |
3455
|
|
|
|
|
|
|
|
3456
|
|
|
|
|
|
|
John Heidemann, C |
3457
|
|
|
|
|
|
|
|
3458
|
|
|
|
|
|
|
See L for the many people who have contributed |
3459
|
|
|
|
|
|
|
bug reports and fixes. |
3460
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
|
3462
|
|
|
|
|
|
|
=head1 COPYRIGHT |
3463
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
Fsdb is Copyright (C) 1991-2016 by John Heidemann . |
3465
|
|
|
|
|
|
|
|
3466
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
3467
|
|
|
|
|
|
|
it under the terms of version 2 of the GNU General Public License as |
3468
|
|
|
|
|
|
|
published by the Free Software Foundation. |
3469
|
|
|
|
|
|
|
|
3470
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but |
3471
|
|
|
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of |
3472
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3473
|
|
|
|
|
|
|
General Public License for more details. |
3474
|
|
|
|
|
|
|
|
3475
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
3476
|
|
|
|
|
|
|
along with this program; if not, write to the Free Software |
3477
|
|
|
|
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
3478
|
|
|
|
|
|
|
|
3479
|
|
|
|
|
|
|
A copy of the GNU General Public License can be found in the file |
3480
|
|
|
|
|
|
|
``COPYING''. |
3481
|
|
|
|
|
|
|
|
3482
|
|
|
|
|
|
|
|
3483
|
|
|
|
|
|
|
|
3484
|
|
|
|
|
|
|
=head1 COMMENTS and BUG REPORTS |
3485
|
|
|
|
|
|
|
|
3486
|
|
|
|
|
|
|
Any comments about these programs should be sent to John Heidemann |
3487
|
|
|
|
|
|
|
C. |
3488
|
|
|
|
|
|
|
|
3489
|
|
|
|
|
|
|
|
3490
|
|
|
|
|
|
|
=cut |
3491
|
|
|
|
|
|
|
|
3492
|
|
|
|
|
|
|
1; # End of Fsdb |
3493
|
|
|
|
|
|
|
|
3494
|
|
|
|
|
|
|
# LocalWords: Exp rdb Manis Evan Schaffer passwd uid gid fullname homedir greg |
3495
|
|
|
|
|
|
|
# LocalWords: gnuplot jgraph dbrow dbcol dbcolcreate dbcoldefine FSDB README un |
3496
|
|
|
|
|
|
|
# LocalWords: dbcolrename dbcolmerge dbcolsplit dbjoin dbsort dbcoldiff Perl bw |
3497
|
|
|
|
|
|
|
# LocalWords: dbmultistats dbrowdiff dbrowenumerate dbroweval dbstats dblistize |
3498
|
|
|
|
|
|
|
# LocalWords: dbcolneaten dbcoltighten dbstripcomments dbstripextraheaders pct |
3499
|
|
|
|
|
|
|
# LocalWords: dbstripleadingspace stddev rsd dbsetheader sprintf LIBDIR BINDIR |
3500
|
|
|
|
|
|
|
# LocalWords: LocalWords isi URL com dbpercentile dbhistogram GRADEBOOK min ss |
3501
|
|
|
|
|
|
|
# LocalWords: gradebook conf std dev dbrowaccumulate dbcolpercentile db dcliff |
3502
|
|
|
|
|
|
|
# LocalWords: dbuniq uniq dbcolize distr pl Apr autoconf Jul html printf Fx fsdb |
3503
|
|
|
|
|
|
|
# LocalWords: printfs dbrowuniq dbrecolize dbformmail kitrace geoff ns berkeley |
3504
|
|
|
|
|
|
|
# LocalWords: comp lang perl Haobo Yu outliers Jorgensen csh dbrowsplituniq crl |
3505
|
|
|
|
|
|
|
# LocalWords: dbcolmovingstats dbcolstats zscores tscores dbcolhisto columnar |
3506
|
|
|
|
|
|
|
# LocalWords: dmalloc tabdelim stats numerics datapoint CDF xgraph max txt sed |
3507
|
|
|
|
|
|
|
# LocalWords: login gecos div cmd nr hw hw assuing Kuenning Vikram Visweswariah |
3508
|
|
|
|
|
|
|
# LocalWords: Kannan Varadahan Arkadi Gelfond Pavlin Radoslavov quartile getopt |
3509
|
|
|
|
|
|
|
# LocalWords: dbcolscorrelate DbGetopt cp tmp nd Ya Xu dbfilesplit |
3510
|
|
|
|
|
|
|
# LocalWords: MERCHANTABILITY tba dbcolsplittocols dbcolsplittorows cvs johnh |
3511
|
|
|
|
|
|
|
# LocalWords: dbcolsregression datasets whitespace LaTeX FS columnname cgi pre |
3512
|
|
|
|
|
|
|
# LocalWords: columname's dbfilevalidate tcpdump http rv eq Bourne DbTDistr |
3513
|
|
|
|
|
|
|
# LocalWords: Goel Eggert Ning Strozzi NoSQL awk startup Sparcstation IPCs GHz |
3514
|
|
|
|
|
|
|
# LocalWords: SunOS Arpaci Dusseau's SOSP Scheaffer STDIN dblib iso freebsd OO |
3515
|
|
|
|
|
|
|
# LocalWords: sendmail unicode Makefile dbmapreduce dbcolmultiscale andersen |
3516
|
|
|
|
|
|
|
# LocalWords: lampson chen drovolis estrin floyd Lukac NIST SEMATECH RCS qw |
3517
|
|
|
|
|
|
|
# LocalWords: listize colize Unkyu dbpipeline ithreads dbfilealter dbrowcount |
3518
|
|
|
|
|
|
|
# LocalWords: dbrvstatdiff dbcolstatscores dbfilestripcomments csv nolog aho |
3519
|
|
|
|
|
|
|
# LocalWords: alfred david clark constantine debrorah Fsdb's colized listized |
3520
|
|
|
|
|
|
|
# LocalWords: Ashvin dbmerge na tmean tstddev wc logfiles stdin lseek SV xa |
3521
|
|
|
|
|
|
|
# LocalWords: refcount lossage DaGronk dbcolscorellate ipchain |