File Coverage

blib/lib/Fsdb.pm
Criterion Covered Total %
statement 9 9 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 12 12 100.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             #
4             # Fsdb.pm
5             #
6             # Copyright (C) 1991-2016 by John Heidemann
7             #
8             # This program is free software; you can redistribute it and/or
9             # modify it under the terms of the GNU General Public License,
10             # version 2, as published by the Free Software Foundation.
11             #
12             # This program is distributed in the hope that it will be useful,
13             # but WITHOUT ANY WARRANTY; without even the implied warranty of
14             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15             # GNU General Public License for more details.
16             #
17             # You should have received a copy of the GNU General Public License along
18             # with this program; if not, write to the Free Software Foundation, Inc.,
19             # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20             #
21              
22             package Fsdb;
23              
24 2     2   145848 use warnings;
  2         7  
  2         100  
25 2     2   17 use strict;
  2         5  
  2         63  
26 2     2   1070 use utf8;
  2         39  
  2         14  
27              
28             =encoding utf8
29              
30             =head1 NAME
31              
32             Fsdb - a flat-text database for shell scripting
33              
34              
35             =cut
36             our $VERSION = '2.64';
37              
38             =head1 SYNOPSIS
39              
40             Fsdb, the flatfile streaming database is package of commands
41             for manipulating flat-ASCII databases from
42             shell scripts. Fsdb is useful to process medium amounts of data (with
43             very little data you'd do it by hand, with megabytes you might want a
44             real database).
45             Fsdb was known as as Jdb from 1991 to Oct. 2008.
46              
47             Fsdb is very good at doing things like:
48              
49             =over 4
50              
51             =item *
52              
53             extracting measurements from experimental output
54              
55             =item *
56              
57             examining data to address different hypotheses
58              
59             =item *
60              
61             joining data from different experiments
62              
63             =item *
64              
65             eliminating/detecting outliers
66              
67             =item *
68              
69             computing statistics on data
70             (mean, confidence intervals, correlations, histograms)
71              
72             =item *
73              
74             reformatting data for graphing programs
75              
76             =back
77              
78             Fsdb is built around the idea of a flat text file as a database.
79             Fsdb files (by convention, with the extension F<.fsdb>),
80             have a header documenting the schema (what the columns mean),
81             and then each line represents a database record (or row).
82              
83             For example:
84              
85             #fsdb experiment duration
86             ufs_mab_sys 37.2
87             ufs_mab_sys 37.3
88             ufs_rcp_real 264.5
89             ufs_rcp_real 277.9
90              
91             Is a simple file with four experiments (the rows),
92             each with a description, size parameter, and run time
93             in the first, second, and third columns.
94              
95             Rather than hand-code scripts to do each special case, Fsdb provides
96             higher-level functions. Although it's often easy throw together a
97             custom script to do any single task, I believe that there are several
98             advantages to using Fsdb:
99              
100             =over 4
101              
102             =item *
103              
104             these programs provide a higher level interface than plain Perl, so
105              
106             =over 4
107              
108             =item **
109              
110             Fewer lines of simpler code:
111              
112             dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration
113              
114             Picks out just one type of experiment and computes statistics on it,
115             rather than:
116              
117             while (<>) { split; $sum+=$F[1]; $ss+=$F[1]**2; $n++; }
118             $mean = $sum / $n; $std_dev = ...
119              
120             in dozens of places.
121              
122             =back
123              
124             =item *
125              
126             the library uses names for columns, so
127              
128             =over 4
129              
130             =item **
131              
132             No more C<$F[1]>, use C<_duration>.
133              
134             =item **
135              
136             New or different order columns? No changes to your scripts!
137              
138             =back
139              
140             Thus if your experiment gets more complicated with a size parameter,
141             so your log changes to:
142              
143             #fsdb experiment size duration
144             ufs_mab_sys 1024 37.2
145             ufs_mab_sys 1024 37.3
146             ufs_rcp_real 1024 264.5
147             ufs_rcp_real 1024 277.9
148             ufs_mab_sys 2048 45.3
149             ufs_mab_sys 2048 44.2
150              
151             Then the previous scripts still work, even though duration is
152             now the third column, not the second.
153              
154             =item *
155              
156             A series of actions are self-documenting (each program records what it does).
157              
158             =over 4
159              
160             =item **
161              
162             No more wondering what hacks were used to compute the
163             final data, just look at the comments at the end
164             of the output.
165              
166             =back
167              
168             For example, the commands
169              
170             dbrow '_experiment eq "ufs_mab_sys"' | dbcolstats duration
171              
172             add to the end of the output the lines
173             # | dbrow _experiment eq "ufs_mab_sys"
174             # | dbcolstats duration
175              
176              
177             =item *
178              
179             The library is mature, supporting large datasets (more than 100GB),
180             corner cases, error handling, backed by an automated test suite.
181              
182             =over 4
183              
184             =item **
185              
186             No more puzzling about bad output because your custom script
187             skimped on error checking.
188              
189             =item **
190              
191             No more memory thrashing when you try to sort ten million records.
192              
193             =back
194              
195             =item *
196              
197             Fsdb-2.x supports Perl scripting (in addition to shell scripting),
198             with libraries to do Fsdb input and output, and easy support for pipelines.
199             The shell script
200              
201             dbcol name test1 | dbroweval '_test1 += 5;'
202              
203             can be written in perl as:
204              
205             dbpipeline(dbcol(qw(name test1)), dbroweval('_test1 += 5;'));
206              
207             =back
208              
209             (The disadvantage is that you need to learn what functions Fsdb provides.)
210              
211             Fsdb is built on flat-ASCII databases. By storing data in simple text
212             files and processing it with pipelines it is easy to experiment (in
213             the shell) and look at the output.
214             To the best of my knowledge, the original implementation of
215             this idea was C, a commercial product described in the book
216             I
217             by Rod Manis, Evan Schaffer, and Robert Jorgensen (and
218             also at the web page L). Fsdb is an incompatible
219             re-implementation of their idea without any accelerated indexing or
220             forms support. (But it's free, and probably has better statistics!).
221              
222             Fsdb-2.x will exploit multiple processors or cores,
223             and provides Perl-level support for input, output, and threaded-pipelines.
224             (As of Fsdb-2.44 it no longer uses Perl threading, just processes,
225             since they are faster.)
226              
227             Installation instructions follow at the end of this document.
228             Fsdb-2.x requires Perl 5.8 to run.
229             All commands have manual pages and provide usage with the C<--help> option.
230             All commands are backed by an automated test suite.
231              
232             The most recent version of Fsdb is available on the web at
233             L.
234              
235              
236             =head1 WHAT'S NEW
237              
238             =head2 2.64, 2017-11-20
239             several small bugfixes and enhancements
240              
241             =over 4
242              
243             =item BUG FIX
244              
245             In L, the C option previously did not
246             correctly set up C<_last_fieldname>. It now does.
247              
248             =item ENHANCEMENT
249              
250             The L converter now has an optional C<-F x> option
251             to set the field separator.
252              
253             =item ENHANCEMENT
254              
255             Finally L has a C<--header> option,
256             and a new C<-N> option to give the list of resulting output columns.
257              
258             =item INCOMPATIBLE CHANGE
259              
260             Now L and L produce no output
261             (but a schema) when given no input but a schema.
262             Previously they gave a null row of output.
263             The C<--output-on-no-input> and C<--no-output-on-no-input>
264             options can control this behavior.
265              
266             =back
267              
268              
269              
270             =head1 README CONTENTS
271              
272             =over 4
273              
274             =item executive summary
275              
276             =item what's new
277              
278             =item README CONTENTS
279              
280             =item installation
281              
282             =item basic data format
283              
284             =item basic data manipulation
285              
286             =item list of commands
287              
288             =item another example
289              
290             =item a gradebook example
291              
292             =item a password example
293              
294             =item history
295              
296             =item related work
297              
298             =item release notes
299              
300             =item copyright
301              
302             =item comments
303              
304             =back
305              
306              
307             =head1 INSTALLATION
308              
309             Fsdb now uses the standard Perl build and installation from
310             ExtUtil::MakeMaker(3), so the quick answer to installation is to type:
311            
312             perl Makefile.PL
313             make
314             make test
315             make install
316              
317             Or, if you want to install it somewhere else, change the first line to
318              
319             perl Makefile.PL PREFIX=$HOME
320              
321             and it will go in your home directory's F, etc.
322             (See L for more details.)
323              
324             Fsdb requires perl 5.8 or later.
325              
326             A test-suite is available, run it with
327              
328             make test
329              
330             A FreeBSD port to Fsdb is available, see
331             L.
332              
333             A Fink (MacOS X) port is available, see
334             L.
335             (Thanks to Lars Eggert for maintaining this port.)
336              
337              
338             =head1 BASIC DATA FORMAT
339              
340             These programs are based on the idea storing data in simple ASCII
341             files. A database is a file with one header line and then data or
342             comment lines. For example:
343              
344             #fsdb account passwd uid gid fullname homedir shell
345             johnh * 2274 134 John_Heidemann /home/johnh /bin/bash
346             greg * 2275 134 Greg_Johnson /home/greg /bin/bash
347             root * 0 0 Root /root /bin/bash
348             # this is a simple database
349              
350             The header line must be first and begins with C<#h>.
351             There are rows (records) and columns (fields),
352             just like in a normal database.
353             Comment lines begin with C<#>.
354             Column names are any string not containing spaces or single quote
355             (although it is prudent to keep them alphanumeric with underscore).
356              
357             By default, columns are delimited by whitespace.
358             With this default configuration, the contents of a field
359             cannot contain whitespace.
360             However, this limitation can be relaxed by changing the field separator
361             as described below.
362              
363             The big advantage of simple flat-text databases is that
364             it is usually easy to massage data into this format,
365             and it's reasonably easy to take data out of this
366             format into other (text-based) programs, like gnuplot, jgraph, and
367             LaTeX. Think Unix. Think pipes.
368             (Or even output to Excel and HTML if you prefer.)
369              
370             Since no-whitespace in columns was a problem for some applications,
371             there's an option which relaxes this rule. You can specify the field
372             separator in the table header with C<-F x> where C is
373             a code for the new field separator.
374             A full list of codes is at L,
375             but two common special values are C<-F t>
376             which is a separator of a single tab character,
377             and C<-F S>, a separator of two spaces.
378             Both allowing (single) spaces in fields. An example:
379              
380             #fsdb -F S account passwd uid gid fullname homedir shell
381             johnh * 2274 134 John Heidemann /home/johnh /bin/bash
382             greg * 2275 134 Greg Johnson /home/greg /bin/bash
383             root * 0 0 Root /root /bin/bash
384             # this is a simple database
385              
386             See L for more details. Regardless of what the column
387             separator is for the body of the data, it's always whitespace in the
388             header.
389              
390             There's also a third format: a "list". Because it's often hard to see
391             what's columns past the first two, in list format each "column" is on
392             a separate line. The programs dblistize and dbcolize convert to and
393             from this format, and all programs work with either formats.
394             The command
395              
396             dbfilealter -R C < DATA/passwd.fsdb
397              
398             outputs:
399              
400             #fsdb -R C account passwd uid gid fullname homedir shell
401             account: johnh
402             passwd: *
403             uid: 2274
404             gid: 134
405             fullname: John_Heidemann
406             homedir: /home/johnh
407             shell: /bin/bash
408            
409             account: greg
410             passwd: *
411             uid: 2275
412             gid: 134
413             fullname: Greg_Johnson
414             homedir: /home/greg
415             shell: /bin/bash
416            
417             account: root
418             passwd: *
419             uid: 0
420             gid: 0
421             fullname: Root
422             homedir: /root
423             shell: /bin/bash
424            
425             # this is a simple database
426             # | dblistize
427              
428             See L for more details.
429              
430              
431             =head1 BASIC DATA MANIPULATION
432              
433             A number of programs exist to manipulate databases.
434             Complex functions can be made by stringing together commands
435             with shell pipelines. For example, to print the home
436             directories of everyone with ``john'' in their names,
437             you would do:
438              
439             cat DATA/passwd | dbrow '_fullname =~ /John/' | dbcol homedir
440              
441             The output might be:
442              
443             #fsdb homedir
444             /home/johnh
445             /home/greg
446             # this is a simple database
447             # | dbrow _fullname =~ /John/
448             # | dbcol homedir
449              
450             (Notice that comments are appended to the output listing each command,
451             providing an automatic audit log.)
452              
453             In addition to typical database functions (select, join, etc.) there
454             are also a number of statistical functions.
455              
456             The real power of Fsdb is that one can apply arbitrary code to rows
457             to do powerful things.
458              
459             cat DATA/passwd | dbroweval '_fullname =~ s/(\w+)_(\w+)/$2,_$1/'
460              
461             converts "John_Heidemann" into "Heidemann,_John".
462             Not too much more work could split fullname into firstname and lastname
463             fields.
464              
465              
466             =head1 TALKING ABOUT COLUMNS
467              
468             An advantage of Fsdb is that you can talk about columns by name
469             (symbolically) rather than simply by their positions. So in the above
470             example, C pulled out the home directory column, and
471             C matched against column fullname.
472              
473             In general, you can use the name of the column listed on the C<#fsdb> line
474             to identify it in most programs, and _name to identify it in code.
475              
476             Some alternatives for flexibility:
477              
478             =over 4
479              
480             =item *
481              
482             Numeric values identify columns positionally, numbering from 0.
483             So 0 or _0 is the first column, 1 is the second, etc.
484              
485             =item *
486              
487             In code, _last_columnname gets the value from columname's previous row.
488              
489             =back
490              
491             See L for more details about writing code.
492              
493              
494              
495             =head1 LIST OF COMMANDS
496              
497             Enough said. I'll summarize the commands, and then you can
498             experiment. For a detailed description of each command, see a summary
499             by running it with the argument C<--help> (or C<-?> if you prefer.)
500             Full manual pages can be found by running the command
501             with the argument C<--man>, or running the Unix command C
502             or whatever program you want.
503              
504             =head2 TABLE CREATION
505              
506             =over 4
507              
508             =item dbcolcreate
509              
510             add columns to a database
511              
512             =item dbcoldefine
513              
514             set the column headings for a non-Fsdb file
515              
516             =back
517              
518             =head2 TABLE MANIPULATION
519              
520             =over 4
521              
522             =item dbcol
523              
524             select columns from a table
525              
526             =item dbrow
527              
528             select rows from a table
529              
530             =item dbsort
531              
532             sort rows based on a set of columns
533              
534             =item dbjoin
535              
536             compute the natural join of two tables
537              
538             =item dbcolrename
539              
540             rename a column
541              
542             =item dbcolmerge
543              
544             merge two columns into one
545              
546             =item dbcolsplittocols
547              
548             split one column into two or more columns
549              
550             =item dbcolsplittorows
551              
552             split one column into multiple rows
553              
554             =item dbfilepivot
555              
556             "pivots" a file, converting multiple rows
557             corresponding to the same entity into a single row with multiple columns.
558              
559             =item dbfilevalidate
560              
561             check that db file doesn't have some common errors
562              
563             =back
564              
565             =head2 COMPUTATION AND STATISTICS
566              
567             =over 4
568              
569             =item dbcolstats
570              
571             compute statistics over a column (mean,etc.,optionally median)
572              
573             =item dbmultistats
574              
575             group rows by some key value, then compute stats (mean, etc.) over each group
576             (equivalent to dbmapreduce with dbcolstats as the reducer)
577              
578             =item dbmapreduce
579              
580             group rows (map) and then apply an arbitrary function to each group (reduce)
581              
582             =item dbrvstatdiff
583              
584             compare two samples distributions (mean/conf interval/T-test)
585              
586             =item dbcolmovingstats
587              
588             computing moving statistics over a column of data
589              
590             =item dbcolstatscores
591              
592             compute Z-scores and T-scores over one column of data
593              
594             =item dbcolpercentile
595              
596             compute the rank or percentile of a column
597              
598             =item dbcolhisto
599              
600             compute histograms over a column of data
601              
602             =item dbcolscorrelate
603              
604             compute the coefficient of correlation over several columns
605              
606             =item dbcolsregression
607              
608             compute linear regression and correlation for two columns
609              
610             =item dbrowaccumulate
611              
612             compute a running sum over a column of data
613              
614             =item dbrowcount
615              
616             count the number of rows (a subset of dbstats)
617              
618             =item dbrowdiff
619              
620             compute differences between a columns in each row of a table
621              
622             =item dbrowenumerate
623              
624             number each row
625              
626             =item dbroweval
627              
628             run arbitrary Perl code on each row
629              
630             =item dbrowuniq
631              
632             count/eliminate identical rows (like Unix uniq(1))
633              
634             =item dbfilediff
635              
636             compare fields on rows of a file (something like Unix diff(1))
637              
638             =back
639              
640             =head2 OUTPUT CONTROL
641              
642             =over 4
643              
644             =item dbcolneaten
645              
646             pretty-print columns
647              
648             =item dbfilealter
649              
650             convert between column or list format, or change the column separator
651              
652             =item dbfilestripcomments
653              
654             remove comments from a table
655              
656             =item dbformmail
657              
658             generate a script that sends form mail based on each row
659              
660             =back
661              
662             =head2 CONVERSIONS
663              
664             (These programs convert data into fsdb. See their web pages for details.)
665              
666             =over 4
667              
668             =item cgi_to_db
669              
670             L
671              
672             =item combined_log_format_to_db
673              
674             L
675              
676             =item html_table_to_db
677              
678             HTML tables to fsdb (assuming they're reasonably formatted).
679              
680             =item kitrace_to_db
681              
682             L
683              
684             =item ns_to_db
685              
686             L
687              
688             =item sqlselect_to_db
689              
690             the output of SQL SELECT tables to db
691              
692             =item tabdelim_to_db
693              
694             spreadsheet tab-delimited files to db
695              
696             =item tcpdump_to_db
697              
698             (see man tcpdump(8) on any reasonable system)
699              
700             =item xml_to_db
701              
702             XML input to fsdb, assuming they're very regular
703              
704              
705             =back
706              
707             (And out of fsdb:)
708              
709             =over 4
710              
711             =item db_to_csv
712              
713             Comma-separated-value format from fsdb.
714              
715             =item db_to_html_table
716              
717             simple conversion of Fsdb to html tables
718              
719             =back
720              
721             =head2 STANDARD OPTIONS
722              
723             Many programs have common options:
724              
725             =over 4
726              
727             =item B<-?> or B<--help>
728              
729             Show basic usage.
730              
731             =item B<-N> on B<--new-name>
732              
733             When a command creates a new column like L's C,
734             this option lets one override the default name of that new column.
735              
736             =item B<-T TmpDir>
737              
738             where to put tmp files.
739             Also uses environment variable TMPDIR, if -T is
740             not specified.
741             Default is /tmp.
742              
743             Show basic usage.
744              
745             =item B<-c FRACTION> or B<--confidence FRACTION>
746              
747             Specify confidence interval FRACTION (L, L, etc.)
748              
749             =item B<-C S> or C<--element-separator S>
750              
751             Specify column separator S (L, L).
752              
753             =item B<-d> or B<--debug>
754              
755             Enable debugging (may be repeated for greater effect in some cases).
756              
757             =item B<-a> or B<--include-non-numeric>
758              
759             Compute stats over all data (treating non-numbers as zeros).
760             (By default, things that can't be treated as numbers
761             are ignored for stats purposes)
762              
763             =item B<-S> or B<--pre-sorted>
764              
765             Assume the data is pre-sorted.
766             May be repeated to disable verification (saving a small amount of work).
767              
768             =item B<-e E> or B<--empty E>
769              
770             give value E as the value for empty (null) records
771              
772             =item B<-i I> or B<--input I>
773              
774             Input data from file I.
775              
776             =item B<-o O> or B<--output O>
777              
778             Write data out to file O.
779              
780             =item B<--header> H
781              
782             Use H as the full Fsdb header, rather than reading a header from
783             then input. This option is particularly useful when using Fsdb
784             under Hadoop, where split files don't have heades.
785              
786             =item B<--nolog>.
787              
788             Skip logging the program in a trailing comment.
789              
790             =back
791              
792             When giving Perl code (in L and L)
793             column names can be embedded if preceded by underscores.
794             Look at L or L for examples.)
795              
796             Most programs run in constant memory and use temporary files if necessary.
797             Exceptions are L, L, L,
798             L, L.
799              
800              
801             =head1 ANOTHER EXAMPLE
802              
803             Take the raw data in C,
804             put a header on it (C),
805             took statistics of each category (C),
806             pick out the relevant fields (C), and you get:
807              
808             #fsdb size mean stddev pct_rsd
809             1024 1.4962e+06 2.8497e+05 19.047
810             10240 5.0286e+06 6.0103e+05 11.952
811             102400 4.9216e+06 3.0939e+05 6.2863
812             # | dbcoldefine size bw
813             # | /home/johnh/BIN/DB/dbmultistats -k size bw
814             # | /home/johnh/BIN/DB/dbcol size mean stddev pct_rsd
815              
816             (The whole command was:
817              
818             cat DATA/http_bandwidth |
819             dbcoldefine size |
820             dbmultistats -k size bw |
821             dbcol size mean stddev pct_rsd
822              
823             all on one line.)
824              
825             Then post-process them to get rid of the exponential notation
826             by adding this to the end of the pipeline:
827              
828             dbroweval '_mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev);'
829              
830             (Actually, this step is no longer required since L
831             now uses a different default format.)
832              
833             giving:
834              
835             #fsdb size mean stddev pct_rsd
836             1024 1496200 284970 19.047
837             10240 5028600 601030 11.952
838             102400 4921600 309390 6.2863
839             # | dbcoldefine size bw
840             # | dbmultistats -k size bw
841             # | dbcol size mean stddev pct_rsd
842             # | dbroweval { _mean = sprintf("%8.0f", _mean); _stddev = sprintf("%8.0f", _stddev); }
843              
844             In a few lines, raw data is transformed to processed output.
845              
846              
847             Suppose you expect there is an odd distribution of results of one
848             datapoint. Fsdb can easily produce a CDF (cumulative distribution
849             function) of the data, suitable for graphing:
850              
851             cat DB/DATA/http_bandwidth | \
852             dbcoldefine size bw | \
853             dbrow '_size == 102400' | \
854             dbcol bw | \
855             dbsort -n bw | \
856             dbrowenumerate | \
857             dbcolpercentile count | \
858             dbcol bw percentile | \
859             xgraph
860              
861             The steps, roughly:
862             1. get the raw input data and turn it into fsdb format,
863             2. pick out just the relevant column (for efficiency) and sort it,
864             3. for each data point, assign a CDF percentage to it,
865             4. pick out the two columns to graph and show them
866              
867              
868             =head1 A GRADEBOOK EXAMPLE
869              
870             The first commercial program I wrote was a gradebook,
871             so here's how to do it with Fsdb.
872              
873             Format your data like DATA/grades.
874              
875             #fsdb name email id test1
876             a a@ucla.example.edu 1 80
877             b b@usc.example.edu 2 70
878             c c@isi.example.edu 3 65
879             d d@lmu.example.edu 4 90
880             e e@caltech.example.edu 5 70
881             f f@oxy.example.edu 6 90
882              
883             Or if your students have spaces in their names, use C<-F S> and two spaces
884             to separate each column:
885              
886             #fsdb -F S name email id test1
887             alfred aho a@ucla.example.edu 1 80
888             butler lampson b@usc.example.edu 2 70
889             david clark c@isi.example.edu 3 65
890             constantine drovolis d@lmu.example.edu 4 90
891             debrorah estrin e@caltech.example.edu 5 70
892             sally floyd f@oxy.example.edu 6 90
893              
894             To compute statistics on an exam, do
895              
896             cat DATA/grades | dbstats test1 |dblistize
897              
898             giving
899              
900             #fsdb -R C ...
901             mean: 77.5
902             stddev: 10.84
903             pct_rsd: 13.987
904             conf_range: 11.377
905             conf_low: 66.123
906             conf_high: 88.877
907             conf_pct: 0.95
908             sum: 465
909             sum_squared: 36625
910             min: 65
911             max: 90
912             n: 6
913             ...
914              
915             To do a histogram:
916              
917             cat DATA/grades | dbcolhisto -n 5 -g test1
918              
919             giving
920              
921             #fsdb low histogram
922             65 *
923             70 **
924             75
925             80 *
926             85
927             90 **
928             # | /home/johnh/BIN/DB/dbhistogram -n 5 -g test1
929              
930             Now you want to send out grades to the students by e-mail.
931             Create a form-letter (in the file F):
932              
933             To: _email (_name)
934             From: J. Random Professor
935             Subject: test1 scores
936              
937             _name, your score on test1 was _test1.
938             86+ A
939             75-85 B
940             70-74 C
941             0-69 F
942              
943             Generate the shell script that will send the mail out:
944              
945             cat DATA/grades | dbformmail test1.txt > test1.sh
946              
947             And run it:
948              
949             sh
950              
951             The last two steps can be combined:
952              
953             cat DATA/grades | dbformmail test1.txt | sh
954              
955             but I like to keep a copy of exactly what I send.
956              
957              
958             At the end of the semester you'll want to compute grade totals and
959             assign letter grades. Both fall out of dbroweval.
960             For example, to compute weighted total grades with a 40% midterm/60%
961             final where the midterm is 84 possible points and the final 100:
962              
963             dbcol -rv total |
964             dbcolcreate total - |
965             dbroweval '
966             _total = .40 * _midterm/84.0 + .60 * _final/100.0;
967             _total = sprintf("%4.2f", _total);
968             if (_final eq "-" || ( _name =~ /^_/)) { _total = "-"; };' |
969             dbcolneaten
970              
971              
972             If you got the data originally from a spreadsheet, save it in
973             "tab-delimited" format and convert it with tabdelim_to_db
974             (run tabdelim_to_db -? for examples).
975              
976              
977             =head1 A PASSWORD EXAMPLE
978              
979             To convert the Unix password file to db:
980              
981             cat /etc/passwd | sed 's/:/ /g'| \
982             dbcoldefine -F S login password uid gid gecos home shell \
983             >passwd.fsdb
984              
985             To convert the group file
986              
987             cat /etc/group | sed 's/:/ /g' | \
988             dbcoldefine -F S group password gid members \
989             >group.fsdb
990              
991             To show the names of the groups that div7-members are in
992             (assuming DIV7 is in the gecos field):
993              
994             cat passwd.fsdb | dbrow '_gecos =~ /DIV7/' | dbcol login gid | \
995             dbjoin -i - -i group.fsdb gid | dbcol login group
996              
997              
998             =head1 SHORT EXAMPLES
999              
1000             Which Fsdb programs are the most complicated (based on number of test cases)?
1001              
1002             ls TEST/*.cmd | \
1003             dbcoldefine test | \
1004             dbroweval '_test =~ s@^TEST/([^_]+).*$@$1@' | \
1005             dbrowuniq -c | \
1006             dbsort -nr count | \
1007             dbcolneaten
1008              
1009             (Answer: L, then L, L and L.)
1010              
1011              
1012             Stats on an exam (in C<$FILE>, where C<$COLUMN> is the name of the exam)?
1013              
1014             cat $FILE | dbcolstats -q 4 $COLUMN <$FILE | dblistize | dbstripcomments
1015              
1016             cat $FILE | dbcolhisto -g -n 20 $COLUMN | dbcolneaten | dbstripcomments
1017              
1018              
1019             Merging a the hw1 column from file hw1.fsdb into grades.fsdb assuming
1020             there's a common student id in column "id":
1021              
1022             dbcol id hw1 t.fsdb
1023              
1024             dbjoin -a -e - grades.fsdb t.fsdb id | \
1025             dbsort name | \
1026             dbcolneaten >new_grades.fsdb
1027              
1028              
1029             Merging two fsdb files with the same rows:
1030              
1031             cat file1.fsdb file2.fsdb >output.fsdb
1032              
1033             or if you want to clean things up a bit
1034              
1035             cat file1.fsdb file2.fsdb | dbstripextraheaders >output.fsdb
1036              
1037             or if you want to know where the data came from
1038              
1039             for i in 1 2
1040             do
1041             dbcolcreate source $i < file$i.fsdb
1042             done >output.fsdb
1043              
1044             (assumes you're using a Bourne-shell compatible shell, not csh).
1045            
1046              
1047             =head1 WARNINGS
1048              
1049             As with any tool, one should (which means I) understand
1050             the limits of the tool.
1051              
1052             All Fsdb tools should run in I.
1053             In some cases (such as F with quartiles, where the whole input
1054             must be re-read), programs will spool data to disk if necessary.
1055              
1056             Most tools buffer one or a few lines of data, so memory
1057             will scale with the size of each line.
1058             (So lines with many columns, or when columns have lots data,
1059             may cause large memory consumption.)
1060              
1061             All Fsdb tools should run in constant or at worst C time.
1062              
1063             All Fsdb tools use normal Perl math routines for computation.
1064             Although I make every attempt to choose numerically stable algorithms
1065             (although I also welcome feedback and suggestions for improvement),
1066             normal rounding due to computer floating point approximations
1067             can result in inaccuracies when data spans a large range of precision.
1068             (See for example the F test cases.)
1069              
1070             Any requirements and limitations of each Fsdb tool
1071             is documented on its manual page.
1072              
1073             If any Fsdb program violates these assumptions,
1074             that is a bug that should be documented
1075             on the tool's manual page or ideally fixed.
1076              
1077             Fsdb does depend on Perl's correctness, and Perl (and Fsdb) have
1078             some bugs. Fsdb should work on perl from version 5.10 onward.
1079              
1080              
1081             =head1 HISTORY
1082              
1083             There have been three versions of Fsdb;
1084             fsdb 1.0 is a complete re-write of the pre-1995 versions,
1085             and was
1086             distributed from 1995 to 2007.
1087             Fsdb 2.0 is a significant re-write of the 1.x versions
1088             for reasons described below.
1089              
1090             Fsdb (in its various forms) has been used extensively by its author
1091             since 1991. Since 1995 it's been used by two other researchers at
1092             UCLA and several at ISI. In February 1998 it was announced to the
1093             Internet. Since then it has found a few users, some outside where I
1094             work.
1095              
1096             =head2 Fsdb 2.0 Rationale
1097              
1098             I've thought about fsdb-2.0 for many years, but it was started
1099             in earnest in 2007. Fsdb-2.0 has the following goals:
1100              
1101             =over 4
1102              
1103             =item in-one-process processing
1104              
1105             While fsdb is great on the Unix command line as a pipeline between
1106             programs, it should I be possible to set it up to run in a single
1107             process. And if it does so, it should be able to avoid serializing
1108             and deserializing (converting to and from text) data between each module.
1109             (Accomplished in fsdb-2.0: see L, although still needs tuning.)
1110              
1111             =item clean IO API
1112              
1113             Fsdb's roots go back to perl4 and 1991, so the fsdb-1.x library is
1114             very, very crufty. More than just being ugly (but it was that too),
1115             this made things reading from one format file and writing to another
1116             the application's job, when it should be the library's.
1117             (Accomplished in fsdb-1.15 and improved in 2.0: see L.)
1118              
1119             =item normalized module APIs
1120              
1121             Because fsdb modules were added as needed over 10 years,
1122             sometimes the module APIs became inconsistent.
1123             (For example, the 1.x C required an empty
1124             value following the name of the new column,
1125             but other programs specify empty values with the C<-e> argument.)
1126             We should smooth over these inconsistencies.
1127             (Accomplished as each module was ported in 2.0 through 2.7.)
1128              
1129             =item everyone handles all input formats
1130              
1131             Given a clean IO API, the distinction between "colized"
1132             and "listized" fsdb files should go away. Any program
1133             should be able to read and write files in any format.
1134             (Accomplished in fsdb-2.1.)
1135              
1136             =back
1137              
1138             Fsdb-2.0 preserves backwards compatibility where possible,
1139             but breaks it where necessary to accomplish the above goals.
1140             In August 2008, Fsdb-2.7 was declared preferred over the 1.x versions.
1141             Benchmarking in 2013 showed that threading performed much worse than
1142             just using pipes, so Fsdb-2.44 uses threading "style",
1143             but implemented with processes (via my "Freds" library).
1144              
1145             =head2 Contributors
1146              
1147             Fsdb includes code ported from Geoff Kuenning (C).
1148              
1149             Fsdb contributors:
1150             Ashvin Goel F,
1151             Geoff Kuenning F,
1152             Vikram Visweswariah F,
1153             Kannan Varadahan F,
1154             Lars Eggert F,
1155             Arkadi Gelfond F,
1156             David Graff F,
1157             Haobo Yu F,
1158             Pavlin Radoslavov F,
1159             Graham Phillips,
1160             Yuri Pradkin,
1161             Alefiya Hussain,
1162             Ya Xu,
1163             Michael Schwendt,
1164             Fabio Silva F,
1165             Jerry Zhao F,
1166             Ning Xu F,
1167             Martin Lukac F,
1168             Xue Cai,
1169             Michael McQuaid,
1170             Christopher Meng,
1171             Calvin Ardi,
1172             H. Merijn Brand,
1173             Lan Wei.
1174              
1175             Fsdb includes datasets contributed from NIST (F),
1176             from
1177             L,
1178             the NIST/SEMATECH e-Handbook of Statistical Methods, section
1179             1.4.2.8.1. Background and Data. The source is public domain, and
1180             reproduced with permission.
1181              
1182              
1183              
1184              
1185             =head1 RELATED WORK
1186              
1187             As stated in the introduction, Fsdb is an incompatible reimplementation
1188             of the ideas found in C. By storing data in simple text files and
1189             processing it with pipelines it is easy to experiment (in the shell)
1190             and look at the output. The original implementation of this idea was
1191             /rdb, a commercial product described in the book I
1192             database management: application development in the UNIX environment>
1193             by Rod Manis, Evan Schaffer, and Robert Jorgensen (and also at the web
1194             page L).
1195              
1196             While Fsdb is inspired by Rdb, it includes no code from it,
1197             and Fsdb makes several different design choices.
1198             In particular: rdb attempts to be closer to a "real" database,
1199             with provision for locking, file indexing.
1200             Fsdb focuses on single user use and so eschews these choices.
1201             Rdb also has some support for interactive editing.
1202             Fsdb leaves editing to text editors like emacs or vi.
1203              
1204             In August, 2002 I found out Carlo Strozzi extended RDB with his
1205             package NoSQL L. According to
1206             Mr. Strozzi, he implemented NoSQL in awk to avoid the Perl start-up of
1207             RDB. Although I haven't found Perl startup overhead to be a big
1208             problem on my platforms (from old Sparcstation IPCs to 2GHz
1209             Pentium-4s), you may want to evaluate his system.
1210             The Linux Journal has a description of NoSQL
1211             at L.
1212             It seems quite similar to Fsdb.
1213             Like /rdb, NoSQL supports indexing (not present in Fsdb).
1214             Fsdb appears to have richer support for statistics,
1215             and, as of Fsdb-2.x, its support for Perl threading may support
1216             faster performance (one-process, less serialization and deserialization).
1217              
1218              
1219             =head1 RELEASE NOTES
1220              
1221             Versions prior to 1.0 were released informally on my web page
1222             but were not announced.
1223              
1224             =head2 0.0 1991
1225              
1226             started for my own research use
1227              
1228             =head2 0.1 26-May-94
1229              
1230             first check-in to RCS
1231              
1232             =head2 0.2 15-Mar-95
1233              
1234             parts now require perl5
1235              
1236             =head2 1.0, 22-Jul-97
1237              
1238             adds autoconf support and a test script.
1239              
1240             =head2 1.1, 20-Jan-98
1241              
1242             support for double space field separators, better tests
1243              
1244             =head2 1.2, 11-Feb-98
1245              
1246             minor changes and release on comp.lang.perl.announce
1247              
1248             =head2 1.3, 17-Mar-98
1249              
1250             =over 4
1251              
1252             =item *
1253             adds median and quartile options to dbstats
1254              
1255              
1256             =item *
1257              
1258             adds dmalloc_to_db converter
1259              
1260              
1261             =item *
1262              
1263             fixes some warnings
1264              
1265              
1266             =item *
1267              
1268             dbjoin now can run on unsorted input
1269              
1270              
1271             =item *
1272              
1273             fixes a dbjoin bug
1274              
1275              
1276             =item *
1277              
1278             some more tests in the test suite
1279              
1280             =back
1281              
1282             =head2 1.4, 27-Mar-98
1283              
1284             =over 4
1285              
1286             =item *
1287              
1288             improves error messages (all should now report the program that makes the error)
1289              
1290             =item *
1291              
1292             fixed a bug in dbstats output when the mean is zero
1293              
1294             =back
1295              
1296             =head2 1.5, 25-Jun-98
1297              
1298             =over 4
1299              
1300             =item BUG FIX
1301             dbcolhisto, dbcolpercentile now handles non-numeric values like dbstats
1302              
1303             =item NEW
1304             dbcolstats computes zscores and tscores over a column
1305              
1306             =item NEW
1307             dbcolscorrelate computes correlation coefficients between two columns
1308              
1309             =item INTERNAL
1310             ficus_getopt.pl has been replaced by DbGetopt.pm
1311              
1312             =item BUG FIX
1313             all tests are now ``portable'' (previously some tests ran only on my system)
1314              
1315             =item BUG FIX
1316             you no longer need to have the db programs in your path (fix arose from a discussion with Arkadi Gelfond)
1317              
1318             =item BUG FIX
1319             installation no longer uses cp -f (to work on SunOS 4)
1320              
1321             =back
1322              
1323             =head2 1.6, 24-May-99
1324              
1325             =over 4
1326              
1327             =item NEW
1328             dbsort, dbstats, dbmultistats now run in constant memory (using tmp files if necessary)
1329              
1330             =item NEW
1331             dbcolmovingstats does moving means over a series of data
1332              
1333             =item NEW
1334             dbcol has a -v option to get all columns except those listed
1335              
1336             =item NEW
1337             dbmultistats does quartiles and medians
1338              
1339             =item NEW
1340             dbstripextraheaders now also cleans up bogus comments before the fist header
1341              
1342             =item BUG FIX
1343             dbcolneaten works better with double-space-separated data
1344              
1345             =back
1346              
1347             =head2 1.7, 5-Jan-00
1348              
1349             =over 4
1350              
1351             =item NEW
1352             dbcolize now detects and rejects lines that contain embedded copies of the field separator
1353              
1354             =item NEW
1355             configure tries harder to prevent people from improperly configuring/installing fsdb
1356              
1357             =item NEW
1358             tcpdump_to_db converter (incomplete)
1359              
1360             =item NEW
1361             tabdelim_to_db converter: from spreadsheet tab-delimited files to db
1362              
1363             =item NEW
1364             mailing lists for fsdb are C and C
1365              
1366             To subscribe to either, send mail to C or C with "subscribe" in the BODY of the message.
1367              
1368             =item BUG FIX
1369             dbjoin used to produce incorrect output if there were extra, unmatched values in the 2nd table. Thanks to Graham Phillips for providing a test case.
1370              
1371             =item BUG FIX
1372             the sample commands in the usage strings now all should explicitly include the source of data (typically from "cat foo.fsdb |"). Thanks to Ya Xu for pointing out this documentation deficiency.
1373              
1374             =item BUG FIX (DOCUMENTATION)
1375             dbcolmovingstats had incorrect sample output.
1376              
1377             =back
1378              
1379             =head2 1.8, 28-Jun-00
1380              
1381             =over 4
1382              
1383             =item BUG FIX
1384             header options are now preserved when writing with dblistize
1385              
1386             =item NEW
1387             dbrowuniq now optionally checks for uniqueness only on certain fields
1388              
1389             =item NEW
1390             dbrowsplituniq makes one pass through a file and splits it into separate files based on the given fields
1391              
1392             =item NEW
1393             converter for "crl" format network traces
1394              
1395             =item NEW
1396             anywhere you use arbitrary code (like dbroweval), _last_foo now maps to the last row's value for field _foo.
1397              
1398             =item OPTIMIZATION
1399             comment processing slightly changed so that dbmultistats now is much faster on files with lots of comments (for example, ~100k lines of comments and 700 lines of data!) (Thanks to Graham Phillips for pointing out this performance problem.)
1400              
1401             =item BUG FIX
1402             dbstats with median/quartiles now correctly handles singleton data points.
1403              
1404             =back
1405              
1406             =head2 1.9, 6-Nov-00
1407              
1408             =over 4
1409              
1410             =item NEW
1411             dbfilesplit, split a single input file into multiple output files (based on code contributed by Pavlin Radoslavov).
1412              
1413             =item BUG FIX
1414             dbsort now works with perl-5.6
1415              
1416             =back
1417              
1418             =head2 1.10, 10-Apr-01
1419              
1420             =over 4
1421              
1422             =item BUG FIX
1423             dbstats now handles the case where there are more n-tiles than data
1424              
1425             =item NEW
1426             dbstats now includes a -S option to optimize work on pre-sorted data (inspired by code contributed by Haobo Yu)
1427              
1428             =item BUG FIX
1429             dbsort now has a better estimate of memory usage when run on data with very short records (problem detected by Haobo Yu)
1430              
1431             =item BUG FIX
1432             cleanup of temporary files is slightly better
1433              
1434             =back
1435              
1436             =head2 1.11, 2-Nov-01
1437              
1438             =over 4
1439              
1440             =item BUG FIX
1441             dbcolneaten now runs in constant memory
1442              
1443             =item NEW
1444             dbcolneaten now supports "field specifiers" that allow some control over how wide columns should be
1445              
1446             =item OPTIMIZATION
1447             dbsort now tries hard to be filesystem cache-friendly (inspired by "Information and Control in Gray-box Systems" by the Arpaci-Dusseau's at SOSP 2001)
1448              
1449             =item INTERNAL
1450             t_distr now ported to perl5 module DbTDistr
1451              
1452             =back
1453              
1454             =head2 1.12, 30-Oct-02
1455              
1456             =over 4
1457              
1458             =item BUG FIX
1459             dbmultistats documentation typo fixed
1460              
1461             =item NEW
1462             dbcolmultiscale
1463              
1464             =item NEW
1465             dbcol has -r option for "relaxed error checking"
1466              
1467             =item NEW
1468             dbcolneaten has new -e option to strip end-of-line spaces
1469              
1470             =item NEW
1471             dbrow finally has a -v option to negate the test
1472              
1473             =item BUG FIX
1474             math bug in dbcoldiff fixed by Ashvin Goel (need to check Scheaffer test cases)
1475              
1476             =item BUG FIX
1477             some patches to run with Perl 5.8. Note: some programs (dbcolmultiscale, dbmultistats, dbrowsplituniq) generate warnings like: "Use of uninitialized value in concatenation (.)" or "string at /usr/lib/perl5/5.8.0/FileCache.pm line 98, line 2". Please ignore this until I figure out how to suppress it. (Thanks to Jerry Zhao for noticing perl-5.8 problems.)
1478              
1479             =item BUG FIX
1480             fixed an autoconf problem where configure would fail to find a reasonable prefix (thanks to Fabio Silva for reporting the problem)
1481              
1482             =item NEW
1483             db_to_html_table: simple conversion to html tables (NO fancy stuff)
1484              
1485             =item NEW
1486             dblib now has a function dblib_text2html() that will do simple conversion of iso-8859-1 to HTML
1487              
1488             =back
1489              
1490              
1491             =head2 1.13, 4-Feb-04
1492              
1493              
1494             =over 4
1495              
1496             =item NEW
1497             fsdb added to the freebsd ports tree L. Maintainer: C
1498              
1499             =item BUG FIX
1500             properly handle trailing spaces when data must be numeric (ex. dbstats with -FS, see test dbstats_trailing_spaces). Fix from Ning Xu C.
1501              
1502             =item NEW
1503             dbcolize error message improved (bug report from Terrence Brannon), and list format documented in the README.
1504              
1505             =item NEW
1506             cgi_to_db converts CGI.pm-format storage to fsdb list format
1507              
1508             =item BUG FIX
1509             handle numeric synonyms for column names in dbcol properly
1510              
1511             =item ENHANCEMENT
1512             "talking about columns" section added to README. Lack of documentation pointed out by Lars Eggert.
1513              
1514             =item CHANGE
1515             dbformmail now defaults to using Mail ("Berkeley Mail") to send mail, rather than sendmail (sendmail is still an option, but mail doesn't require running as root)
1516              
1517             =item NEW
1518             on platforms that support it (i.e., with perl 5.8), fsdb works fine with unicode
1519              
1520             =item NEW
1521             dbfilevalidate: check a db file for some common errors
1522              
1523             =back
1524              
1525              
1526             =head2 1.14, 24-Aug-06
1527              
1528             =over 4
1529              
1530              
1531             =item ENHANCEMENT
1532             README cleanup
1533              
1534             =item INCOMPATIBLE CHANGE
1535             dbcolsplit renamed dbcolsplittocols
1536              
1537             =item NEW
1538             dbcolsplittorows split one column into multiple rows
1539              
1540             =item NEW
1541             dbcolsregression compute linear regression and correlation for two columns
1542              
1543             =item ENHANCEMENT
1544             cvs_to_db: better error handling, normalize field names, skip blank lines
1545              
1546             =item ENHANCEMENT
1547             dbjoin now detects (and fails) if non-joined files have duplicate names
1548              
1549             =item BUG FIX
1550             minor bug fixed in calculation of Student t-distributions (doesn't change any test output, but may have caused small errors)
1551              
1552             =back
1553              
1554             =head2 1.15, 12-Nov-07
1555              
1556             =over 4
1557              
1558             =item NEW
1559             fsdb-1.14 added to the MacOS Fink system L. (Thanks to Lars Eggert for maintaining this port.)
1560              
1561             =item NEW
1562             Fsdb::IO::Reader and Fsdb::IO::Writer now provide reasonably clean OO I/O interfaces to Fsdb files. Highly recommended if you use fsdb directly from perl. In the fullness of time I expect to reimplement the entire thing using these APIs to replace the current dblib.pl which is still hobbled by its roots in perl4.
1563              
1564             =item NEW
1565             dbmapreduce now implements a Google-style map/reduce abstraction, generalizing dbmultistats.
1566              
1567             =item ENHANCEMENT
1568             fsdb now uses the Perl build system (Makefile.PL, etc.), instead of autoconf. This change paves the way to better perl-5-style modularization, proper manual pages, input of both listize and colize format for every program, and world peace.
1569              
1570             =item ENHANCEMENT
1571             dblib.pl is now moved to Fsdb::Old.pm.
1572              
1573             =item BUG FIX
1574             dbmultistats now propagates its format argument (-f). Bug and fix from Martin Lukac (thanks!).
1575              
1576             =item ENHANCEMENT
1577             dbformmail documentation now is clearer that it doesn't send the mail, you have to run the shell script it writes. (Problem observed by Unkyu Park.)
1578              
1579             =item ENHANCEMENT
1580             adapted to autoconf-2.61 (and then these changes were discarded in favor of The Perl Way.
1581              
1582             =item BUG FIX
1583             dbmultistats memory usage corrected (O(# tags), not O(1))
1584              
1585             =item ENHANCEMENT
1586             dbmultistats can now optionally run with pre-grouped input in O(1) memory
1587              
1588             =item ENHANCEMENT
1589             dbroweval -N was finally implemented (eat comments)
1590              
1591             =back
1592              
1593             =head2 2.0, 25-Jan-08
1594              
1595             2.0, 25-Jan-08 --- a quiet 2.0 release (gearing up towards complete)
1596              
1597             =over 4
1598              
1599             =item ENHANCEMENT:
1600             shifting old programs to Perl modules, with
1601             the front-end program as just a wrapper.
1602             In the short-term, this change just means programs have real man pages.
1603             In the long-run, it will mean that one can run a pipeline in a single
1604             Perl program.
1605             So far:
1606             L,
1607             L,
1608             the new L.
1609             L
1610             the new L,
1611             the old C (renamed L),
1612             L,
1613             L,
1614              
1615             =item NEW:
1616             L is an internal-only module that lets one
1617             use fsdb commands from within perl (via threads).
1618              
1619             It also provides perl function aliases for the internal modules,
1620             so a string of fsdb commands in perl are nearly as terse as in the
1621             shell:
1622              
1623             use Fsdb::Filter::dbpipeline qw(:all);
1624             dbpipeline(
1625             dbrow(qw(name test1)),
1626             dbroweval('_test1 += 5;')
1627             );
1628              
1629             =item INCOMPATIBLE CHANGE:
1630             The old L has been renamed L.
1631             The new L does the same thing as the old L.
1632             This incompatibility is unfortunate but normalizes program names.
1633              
1634             =item CHANGE:
1635             The new L program
1636             always outputs C<-> (the default empty value) for
1637             statistics it cannot compute (for example, standard deviation
1638             if there is only one row),
1639             instead of the old mix of C<-> and "na".
1640              
1641             =item INCOMPATIBLE CHANGE:
1642             The old L program, now called L,
1643             also has different arguments. The C<-t mean,stddev> option is now
1644             C<--tmean mean --tstddev stddev>. See L for details.
1645              
1646             =item INCOMPATIBLE CHANGE:
1647             L now assumes all new columns get the default
1648             value rather than requiring each column to have an initial constant value.
1649             To change the initial value, sue the new C<-e> option.
1650              
1651             =item NEW:
1652             L counts rows, an almost-subset of L's C output
1653             (except without differentiating numeric/non-numeric input),
1654             or the equivalent of C.
1655              
1656             =item NEW:
1657             L merges two sorted files.
1658             This functionality was previously embedded in L.
1659              
1660             =item INCOMPATIBLE CHANGE:
1661             L's C<-i> option to include non-matches
1662             is now renamed C<-a>, so as to not conflict with the new
1663             standard option C<-i> for input file.
1664              
1665             =back
1666              
1667             =head2 2.1, 6-Apr-08
1668              
1669             2.1, 6-Apr-08 --- another alpha 2.0, but now all converted programs understand both listize and colize format
1670              
1671             =over 4
1672              
1673             =item ENHANCEMENT:
1674             shifting more old programs to Perl modules.
1675             New in 2.1:
1676             L,
1677             L,
1678             L,
1679             L,
1680             L,
1681             L
1682              
1683             =item ENHANCEMENT
1684             L now handles an arbitrary number of input files,
1685             not just exactly two.
1686              
1687             =item NEW
1688             L is an internal routine that handles merging exactly two files.
1689              
1690             =item INCOMPATIBLE CHANGE
1691             L now specifies inputs like L,
1692             rather than assuming the first two arguments were tables (as in fsdb-1).
1693              
1694             The old L argument C<-i> is now C<-a> or <--type=outer>.
1695              
1696             A minor change: comments in the source files for
1697             L are now intermixed with output
1698             rather than being delayed until the end.
1699              
1700             =item ENHANCEMENT
1701             L now no longer produces warnings when null values are
1702             passed to numeric comparisons.
1703              
1704             =item BUG FIX
1705             L now once again works with code that lacks a trailing semicolon.
1706             (This bug fixes a regression from 1.15.)
1707              
1708             =item INCOMPATIBLE CHANGE
1709             L's old C<-e> option (to avoid end-of-line spaces) is now C<-E>
1710             to avoid conflicts with the standard empty field argument.
1711              
1712             =item INCOMPATIBLE CHANGE
1713             L's old C<-e> option is now C<-E> to avoid conflicts.
1714             And its C<-n>, C<-s>, and C<-w> are now
1715             C<-N>, C<-S>, and C<-W> to correspond.
1716              
1717             =item NEW
1718             L replaces L, L, and L,
1719             but with different options.
1720              
1721             =item ENHANCEMENT
1722             The library routines C now understand both list-format
1723             and column-format data, so all converted programs can now
1724             I read either format. This capability was one
1725             of the milestone goals for 2.0, so yea!
1726              
1727             =back
1728              
1729             =head2 2.2, 23-May-08
1730              
1731             Release 2.2 is another 2.x alpha release. Now I of the
1732             commands are ported, but a few remain, and I plan one last
1733             incompatible change (to the file header) before 2.x final.
1734              
1735             =over 4
1736              
1737             =item ENHANCEMENT
1738              
1739             shifting more old programs to Perl modules.
1740             New in 2.2:
1741             L,
1742             L.
1743             L.
1744             L.
1745             L.
1746             L.
1747             L.
1748             L.
1749             L.
1750             L.
1751             L.
1752             Also
1753             L
1754             exists only as a front-end (command-line) program.
1755              
1756             =item INCOMPATIBLE CHANGE
1757              
1758             The following programs have been dropped from fsdb-2.x:
1759             L,
1760             L,
1761             L,
1762             L.
1763              
1764             =item NEW
1765              
1766             L to convert Apache logfiles
1767              
1768             =item INCOMPATIBLE CHANGE
1769              
1770             Options to L are now B<-B> and B<-I>,
1771             not B<-a> and B<-i>.
1772              
1773             =item INCOMPATIBLE CHANGE
1774              
1775             L is now L.
1776              
1777             =item BUG FIXES
1778              
1779             L better handles empty columns;
1780             L warning suppressed (actually a bug in high-bucket handling).
1781              
1782             =item INCOMPATIBLE CHANGE
1783              
1784             L now requires a C<-k> option in front of the
1785             key (tag) field, or if none is given, it will group by the first field
1786             (both like L).
1787              
1788             =item KNOWN BUG
1789              
1790             L with quantile option doesn't work currently.
1791              
1792             =item INCOMPATIBLE CHANGE
1793              
1794             L is renamed L.
1795              
1796             =item BUG FIXES
1797              
1798             L was leaving its log message as a command, not a comment.
1799             Oops. No longer.
1800              
1801             =back
1802              
1803             =head2 2.3, 27-May-08 (alpha)
1804              
1805             Another alpha release, this one just to fix the critical dbjoin bug
1806             listed below (that happens to have blocked my MP3 jukebox :-).
1807              
1808             =over 4
1809              
1810             =item BUG FIX
1811              
1812             Dbsort no longer hangs if given an input file with no rows.
1813              
1814             =item BUG FIX
1815              
1816             Dbjoin now works with unsorted input coming from a pipeline (like stdin).
1817             Perl-5.8.8 has a bug (?) that was making this case fail---opening
1818             stdin in one thread, reading some, then reading more in a different
1819             thread caused an lseek which works on files, but fails on pipes like stdin.
1820             Go figure.
1821              
1822             =item BUG FIX / KNOWN BUG
1823              
1824             The dbjoin fix also fixed dbmultistats -q
1825             (it now gives the right answer).
1826             Although a new bug appeared, messages like:
1827             Attempt to free unreferenced scalar: SV 0xa9dd0c4, Perl interpreter: 0xa8350b8 during global destruction.
1828             So the dbmultistats_quartile test is still disabled.
1829              
1830             =back
1831              
1832             =head2 2.4, 18-Jun-08
1833              
1834             Another alpha release, mostly to fix minor usability
1835             problems in dbmapreduce and client functions.
1836              
1837             =over 4
1838              
1839             =item ENHANCEMENT
1840              
1841             L now defaults to running user supplied code without warnings
1842             (as with fsdb-1.x).
1843             Use C<--warnings> or C<-w> to turn them back on.
1844              
1845             =item ENHANCEMENT
1846              
1847             L can now write different format output
1848             than the input, using the C<-m> option.
1849              
1850             =item KNOWN BUG
1851              
1852             L emits warnings on perl 5.10.0
1853             about "Unbalanced string table refcount" and "Scalars leaked"
1854             when run with an external program as a reducer.
1855              
1856             L emits the warning "Attempt to free unreferenced scalar"
1857             when run with quartiles.
1858              
1859             In each case the output is correct.
1860             I believe these can be ignored.
1861              
1862             =item CHANGE
1863              
1864             L no longer logs a line for each reducer that is invoked.
1865              
1866             =back
1867              
1868              
1869             =head2 2.5, 24-Jun-08
1870              
1871             Another alpha release, fixing more minor bugs in
1872             C and lossage in C.
1873              
1874             =over 4
1875              
1876             =item ENHANCEMENT
1877              
1878             L can now tolerate non-map-aware reducers
1879             that pass back the key column in put.
1880             It also passes the current key as the last argument to
1881             external reducers.
1882              
1883             =item BUG FIX
1884              
1885             L, correctly handle C<-header> option again.
1886             (Broken since fsdb-2.3.)
1887              
1888             =back
1889              
1890              
1891             =head2 2.6, 11-Jul-08
1892              
1893             Another alpha release, needed to fix DaGronk.
1894             One new port, small bug fixes, and important fix to L.
1895              
1896             =over 4
1897              
1898             =item ENHANCEMENT
1899              
1900             shifting more old programs to Perl modules.
1901             New in 2.2:
1902             L.
1903              
1904             =item INCOMPATIBLE CHANGE and ENHANCEMENTS
1905             L arguments changed,
1906             use C<--rank> to require ranking instead of C<-r>.
1907             Also, C<--ascending> and C<--descending> can now be specified separately,
1908             both for C<--percentile> and C<--rank>.
1909              
1910             =item BUG FIX
1911              
1912             Sigh, the sense of the --warnings option in L was inverted. No longer.
1913              
1914             =item BUG FIX
1915              
1916             I found and fixed the string leaks (errors like "Unbalanced string
1917             table refcount" and "Scalars leaked") in L and L.
1918             (All Cs in threads must be manually destroyed.)
1919              
1920             =item BUG FIX
1921              
1922             The C<-C> option to specify the column separator in L
1923             now works again (broken since it was ported).
1924              
1925             =back
1926              
1927             2.7, 30-Jul-08 beta
1928              
1929             The beta release of fsdb-2.x. Finally, all programs are ported.
1930             As statistics, the number of lines of non-library code doubled from
1931             7.5k to 15.5k. The libraries are much more complete,
1932             going from 866 to 5164 lines.
1933             The overall number of programs is about the same,
1934             although 19 were dropped and 11 were added.
1935             The number of test cases has grown from 116 to 175.
1936             All programs are now in perl-5, no more shell scripts or perl-4.
1937             All programs now have manual pages.
1938              
1939             Although this is a major step forward, I still expect
1940             to rename "fsdb" to "fsdb".
1941              
1942             =over 4
1943              
1944             =item ENHANCEMENT
1945              
1946             shifting more old programs to Perl modules.
1947             New in 2.7:
1948             L.
1949             L.
1950             L.
1951             L.
1952             L.
1953             L,
1954             L,
1955             L,
1956             L,
1957             L,
1958             L.
1959              
1960             =item INCOMPATIBLE CHANGE
1961              
1962             The following programs have been dropped from fsdb-2.x:
1963             L,
1964             L,
1965             L.
1966             L.
1967             They may come back, but seemed overly specialized.
1968             The following program
1969             L
1970             was dropped because it is superseded by L.
1971             L
1972             was dropped pending a test cases and examples.
1973              
1974             =item ENHANCEMENT
1975              
1976             L now has a C<-c> option to correct errors.
1977              
1978             =item NEW
1979              
1980             L provides the inverse of
1981             L.
1982              
1983             =back
1984              
1985              
1986             =head2 2.8, 5-Aug-08
1987              
1988             Change header format, preserving forwards compatibility.
1989              
1990             =over 4
1991              
1992             =item BUG FIX
1993              
1994             Complete editing pass over the manual, making sure it aligns
1995             with fsdb-2.x.
1996              
1997             =item SEMI-COMPATIBLE CHANGE
1998              
1999             The header of fsdb files has changed, it is now #fsdb, not #h (or #L)
2000             and parsing of -F and -R are also different.
2001             See L for the new specification.
2002             The v1 file format will be read, compatibly, but
2003             not written.
2004              
2005             =item BUG FIX
2006              
2007             L now tolerates comments that precede the first key,
2008             instead of failing with an error message.
2009              
2010             =back
2011              
2012              
2013             =head2 2.9, 6-Aug-08
2014              
2015             Still in beta; just a quick bug-fix for L.
2016              
2017             =over 4
2018              
2019             =item ENHANCEMENT
2020              
2021             L now generates plausible output when given no rows
2022             of input.
2023              
2024             =back
2025              
2026             =head2 2.10, 23-Sep-08
2027              
2028             Still in beta, but picking up some bug fixes.
2029              
2030             =over 4
2031              
2032             =item ENHANCEMENT
2033              
2034             L now generates plausible output when given no rows
2035             of input.
2036              
2037             =item ENHANCEMENT
2038              
2039             L the warnings option was backwards;
2040             now corrected. As a result, warnings in user code now default off
2041             (like in fsdb-1.x).
2042              
2043             =item BUG FIX
2044              
2045             L now defaults to assuming the target column is numeric.
2046             The new option C<-N> allows selection of a non-numeric target.
2047              
2048             =item BUG FIX
2049              
2050             L now includes C<--sample> and C<--nosample> options
2051             to compute the sample or full population correlation coefficients.
2052             Thanks to Xue Cai for finding this bug.
2053              
2054             =back
2055              
2056              
2057             =head2 2.11, 14-Oct-08
2058              
2059             Still in beta, but picking up some bug fixes.
2060              
2061             =over 4
2062              
2063             =item ENHANCEMENT
2064              
2065             L is now more aggressive about filling in empty cells
2066             with the official empty value, rather than leaving them blank or as whitespace.
2067              
2068             =item ENHANCEMENT
2069              
2070             L now catches failures during pipeline element setup
2071             and exits reasonably gracefully.
2072              
2073             =item BUG FIX
2074              
2075             L now reaps child processes, thus avoiding
2076             running out of processes when used a lot.
2077              
2078             =back
2079              
2080             =head2 2.12, 16-Oct-08
2081              
2082             Finally, a full (non-beta) 2.x release!
2083              
2084             =over 4
2085              
2086             =item INCOMPATIBLE CHANGE
2087              
2088             Jdb has been renamed Fsdb, the flatfile-streaming database.
2089             This change affects all internal Perl APIs,
2090             but no shell command-level APIs.
2091             While Jdb served well for more than ten years,
2092             it is easily confused with the Java debugger (even though Jdb was there first!).
2093             It also is too generic to work well in web search engines.
2094             Finally, Jdb stands for ``John's database'', and we're a bit beyond that.
2095             (However, some call me the ``file-system guy'', so
2096             one could argue it retains that meeting.)
2097              
2098             If you just used the shell commands, this change should not affect you.
2099             If you used the Perl-level libraries directly in your code,
2100             you should be able to rename "Jdb" to "Fsdb" to move to 2.12.
2101              
2102             The jdb-announce list not yet been renamed, but it will be shortly.
2103              
2104             With this release I've accomplished everything I wanted to
2105             in fsdb-2.x. I therefore expect to return to boring, bugfix releases.
2106              
2107             =back
2108              
2109             =head2 2.13, 30-Oct-08
2110              
2111             =over 4
2112              
2113             =item BUG FIX
2114              
2115             L now treats non-numeric data as zero by default.
2116              
2117             =item BUG FIX
2118              
2119             Fixed a perl-5.10ism in L that
2120             breaks that program under 5.8.
2121             Thanks to Martin Lukac for reporting the bug.
2122              
2123             =back
2124              
2125             =head2 2.14, 26-Nov-08
2126              
2127             =over 4
2128              
2129             =item BUG FIX
2130              
2131             Improved documentation for L's C<-f> option.
2132              
2133             =item ENHANCEMENT
2134              
2135             L how computes a moving standard deviation in addition
2136             to a moving mean.
2137              
2138             =back
2139              
2140              
2141             =head2 2.15, 13-Apr-09
2142              
2143             =over 4
2144              
2145             =item BUG FIX
2146              
2147             Fix a F bug reported by Shalindra Fernando.
2148              
2149             =back
2150              
2151              
2152             =head2 2.16, 14-Apr-09
2153              
2154             =over 4
2155              
2156             =item BUG FIX
2157              
2158             Another minor release bug: on some systems F looses
2159             executable permissions. Again reported by Shalindra Fernando.
2160              
2161             =back
2162              
2163             =head2 2.17, 25-Jun-09
2164              
2165             =over 4
2166              
2167             =item TYPO FIXES
2168              
2169             Typo in the F manual fixed.
2170              
2171             =item IMPROVEMENT
2172              
2173             There is no longer a comment line to label columns
2174             in F, instead the header line is tweaked to
2175             line up. This change restores the Jdb-1.x behavior, and
2176             means that repeated runs of dbcolneaten no longer add comment lines
2177             each time.
2178              
2179             =item BUG FIX
2180              
2181             It turns out F was not correctly handling trailing spaces
2182             when given the C<-E> option to suppress them. This regression is now
2183             fixed.
2184              
2185             =item EXTENSION
2186              
2187             L can now handle direct references to the last row
2188             via F<$lfref>, a dubious but now documented feature.
2189              
2190             =item BUG FIXES
2191              
2192             Separators set with C<-C> in F and F
2193             were not properly
2194             setting the heading, and null fields were not recognized.
2195             The first bug was reported by Martin Lukac.
2196              
2197             =back
2198              
2199             =head2 2.18, 1-Jul-09 A minor release
2200              
2201             =over 4
2202              
2203             =item IMPROVEMENT
2204              
2205             Documentation for F has been improved.
2206              
2207             =item IMPROVEMENT
2208              
2209             The package should now be PGP-signed.
2210              
2211             =back
2212              
2213              
2214             =head2 2.19, 10-Jul-09
2215              
2216             =over 4
2217              
2218             =item BUG FIX
2219              
2220             Internal improvements to debugging output and robustness of
2221             F and F.
2222             F re-enabled.
2223              
2224             =back
2225              
2226              
2227             =head2 2.20, 30-Nov-09
2228             (A collection of minor bugfixes, plus a build against Fedora 12.)
2229              
2230             =over 4
2231              
2232             =item BUG FIX
2233              
2234             Loging for
2235             F
2236             with code refs is now stable
2237             (it no longer includes a hex pointer to the code reference).
2238              
2239             =item BUG FIX
2240              
2241             Better handling of mixed blank lines in F
2242             (see test case F).
2243              
2244             =item BUG FIX
2245              
2246             F now handles multi-line input better,
2247             and handles tables with COLSPAN.
2248              
2249             =item BUG FIX
2250              
2251             F now cleans up threads in an C
2252             to prevent "cannot detach a joined thread" errors that popped
2253             up in perl-5.10. Hopefully this prevents a race condition
2254             that causes the test suites to hang about 20% of the time
2255             (in F).
2256              
2257             =item IMPROVEMENT
2258              
2259             F now detects and correctly fails
2260             when the input and reducer have incompatible
2261             field separators.
2262              
2263             =item IMPROVEMENT
2264              
2265             F, F, F, F,
2266             and F
2267             now all take an C<-F> option to let one specify the output field separator
2268             (so they work better with F).
2269              
2270             =item BUG FIX
2271              
2272             An omitted C<-k> from the manual page of F
2273             is now there. Bug reported by Unkyu Park.
2274              
2275             =back
2276              
2277              
2278             =head2 2.21, 17-Apr-10
2279             bug fix release
2280              
2281             =over 4
2282              
2283             =item BUG FIX
2284              
2285             F now no longer fails with -outputheader => never
2286             (an obscure bug).
2287              
2288             =item IMPROVEMENT
2289              
2290             F (in the warnings section)
2291             and F now more carefully document how they
2292             handle (and do not handle) numerical precision problems,
2293             and other general limits. Thanks to Yuri Pradkin for prompting
2294             this documentation.
2295              
2296             =item IMPROVEMENT
2297              
2298             C
2299             is now restored from C.
2300              
2301             =item IMPROVEMENT
2302              
2303             Documention for multiple styles of input approaches
2304             (including performance description) added to L.
2305              
2306             =back
2307              
2308             =head2 2.22, 2010-10-31
2309             One new tool F and several bug fixes for Perl 5.10.
2310              
2311             =over 4
2312              
2313             =item BUG FIX
2314              
2315             F now correctly handles n-way merges.
2316             Bug reported by Yuri Pradkin.
2317              
2318             =item INCOMPARABLE CHANGE
2319              
2320             F now defaults to I padding the last column.
2321              
2322             =item ADDITION
2323              
2324             F now takes B<-N NewColumn> to give the new
2325             column a name other than "count". Feature requested by Mike Rouch
2326             in January 2005.
2327              
2328             =item ADDITION
2329              
2330             New program F copies the last value of a column
2331             into a new column copylast_column of the next row.
2332             New program requested by Fabio Silva;
2333             useful for converting dbmultistats output into dbrvstatdiff input.
2334              
2335             =item BUG FIX
2336              
2337             Several tools (particularly F and F) would
2338             report errors like "Unbalanced string table refcount: (1) for "STDOUT"
2339             during global destruction" on exit, at least on certain versions
2340             of Perl (for me on 5.10.1), but similar errors have been off-and-on
2341             for several Perl releases. Although I think my code looked
2342             OK, I worked around this problem with a different way of handling
2343             standard IO redirection.
2344              
2345             =back
2346              
2347              
2348             =head2 2.23, 2011-03-10
2349             Several small portability bugfixes; improved F for large datasets
2350              
2351             =over 4
2352              
2353             =item IMPROVEMENT
2354              
2355             Documentation to F was changed to use "sd" to refer to
2356             standard deviation, not "ss" (which might be confused with sum-of-squares).
2357              
2358             =item BUG FIX
2359              
2360             This documentation about F was missing the F<-k> option
2361             in some cases.
2362              
2363             =item BUG FIX
2364              
2365             F was failing on MacOS-10.6.3 for some tests with
2366             the error
2367              
2368             dbmapreduce: cannot run external dbmapreduce reduce program (perl TEST/dbmapreduce_external_with_key.pl)
2369              
2370             The problem seemed to be only in the error, not in operation.
2371             On MacOS, the error is now suppressed.
2372             Thanks to Alefiya Hussain for providing access to a Mac system
2373             that allowed debugging of this problem.
2374              
2375             =item IMPROVEMENT
2376              
2377             The F command requires an external
2378             Perl library (F). On computers that
2379             lack this optional library, previously Fsdb would configure
2380             with a warning and then test cases would fail.
2381             Now those test cases are skipped with an additional warning.
2382              
2383             =item BUG FIX
2384              
2385             The test suite now supports alternative valid output, as a hack
2386             to account for last-digit floating point differences.
2387             (Not very satisfying :-(
2388              
2389             =item BUG FIX
2390              
2391             F output for confidence intervals on very large
2392             datasets has changed. Previously it failed for more than 2^31-1
2393             records, and handling of T-Distributions with thousands of rows
2394             was a bit dubious. Now datasets with more than 10000 are considered
2395             infinitely large and hopefully correctly handled.
2396              
2397             =back
2398              
2399             =head2 2.24, 2011-04-15
2400             Improvements to fix an old bug in dbmapreduce with different field separators
2401              
2402             =over 4
2403              
2404             =item IMPROVEMENT
2405              
2406             The F command had a C<--correct> option to
2407             work-around from incompatible field-separators,
2408             but it did nothing. Now it does the correct but sad, data-loosing
2409             thing.
2410              
2411             =item IMPROVEMENT
2412              
2413             The F command
2414             previously failed with an error message when invoked
2415             on input with a non-default field separator.
2416             The root cause was the underlying F
2417             that did not handle the case of reducers that generated
2418             output with a different field separator than the input.
2419             We now detect and repair incompatible field separators.
2420             This change corrects a problem originally documented and detected
2421             in Fsdb-2.20.
2422             Bug re-reported by Unkyu Park.
2423              
2424             =back
2425              
2426             =head2 2.25, 2011-08-07
2427             Two new tools, F and F, and a bugfix for two people.
2428              
2429             =over 4
2430              
2431             =item IMPROVEMENT
2432              
2433             F now supports a F<--utc> option,
2434             which also fixes this test case for users outside of the Pacific
2435             time zone. Bug reported by David Graff, and also by Peter Desnoyers
2436             (within a week of each other :-)
2437              
2438             =item NEW
2439              
2440             F can convert simple, very regular XML files into Fsdb.
2441              
2442             =item NEW
2443              
2444             F "pivots" a file, converting multiple rows
2445             corresponding to the same entity into a single row with multiple columns.
2446              
2447             =back
2448              
2449             =head2 2.26, 2011-12-12
2450             Bug fixes, particularly for perl-5.14.2.
2451              
2452             =over 4
2453              
2454             =item BUG FIX
2455              
2456             Bugs fixed in L manual page.
2457              
2458             =item BUG FIX
2459              
2460             Fixed problems where L was truncating floating point numbers
2461             when sorting. This strange behavior happens as of perl-5.14.2 and
2462             it I like a Perl bug. I've worked around it for the test suites,
2463             but I'm a bit nervous.
2464              
2465             =back
2466              
2467             =head2 2.27, 2012-11-15
2468             Accumulated bug fixes.
2469              
2470             =over 4
2471              
2472             =item IMPROVEMENT
2473              
2474             F now reports errors in CVS input with real diagnostics.
2475              
2476             =item IMPROVEMENT
2477              
2478             F can now compute median, when given the C<-m> option.
2479              
2480             =item BUG FIX
2481              
2482             F non-numeric handling (the C<-a> option) now works properly.
2483              
2484             =item DOCUMENTATION
2485              
2486             The internal
2487             F test framework
2488             is now documented.
2489              
2490             =item BUG FIX
2491              
2492             F now correctly handles the case where there is no input
2493             (previously it output a blank line, which is a malformed fsdb file).
2494             Thanks to Yuri Pradkin for reporting this bug.
2495              
2496             =back
2497              
2498             =head2 2.28, 2012-11-15
2499             A quick release to fix most rpmlint errors.
2500              
2501             =over 4
2502              
2503             =item BUG FIX
2504              
2505             Fixed a number of minor release problems (wrong permissions, old FSF
2506             address, etc.) found by rpmlint.
2507              
2508             =back
2509              
2510             =head2 2.29, 2012-11-20
2511             a quick release for CPAN testing
2512              
2513             =over 4
2514              
2515             =item IMPROVEMENT
2516              
2517             Tweaked the RPM spec.
2518              
2519             =item IMPROVEMENT
2520              
2521             Modified F to fail gracefully on Perl installations
2522             that lack threads. (Without this fix, I get massive failures
2523             in the non-ithreads test system.)
2524              
2525             =back
2526              
2527             =head2 2.30, 2012-11-25
2528             improvements to perl portability
2529              
2530             =over 4
2531              
2532             =item BUG FIX
2533              
2534             Removed unicode character in documention of F
2535             so pod tests will pass. (Sigh, that should work :-( )
2536              
2537             =item BUG FIX
2538              
2539             Fixed test suite failures on 5 tests (F
2540             was the first) due to L's addition of a period.
2541             This problem was breaking Fsdb on perl-5.17.
2542             Thanks to Michael McQuaid for helping diagnose this problem.
2543              
2544             =item IMPROVEMENT
2545              
2546             The test suite now prints out the names of tests it tries.
2547              
2548             =back
2549              
2550             =head2 2.31, 2012-11-28
2551             A release with actual improvements to dbfilepivot and dbrowuniq.
2552              
2553             =over 4
2554              
2555             =item BUG FIX
2556              
2557             Documentation fixes: typos in L,
2558             bugs in L,
2559             clarification for comment handling in L.
2560              
2561             =item IMPROVEMENT
2562              
2563             Previously L assumed the input was grouped by keys
2564             and didn't very that pre-condition.
2565             Now there is no pre-condition (it will sort the input by default),
2566             and it checks if the invariant is violated.
2567              
2568             =item BUG FIX
2569              
2570             Previously L failed if the input had comments (oops :-);
2571             no longer.
2572              
2573             =item IMPROVEMENT
2574              
2575             Now L has the C<-L> option to preserve the last
2576             unique row (instead of the first), a common idiom.
2577              
2578             =back
2579              
2580             =head2 2.32, 2012-12-21
2581             Test suites should now be more numerically robust.
2582              
2583             =over 4
2584              
2585             =item NEW
2586              
2587             New L does fsdb-aware file differencing.
2588             It does not do smart intuition of add/removes like Unix diff(1),
2589             but it does know about columns, and with C<-E>, it does
2590             numeric-aware differences.
2591              
2592             =item IMPROVEMENT
2593              
2594             Test suites that are numeric now use L to do numeric-aware
2595             comparisons, so the test suite should now be robust to slightly different
2596             computers and operating systems and compilers than I what I use.
2597              
2598             =back
2599              
2600             =head2 2.33, 2012-12-23
2601             Minor fixes to some test cases.
2602              
2603             =over 4
2604              
2605             =item IMPROVEMENT
2606              
2607             L and L
2608             now supports the C<-N> option to give the new column a
2609             different name. (And a test cases where this duplication mattered
2610             have been fixed.)
2611              
2612             =item IMPROVEMENT
2613              
2614             L now show the t-test breakpoint with a reasonable number of
2615             floating point digits.
2616              
2617             =item BUG FIX
2618              
2619             Fixed a numerical stability problem in the F test case.
2620              
2621             =back
2622              
2623             =head1 WHAT'S NEW
2624              
2625             =head2 2.34, 2013-02-10
2626             Parallelism in L.
2627              
2628             =over 4
2629              
2630             =item IMPROVEMENT
2631              
2632             Documention for L now includes resource requirements.
2633              
2634             =item IMPROVEMENT
2635              
2636             Default memory usage for L is now about 256MB.
2637             (The world keeps moving forward.)
2638              
2639             =item IMPROVEMENT
2640              
2641             L now does merging in parallel.
2642             As a side-effect, L should be faster when
2643             input overflows memory. The level of parallelism
2644             can be limited with the C<--parallelism> option.
2645             (There is more work to do here, but we're off to a start.)
2646              
2647             =back
2648              
2649             =head2 2.35, 2013-02-23
2650             Improvements to dbmerge parallelism
2651              
2652             =over 4
2653              
2654             =item BUG FIX
2655              
2656             Fsdb temporary files are now created more securely (with File::Temp).
2657              
2658             =item IMPROVEMENT
2659              
2660             Programs that sort or merge on fields (L, L, L,
2661             L) now report an error if no fields on which to join or merge
2662             are given.
2663              
2664             =item IMPROVEMENT
2665              
2666             Parallelism in L is should now be more consistent,
2667             with less starting and stopping.
2668              
2669             =item IMPROVEMENT
2670             In L, the C<--xargs> option lets one give input filenames on
2671             standard input, rather than the command line.
2672             This feature paves the way for faster dbsort for large inputs
2673             (by pipelining sorting and merging), expected in the next release.
2674              
2675             =back
2676              
2677              
2678             =head2 2.36, 2013-02-25
2679             dbsort pipelines with dbmerge
2680              
2681             =over 4
2682              
2683             =item IMPROVEMENT
2684             For large inputs,
2685             L now pipelines sorting and merging,
2686             allowing earlier processing.
2687              
2688             =item BUG FIX
2689             Since 2.35, L delayed cleanup of intermediate files,
2690             thereby requiring extra disk space.
2691              
2692             =back
2693              
2694             =head2 2.37, 2013-02-26
2695             quick bugfix to support parallel sort and merge from recent releases
2696              
2697             =over 4
2698              
2699             =item BUG FIX
2700             Since 2.35, L delayed removal of input files given by
2701             C<--xargs>. This problem is now fixed.
2702              
2703             =back
2704              
2705              
2706             =head2 2.38, 2013-04-29
2707             minor bug fixes
2708              
2709             =over 4
2710              
2711             =item CLARIFICATION
2712              
2713             Configure now rejects Windows since tests seem to hang
2714             on some versions of Windows.
2715             (I would love help from a Windows developer to get this problem fixed,
2716             but I cannot do it.) See F.
2717              
2718             =item IMPROVEMENT
2719              
2720             All programs that use temporary files
2721             (L, L, L, L)
2722             now take the C<-T> option
2723             and set the temporary directory consistently.
2724              
2725             In addition, error messages are better when the temporary directory
2726             has problems. Problem reported by Liang Zhu.
2727              
2728             =item BUG FIX
2729              
2730             L was failing with external, map-reduce aware reducers
2731             (when invoked with -M and an external program).
2732             (Sigh, did this case ever work?)
2733             This case should now work.
2734             Thanks to Yuri Pradkin for reporting this bug (in 2011).
2735              
2736             =item BUG FIX
2737              
2738             Fixed perl-5.10 problem with L.
2739             Thanks to Yuri Pradkin for reporting this bug (in 2013).
2740              
2741             =back
2742              
2743             =head2 2.39, date 2013-05-31
2744             quick release for the dbrowuniq extension
2745              
2746             =over 4
2747              
2748             =item BUG FIX
2749              
2750             Actually in 2.38, the Fedora F<.spec> got cleaner dependencies.
2751             Suggestion from Christopher Meng via L.
2752              
2753             =item ENHANCEMENT
2754              
2755             Fsdb files are now explicitly set into UTF-8 encoding,
2756             unless one specifies C<-encoding> to C.
2757              
2758             =item ENHANCEMENT
2759              
2760             L now supports C<-I> for incremental counting.
2761              
2762             =back
2763              
2764             =head2 2.40, 2013-07-13
2765             small bug fixes
2766              
2767             =over 4
2768              
2769             =item BUG FIX
2770              
2771             L now has more respect for a user-given temporary directory;
2772             it no longer is ignored for merging.
2773              
2774             =item IMPROVEMENT
2775              
2776             L now has options to output the first, last, and both first
2777             and last rows of a run (C<-F>, C<-L>, and C<-B>).
2778              
2779             =item BUG FIX
2780              
2781             L now correctly handles C<-N>. Sigh, it didn't work before.
2782              
2783             =back
2784              
2785             =head2 2.41, 2013-07-29
2786             small bug and packaging fixes
2787              
2788             =over 4
2789              
2790             =item ENHANCEMENT
2791              
2792             Documentation to L improved
2793             (inspired by questions from Qian Kun).
2794              
2795             =item BUG FIX
2796              
2797             L no longer duplicates
2798             singleton unique lines when outputting both (with C<-B>).
2799              
2800             =item BUG FIX
2801              
2802             Add missing C dependency to F.
2803              
2804             =item ENHANCEMENT
2805              
2806             Tests now show the diff of the failing output
2807             if run with C.
2808              
2809             =item ENHANCEMENT
2810              
2811             L now includes documentation for how to output extra rows.
2812             Suggestion from Yuri Pradkin.
2813              
2814             =item BUG FIX
2815              
2816             Several improvements to the Fedora package
2817             from Michael Schwendt
2818             via L,
2819             and from the harsh master that is F.
2820             (I am stymied at teaching it that "outliers" is spelled correctly.
2821             Maybe I should send it Schneier's book. And an unresolvable
2822             invalid-spec-name lurks in the SRPM.)
2823              
2824             =back
2825              
2826             =head2 2.42, 2013-07-31
2827             A bug fix and packaging release.
2828              
2829             =over 4
2830              
2831             =item ENHANCEMENT
2832              
2833             Documentation to L improved
2834             to better memory usage.
2835             (Based on problem report by Lin Quan.)
2836              
2837             =item BUG FIX
2838              
2839             The F<.spec> is now F
2840             to satisfy F.
2841             Thanks to Christopher Meng for a specific bug report.
2842              
2843             =item BUG FIX
2844              
2845             Test F no longer has a column
2846             that caused failures because of numerical instability.
2847              
2848             =item BUG FIX
2849              
2850             Some tests now better handle bugs in old versions of perl (5.10, 5.12).
2851             Thanks to Calvin Ardi for help debugging this on a Mac with perl-5.12,
2852             but the fix should affect other platforms.
2853              
2854             =back
2855              
2856             =head2 2.43, 2013-08-27
2857             Adds in-file compression.
2858              
2859             =over 4
2860              
2861             =item BUG FIX
2862              
2863             Changed the sort on F to strings
2864             (from numerics) so we're less susceptible to false test-failures
2865             due to floating point IO differences.
2866              
2867             =item EXPERIMENTAL ENHANCEMENT
2868              
2869             Yet more parallelism in L:
2870             new "endgame-mode" builds a merge tree of processes at the end
2871             of large merge tasks to get maximally parallelism.
2872             Currently this feature is off by default
2873             because it can hang for some inputs.
2874             Enable this experimental feature with C<--endgame>.
2875              
2876             =item ENHANCEMENT
2877              
2878             C now handles being given C objects
2879             (as exercised by L).
2880              
2881             =item BUG FIX
2882              
2883             Handling of NamedTmpfiles now supports concurrency.
2884             This fix will hopefully fix occasional
2885             "Use of uninitialized value $_ in string ne at ...NamedTmpfile.pm line 93."
2886             errors.
2887              
2888             =item BUG FIX
2889              
2890             Fsdb now requires perl 5.10.
2891             This is a bug fix because some test cases used to require it,
2892             but this fact was not properly documented.
2893             (Back-porting to 5.008 would require removing all C operators.)
2894              
2895             =item ENHANCEMENT
2896              
2897             Fsdb now handles automatic compression of file contents.
2898             Enable compression with C
2899             (or C or C).
2900             All programs should operate on compressed files
2901             and leave the output with the same level of compression.
2902             C is recommended as fastest and most efficient.
2903             C is produces unrepeatable output (and so has no
2904             output test), it seems to insist on adding a timestamp.
2905              
2906             =back
2907              
2908             =head2 2.44, 2013-10-02
2909             A major change--all threads are gone.
2910              
2911             =over 4
2912              
2913             =item ENHANCEMENT
2914              
2915             Fsdb is now thread free and only uses processes for parallelism.
2916             This change is a big change--the entire motivation for Fsdb-2
2917             was to exploit parallelism via threading.
2918             Parallelism--good, but perl threading--bad for performance.
2919             Horribly bad for performance.
2920             About 20x worse than pipes on my box.
2921             (See perl bug #119445 for the discussion.)
2922              
2923             =item NEW
2924              
2925             C provides a thread-like abstraction over forking,
2926             with some nice support for callbacks in the parent upon child termination.
2927              
2928             =item ENHANCEMENT
2929              
2930             Details about removing threads:
2931             C is thread free,
2932             and new tests to verify each of its parts.
2933             The easy cases are C,
2934             C, C, C, and
2935             C, each of which use it in simple ways (2013-09-09).
2936             C is now thread free (2013-09-13),
2937             but was a significant rewrite,
2938             which brought C along.
2939             C is partly thread free (2013-09-21),
2940             again as a rewrite,
2941             and it brings C along.
2942             Full C support took much longer (2013-10-02).
2943              
2944             =item BUG FIX
2945              
2946             When running with user-only output (C<-n>),
2947             L now resets the output vector C<$ofref>
2948             after it has been output.
2949              
2950             =item NEW
2951              
2952             L will create all columns at the head of each row
2953             with the C<--first> option.
2954              
2955             =item NEW
2956              
2957             L will concatenate two files,
2958             verifying that they have the same schema.
2959              
2960             =item ENHANCEMENT
2961              
2962             L now passes comments through,
2963             rather than eating them as before.
2964              
2965             Also, L now supports a C<--> option to prevent misinterpreting
2966             sub-program parameters as for dbmapreduce.
2967              
2968             =item INCOMPATIBLE CHANGE
2969              
2970             L no longer figures out if it needs to add the key
2971             to the output. For multi-key-aware reducers, it never does
2972             (and cannot). For non-multi-key-aware reducers,
2973             it defaults to add the key and will now fail if the reducer adds the key
2974             (with error "dbcolcreate: attempt to create pre-existing column...").
2975             In such cases, one must disable adding the key with the new
2976             option C<--no-prepend-key>.
2977              
2978             =item INCOMPATIBLE CHANGE
2979              
2980             L no longer copies the input field separator by default.
2981             For multi-key-aware reducers, it never does
2982             (and cannot). For non-multi-key-aware reducers,
2983             it defaults to I copying the field separator,
2984             but it will copy it (the old default) with the C<--copy-fs> option
2985              
2986             =back
2987              
2988             =head2 2.45, 2013-10-07
2989             cleanup from de-thread-ification
2990              
2991             =over 4
2992              
2993             =item BUG FIX
2994              
2995             Corrected a fast busy-wait in L.
2996              
2997             =item ENHANCEMENT
2998              
2999             Endgame mode enabled in L; it (and also large cases of L)
3000             should now exploit greater parallelism.
3001              
3002             =item BUG FIX
3003              
3004             Test case with C (gone since 2.44) now removed.
3005              
3006             =back
3007              
3008             =head2 2.46, 2013-10-08
3009             continuing cleanup of our no-threads version
3010              
3011             =over 4
3012              
3013             =item BUG FIX
3014              
3015             Fixed some packaging details.
3016             (Really, threads are no longer required,
3017             missing tests in the MANIFEST.)
3018              
3019             =item IMPROVEMENT
3020              
3021             L now better communicates with the merge process to avoid
3022             bursty parallelism.
3023              
3024             L now can take C<-autoflush => 1>
3025             for line-buffered IO.
3026              
3027             =back
3028              
3029             =head2 2.47, 2013-10-12
3030             test suite cleanup for non-threaded perls
3031              
3032             =over 4
3033              
3034             =item BUG FIX
3035              
3036             Removed some stray "use threads" in some test cases.
3037             We didn't need them, and these were breaking non-threaded perls.
3038              
3039             =item BUG FIX
3040              
3041             Better handling of Fred cleanup;
3042             should fix intermittent L failures on BSD.
3043              
3044             =item ENHANCEMENT
3045              
3046             Improved test framework to show output when tests fail.
3047             (This time, for real.)
3048              
3049             =back
3050              
3051             =head2 2.48, 2014-01-03
3052             small bugfixes and improved release engineering
3053              
3054             =over 4
3055              
3056             =item ENHANCEMENT
3057              
3058             Test suites now skip tests for libraries that are missing.
3059             (Patch for missing C contributed by Calvin Ardi.)
3060              
3061             =item ENHANCEMENT
3062              
3063             Removed references to Jdb in the package specification.
3064             Since the name was changed in 2008, there's no longer a huge
3065             need for backwards comparability.
3066             (Suggestion form Petr Å abata.)
3067              
3068             =item ENHANCEMENT
3069              
3070             Test suites now invoke the perl using the path from C<$Config{perlpath}>.
3071             Hopefully this helps testing in environments where there are multiple installed
3072             perls and the default perl is not the same as the perl-under-test
3073             (as happens in cpantesters.org).
3074              
3075             =item BUG FIX
3076              
3077             Added specific encoding to this manpage to account for
3078             Unicode. Required to build correctly against perl-5.18.
3079              
3080             =back
3081              
3082             =head2 2.49, 2014-01-04
3083             bugfix to unicode handling in Fsdb IO (plus minor packaging fixes)
3084              
3085             =over 4
3086              
3087             =item BUG FIX
3088              
3089             Restored a line in the F<.spec> to chmod g-s.
3090              
3091             =item BUG FIX
3092              
3093             Unicode decoding is now handled correctly for programs that read
3094             from standard input.
3095             (Also: New test scripts cover unicode input and output.)
3096              
3097             =item BUG FIX
3098              
3099             Fix to L documentation encoding line.
3100             Addresses test failure in perl-5.16 and earlier.
3101             (Who knew "encoding" had to be followed by a blank line.)
3102              
3103             =back
3104              
3105             =head1 WHAT'S NEW
3106              
3107             =head2 2.50, 2014-05-27
3108             a quick release for spec tweaks
3109              
3110             =over 4
3111              
3112             =item ENHANCEMENT
3113              
3114             In L, the C<-N> (no output, even comments) option now
3115             implies C<-n>, and it now suppresses the header and trailer.
3116              
3117             =item BUG FIX
3118              
3119             A few more tweaks to the F from Petr Å abata.
3120              
3121             =item BUG FIX
3122              
3123             Fixed 3 uses of C in test suites that were causing test
3124             failures (due to warnings, not real failures) on some platforms.
3125              
3126             =back
3127              
3128             =head2 2.51, 2014-09-05
3129             Feature enhancements to L, L, L, and new L
3130              
3131             =over 4
3132              
3133             =item ENHANCEMENT
3134              
3135             L now has a C<--no-recreate-fatal>
3136             that causes it to ignore creation of existing columns
3137             (instead of failing).
3138              
3139             =item ENHANCEMENT
3140              
3141             L once again is robust to reducers
3142             that output the key;
3143             C<--no-prepend-key> is no longer mandatory.
3144              
3145             =item ENHANCEMENT
3146              
3147             L can now enumerate the output rows with C<-E>.
3148              
3149             =item BUG FIX
3150              
3151             L is more mathematically robust.
3152             Previously for some inputs and some platforms,
3153             floating point rounding could
3154             sometimes cause squareroots of negative numbers.
3155              
3156             =item NEW
3157              
3158             L converts the output of the MySQL or MarinaDB
3159             select comment into fsdb format.
3160              
3161             =item INCOMPATIBLE CHANGE
3162              
3163             L now outputs the I row
3164             when doing sloppy numeric comparisons,
3165             to better support test suites.
3166              
3167             =back
3168              
3169             =head2 2.52, 2014-11-03
3170             Fixing the test suite for line number changes.
3171              
3172             =over 4
3173              
3174             =item ENHANCEMENT
3175              
3176             Test suites changes to be robust to exact line numbers of failures,
3177             since different Perl releases fail on different lines.
3178             L
3179              
3180             =back
3181              
3182              
3183             =head2 2.53, 2014-11-26
3184             bug fixes and stability improvements to dbmapreduce
3185              
3186             =over 4
3187              
3188             =item ENHANCEMENT
3189              
3190             The L how supports a C<--quiet> option.
3191              
3192             =item ENHANCEMENT
3193              
3194             Better documention of L.
3195              
3196             =item BUGFIX
3197              
3198             Added groff-base and perl-podlators to the Fedora package spec.
3199             Fixes L.
3200             (Also in package 2.52-2.)
3201              
3202             =item BUGFIX
3203              
3204             An important stability improvement to L.
3205             It, plus L, and L now support
3206             controlled parallelism with the C<--pararallelism=N> option.
3207             They default to run with the number of available CPUs.
3208             L also moderates its level of parallelism.
3209             Previously it would create reducers as needed,
3210             causing CPU thrashing if reducers ran much slower than data production.
3211              
3212             =item BUGFIX
3213              
3214             The combination of L with L now works
3215             as it should. (The obscure bug was an interaction with L
3216             with non-multi-key reducers that output their own key. L
3217             has too many useful corner cases.)
3218              
3219             =back
3220              
3221             =head2 2.54, 2014-11-28
3222             fix for the test suite to correct failing tests on not-my-platform
3223              
3224             =over 4
3225              
3226             =item BUGFIX
3227              
3228             Sigh, the test suite now has a test suite.
3229             Because, yes, I broke it, causing many incorrect failures
3230             at cpantesters.
3231             Now fixed.
3232              
3233             =back
3234              
3235             =head2 2.55, 2015-01-05
3236             many spelling fixes and L tests are more robust to different numeric precision
3237              
3238             =over 4
3239              
3240             =item ENHANCEMENT
3241              
3242             L now can be extra quiet, as I continue to try to track down
3243             a numeric difference on FreeBSD AMD boxes.
3244              
3245             =item ENHANCEMENT
3246              
3247             L gave different test output
3248             (just reflecting rounding error)
3249             when stddev approaches zero. We now detect hand handle this case.
3250             See
3251             and thanks to H. Merijn Brand for the bug report.
3252              
3253             =item BUG FIX
3254              
3255             Many, many spelling bugs found by
3256             H. Merijn Brand; thanks for the bug report.
3257              
3258             =item INCOMPATBLE CHANGE
3259              
3260             A number of programs had misspelled "separator"
3261             in C<--fieldseparator> and C<--columnseparator> options as "seperator".
3262             These are now correctly spelled.
3263              
3264             =back
3265              
3266             =head2 2.56, 2015-02-03
3267             fix against Getopt::Long-2.43's stricter error checkign
3268              
3269             =over 4
3270              
3271             =item BUG FIX
3272              
3273             Internal argument parsing uses Getopt::Long, but mixed pass-through and EE.
3274             Bug reported by Petr Pisar at L.a
3275              
3276             =item BUG FIX
3277              
3278             Added missing BuildRequires for C.
3279              
3280             =back
3281              
3282             =head2 2.57, 2015-04-29
3283             Minor changes, with better performance from L.
3284              
3285             =over 4
3286              
3287             =item BUG FIX
3288              
3289             L now honors C<--remove-inputs> (previously it didn't).
3290             This omission meant that L (and L) would accumulate
3291             files in F when running. Bad news for inputs with 4M keys.
3292              
3293             =item ENHANCMENT
3294              
3295             L should be faster with lots of small keys.
3296             L now supports C<-k> to get some of the functionality of
3297             L (if data is pre-sorted and median/quartiles are not required).
3298              
3299             L now honors C<--remove-inputs> (previously it didn't).
3300             This omission meant that L (and L) would accumulate
3301             files in F when running. Bad news for inputs with 4M keys.
3302              
3303             =back
3304              
3305              
3306             =head2 2.58, 2015-04-30
3307             Bugfix in L
3308              
3309             =over 4
3310              
3311             =item BUG FIX
3312              
3313             Fixed a case where L suffered mojobake in endgame mode.
3314             This bug surfaced when L was applied to large files
3315             (big enough to require merging) with unicode in them;
3316             the symptom was soemthing like:
3317             Wide character in print at /usr/lib64/perl5/IO/Handle.pm line 420, line 111.
3318              
3319             =back
3320              
3321              
3322             =head2 2.59, 2016-09-01
3323             Collect a few small bug fixes and documentation improvements.
3324              
3325             =over 4
3326              
3327             =item BUG FIX
3328              
3329             More IO is explicitly marked UTF-8 to avoid Perl's tendency to
3330             mojibake on otherwise valid unicode input.
3331             This change helps L.
3332              
3333             =item ENHANCEMENT
3334              
3335             L now crossreferences L.
3336              
3337             =item ENHANCEMENT
3338              
3339             Documentation for L now clarifies that the default is baseline mode.
3340              
3341             =item BUG FIX
3342              
3343             L now propagates C<-T> into the sorting process (if it is required).
3344             Thanks to Lan Wei for reporting this bug.
3345              
3346             =back
3347              
3348              
3349             =head2 2.60, 2016-09-04
3350             Adds support for hash joins.
3351              
3352             =over 4
3353              
3354             =item ENHANCEMENT
3355              
3356             L now supports hash joins
3357             with C<-t lefthash> and C<-t righthash>.
3358             Hash joins cache a table in memory, but do not require
3359             that the other table be sorted.
3360             They are ideal when joining a large table against a small one.
3361              
3362             =back
3363              
3364             =head2 2.61, 2016-09-05
3365             Support left and right outer joins.
3366              
3367             =over 4
3368              
3369             =item ENHANCEMENT
3370              
3371             L now handles left and right outer joins
3372             with C<-t left> and C<-t right>.
3373              
3374             =item ENHANCEMENT
3375              
3376             L hash joins are now selected
3377             with C<-m lefthash> and C<-m righthash>
3378             (not the shortlived C<-t righthash> option).
3379             (Technically this change is incompatible with Fsdd-2.60, but
3380             no one but me ever used that version.)
3381              
3382             =back
3383              
3384             =head2 2.62, 2016-11-29
3385             A new L and other minor improvements.
3386              
3387             =over 4
3388              
3389             =item ENHANCEMENT
3390              
3391             Documentation for L now includes sample output.
3392              
3393             =item NEW
3394              
3395             L converts a specific form of YAML to fsdb.
3396              
3397             =item BUG FIX
3398              
3399             The test suite now uses C rather than C
3400             to make OpenBSD-5.9 happier, I hope.
3401              
3402             =item ENHANCEMENT
3403              
3404             Comments that log operations at the end of each file now do simple
3405             quoting of spaces. (It is not guaranteed to be fully shell-compliant.)
3406              
3407             =item ENHANCEMENT
3408              
3409             There is a new standard option, C<--header>,
3410             allowing one to specify an Fsdb header for inputs that lack it.
3411             Currently it is supported by L,
3412             L, L, L, L,
3413             L.
3414              
3415             =item ENHANCEMENT
3416              
3417             L now allows the B<--possible-pivots> option,
3418             and if it is provided processes the data in one pass.
3419              
3420             =item ENHANCEMENT
3421              
3422             L logs are now quoted.
3423              
3424             =back
3425              
3426             =head2 2.63, 2017-02-03
3427             Re-add some features supposedly in 2.62 but not, and add more --header options.
3428              
3429             =over 4
3430              
3431             =item ENHANCEMENT
3432              
3433             The option B<-j> is now a synonym for B<--parallelism>.
3434             (And several documention bugs about this option are fixed.)
3435              
3436             =item ENHANCEMENT
3437              
3438             Additional support for C<--header> in L, L, L,
3439             and L.
3440              
3441             =item BUG FIX
3442              
3443             Version 2.62 was supposed to have this improvement, but did not (and now does):
3444             L now allows the B<--possible-pivots> option,
3445             and if it is provided processes the data in one pass.
3446              
3447             =item BUG FIX
3448              
3449             Version 2.62 was supposed to have this improvement, but did not (and now does):
3450             L logs are now quoted.
3451              
3452             =back
3453              
3454             =head1 AUTHOR
3455              
3456             John Heidemann, C
3457              
3458             See L for the many people who have contributed
3459             bug reports and fixes.
3460              
3461              
3462             =head1 COPYRIGHT
3463              
3464             Fsdb is Copyright (C) 1991-2016 by John Heidemann .
3465              
3466             This program is free software; you can redistribute it and/or modify
3467             it under the terms of version 2 of the GNU General Public License as
3468             published by the Free Software Foundation.
3469              
3470             This program is distributed in the hope that it will be useful, but
3471             WITHOUT ANY WARRANTY; without even the implied warranty of
3472             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3473             General Public License for more details.
3474              
3475             You should have received a copy of the GNU General Public License
3476             along with this program; if not, write to the Free Software
3477             Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
3478              
3479             A copy of the GNU General Public License can be found in the file
3480             ``COPYING''.
3481              
3482              
3483              
3484             =head1 COMMENTS and BUG REPORTS
3485              
3486             Any comments about these programs should be sent to John Heidemann
3487             C.
3488              
3489              
3490             =cut
3491              
3492             1; # End of Fsdb
3493              
3494             # LocalWords: Exp rdb Manis Evan Schaffer passwd uid gid fullname homedir greg
3495             # LocalWords: gnuplot jgraph dbrow dbcol dbcolcreate dbcoldefine FSDB README un
3496             # LocalWords: dbcolrename dbcolmerge dbcolsplit dbjoin dbsort dbcoldiff Perl bw
3497             # LocalWords: dbmultistats dbrowdiff dbrowenumerate dbroweval dbstats dblistize
3498             # LocalWords: dbcolneaten dbcoltighten dbstripcomments dbstripextraheaders pct
3499             # LocalWords: dbstripleadingspace stddev rsd dbsetheader sprintf LIBDIR BINDIR
3500             # LocalWords: LocalWords isi URL com dbpercentile dbhistogram GRADEBOOK min ss
3501             # LocalWords: gradebook conf std dev dbrowaccumulate dbcolpercentile db dcliff
3502             # LocalWords: dbuniq uniq dbcolize distr pl Apr autoconf Jul html printf Fx fsdb
3503             # LocalWords: printfs dbrowuniq dbrecolize dbformmail kitrace geoff ns berkeley
3504             # LocalWords: comp lang perl Haobo Yu outliers Jorgensen csh dbrowsplituniq crl
3505             # LocalWords: dbcolmovingstats dbcolstats zscores tscores dbcolhisto columnar
3506             # LocalWords: dmalloc tabdelim stats numerics datapoint CDF xgraph max txt sed
3507             # LocalWords: login gecos div cmd nr hw hw assuing Kuenning Vikram Visweswariah
3508             # LocalWords: Kannan Varadahan Arkadi Gelfond Pavlin Radoslavov quartile getopt
3509             # LocalWords: dbcolscorrelate DbGetopt cp tmp nd Ya Xu dbfilesplit
3510             # LocalWords: MERCHANTABILITY tba dbcolsplittocols dbcolsplittorows cvs johnh
3511             # LocalWords: dbcolsregression datasets whitespace LaTeX FS columnname cgi pre
3512             # LocalWords: columname's dbfilevalidate tcpdump http rv eq Bourne DbTDistr
3513             # LocalWords: Goel Eggert Ning Strozzi NoSQL awk startup Sparcstation IPCs GHz
3514             # LocalWords: SunOS Arpaci Dusseau's SOSP Scheaffer STDIN dblib iso freebsd OO
3515             # LocalWords: sendmail unicode Makefile dbmapreduce dbcolmultiscale andersen
3516             # LocalWords: lampson chen drovolis estrin floyd Lukac NIST SEMATECH RCS qw
3517             # LocalWords: listize colize Unkyu dbpipeline ithreads dbfilealter dbrowcount
3518             # LocalWords: dbrvstatdiff dbcolstatscores dbfilestripcomments csv nolog aho
3519             # LocalWords: alfred david clark constantine debrorah Fsdb's colized listized
3520             # LocalWords: Ashvin dbmerge na tmean tstddev wc logfiles stdin lseek SV xa
3521             # LocalWords: refcount lossage DaGronk dbcolscorellate ipchain