line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
## Emacs: -*- tab-width: 4; -*- |
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
13071
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
211
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Data::CTable; |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
8
|
use vars qw($VERSION); $VERSION = '1.01'; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
333
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=pod |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Data::CTable - Read, write, manipulate tabular data |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 SYNOPSIS |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
## Read some data files in various tabular formats |
19
|
|
|
|
|
|
|
use Data::CTable; |
20
|
|
|
|
|
|
|
my $People = Data::CTable->new("people.merge.mac.txt"); |
21
|
|
|
|
|
|
|
my $Stats = Data::CTable->new("stats.tabs.unix.txt"); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
## Clean stray whitespace in fields |
24
|
|
|
|
|
|
|
$People->clean_ws(); |
25
|
|
|
|
|
|
|
$Stats ->clean_ws(); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
## Retrieve columns |
28
|
|
|
|
|
|
|
my $First = $People->col('FirstName'); |
29
|
|
|
|
|
|
|
my $Last = $People->col('LastName' ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
## Calculate a new column based on two others |
32
|
|
|
|
|
|
|
my $Full = [map {"$First->[$_] $Last->[$_]"} @{$People->all()}]; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
## Add new column to the table |
35
|
|
|
|
|
|
|
$People->col(FullName => $Full); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
## Another way to calculate a new column |
38
|
|
|
|
|
|
|
$People->col('Key'); |
39
|
|
|
|
|
|
|
$People->calc(sub {no strict 'vars'; $Key = "$Last,$First";}); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
## "Left join" records matching Stats:PersonID to People:Key |
42
|
|
|
|
|
|
|
$Stats->join($People, PersonID => 'Key'); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
## Find certain records |
45
|
|
|
|
|
|
|
$Stats->select_all(); |
46
|
|
|
|
|
|
|
$Stats->select(Department => sub {/Sale/i }); ## Sales depts |
47
|
|
|
|
|
|
|
$Stats->omit (Department => sub {/Resale/i}); ## not Resales |
48
|
|
|
|
|
|
|
$Stats->select(UsageIndex => sub {$_ > 20.0}); ## high usage |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
## Sort the found records |
51
|
|
|
|
|
|
|
$Stats->sortspec('DeptNum' , {SortType => 'Integer'}); |
52
|
|
|
|
|
|
|
$Stats->sortspec('UsageIndex', {SortType => 'Number' }); |
53
|
|
|
|
|
|
|
$Stats->sort([qw(DeptNum UsageIndex Last First)]); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## Make copy of table with only found/sorted data, in order |
56
|
|
|
|
|
|
|
my $Report = $Stats->snapshot(); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
## Write an output file |
59
|
|
|
|
|
|
|
$Report->write(_FileName => "Rept.txt", _LineEnding => "mac"); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
## Print a final progress message. |
62
|
|
|
|
|
|
|
$Stats->progress("Done!"); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
## Dozens more methods and parameters available... |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 OVERVIEW |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Data::CTable is a comprehensive utility for reading, writing, |
69
|
|
|
|
|
|
|
manipulating, cleaning and otherwise transforming tabular data. The |
70
|
|
|
|
|
|
|
distribution includes several illustrative subclasses and utility |
71
|
|
|
|
|
|
|
scripts. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
A Columnar Table represents a table as a hash of data columns, making |
74
|
|
|
|
|
|
|
it easy to do data cleanup, formatting, searching, calculations, |
75
|
|
|
|
|
|
|
joins, or other complex operations. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The object's hash keys are the field names and the hash values hold |
78
|
|
|
|
|
|
|
the data columns (as array references). |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Tables also store a "selection" -- a list of selected / sorted record |
81
|
|
|
|
|
|
|
numbers, and a "field list" -- an ordered list of all or some fields |
82
|
|
|
|
|
|
|
to be operated on. Select() and sort() methods manipulate the |
83
|
|
|
|
|
|
|
selection list. Later, you can optionally rewrite the table in memory |
84
|
|
|
|
|
|
|
or on disk to reflect changes in the selection list or field list. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Data::CTable reads and writes any tabular text file format including |
87
|
|
|
|
|
|
|
Merge, CSV, Tab-delimited, and variants. It transparently detects, |
88
|
|
|
|
|
|
|
reads, and preserves Unix, Mac, and/or DOS line endings and tab or |
89
|
|
|
|
|
|
|
comma field delimiters -- regardless of the runtime platform. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
In addition to reading data files, CTable is a good way to gather, |
92
|
|
|
|
|
|
|
store, and operate on tabular data in memory, and to export data to |
93
|
|
|
|
|
|
|
delimited text files to be read by other programs or interactive |
94
|
|
|
|
|
|
|
productivity applications. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
To achieve extremely fast data loading, CTable caches data file |
97
|
|
|
|
|
|
|
contents using the Storable module. This can be helpful in CGI |
98
|
|
|
|
|
|
|
environments or when operating on very large data files. CTable can |
99
|
|
|
|
|
|
|
read an entire cached table of about 120 megabytes into memory in |
100
|
|
|
|
|
|
|
about 10 seconds on an average mid-range computer. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
For simple data-driven applications needing to store and quickly |
103
|
|
|
|
|
|
|
retrieve simple tabular data sets, CTable provides a credible |
104
|
|
|
|
|
|
|
alternative to DBM files or SQL. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
For data hygiene applications, CTable forms the foundation for writing |
107
|
|
|
|
|
|
|
utility scripts or compilers to transfer data from external sources, |
108
|
|
|
|
|
|
|
such as FileMaker, Excel, Access, personal organizers, etc. into |
109
|
|
|
|
|
|
|
compiled or validated formats -- or even as a gateway to loading data |
110
|
|
|
|
|
|
|
into SQL databases or other destinations. You can easily write short, |
111
|
|
|
|
|
|
|
repeatable scripts in Perl to do reporting, error checking, analysis, |
112
|
|
|
|
|
|
|
or validation that would be hard to duplicate in less-flexible |
113
|
|
|
|
|
|
|
application environments. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The data representation is simple and open so you can directly access |
116
|
|
|
|
|
|
|
the data in the object if you feel like it -- or you can use accessors |
117
|
|
|
|
|
|
|
to request "clean" structures containing only the data or copies of |
118
|
|
|
|
|
|
|
it. Or you can build your own columns in memory and then when you're |
119
|
|
|
|
|
|
|
ready, turn them into a table object using the very flexible new() |
120
|
|
|
|
|
|
|
method. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
The highly factored interface and implementation allow fine-grained |
123
|
|
|
|
|
|
|
subclassing so you can easily create useful lightweight subclasses. |
124
|
|
|
|
|
|
|
Several subclasses are included with the distribution. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Most defaults and parameters can be customized by subclassing, |
127
|
|
|
|
|
|
|
overridden at the instance level (avoiding the need to subclass too |
128
|
|
|
|
|
|
|
often), and further overridden via optional named-parameter arguments |
129
|
|
|
|
|
|
|
to most major method calls. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 Similar / related modules on CPAN |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
The Data::Table module by Yingyao Zhou & Guangzhou Zou offers similar |
134
|
|
|
|
|
|
|
functionality, but uses a different underlying data representation |
135
|
|
|
|
|
|
|
(2-dimensional array), and has a somewhat different feature set. |
136
|
|
|
|
|
|
|
Check it out. Maybe you will prefer it for your application. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
http://search.cpan.org/search?mode=module&query=Data::Table |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
The Data::ShowTable module renders tables in various viewable formats. |
141
|
|
|
|
|
|
|
CTable relies on ShowTable's ShowBoxTable method to implement its own |
142
|
|
|
|
|
|
|
format() and out() methods. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
http://search.cpan.org/search?mode=module&query=Data::ShowTable |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 Prerequisites |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
The CTable documentation, source code, and examples assume familiarity |
150
|
|
|
|
|
|
|
with large nested data structures, object-oriented syntax and |
151
|
|
|
|
|
|
|
terminology, and comfort working with array and hash references and |
152
|
|
|
|
|
|
|
array and hash slice syntax. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
See the perlref man page for more on these topics. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head2 How to learn more |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Dozens more methods, parameters, and examples are described below. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
See the full source code in CTable.pm. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Or, after installing, read the man page using: |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
man Data::CTable |
166
|
|
|
|
|
|
|
perldoc Data::CTable |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
See the eg/ (examples) folder in the Data::CTable distribution and the |
169
|
|
|
|
|
|
|
test.pl script for scripts demonstrating every CTable method. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
For latest version and other news, check the Data::CTable home page: |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
http://christhorman.com/projects/perl/Data-CTable/ |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Or search CPAN: |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
http://search.cpan.org/search?mode=module&query=Data::CTable |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 INSTALLATION |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Using CPAN module: |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
perl -MCPAN -e 'install Data::CTable' |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Or manually: |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
tar xzvf Data-CTable*gz |
188
|
|
|
|
|
|
|
cd Data-CTable-?.?? |
189
|
|
|
|
|
|
|
perl Makefile.PL |
190
|
|
|
|
|
|
|
make |
191
|
|
|
|
|
|
|
make test |
192
|
|
|
|
|
|
|
make install |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head1 INCLUDED SUBCLASSES AND UTILITIES |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
In addition to the module itself, there are a number of subclasses and |
197
|
|
|
|
|
|
|
simple utilities included with the Data::CTable distribution. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 Subclases |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
The Data::CTable distribution includes these example subclasses. Each |
202
|
|
|
|
|
|
|
is installed in your Perl environment along with the main module, and |
203
|
|
|
|
|
|
|
so may be used by your scripts. Each has its own man/perldoc page |
204
|
|
|
|
|
|
|
containing more detail. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
B is a subclass that logs all progress |
207
|
|
|
|
|
|
|
messages to a list within the object itself rather than (merely) |
208
|
|
|
|
|
|
|
echoing them to STDERR. Later, you may retrieve and examine the list. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
B is a virtual subclass that includes class and |
211
|
|
|
|
|
|
|
object methods that make it easy to write a simple interactive |
212
|
|
|
|
|
|
|
command-line program that parses options and outputs a table. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
B is a very useful subclass of |
215
|
|
|
|
|
|
|
Data::CTable::Script that implements a souped-up Unix-like "ls" (file |
216
|
|
|
|
|
|
|
listing) command -- it first gets an optionally-recursive listing of |
217
|
|
|
|
|
|
|
any number of files and/or directories, builds a list of their full |
218
|
|
|
|
|
|
|
absolute or relative paths, then build a Data::CTable::Listing object |
219
|
|
|
|
|
|
|
that contains all the paths, plus about 25+ other pieces of useful |
220
|
|
|
|
|
|
|
information about each file or directory. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
The "tls" utility, below, is simply a command-line cover for this |
223
|
|
|
|
|
|
|
class, but you could use this class in your own scripts in order to |
224
|
|
|
|
|
|
|
get detailed file listings. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 Utilities |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Each of these utilities is provided mainly so you can see mature |
229
|
|
|
|
|
|
|
examples of how to use Data::CTable in real-world scripts. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
But each is also genuinely useful, too, and you may enjoy adding them |
232
|
|
|
|
|
|
|
to your regular bag of tricks or using them as an easily-modifiable |
233
|
|
|
|
|
|
|
basis for scripts of your own. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
On most systems, these will be installed in an appropriate directory |
236
|
|
|
|
|
|
|
in your path when you install CTable, and hence will be executable |
237
|
|
|
|
|
|
|
just by typing their file name. (On Windows, they'll be wrapped by a |
238
|
|
|
|
|
|
|
.bat file and installed in C:\Perl\bin or equivalent. On *nix, |
239
|
|
|
|
|
|
|
they'll be in /usr/bin/ or equivalent.) |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
B is a command-line utility that wraps Data::CTable::Listing to |
242
|
|
|
|
|
|
|
implement a variant on the classic Unix "ls" command in which an |
243
|
|
|
|
|
|
|
internal CTable object is used to hold and calculate a very large |
244
|
|
|
|
|
|
|
amount of meta-data about each file and directory and then output that |
245
|
|
|
|
|
|
|
data in formatted tables, or as any kind of delimited text file, and |
246
|
|
|
|
|
|
|
with much more flexibility and control over included data, and sort |
247
|
|
|
|
|
|
|
and sub-sort order than with ls. |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
B is a command-line utility that reads each of its arguments as |
250
|
|
|
|
|
|
|
a Data::CTable file and then calls the out() method to display its |
251
|
|
|
|
|
|
|
entire contents. (Warning: out() is slow with very large data sets.) |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
B is a command-line utility that takes a US zip code, |
254
|
|
|
|
|
|
|
grabs the local weather report from a popular weather web site and |
255
|
|
|
|
|
|
|
uses a CTable object to store, process, and clean, and present the |
256
|
|
|
|
|
|
|
table of weather data that results in a simple text format. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=cut |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
## Required dependencies |
261
|
|
|
|
|
|
|
|
262
|
1
|
|
|
1
|
|
1022
|
use IO::File; |
|
1
|
|
|
|
|
12860
|
|
|
1
|
|
|
|
|
134
|
|
263
|
1
|
|
|
1
|
|
10
|
use Config; qw(%Config); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
49
|
|
264
|
|
|
|
|
|
|
|
265
|
1
|
|
|
1
|
|
4
|
use Carp qw(croak carp confess cluck); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
64
|
|
266
|
1
|
|
|
1
|
|
1141
|
use Storable qw(store nstore retrieve dclone); |
|
1
|
|
|
|
|
3435
|
|
|
1
|
|
|
|
|
111
|
|
267
|
1
|
|
|
1
|
|
8
|
use File::Basename qw(fileparse); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
137
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
## Optional dependencies |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
my $HaveDumper; |
273
|
|
|
|
|
|
|
my $HaveShowTable; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
BEGIN |
276
|
|
|
|
|
|
|
{ |
277
|
1
|
|
|
1
|
|
70
|
eval " |
|
1
|
|
|
1
|
|
1657
|
|
|
1
|
|
|
1
|
|
10438
|
|
|
1
|
|
|
|
|
83
|
|
|
1
|
|
|
|
|
311
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
278
|
|
|
|
|
|
|
use Data::Dumper qw(Dumper); |
279
|
|
|
|
|
|
|
use Data::ShowTable qw(ShowBoxTable); |
280
|
|
|
|
|
|
|
"; |
281
|
|
|
|
|
|
|
|
282
|
1
|
|
|
|
|
3
|
$HaveDumper = $Data::Dumper::VERSION; |
283
|
1
|
|
|
|
|
21
|
$HaveShowTable = exists($Data::ShowTable::{ShowBoxTable}); |
284
|
|
|
|
|
|
|
}; |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
## We optionally export a few general-purpose utility routines. |
288
|
|
|
|
|
|
|
|
289
|
1
|
|
|
1
|
|
4
|
use Exporter; use vars qw(@ISA @EXPORT_OK); @ISA=qw(Exporter); |
|
1
|
|
|
1
|
|
1
|
|
|
1
|
|
|
|
|
33
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
3376
|
|
290
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
291
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman |
292
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1 |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman_clean |
295
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1_clean |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
&guess_endings |
298
|
|
|
|
|
|
|
&guess_delimiter |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
&path_info |
301
|
|
|
|
|
|
|
&path_is_absolute |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
&min |
304
|
|
|
|
|
|
|
&max |
305
|
|
|
|
|
|
|
); |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=pod |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head1 CREATING TABLE OBJECTS |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
## Create an object / read file(s) / override params |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
use Data::CTable; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
$t = Data::CTable->new() |
316
|
|
|
|
|
|
|
$t = Data::CTable->new($File) |
317
|
|
|
|
|
|
|
$t = Data::CTable->new($File1, $File2....) |
318
|
|
|
|
|
|
|
$t = Data::CTable->new($Params) |
319
|
|
|
|
|
|
|
$t = Data::CTable->new($Params, $File1) |
320
|
|
|
|
|
|
|
$t = Data::CTable->new($Params, $File1, $File2....) |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
## Internal initializer (subclassable): called for you by new() |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
$t->initialize() |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
If the first argument to new() is a hash ref, it is the $Params hash |
327
|
|
|
|
|
|
|
of initial parameters and/or data columns which, if supplied will form |
328
|
|
|
|
|
|
|
the starting point for the object being created. Any non-hash and/or |
329
|
|
|
|
|
|
|
further arguments to new() are treated as file names to be opened. |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
If supplied, data in the $Params hash will be shallowly copied -- the |
332
|
|
|
|
|
|
|
original hash object passed will not be used, but any sub-structures |
333
|
|
|
|
|
|
|
within it will now "belong" to the resulting new object which will |
334
|
|
|
|
|
|
|
feel free to manipulate them or discard them. |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Then, any parameters not supplied (usually most of them) will be |
337
|
|
|
|
|
|
|
defaulted for you because new() will call the internal method |
338
|
|
|
|
|
|
|
initialize() before the object is finished and returned. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
See the PARAMETER REFERENCE section below for the parameters you can |
341
|
|
|
|
|
|
|
choose to supply or have defaulted for you. |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Any file name arguments will be read and appended into a single |
344
|
|
|
|
|
|
|
object. If any of the files fails to be read, then new() will fail |
345
|
|
|
|
|
|
|
(return a false value) and no object will be created. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
initialize() makes sure there is a legal and consistent value for |
348
|
|
|
|
|
|
|
every internal parameter in the object. Generally, initialize() |
349
|
|
|
|
|
|
|
leaves alone any parameters you supplied to new(), and simply sets |
350
|
|
|
|
|
|
|
default values for any that were not yet supplied. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
You should never need to call initialize() directly. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
After calling initialize(), new() then calls the append_files_new() |
355
|
|
|
|
|
|
|
method to process the filename arguments. This method then calls |
356
|
|
|
|
|
|
|
read() on the first filename, and then append_file on all subsequent |
357
|
|
|
|
|
|
|
file names, appending them to the first, in sequence. |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
See append() for an explanation of how the data from multiple files is |
360
|
|
|
|
|
|
|
combined into a single table. |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head2 Advanced: Using a template object |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
## Calling new() with a template object |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
$v = $t->new() |
367
|
|
|
|
|
|
|
$v = $t->new($File) |
368
|
|
|
|
|
|
|
$v = $t->new($File1, $File2....) |
369
|
|
|
|
|
|
|
$v = $t->new($Params) |
370
|
|
|
|
|
|
|
$v = $t->new($Params, $File1) |
371
|
|
|
|
|
|
|
$v = $t->new($Params, $File1, $File2....) |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
You can also call $t->new() to use an existing object, $t, as a |
374
|
|
|
|
|
|
|
template for the new object. $t->new() will create a new object of |
375
|
|
|
|
|
|
|
the same class or subclass as the template object. Furthermore, the |
376
|
|
|
|
|
|
|
template object, if provided, will be used as a starting point for the |
377
|
|
|
|
|
|
|
resulting object -- in fact, it will initially share shallow copies of |
378
|
|
|
|
|
|
|
all data columns, if any, and all internal parameters and data |
379
|
|
|
|
|
|
|
structures, if any. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
This advanced shared-data technique could be used to create two |
382
|
|
|
|
|
|
|
separate table objects that share and operate on the same underlying |
383
|
|
|
|
|
|
|
data columns in memory but have different custom field lists, custom |
384
|
|
|
|
|
|
|
selections, sort behavior, etc. But don't do this unless you're sure |
385
|
|
|
|
|
|
|
you understand what you're doing, because changing data in one table |
386
|
|
|
|
|
|
|
would change it in the other. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head1 PARAMETER REFERENCE |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
The parameters listed here are recognized by new(), initialize(), and |
391
|
|
|
|
|
|
|
by many functions that use the named-parameter calling convention, |
392
|
|
|
|
|
|
|
such as read(), write(), sort(), and others. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Any parameter listed may be specified when using new(). |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
Most parameters should not be directly accessed or manipulated once |
397
|
|
|
|
|
|
|
the object has been created, except those that have appropriate |
398
|
|
|
|
|
|
|
accessor methods described throughout in this documentation. |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Each parameter in the lists below is listed along with its defaulting |
401
|
|
|
|
|
|
|
logic as performed by new() via initialize(). |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 Custom field list and custom selection (record list) |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=over 4 |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item _FieldList ||= undef; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
This is the ordered list (array reference) of columns / fields present |
410
|
|
|
|
|
|
|
in the object. It is set by read() to reflect the names and order of |
411
|
|
|
|
|
|
|
the fields encountered or actually read from in the incoming data |
412
|
|
|
|
|
|
|
file, if any. Initially this list is undefined, and if removed or |
413
|
|
|
|
|
|
|
left undefined, then the de-facto field list will be a list of all |
414
|
|
|
|
|
|
|
columns present in the table object, in alphabetical order (see |
415
|
|
|
|
|
|
|
fieldlist() and fieldlist_all()). |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Normally, all fields present in the table object would be listed in |
418
|
|
|
|
|
|
|
the field list. |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
However, this parameter may be set in the object (or overridden in |
421
|
|
|
|
|
|
|
named-parameter function calls like read(), write(), etc.) to cause a |
422
|
|
|
|
|
|
|
subset of fields to be read, written or otherwise used. If a subset |
423
|
|
|
|
|
|
|
field list is specified before reading a data file, then ONLY fields |
424
|
|
|
|
|
|
|
listed will be read -- this is a way to read just certain fields from |
425
|
|
|
|
|
|
|
a very large file, but may not always be what you want. |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Specifying the field list before calling read() is required if the |
428
|
|
|
|
|
|
|
data file has no header row giving names to its fields -- the names |
429
|
|
|
|
|
|
|
you specify in the field list, in order, will be applied to the data |
430
|
|
|
|
|
|
|
file being read. See the _HeaderRow parameter, below. |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
If a subset field list is used after columns are already loaded in |
433
|
|
|
|
|
|
|
memory, the columns not listed in the field list will still be present |
434
|
|
|
|
|
|
|
in the object (and can be listed by calling fieldlist_all()), but they |
435
|
|
|
|
|
|
|
will be omitted from most operations that iterate over fields. |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=item _Selection ||= undef; |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
This is a list of the record numbers of "selected" records in the |
440
|
|
|
|
|
|
|
table, possibly indicating sorted order. |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
If absent, then all records are considered to be selected, in |
443
|
|
|
|
|
|
|
"natural" order -- i.e. the order they occur in the file. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
You can create and set your own selection list or get and modify an |
446
|
|
|
|
|
|
|
existing one. Deleting it resets the selection (for example, by |
447
|
|
|
|
|
|
|
calling select_all()). |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Calling sort() will create a _Selection if none existed. Otherwise it |
450
|
|
|
|
|
|
|
operates by modifying the existing _Selection, which may be a subset |
451
|
|
|
|
|
|
|
of all record numbers. |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=back |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 Cache behavior controls |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
See sections related to Cacheing, below. |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over 4 |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item _CacheOnRead = 1 unless exists |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Boolean: whether data files read by the read() method should be cached |
464
|
|
|
|
|
|
|
after reading. Once cached, the data will be read from the cache |
465
|
|
|
|
|
|
|
instead of the original file the NEXT TIME READ() IS CALLED, but only |
466
|
|
|
|
|
|
|
if: 1) the cache file is found, and 2) its date is later than the |
467
|
|
|
|
|
|
|
original. Otherwise, the cache file is ignored or re-written. |
468
|
|
|
|
|
|
|
Cacheing can be up to 10x faster than parsing the file, so it's almost |
469
|
|
|
|
|
|
|
always worth doing in any situation where you'll be reading a data |
470
|
|
|
|
|
|
|
file more often than writing it. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
This parameter defaults to true. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=item _CacheOnWrite = 0 unless exists |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Boolean: whether tables written by the write() method should be cached |
477
|
|
|
|
|
|
|
after writing. This defaults to false on the assumption that the |
478
|
|
|
|
|
|
|
program won't need to re-read a file it just wrote. However, this |
479
|
|
|
|
|
|
|
behavior would be useful if a later step in your program or another |
480
|
|
|
|
|
|
|
program will be reading the file that was written and would benefit |
481
|
|
|
|
|
|
|
from having the cacheing already done. Cacheing a file after writing |
482
|
|
|
|
|
|
|
is quite fast since the data is already in memory and of course it |
483
|
|
|
|
|
|
|
speeds up subsequent read() operations by up to 10x. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=item _CacheExtension = ".cache" unless exists |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
This is the file name extension that is added to a file's name to |
488
|
|
|
|
|
|
|
determine the name of its corresponding cache file. (First, any |
489
|
|
|
|
|
|
|
existing extension, if any, is removed.) If this extension is empty, |
490
|
|
|
|
|
|
|
then the cache file will be named the same as the original assuming |
491
|
|
|
|
|
|
|
the cache file is being stored in a different directory. (See next |
492
|
|
|
|
|
|
|
setting.) |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=item _CacheSubDir = "cache" unless exists |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
This is the absolute or relative path to the subdirectory that should |
497
|
|
|
|
|
|
|
be used to store the cache files. The default value is the relative |
498
|
|
|
|
|
|
|
path to a directory called "cache". Relative paths will be appended |
499
|
|
|
|
|
|
|
to the directory path containing the original file being read. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
Absolute cache paths (such as /tmp or c:\temp\) can also be used. |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Override _CacheExtension and _CacheSubdir in a subclass, in each |
504
|
|
|
|
|
|
|
object, or in each call to read() or write() in order to have the |
505
|
|
|
|
|
|
|
cache files stored elsewhere. But remember: unless you use the same |
506
|
|
|
|
|
|
|
cache settings next time you read the same file, the cache files will |
507
|
|
|
|
|
|
|
be orphaned. |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=back |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=head2 Progress routine / setting |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=over 4 |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=item _Progress = undef unless exists |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
The _Progress setting controls the routing of diagnostic messages. |
518
|
|
|
|
|
|
|
Four possible settings are recognized: |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
undef (default) The class's progress settings are used. |
521
|
|
|
|
|
|
|
subroutine reference Your own custom progress routine. |
522
|
|
|
|
|
|
|
true Built-in progress_default() method used. |
523
|
|
|
|
|
|
|
0/false No progress messages for this object. |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
See the PROGRESS section for a description of the interface of custom |
526
|
|
|
|
|
|
|
progress routines and for details on how the builtin one works. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=back |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 File format settings |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=over 4 |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=item _LineEnding ||= undef; |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
_LineEnding indicates the line ending string or setting to be used to |
537
|
|
|
|
|
|
|
read a file, the setting that actually I used to read a file, |
538
|
|
|
|
|
|
|
and/or the line ending that will be used to write a file. |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Set this parameter to force a particular encoding to be used. |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Otherwise, leave it undef. The program will Do What You Mean. |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
If _LineEnding is undef when read() is called, read() will try to |
545
|
|
|
|
|
|
|
guess the line ending type by inspecting the first file it reads. |
546
|
|
|
|
|
|
|
Then it will set this setting for you. It can detect DOS, Unix, and |
547
|
|
|
|
|
|
|
Mac line endings. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
If _LineEnding is undef when write() is called, write() will use |
550
|
|
|
|
|
|
|
C<"\n">, which yields different strings depending on the current |
551
|
|
|
|
|
|
|
runtime platform: \x0A on Unix; \x0D in MacPerl, \x0D\x0A on DOS. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Otherwise, write() uses the value defined in _LineEnding, which would |
554
|
|
|
|
|
|
|
match the value filled in by read() if this object's data originally |
555
|
|
|
|
|
|
|
had been read from a file. So if you read a file and then later write |
556
|
|
|
|
|
|
|
it out, the line endings in the written file will match the format of |
557
|
|
|
|
|
|
|
original unless you override _LineEnding specifically. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Since Data::CTable supports reading and writing all common endings, |
560
|
|
|
|
|
|
|
base your decision on line ending format during write() on the needs |
561
|
|
|
|
|
|
|
of other programs you might be using. |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
For example: FileMaker Pro and Excel crash / hang if Unix line endings |
564
|
|
|
|
|
|
|
are used, so be sure to use the ending format that matches the needs |
565
|
|
|
|
|
|
|
of the other programs you plan to use. |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
As a convenience, you may specify and retrieve the _LineEnding setting |
568
|
|
|
|
|
|
|
using the mnemonic symbols "mac", "dos" and "unix." These special |
569
|
|
|
|
|
|
|
values are converted to the string values shown in this chart: |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
symbol string value chars decimal octal control |
572
|
|
|
|
|
|
|
------------------------------------------------------------- |
573
|
|
|
|
|
|
|
dos "\x0D\x0A" CR/LF 13,10 "\015\012" ^M^J |
574
|
|
|
|
|
|
|
mac "\x0D" CR 13 "\015" ^M |
575
|
|
|
|
|
|
|
unix "\x0A" LF 10 "\012" ^J |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
See the section LINE ENDINGS, below, for accessor methods and |
578
|
|
|
|
|
|
|
conversion utilities that help you get/set this parameter in either |
579
|
|
|
|
|
|
|
symbolic format or string format as you prefer. |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=item _FDelimiter ||= undef; |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
_FDelimiter is the field delimiter between field names in the header |
584
|
|
|
|
|
|
|
row (if any) and also between fields in the body of the file. If |
585
|
|
|
|
|
|
|
undef, read() will try to guess whether it is tab C<"\t"> or comma |
586
|
|
|
|
|
|
|
<",">, and set this parameter accordingly. If there is only one field |
587
|
|
|
|
|
|
|
in the file, then comma is assumed by read() and will be used by |
588
|
|
|
|
|
|
|
write(). |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
To guess the delimiter, the program looks for the first comma or tab |
591
|
|
|
|
|
|
|
character in the header row (if present) or in the first record. |
592
|
|
|
|
|
|
|
Whichever character is found first is assumed to be the delimiter. |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
If you don't want the program to guess, or you have a data file format |
595
|
|
|
|
|
|
|
that uses a custom delimiter, specify the delimiter explicitly in the |
596
|
|
|
|
|
|
|
object or when calling read() or make a subclass that initializes this |
597
|
|
|
|
|
|
|
value differently. On write(), this will default to comma if it is |
598
|
|
|
|
|
|
|
empty or undef. |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=item _QuoteFields = undef unless exists |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
_QuoteFields controls how field values are quoted by write() when |
603
|
|
|
|
|
|
|
writing the table to a delimited text file. |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
An undef value (the default) means "auto" -- each field is checked |
606
|
|
|
|
|
|
|
individually and if it contains either the _FDelimiter character or a |
607
|
|
|
|
|
|
|
double-quote character, the field value will be surrounded by |
608
|
|
|
|
|
|
|
double-quotes as it is written to the file. This method is slower to |
609
|
|
|
|
|
|
|
write but faster to read, and may make the output easier for humans to |
610
|
|
|
|
|
|
|
read. |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
A true value means always put double-quotes around every field value. |
613
|
|
|
|
|
|
|
This mode is faster to write but slower to read. |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
A zero value means never to use double-quotes around field values and |
616
|
|
|
|
|
|
|
not to check for the need to use them. This method is the fastest to |
617
|
|
|
|
|
|
|
read and write. You may use it when you are certain that your data |
618
|
|
|
|
|
|
|
can't contain any special characters. However, if you're wrong, this |
619
|
|
|
|
|
|
|
mode will produce a corrupted file in the event that one of the fields |
620
|
|
|
|
|
|
|
does contain the active delimiter (such as comma or tab) or a quote. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=item _HeaderRow = 1 unless exists |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
_HeaderRow is a boolean that says whether to expect a header row in |
625
|
|
|
|
|
|
|
data files. The default is true: a header row is required. If false, |
626
|
|
|
|
|
|
|
_FieldList MUST be present before calling read() or an error will be |
627
|
|
|
|
|
|
|
generated. In this latter case, _FieldList will be assumed to give |
628
|
|
|
|
|
|
|
the correct names of the fields in the file, in order, before the file |
629
|
|
|
|
|
|
|
is read. In other words, the object expects that either a) it can get |
630
|
|
|
|
|
|
|
the field names from the file's header row or b) you will supply them |
631
|
|
|
|
|
|
|
before read() opens the file. |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=back |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=head2 Encoding of return characters within fields |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=over 4 |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=item _ReturnMap = 1 unless exists |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
_ReturnMap says that returns embedded in fields should be decoded on |
642
|
|
|
|
|
|
|
read() and encoded again on write(). The industry-standard encoding |
643
|
|
|
|
|
|
|
for embedded returns is ^K (ascii 11 -- but see next setting to change |
644
|
|
|
|
|
|
|
it). This defaults to true but can be turned off if you want data |
645
|
|
|
|
|
|
|
untouched by read(). This setting has no effect on data files where |
646
|
|
|
|
|
|
|
no fields contain embedded returns. However, it is vital to leave |
647
|
|
|
|
|
|
|
this option ON when writing any data file whose fields could contain |
648
|
|
|
|
|
|
|
embedded returns -- if you have such data and call write() with |
649
|
|
|
|
|
|
|
_ReturnMap turned off, the resulting file will be an invalid Merge/CSV |
650
|
|
|
|
|
|
|
file and might not be re-readable. |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
When these fields are decoded on read(), encoded returns are converted |
653
|
|
|
|
|
|
|
to C<"\n"> in memory, whatever its interpretation may be on the current |
654
|
|
|
|
|
|
|
platform (\x0A on Unix or DOS; \x0D on MacPerl). |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
IMPORTANT NOTE: When these fields are encoded by write(), any |
657
|
|
|
|
|
|
|
occurrence of the current _LineEnding being used to write the file is |
658
|
|
|
|
|
|
|
searched and encoded FIRST, and THEN, any occurrence of "\n" is also |
659
|
|
|
|
|
|
|
searched and encoded. For example, if using mac line endings (^M) to |
660
|
|
|
|
|
|
|
write a file on a Unix machine, any ^M characters in fields will be |
661
|
|
|
|
|
|
|
encoded, and then any "\n" (^J) characters will ALSO be encoded. This |
662
|
|
|
|
|
|
|
may not be what you want, so be sure you know how your data is encoded |
663
|
|
|
|
|
|
|
in cases where your field values might contain any ^J and/or ^M |
664
|
|
|
|
|
|
|
characters. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
IMPORTANT NOTE: If you turn _ReturnMap off, fields with returns in |
667
|
|
|
|
|
|
|
them will still be double-quoted correctly. Some parsers of tab- or |
668
|
|
|
|
|
|
|
comma-delimited files are able to support reading such files. |
669
|
|
|
|
|
|
|
HOWEVER, the parser in this module's read() method DOES NOT currently |
670
|
|
|
|
|
|
|
support reading files in which a single field value appears to span |
671
|
|
|
|
|
|
|
multiple lines in the file. If you have a need to read such a file, |
672
|
|
|
|
|
|
|
you may need to write your own parser as a subclass of this module. |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=item _ReturnEncoding ||= "\x0B"; |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
This is the default encoding to assume when embedding return |
677
|
|
|
|
|
|
|
characters within fields. The industry standard is "\x0B" (ascii 11 / |
678
|
|
|
|
|
|
|
octal \013 / ^K) so you should probably not ever change this setting. |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
When fields are encoded on write(), C<"\n"> is converted to this |
681
|
|
|
|
|
|
|
value. Note that different platforms use different ascii values for |
682
|
|
|
|
|
|
|
C<"\n">, which is another good reason to leave the ReturnEncoding |
683
|
|
|
|
|
|
|
feature enabled when calling write(). |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
To summarize: this module likes to assume, and you should too, that |
686
|
|
|
|
|
|
|
returns in data files on disk are encoded as "\x0B", but once loaded |
687
|
|
|
|
|
|
|
into memory, they are encoded as the current platform's value of |
688
|
|
|
|
|
|
|
C<"\n">. |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=item _MacRomanMap = undef unless exists |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
Data::CTable assumes by default that you want field data in memory to |
693
|
|
|
|
|
|
|
be in the ISO 8859-1 character set (the standard for Latin 1 Roman |
694
|
|
|
|
|
|
|
characters on Unix and Windows in the English and Western European |
695
|
|
|
|
|
|
|
languages -- and also the default encoding for HTML Web pages). |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
_MacRomanMap controls the module's optional mapping of Roman |
698
|
|
|
|
|
|
|
characters from Mac format on disk to ISO format in memory when |
699
|
|
|
|
|
|
|
reading and writing data files. These settings are recognized: |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
undef ## Auto: Read/write Mac chars if using Mac line endings |
702
|
|
|
|
|
|
|
1 ## On: Assume Mac char set in all fields |
703
|
|
|
|
|
|
|
0 ## Off: Don't do any character mapping at all |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
The default setting is undef, which enables "Auto" mode: files found |
706
|
|
|
|
|
|
|
to contain Mac line endings will be assumed to contain Mac upper-ASCII |
707
|
|
|
|
|
|
|
characters and will be mapped to ISO on read(); and files to be |
708
|
|
|
|
|
|
|
written with Mac line endings will mapped back from ISO to Mac format |
709
|
|
|
|
|
|
|
on write(). |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
If your data uses any non-Latin-1 character sets, or binary data, or |
712
|
|
|
|
|
|
|
you really want Mac upper-ASCII characters in memory, or you just |
713
|
|
|
|
|
|
|
don't want this module messing with your encodings, set this option to |
714
|
|
|
|
|
|
|
0 (Off) or make a subclass that always sets it to 0. |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
See also the clean() methods that can help you translate just the |
717
|
|
|
|
|
|
|
columns you want after reading a file or before writing it, which may |
718
|
|
|
|
|
|
|
be faster for you if only a few fields might contain high-ASCII |
719
|
|
|
|
|
|
|
characters. |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=item _FileName ||= undef; |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
This is the name of the file that should be read from or WAS read |
724
|
|
|
|
|
|
|
from. (read() will set _FileName to the value it used to read the |
725
|
|
|
|
|
|
|
file, even if _FileName was only supplied as a named parameter.) |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
This name will also be used, unless overridden, to re-write the file |
728
|
|
|
|
|
|
|
again, but with an optional extension added. (See next setting.) |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=item _WriteExtension = ".out" unless exists |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
The _WriteExtension is provided so that CTable won't overwrite your |
733
|
|
|
|
|
|
|
input data file unless you tell it to. |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
_WriteExtension will be added to the object's _FileName setting to |
736
|
|
|
|
|
|
|
create a new, related file name, before writing.... UNLESS _FileName |
737
|
|
|
|
|
|
|
is supplied as an direct or named parameter when calling write(). |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
In the latter case, write() uses the file name you supply and adds no |
740
|
|
|
|
|
|
|
extension, even if this would mean overwriting the original data file. |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
To add _WriteExtension, write() places it prior to any existing final |
743
|
|
|
|
|
|
|
extension in the _FileName: |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
_FileName default file name used by write() |
746
|
|
|
|
|
|
|
-------------------------------------------------------------- |
747
|
|
|
|
|
|
|
People.merge.txt People.merge.out.txt |
748
|
|
|
|
|
|
|
People People.out |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
If you want to always overwrite the original file without having to |
751
|
|
|
|
|
|
|
supply _FileName each time, simply set _WriteExtension to undef in a |
752
|
|
|
|
|
|
|
subclass or in each instance. |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
If _CacheOnWrite is true, then the _WriteExtension logic is applied |
755
|
|
|
|
|
|
|
first to arrive at the actual name of the file to be written, and then |
756
|
|
|
|
|
|
|
the _CacheExtension logic is applied to that name to arrive at the |
757
|
|
|
|
|
|
|
name of the cache file to be written. |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
=back |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=head2 Sorting-related parameters |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=over 4 |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item _SortOrder ||= undef; |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
_SortOrder is the list of fields which should be used as primary, |
768
|
|
|
|
|
|
|
secondary, etc. sort keys when sort() is called. Like other |
769
|
|
|
|
|
|
|
parameters, it may be initialized by a subclass, stored in the object, |
770
|
|
|
|
|
|
|
or provided as a named parameter on each call to sort(). |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
If _SortOrder is empty or undefined, then sort() sorts the records by |
773
|
|
|
|
|
|
|
record number (i.e. they are returned to their "natural" order). |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
=item _SortSpecs ||= {}; |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
_SortSpecs is a hash of specifications for the SortType and |
778
|
|
|
|
|
|
|
SortDirection of fields on which sorting may be done. For any field |
779
|
|
|
|
|
|
|
missing a sort spec or the SortType or SortDirection components of its |
780
|
|
|
|
|
|
|
sort spec, the _DefaultSortType and _DefaultSortDirection settings |
781
|
|
|
|
|
|
|
will be used. So, for example, if all fields are of type String and |
782
|
|
|
|
|
|
|
you want them to sort Ascending, then you don't need to worry about |
783
|
|
|
|
|
|
|
_SortSpecs. You only need to provide specs for fields that don't take |
784
|
|
|
|
|
|
|
the default settings. |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
_SortSpecs might look like this: |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
{Age => {SortType => 'Integer'}, |
789
|
|
|
|
|
|
|
NameKey => {SortType => 'Text', SortDirection => -1}} |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=item _SRoutines ||= {}; |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
_SRoutines is a hash mapping any new SortTypes invented by you to your |
794
|
|
|
|
|
|
|
custom subroutines for sorting that type of data. (See the section on |
795
|
|
|
|
|
|
|
sort routines, below, for a full discussion.) |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=back |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 Sorting defaults |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=over 4 |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
=item _DefaultSortType ||= 'String'; |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
If you sort using a field with no sort spec supplied, or whose sort |
806
|
|
|
|
|
|
|
spec omits the SortType, it will get its SortType from this parameter. |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
See the sections below on SORT TYPES and SORT ROUTINES. |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=item _DefaultSortDirection ||= 1; |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
If you sort using a field with no sort spec supplied, or whose sort |
813
|
|
|
|
|
|
|
spec omits the SortDirection, it will get its SortDirection from this |
814
|
|
|
|
|
|
|
parameter. |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Legal sort directions are: 1 (Ascending) or -1 (Descending). |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
See the section below on DEFAULT SORT DIRECTION. |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=back |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=head2 Miscellaneous parameters |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=over 4 |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
=item _ErrorMsg ||= ""; |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
This parameter is set by read() or write() methods that encounter an |
829
|
|
|
|
|
|
|
error (usually a parameter error or file-system error) that prevents |
830
|
|
|
|
|
|
|
them from completing. If those methods or any methods that call them |
831
|
|
|
|
|
|
|
return a false value indicating failure, then _ErrorMsg will contain a |
832
|
|
|
|
|
|
|
string explaining the problem. The message will also have been passed |
833
|
|
|
|
|
|
|
to the progress() method for possible console feedback. |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
=item _Subset |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
This parameter is set to 1 (true) by read() if the last call to read() |
838
|
|
|
|
|
|
|
brought in a subset of the fields available in the file; 0 otherwise. |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
The object uses this field internally so it knows to abandon any cache |
841
|
|
|
|
|
|
|
files that might not contain all requested fields upon read(). |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
=back |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
=head1 SUBCLASSING |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
Most subclasses will override initialize() to set default values for |
848
|
|
|
|
|
|
|
the parameters of the parent class and then they may provide default |
849
|
|
|
|
|
|
|
values for other subclass-specific parameters. Then, the subclass's |
850
|
|
|
|
|
|
|
initialize() should call SUPER::initialize() to let the parent |
851
|
|
|
|
|
|
|
class(es) take care of the remaining ones. |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Every initialize() method should always allow for parameters to have |
854
|
|
|
|
|
|
|
already been provided by the $Params hash or template object. It |
855
|
|
|
|
|
|
|
should not overwrite any valid values that already exist. |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
The following sample subclass changes the default setting of the |
858
|
|
|
|
|
|
|
_Progress parameter from undef to 1 and then overrides the |
859
|
|
|
|
|
|
|
progress_default() method to log all progress messages into a new |
860
|
|
|
|
|
|
|
"_ProgrLog" (progress log) parameter stored in the object. |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
BEGIN |
863
|
|
|
|
|
|
|
{ ## Data::CTable::ProgressLogger: store messages in the object |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
package Data::CTable::ProgressLogger; |
866
|
|
|
|
|
|
|
use vars qw(@ISA); @ISA=qw(Data::CTable); |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
sub initialize ## Add a new param; change one default |
869
|
|
|
|
|
|
|
{ |
870
|
|
|
|
|
|
|
my $this = shift; |
871
|
|
|
|
|
|
|
$this->{_Progress} = 1 unless exists($this->{_Progress}); |
872
|
|
|
|
|
|
|
$this->{_ProgrLog} ||= []; |
873
|
|
|
|
|
|
|
$this->SUPER::initialize(); |
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
sub progress_default ## Log message to object's ProgMsgs list |
877
|
|
|
|
|
|
|
{ |
878
|
|
|
|
|
|
|
my $this = shift; |
879
|
|
|
|
|
|
|
my ($msg) = @_; |
880
|
|
|
|
|
|
|
chomp $msg; |
881
|
|
|
|
|
|
|
push @{$this->{_ProgrLog}}, localtime() . " $msg"; |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
return(1); |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
sub show_log ## Use Dumper to spit out the log list |
887
|
|
|
|
|
|
|
{ |
888
|
|
|
|
|
|
|
my $this = shift; |
889
|
|
|
|
|
|
|
$this->dump($this->{_ProgrLog}); |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
## Later... |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
my $Table = Data::CTable::ProgressLogger->new("mydata.txt"); |
896
|
|
|
|
|
|
|
# ... do stuff... |
897
|
|
|
|
|
|
|
$Table->write(); |
898
|
|
|
|
|
|
|
$Table->show_log(); |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=cut |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub new |
905
|
|
|
|
|
|
|
{ |
906
|
|
|
|
|
|
|
## First arg to new is always either class name or a template |
907
|
|
|
|
|
|
|
## object. This allows $obj->new() or CLASS->new(). |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
## Second argument (if and only if it is a hash ref or an object |
910
|
|
|
|
|
|
|
## whose underlying representation is a hash ref) is an optional |
911
|
|
|
|
|
|
|
## anonymous hash of parameters which if supplied, will override |
912
|
|
|
|
|
|
|
## any parameters already found in the template object, if any. |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
## See the initialize method, below, for a list of parameters that |
915
|
|
|
|
|
|
|
## can be supplied (and will be defaulted for you if not |
916
|
|
|
|
|
|
|
## supplied). |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
## Note that the template object and the params hash will be |
919
|
|
|
|
|
|
|
## SHALLOWLY copied -- the original hash objects passed will not |
920
|
|
|
|
|
|
|
## be used, but any sub-structures within them will now "belong" |
921
|
|
|
|
|
|
|
## to the resulting new object which will feel free to manipulate |
922
|
|
|
|
|
|
|
## them, possibly invalidating the integrity of the original |
923
|
|
|
|
|
|
|
## template object. |
924
|
|
|
|
|
|
|
|
925
|
109
|
|
|
109
|
0
|
3853
|
my $ClassOrObj = shift; |
926
|
109
|
100
|
|
|
|
531
|
my ($Params) = {%{shift()}} if UNIVERSAL::isa($_[0], 'HASH'); |
|
82
|
|
|
|
|
855
|
|
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
## Shallow-copy all params from template object and/or optional |
929
|
|
|
|
|
|
|
## $Params hash into new hash. DON'T re-use caller's obj or hash. |
930
|
|
|
|
|
|
|
|
931
|
109
|
50
|
|
|
|
673
|
my $this = |
932
|
109
|
100
|
|
|
|
862
|
{%{(UNIVERSAL::isa($ClassOrObj, 'HASH') ? $ClassOrObj : {})}, |
933
|
109
|
|
|
|
|
212
|
%{(UNIVERSAL::isa($Params, 'HASH') ? $Params : {})}}; |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
## Bless the new object into the class |
936
|
|
|
|
|
|
|
|
937
|
109
|
|
33
|
|
|
545
|
my $class = ref($ClassOrObj) || $ClassOrObj; |
938
|
109
|
|
|
|
|
273
|
bless $this, $class; |
939
|
|
|
|
|
|
|
|
940
|
109
|
|
|
|
|
133
|
my $Success; |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
## Run the subclassable initialize() method to create default |
943
|
|
|
|
|
|
|
## settings for any private parameters. |
944
|
|
|
|
|
|
|
|
945
|
109
|
50
|
|
|
|
268
|
goto done unless $this->initialize(); |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
## Finally, process any (other) arguments to new(), if any. |
948
|
|
|
|
|
|
|
|
949
|
109
|
|
|
|
|
211
|
my $RemainingArgs = [@_]; |
950
|
|
|
|
|
|
|
|
951
|
109
|
50
|
|
|
|
305
|
goto done unless $this->process_new_args($RemainingArgs, $Params); |
952
|
|
|
|
|
|
|
|
953
|
109
|
|
|
|
|
125
|
$Success = 1; |
954
|
109
|
50
|
|
|
|
556
|
done: |
955
|
|
|
|
|
|
|
return ($Success ? $this : undef); |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
### process_new_args |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
### Any optional remaining (non-HASH ref) arguments to new() are |
961
|
|
|
|
|
|
|
### treated as file names of files to open and append to the in-memory |
962
|
|
|
|
|
|
|
### table, creating new columns as necessary. We call the |
963
|
|
|
|
|
|
|
### subclassable append_files_new() method to process these. |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
sub process_new_args |
966
|
|
|
|
|
|
|
{ |
967
|
109
|
|
|
109
|
0
|
155
|
my $this = shift; |
968
|
109
|
|
|
|
|
148
|
my ($RemainingArgs, $Params) = @_; |
969
|
|
|
|
|
|
|
|
970
|
109
|
|
|
|
|
169
|
my $Success; |
971
|
|
|
|
|
|
|
|
972
|
109
|
50
|
|
|
|
301
|
$Success = $this->append_files_new($RemainingArgs, $Params) or goto done; |
973
|
|
|
|
|
|
|
|
974
|
109
|
|
|
|
|
132
|
$Success = 1; |
975
|
109
|
|
|
|
|
263
|
done: |
976
|
|
|
|
|
|
|
return ($Success); |
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
### initialize |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
### Assumptions made by initialize() (and all other methods, too): |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
### The blessed object is a hash ref. |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
### All hash keys beginning with _ are reserved for non-data columns. |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
### Hash keys beginning with a single _ are reserved for future |
988
|
|
|
|
|
|
|
### versions of this parent class implementation. Subclasses might |
989
|
|
|
|
|
|
|
### want to use double-underscore for additional slots. |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
### All other hash keys are field names; their values are data |
992
|
|
|
|
|
|
|
### columns (array references). |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
### initialize() sets / validates initial settings for all parameters |
995
|
|
|
|
|
|
|
### recognized by this parent class. It exercises caution to not |
996
|
|
|
|
|
|
|
### override any legal values previously set by the |
997
|
|
|
|
|
|
|
### subclass::initialize() or by new(). |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
sub initialize |
1000
|
|
|
|
|
|
|
{ |
1001
|
109
|
50
|
|
109
|
0
|
306
|
my $this = shift or goto done; |
1002
|
|
|
|
|
|
|
|
1003
|
109
|
|
|
|
|
121
|
my $Success; |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
## Reading / writing |
1006
|
|
|
|
|
|
|
|
1007
|
109
|
|
100
|
|
|
361
|
$this->{_FileName} ||= undef; ## Path of file that was read |
1008
|
|
|
|
|
|
|
|
1009
|
109
|
100
|
|
|
|
329
|
$this->{_WriteExtension} = ".out" unless exists($this->{_WriteExtension}); |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
## Cache settings |
1012
|
|
|
|
|
|
|
|
1013
|
109
|
100
|
|
|
|
297
|
$this->{_CacheOnRead} = 1 unless exists($this->{_CacheOnRead}); |
1014
|
109
|
100
|
|
|
|
298
|
$this->{_CacheOnWrite} = 0 unless exists($this->{_CacheOnWrite}); |
1015
|
109
|
100
|
|
|
|
288
|
$this->{_CacheExtension} = ".cache" unless exists($this->{_CacheExtension}); |
1016
|
109
|
100
|
|
|
|
323
|
$this->{_CacheSubDir} = "cache" unless exists($this->{_CacheSubDir}); |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
## File format settings |
1019
|
|
|
|
|
|
|
|
1020
|
109
|
|
100
|
|
|
374
|
$this->{_LineEnding} ||= undef; |
1021
|
109
|
|
100
|
|
|
361
|
$this->{_FDelimiter} ||= undef; |
1022
|
109
|
100
|
|
|
|
284
|
$this->{_QuoteFields} = undef unless exists ($this->{_QuoteFields}); |
1023
|
109
|
100
|
|
|
|
281
|
$this->{_HeaderRow} = 1 unless exists ($this->{_HeaderRow}); |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
## Return encodings |
1026
|
|
|
|
|
|
|
|
1027
|
109
|
100
|
|
|
|
261
|
$this->{_ReturnMap} = 1 unless exists ($this->{_ReturnMap}); |
1028
|
109
|
|
100
|
|
|
356
|
$this->{_ReturnEncoding} ||= "\x0B"; ## Char to use for return chars |
1029
|
109
|
100
|
|
|
|
288
|
$this->{_MacRomanMap} = undef unless exists ($this->{_MacRomanMap}); |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
## Sorting defaults |
1032
|
|
|
|
|
|
|
|
1033
|
109
|
|
100
|
|
|
334
|
$this->{_DefaultSortType} ||= 'String'; |
1034
|
109
|
|
100
|
|
|
330
|
$this->{_DefaultSortDirection} ||= 1; ## Ascending (-1 = desc) |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
## Progress routine / setting |
1037
|
|
|
|
|
|
|
|
1038
|
109
|
100
|
|
|
|
373
|
$this->{_Progress} = undef unless exists ($this->{_Progress}); |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
## Internal meta-structures |
1041
|
|
|
|
|
|
|
|
1042
|
109
|
|
100
|
|
|
346
|
$this->{_FieldList} ||= undef; ## List of fields; undef means all fields, alpha order |
1043
|
109
|
|
100
|
|
|
487
|
$this->{_Selection} ||= undef; ## List of rec #s; undef means all records, natural order |
1044
|
109
|
|
50
|
|
|
422
|
$this->{_SortOrder} ||= undef; ## List of fields; undef/empty means sort by record number |
1045
|
|
|
|
|
|
|
|
1046
|
109
|
|
100
|
|
|
359
|
$this->{_SortSpecs} ||= {}; ## Hash: FieldName => Sortspec |
1047
|
109
|
|
100
|
|
|
348
|
$this->{_SRoutines} ||= {}; ## Hash: SortType => custom sort routine for type |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
## Miscellaneous |
1050
|
|
|
|
|
|
|
|
1051
|
109
|
|
50
|
|
|
371
|
$this->{_ErrorMsg} ||= ""; ## Explains last read/write failure |
1052
|
109
|
|
50
|
|
|
394
|
$this->{_Subset} ||= 0; ## Flag indicating subset of available fields were read |
1053
|
|
|
|
|
|
|
|
1054
|
109
|
|
|
|
|
141
|
$Success = 1; |
1055
|
109
|
|
|
|
|
291
|
done: |
1056
|
|
|
|
|
|
|
return($Success); |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
=pod |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
=head1 FIELD LIST |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
## Getting / setting the object's _FieldList |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
$t->fieldlist() ## Get _FieldList or fieldlist_all() |
1066
|
|
|
|
|
|
|
$t->fieldlist_get() |
1067
|
|
|
|
|
|
|
$t->fieldlist_hash() ## Get fieldlist() as keys in a hash |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
$t->fieldlist_all() ## Get all fields (ignore _FieldList) |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
$t->fieldlist($MyList) ## Set field list (_FieldList param) |
1072
|
|
|
|
|
|
|
$t->fieldlist_set($MyList) |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
$t->fieldlist(0) ## Remove field list (use default) |
1075
|
|
|
|
|
|
|
$t->fieldlist_set() |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
$t->fieldlist_force($MyList)## Set list; remove non-matching cols |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
$t->fieldlist_truncate() ## Just remove nonmatching cols |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
$t->fieldlist_default() ## Default field list (alpha-sorted) |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
$t->fieldlist_add($MyName) ## Append new name to custom list. |
1084
|
|
|
|
|
|
|
$t->fieldlist_delete($MyName) ## Delete name from custom list. |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
A CTable object can optionally have a custom field list. The custom |
1087
|
|
|
|
|
|
|
field list can store both the ORDER of the fields (which otherwise |
1088
|
|
|
|
|
|
|
would be unordered since they are stored as keys in a hash), and also |
1089
|
|
|
|
|
|
|
can be a subset of the fields actually in the object, allowing you to |
1090
|
|
|
|
|
|
|
temporarily ignore certain effectively-hidden fields for the benefit |
1091
|
|
|
|
|
|
|
of certain operations. The custom field list can be changed or |
1092
|
|
|
|
|
|
|
removed at any time. |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
The custom field list is stored in the private _FieldList parameter. |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
fieldlist() always returns a list (reference). The list is either the |
1097
|
|
|
|
|
|
|
same list as _FieldList, if present, or it is the result of calling |
1098
|
|
|
|
|
|
|
fieldlist_default(). In CTable, fieldlist_default() in turn calls |
1099
|
|
|
|
|
|
|
fieldlist_all() -- hence fieldlist() would yield an auto-generated |
1100
|
|
|
|
|
|
|
list of all fields in alphabetical order. |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
fieldlist_all() can be called directly to get a list of all fields |
1103
|
|
|
|
|
|
|
present regardless of the presence of a _FieldList parameter. The |
1104
|
|
|
|
|
|
|
list is an alphabetical case-insensitively sorted list of all hash |
1105
|
|
|
|
|
|
|
keys whose names do not begin with an underscore. |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
You could override this method if you want a different behavior. Or, |
1108
|
|
|
|
|
|
|
you could create your own custom field list by calling fieldlist_all() |
1109
|
|
|
|
|
|
|
and removing fields or ordering them differently. |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
To set a custom field list (in _FieldList), call fieldlist() or |
1112
|
|
|
|
|
|
|
fieldlist_set() with a list (reference). The list must be a list of |
1113
|
|
|
|
|
|
|
strings (field names) that do not begin with underscore. The object |
1114
|
|
|
|
|
|
|
owns the list you supply. |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
To remove a custom field list (and let the default be used), call |
1117
|
|
|
|
|
|
|
fieldlist(0) or fieldlist_set() with no arguments (these will return |
1118
|
|
|
|
|
|
|
the fieldlist that was deleted, if any). |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
fieldlist_freeze() "freezes" the fieldlist in its current state. This |
1121
|
|
|
|
|
|
|
is equivalent to the following: |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
$t->fieldlist_set($t->fieldlist()); |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
... which would force the fieldlist to $t->fieldlist_all() if and only |
1126
|
|
|
|
|
|
|
if there is not already a custom _FieldList present. |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
IMPORTANT NOTE ABOUT PARTIAL FIELD LISTS: When setting a field list, |
1129
|
|
|
|
|
|
|
the object ensures that all fields (columns) mentioned in the list are |
1130
|
|
|
|
|
|
|
present in the object -- it creates empty columns of the correct |
1131
|
|
|
|
|
|
|
length as necessary. However, it does NOT delete any fields not |
1132
|
|
|
|
|
|
|
mentioned in the field list. This allows you to manipulate the field |
1133
|
|
|
|
|
|
|
list in order to have certain fields be temporarily ignored by all |
1134
|
|
|
|
|
|
|
other methods, then alter, restore, or remove it (allow it to revert |
1135
|
|
|
|
|
|
|
to default) and they will be effectively unhidden again. Some methods |
1136
|
|
|
|
|
|
|
(such as cols(), write(), etc.) also allow you to specify a custom |
1137
|
|
|
|
|
|
|
field list that will override any other list just during the execution |
1138
|
|
|
|
|
|
|
of that method call but will not modify the object itself. |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
Call fieldlist_force() to set the list AND have any non-listed fields |
1141
|
|
|
|
|
|
|
also deleted at the same time (by calling fieldlist_truncate() |
1142
|
|
|
|
|
|
|
internally). You can also just delete individual columns one-by-one, |
1143
|
|
|
|
|
|
|
of course, using the column-manipulation methods and the custom |
1144
|
|
|
|
|
|
|
fieldlist, if any, will be appropriately updated for you. |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
fieldlist_truncate() deletes any fields found in the table but not |
1147
|
|
|
|
|
|
|
currently present in _FieldList. A hash of the deleted columns is |
1148
|
|
|
|
|
|
|
returned to the caller. If there is no _FieldList, then this method |
1149
|
|
|
|
|
|
|
does nothing. |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
fieldlist_default() just calls fieldlist_all() in this implementation, |
1152
|
|
|
|
|
|
|
but could be changed in subclasses. |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
fieldlist_add() is the internal method that adds a new field name to |
1155
|
|
|
|
|
|
|
the custom field list (if present) and if the field name was not |
1156
|
|
|
|
|
|
|
already on the list. It is called by other methods any time a new |
1157
|
|
|
|
|
|
|
column is added to the table. Don't call it directly unless you know |
1158
|
|
|
|
|
|
|
what you're doing because the corresponding column won't be created. |
1159
|
|
|
|
|
|
|
(Instead, use col().) The field name is appended to the end of the |
1160
|
|
|
|
|
|
|
existing custom field list. If there is no custom field list, nothing |
1161
|
|
|
|
|
|
|
is done. |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
fieldlist_delete() is the internal method that deletes a field name |
1164
|
|
|
|
|
|
|
from the custom field list (if present). It is called by other |
1165
|
|
|
|
|
|
|
methods when columns are deleted, but it does not actually delete the |
1166
|
|
|
|
|
|
|
columns themselves, so use with caution: deleting a field from the |
1167
|
|
|
|
|
|
|
custom field list effectively hides the field. This method has no |
1168
|
|
|
|
|
|
|
effect, however, if there is no custom field list present. So don't |
1169
|
|
|
|
|
|
|
call this method directly unless you know what you're doing. |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
=cut |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
sub fieldlist_all |
1174
|
|
|
|
|
|
|
{ |
1175
|
223
|
|
|
223
|
0
|
298
|
my $this = shift; |
1176
|
223
|
|
|
|
|
1020
|
my $FieldList = [sort {lc($a) cmp lc($b)} grep {!/^_/} keys %$this]; |
|
883
|
|
|
|
|
1558
|
|
|
5726
|
|
|
|
|
12123
|
|
1177
|
|
|
|
|
|
|
|
1178
|
223
|
|
|
|
|
899
|
return($FieldList); |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
sub fieldlist_default ## Same as fieldlist_all() in this class. |
1182
|
|
|
|
|
|
|
{ |
1183
|
40
|
|
|
40
|
0
|
51
|
my $this = shift; |
1184
|
40
|
|
|
|
|
73
|
my $FieldList = $this->fieldlist_all(); |
1185
|
|
|
|
|
|
|
|
1186
|
40
|
|
|
|
|
136
|
return($FieldList); |
1187
|
|
|
|
|
|
|
} |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
sub fieldlist |
1190
|
|
|
|
|
|
|
{ |
1191
|
805
|
|
|
805
|
0
|
2485
|
my $this = shift; |
1192
|
805
|
|
|
|
|
927
|
my ($FieldList) = @_; |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
## Set if specified. |
1195
|
805
|
100
|
|
|
|
1560
|
$this->fieldlist_set($FieldList) if defined($FieldList); |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
## Get and return. |
1198
|
805
|
|
|
|
|
1336
|
$FieldList = $this->fieldlist_get(); |
1199
|
|
|
|
|
|
|
|
1200
|
805
|
|
|
|
|
1857
|
return($FieldList); |
1201
|
|
|
|
|
|
|
} |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
sub fieldlist_get |
1204
|
|
|
|
|
|
|
{ |
1205
|
805
|
|
|
805
|
0
|
1291
|
my $this = shift; |
1206
|
805
|
|
66
|
|
|
1821
|
my $FieldList = $this->{_FieldList} || $this->fieldlist_default(); |
1207
|
|
|
|
|
|
|
|
1208
|
805
|
|
|
|
|
1250
|
return($FieldList); |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
sub fieldlist_hash ## ([$FieldList]) |
1212
|
|
|
|
|
|
|
{ |
1213
|
96
|
|
|
96
|
0
|
124
|
my $this = shift; |
1214
|
96
|
|
|
|
|
123
|
my ($FieldList) = @_; |
1215
|
96
|
|
66
|
|
|
280
|
$FieldList ||= $this->fieldlist(); |
1216
|
96
|
|
|
|
|
170
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
96
|
|
|
|
|
388
|
|
1217
|
|
|
|
|
|
|
|
1218
|
96
|
|
|
|
|
189
|
return($FieldHash); |
1219
|
|
|
|
|
|
|
} |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
sub fieldlist_set |
1222
|
|
|
|
|
|
|
{ |
1223
|
36
|
|
|
36
|
0
|
62
|
my $this = shift; |
1224
|
36
|
|
|
|
|
50
|
my ($FieldList) = @_; |
1225
|
|
|
|
|
|
|
|
1226
|
36
|
|
|
|
|
118
|
return($this->fieldlist_set_internal($FieldList, 0)); |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
sub fieldlist_freeze |
1230
|
|
|
|
|
|
|
{ |
1231
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
1232
|
0
|
|
|
|
|
0
|
return($this->fieldlist_set($this->fieldlist())); |
1233
|
|
|
|
|
|
|
} |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
sub fieldlist_force |
1236
|
|
|
|
|
|
|
{ |
1237
|
3
|
|
|
3
|
0
|
7
|
my $this = shift; |
1238
|
3
|
|
|
|
|
5
|
my ($FieldList) = @_; |
1239
|
|
|
|
|
|
|
|
1240
|
3
|
|
|
|
|
9
|
return($this->fieldlist_set_internal($FieldList, 1)); |
1241
|
|
|
|
|
|
|
} |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
sub fieldlist_set_internal |
1244
|
|
|
|
|
|
|
{ |
1245
|
39
|
|
|
39
|
0
|
49
|
my $this = shift; |
1246
|
39
|
|
|
|
|
50
|
my ($FieldList, $Force) = @_; |
1247
|
|
|
|
|
|
|
|
1248
|
39
|
100
|
|
|
|
106
|
if (ref($FieldList) eq 'ARRAY') |
1249
|
|
|
|
|
|
|
{ |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
## Whether forcing or not, ensure all fields mentioned in the |
1252
|
|
|
|
|
|
|
## list actually exist and are the correct length. |
1253
|
33
|
|
|
|
|
88
|
$this->fieldlist_check($FieldList); |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
## Set the custom list |
1256
|
33
|
|
|
|
|
68
|
$this->{_FieldList} = $FieldList; |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
## In "force" mode, remove any non-listed columns. |
1259
|
33
|
100
|
|
|
|
92
|
$this->fieldlist_truncate() if ($Force); |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
else |
1262
|
|
|
|
|
|
|
{ |
1263
|
|
|
|
|
|
|
## Remove the custom field list. |
1264
|
6
|
|
|
|
|
17
|
$FieldList = delete $this->{_FieldList}; |
1265
|
|
|
|
|
|
|
} |
1266
|
|
|
|
|
|
|
|
1267
|
39
|
|
|
|
|
73
|
return($FieldList); ## Return the one that was set or deleted. |
1268
|
|
|
|
|
|
|
} |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
sub fieldlist_check |
1271
|
|
|
|
|
|
|
{ |
1272
|
33
|
|
|
33
|
0
|
39
|
my $this = shift; |
1273
|
33
|
|
|
|
|
65
|
my ($FieldList) = @_; |
1274
|
|
|
|
|
|
|
|
1275
|
33
|
|
33
|
|
|
81
|
$FieldList ||= $this->fieldlist(); |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
## Visit each field name in the current list. Make sure it is |
1278
|
|
|
|
|
|
|
## present. |
1279
|
|
|
|
|
|
|
|
1280
|
33
|
|
|
|
|
60
|
foreach my $FieldName (@$FieldList) |
1281
|
|
|
|
|
|
|
{ |
1282
|
|
|
|
|
|
|
## The col method will the column exist if not present. |
1283
|
100
|
|
|
|
|
214
|
$this->col($FieldName); |
1284
|
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
} |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
sub fieldlist_truncate |
1288
|
|
|
|
|
|
|
{ |
1289
|
3
|
|
|
3
|
0
|
5
|
my $this = shift; |
1290
|
|
|
|
|
|
|
|
1291
|
3
|
|
|
|
|
8
|
my $FieldList = $this->fieldlist(); |
1292
|
3
|
|
|
|
|
9
|
my $AllFields = $this->fieldlist_all(); |
1293
|
3
|
|
|
|
|
6
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
3
|
|
|
|
|
11
|
|
1294
|
|
|
|
|
|
|
|
1295
|
3
|
|
|
|
|
6
|
my $DeletedCols = {}; |
1296
|
|
|
|
|
|
|
|
1297
|
3
|
|
|
|
|
7
|
foreach my $FieldName (@$AllFields) |
1298
|
|
|
|
|
|
|
{ |
1299
|
8
|
100
|
|
|
|
23
|
if (!exists($FieldHash->{$FieldName})) |
1300
|
|
|
|
|
|
|
{ |
1301
|
3
|
|
|
|
|
9
|
$DeletedCols->{$FieldName} = delete $this->{$FieldName}; |
1302
|
|
|
|
|
|
|
} |
1303
|
|
|
|
|
|
|
} |
1304
|
|
|
|
|
|
|
|
1305
|
3
|
|
|
|
|
12
|
return($DeletedCols); |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
sub fieldlist_add |
1309
|
|
|
|
|
|
|
{ |
1310
|
43
|
|
|
43
|
0
|
53
|
my $this = shift; |
1311
|
43
|
|
|
|
|
56
|
my ($FieldName) = @_; |
1312
|
|
|
|
|
|
|
|
1313
|
43
|
100
|
|
|
|
124
|
if (ref($this->{_FieldList}) eq 'ARRAY') |
1314
|
|
|
|
|
|
|
{ |
1315
|
34
|
|
|
|
|
47
|
my $FieldList = $this->{_FieldList}; |
1316
|
34
|
|
|
|
|
52
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
34
|
|
|
|
|
112
|
|
1317
|
|
|
|
|
|
|
|
1318
|
34
|
50
|
|
|
|
85
|
if (!exists($FieldHash->{$FieldName})) |
1319
|
|
|
|
|
|
|
{ |
1320
|
34
|
|
|
|
|
117
|
push @$FieldList, $FieldName; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
} |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
sub fieldlist_delete |
1326
|
|
|
|
|
|
|
{ |
1327
|
3
|
|
|
3
|
0
|
10
|
my $this = shift; |
1328
|
3
|
|
|
|
|
8
|
my ($FieldName) = @_; |
1329
|
|
|
|
|
|
|
|
1330
|
3
|
50
|
|
|
|
14
|
if (ref($this->{_FieldList}) eq 'ARRAY') |
1331
|
|
|
|
|
|
|
{ |
1332
|
3
|
|
|
|
|
6
|
$this->{_FieldList} = [grep {$_ ne $FieldName} @{$this->{_FieldList}}]; |
|
11
|
|
|
|
|
33
|
|
|
3
|
|
|
|
|
11
|
|
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
} |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=pod |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
=head1 DATA COLUMNS (FIELD DATA) |
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
## Getting or setting data in entire columns |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
$t->{$ColName} ## Get a column you know exists |
1343
|
|
|
|
|
|
|
$t->col($ColName) ## Get a column or make empty one. |
1344
|
|
|
|
|
|
|
$t->col_get($ColName) |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
$t->col($ColName, $ListRef) ## Set all of a column all at once. |
1347
|
|
|
|
|
|
|
$t->col_set($ColName, $ListRef) |
1348
|
|
|
|
|
|
|
$t->col_force($ColName, $ListRef) ## Add but don't check size or |
1349
|
|
|
|
|
|
|
## add to custom field list |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
$t->col_set($ColName, undef) ## Delete a column completely |
1352
|
|
|
|
|
|
|
$t->col_delete($ColName) |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
$t->col_empty() ## An empty col presized for table |
1355
|
|
|
|
|
|
|
$t->col_empty(22) ## An empty col of another length |
1356
|
|
|
|
|
|
|
$t->col_empty($Col) ## An empty col sized to match another |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
$t->col_default() ## Default if req. column not found. |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
$t->col_exists($Field) ## Check existence of column |
1361
|
|
|
|
|
|
|
$t->col_active($Field) ## Restrict check to fieldlist() |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
$t->cols($ColList) ## Get list of multiple named columns |
1364
|
|
|
|
|
|
|
$t->cols_hash($ColList) ## Get hash " " " |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
$t->col_rename($Old => $New) ## Change name of columns |
1367
|
|
|
|
|
|
|
$t->col_rename($Old1 => $New1, $Old2 => $New2) ## Change several |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
A "column" is a field in the table and all its data. The column's |
1370
|
|
|
|
|
|
|
field name is a key in the object itself, and may also optionally be |
1371
|
|
|
|
|
|
|
listed in a custom field list if present. The column's data is the |
1372
|
|
|
|
|
|
|
key's value in the hash and is an array ref of values presumed to be |
1373
|
|
|
|
|
|
|
of the same data type (e. g. string, integer, etc.) |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
Sometimes the terms "column" and "field" are used interchangeably in |
1376
|
|
|
|
|
|
|
this documentation. |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
If you already know that a column exists (because you got it from the |
1379
|
|
|
|
|
|
|
fieldlist() method and you've not previously manipulated _FieldList |
1380
|
|
|
|
|
|
|
directly but instead carefully used the method calls available for |
1381
|
|
|
|
|
|
|
that), then you can safely get the column by just looking it up in the |
1382
|
|
|
|
|
|
|
object itself. |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
The col() method does the same thing, but forces the column to spring |
1385
|
|
|
|
|
|
|
into existence if it did not already (which can also have the |
1386
|
|
|
|
|
|
|
potentially unwanted side-effect of hiding coding errors in which you |
1387
|
|
|
|
|
|
|
retreive mis-named columns: so beware). Columns brought into |
1388
|
|
|
|
|
|
|
existence this way will automatically be pre-sized (i.e. they will |
1389
|
|
|
|
|
|
|
will be created and set to whatever col_default() returns). |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
The col() or col_set() methods can also be used to set a column. When |
1392
|
|
|
|
|
|
|
the column is set, the list you pass is automatically sized |
1393
|
|
|
|
|
|
|
(lengthened or truncated) to match the current length of the table. |
1394
|
|
|
|
|
|
|
If this is not what you want, then call col_force() which will not |
1395
|
|
|
|
|
|
|
check whether the new column matches the size of the others. |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
No matter how you set it, the object now "owns" the list you gave it. |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
As a convenience, col(), col_set() and col_force() return the column |
1400
|
|
|
|
|
|
|
that was set. They silently discard any previous column. |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
All three methods of column setting will append the column to the |
1403
|
|
|
|
|
|
|
custom field list if one is present and the column name is not already |
1404
|
|
|
|
|
|
|
listed there (by calling fieldlist_add()). They will also call the |
1405
|
|
|
|
|
|
|
extend() method to ensure all columns have the same length (either |
1406
|
|
|
|
|
|
|
others will be extended to match the length of the new one, or the new |
1407
|
|
|
|
|
|
|
one will be extended to match the length of the others). |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
col_delete() deletes a column. |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
col_empty() returns an anonymous list reference that is pre-sized to |
1412
|
|
|
|
|
|
|
the length of the table (by default). You could use it to get an |
1413
|
|
|
|
|
|
|
empty column that you intend to fill up and then later insert into the |
1414
|
|
|
|
|
|
|
table or use to hold the results of an operation on other columns. If |
1415
|
|
|
|
|
|
|
you want a different length, specify it as a number or as an array ref |
1416
|
|
|
|
|
|
|
whose length should be matched. |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
col_default() is the internal method that implements the "springing |
1419
|
|
|
|
|
|
|
into existence" of missing columns. Currently it just calls |
1420
|
|
|
|
|
|
|
col_empty(). Other subclasses might want to have it return undef or a |
1421
|
|
|
|
|
|
|
string like "NO_SUCH_COLUMN" in order to help track programming errors |
1422
|
|
|
|
|
|
|
where nonexistent columns are requested. |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
cols($FieldList) returns an ordered list of the requested column |
1425
|
|
|
|
|
|
|
names. If no list is given, then fieldlist() is used. |
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
cols_hash($FieldList) does the same as cols(), but the result is a |
1428
|
|
|
|
|
|
|
hash whose keys are the field names and whose values are the columns |
1429
|
|
|
|
|
|
|
-- much like the original object itself, but not blessed into the |
1430
|
|
|
|
|
|
|
class. The resulting hash, however, could be used as the prototype |
1431
|
|
|
|
|
|
|
for a new Data::CTable object (by calling the new() method). However, |
1432
|
|
|
|
|
|
|
be warned that both objects will think they "own" the resulting shared |
1433
|
|
|
|
|
|
|
so be careful what you do..... which brings us to this: |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
IMPORTANT NOTE ABOUT GETTING COLUMNS: The columns you retrieve from a |
1436
|
|
|
|
|
|
|
table are still "owned" by the table object as long as it lives. If |
1437
|
|
|
|
|
|
|
you modify them, you are modifying the table's data. If you change |
1438
|
|
|
|
|
|
|
their length, then you may be invalidating the table's own |
1439
|
|
|
|
|
|
|
expectations that all its columns have the same length. So beware. |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
Just make yourself a copy of the data if that isn't what you want. |
1442
|
|
|
|
|
|
|
For example, instead of this: |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
my $Foo = $Table->col('Foo'); ## Reference to actual column |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
Do this: |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
my $Foo = [@{$Table->col('Foo')}]; ## Shallow copy of the column |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=cut |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
sub col ## ($ColName, [$Vector]) |
1453
|
|
|
|
|
|
|
{ |
1454
|
1472
|
|
|
1472
|
0
|
2828
|
my $this = shift; |
1455
|
1472
|
|
|
|
|
1831
|
my ($ColName, $Vector) = @_; |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
## Set if specified. |
1458
|
1472
|
100
|
|
|
|
3406
|
my $FoundVector = $this->col_set($ColName, $Vector) if defined($Vector); |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
## Get and return. |
1461
|
|
|
|
|
|
|
## If not specified, create it with col_default() |
1462
|
1472
|
|
|
|
|
2428
|
my $Col = $this->col_get($ColName); |
1463
|
|
|
|
|
|
|
|
1464
|
1472
|
|
|
|
|
3961
|
return($Col); |
1465
|
|
|
|
|
|
|
} |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
sub col_get |
1468
|
|
|
|
|
|
|
{ |
1469
|
1475
|
|
|
1475
|
0
|
1730
|
my $this = shift; |
1470
|
1475
|
|
|
|
|
2103
|
my ($ColName) = @_; |
1471
|
|
|
|
|
|
|
|
1472
|
1475
|
|
66
|
|
|
3413
|
my $Col = ($this->{$ColName} || $this->col_add($ColName)); |
1473
|
|
|
|
|
|
|
|
1474
|
1475
|
|
|
|
|
2320
|
return($Col); |
1475
|
|
|
|
|
|
|
} |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
sub col_add |
1478
|
|
|
|
|
|
|
{ |
1479
|
28
|
|
|
28
|
0
|
38
|
my $this = shift; |
1480
|
28
|
|
|
|
|
43
|
my ($ColName) = @_; |
1481
|
28
|
|
|
|
|
61
|
my $Col = $this->{$ColName} = $this->col_empty(); |
1482
|
|
|
|
|
|
|
|
1483
|
28
|
|
|
|
|
67
|
$this->fieldlist_add($ColName); |
1484
|
28
|
|
|
|
|
80
|
return($Col); |
1485
|
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
sub col_set_internal ## ($ColName, [$Vector], [$Force]) |
1488
|
|
|
|
|
|
|
{ |
1489
|
16
|
|
|
16
|
0
|
18
|
my $this = shift; |
1490
|
16
|
|
|
|
|
26
|
my ($ColName, $Vector, $Force) = @_; |
1491
|
|
|
|
|
|
|
|
1492
|
16
|
|
|
|
|
28
|
my $Valid = (ref($Vector) eq 'ARRAY'); |
1493
|
16
|
|
|
|
|
26
|
my $Existing = (ref($this->{$ColName}) eq 'ARRAY'); |
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
## Delete existing vector by this name... |
1496
|
16
|
100
|
66
|
|
|
99
|
if (!$Valid && $Existing) |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
{ |
1498
|
1
|
|
|
|
|
3
|
$Vector = delete $this->{$ColName}; ## Delete and save to return to caller. |
1499
|
1
|
|
|
|
|
6
|
$this->fieldlist_delete($ColName); ## Delete from field list if needed. |
1500
|
|
|
|
|
|
|
} |
1501
|
|
|
|
|
|
|
## ...or add one... |
1502
|
|
|
|
|
|
|
elsif ($Valid && !$Existing) |
1503
|
|
|
|
|
|
|
{ |
1504
|
11
|
|
|
|
|
24
|
$this->{$ColName} = $Vector; |
1505
|
|
|
|
|
|
|
|
1506
|
11
|
50
|
|
|
|
20
|
if (!$Force) |
1507
|
|
|
|
|
|
|
{ |
1508
|
11
|
|
|
|
|
23
|
$this->extend(); ## Extend all vectors as needed to ensure same length. |
1509
|
11
|
|
|
|
|
25
|
$this->fieldlist_add($ColName); ## Add to custom field list if needed. |
1510
|
|
|
|
|
|
|
} |
1511
|
|
|
|
|
|
|
} |
1512
|
|
|
|
|
|
|
## ...otherwise replace. |
1513
|
|
|
|
|
|
|
elsif ($Valid) |
1514
|
|
|
|
|
|
|
{ |
1515
|
4
|
|
|
|
|
9
|
$this->{$ColName} = $Vector; |
1516
|
|
|
|
|
|
|
|
1517
|
4
|
50
|
|
|
|
18
|
if (!$Force) |
1518
|
|
|
|
|
|
|
{ |
1519
|
4
|
|
|
|
|
11
|
$this->extend(); ## Extend all vectors as needed to ensure same length. |
1520
|
|
|
|
|
|
|
} |
1521
|
|
|
|
|
|
|
} |
1522
|
|
|
|
|
|
|
|
1523
|
16
|
|
|
|
|
44
|
return($Vector); ## Return added or deleted vector for convenience. |
1524
|
|
|
|
|
|
|
} |
1525
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
sub col_delete ## ($ColName) |
1527
|
|
|
|
|
|
|
{ |
1528
|
1
|
|
|
1
|
0
|
7
|
my $this = shift; |
1529
|
1
|
|
|
|
|
3
|
my ($ColName) = @_; |
1530
|
|
|
|
|
|
|
|
1531
|
1
|
|
|
|
|
4
|
return($this->col_set_internal($ColName)); |
1532
|
|
|
|
|
|
|
} |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
sub col_set ## ($ColName, $Vector) |
1535
|
|
|
|
|
|
|
{ |
1536
|
15
|
|
|
15
|
0
|
27
|
my $this = shift; |
1537
|
15
|
|
|
|
|
25
|
my ($ColName, $Vector) = @_; |
1538
|
|
|
|
|
|
|
|
1539
|
15
|
|
|
|
|
37
|
return($this->col_set_internal($ColName, $Vector)); |
1540
|
|
|
|
|
|
|
} |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
sub col_force ## ($ColName, $Vector) |
1543
|
|
|
|
|
|
|
{ |
1544
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
1545
|
0
|
|
|
|
|
0
|
my ($ColName, $Vector) = @_; |
1546
|
|
|
|
|
|
|
|
1547
|
0
|
|
|
|
|
0
|
return($this->col_set_internal($ColName, $Vector, 1)); |
1548
|
|
|
|
|
|
|
} |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
sub col_empty |
1551
|
|
|
|
|
|
|
{ |
1552
|
37
|
|
|
37
|
0
|
56
|
my $this = shift; |
1553
|
37
|
|
|
|
|
46
|
my ($Length) = @_; |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
## Default to table length. Or get length from sample column. |
1556
|
37
|
100
|
|
|
|
114
|
$Length = $this->length() unless defined($Length); |
1557
|
37
|
100
|
|
|
|
89
|
$Length = @$Length if ref($Length) eq 'ARRAY'; |
1558
|
|
|
|
|
|
|
|
1559
|
37
|
|
|
|
|
54
|
my $Col = []; |
1560
|
37
|
|
|
|
|
119
|
$#$Col = $Length - 1; |
1561
|
|
|
|
|
|
|
|
1562
|
37
|
|
|
|
|
93
|
return($Col); |
1563
|
|
|
|
|
|
|
} |
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
sub col_default |
1566
|
|
|
|
|
|
|
{ |
1567
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
1568
|
1
|
|
|
|
|
4
|
my $Col = $this->col_empty(); |
1569
|
|
|
|
|
|
|
|
1570
|
1
|
|
|
|
|
6
|
return($Col); |
1571
|
|
|
|
|
|
|
} |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
sub cols ## ($ColNames) |
1574
|
|
|
|
|
|
|
{ |
1575
|
33
|
|
|
33
|
0
|
68
|
my $this = shift; |
1576
|
33
|
|
|
|
|
49
|
my ($ColNames) = @_; |
1577
|
33
|
|
66
|
|
|
89
|
$ColNames ||= $this->fieldlist(); |
1578
|
33
|
|
|
|
|
62
|
my $Cols = [map {$this->col($_)} @$ColNames]; |
|
106
|
|
|
|
|
214
|
|
1579
|
|
|
|
|
|
|
|
1580
|
33
|
|
|
|
|
106
|
return($Cols); |
1581
|
|
|
|
|
|
|
} |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
sub cols_hash ## ($ColNames) |
1584
|
|
|
|
|
|
|
{ |
1585
|
25
|
|
|
25
|
0
|
60
|
my $this = shift; |
1586
|
25
|
|
|
|
|
44
|
my ($ColNames) = @_; |
1587
|
25
|
|
66
|
|
|
88
|
$ColNames ||= $this->fieldlist(); |
1588
|
25
|
|
|
|
|
73
|
my $Cols = $this->cols($ColNames); |
1589
|
25
|
|
|
|
|
56
|
my $ColsHash = {}; @$ColsHash{@$ColNames} = @$Cols; |
|
25
|
|
|
|
|
93
|
|
1590
|
|
|
|
|
|
|
|
1591
|
25
|
|
|
|
|
225
|
return($ColsHash); |
1592
|
|
|
|
|
|
|
} |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
sub col_exists ## ($ColName, [$FieldList]) |
1595
|
|
|
|
|
|
|
{ |
1596
|
24
|
|
|
24
|
0
|
93
|
my $this = shift; |
1597
|
24
|
|
|
|
|
40
|
my ($ColName, $FieldList) = @_; |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
## Default list to search is ALL fields in object. |
1600
|
24
|
|
66
|
|
|
78
|
$FieldList ||= $this->fieldlist_all(); |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
## Disallow column names starting with underscore. |
1603
|
24
|
50
|
|
|
|
60
|
return(0) if $ColName =~ /^_/; |
1604
|
|
|
|
|
|
|
|
1605
|
24
|
|
|
|
|
55
|
my $FieldHash = $this->fieldlist_hash($FieldList); |
1606
|
24
|
|
|
|
|
49
|
my $Exists = exists($FieldHash->{$ColName}); |
1607
|
|
|
|
|
|
|
|
1608
|
24
|
|
|
|
|
99
|
return($Exists); |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
sub col_active ## ($ColName, [$FieldList]) |
1612
|
|
|
|
|
|
|
{ |
1613
|
8
|
|
|
8
|
0
|
42
|
my $this = shift; |
1614
|
8
|
|
|
|
|
14
|
my ($ColName, $FieldList) = @_; |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
## Default list to search is only ACTIVE fields in object. |
1617
|
8
|
|
33
|
|
|
30
|
$FieldList ||= $this->fieldlist(); |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
## Disallow column names starting with underscore. |
1620
|
8
|
50
|
|
|
|
22
|
return(0) if $ColName =~ /^_/; |
1621
|
|
|
|
|
|
|
|
1622
|
8
|
|
|
|
|
20
|
my $FieldHash = $this->fieldlist_hash($FieldList); |
1623
|
8
|
|
|
|
|
17
|
my $Exists = exists($FieldHash->{$ColName}); |
1624
|
|
|
|
|
|
|
|
1625
|
8
|
|
|
|
|
30
|
return($Exists); |
1626
|
|
|
|
|
|
|
} |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
sub col_rename ## ($Old => $New, [$Old => New...]) |
1629
|
|
|
|
|
|
|
{ |
1630
|
3
|
|
|
3
|
0
|
19
|
my $this = shift; |
1631
|
|
|
|
|
|
|
|
1632
|
3
|
|
|
|
|
4
|
my $Success; |
1633
|
|
|
|
|
|
|
|
1634
|
3
|
|
|
|
|
8
|
my $Fields = $this->fieldlist_all(); |
1635
|
|
|
|
|
|
|
|
1636
|
3
|
|
|
|
|
5
|
my ($Old, $New); |
1637
|
3
|
|
|
|
|
18
|
while (($Old, $New) = splice(@_, 0, 2)) |
1638
|
|
|
|
|
|
|
{ |
1639
|
3
|
50
|
|
|
|
17
|
$this->warn("Invalid column name: $New"), next |
1640
|
|
|
|
|
|
|
unless ($New =~ /^[^_]+/); |
1641
|
|
|
|
|
|
|
|
1642
|
3
|
50
|
|
|
|
10
|
$this->warn("Column to be renamed does not exist: $Old"), next |
1643
|
|
|
|
|
|
|
unless $this->col_exists($Old, $Fields); |
1644
|
|
|
|
|
|
|
|
1645
|
3
|
50
|
0
|
|
|
8
|
(($Old ne $New) && $this->warn("Failed to rename column $Old to $New: $New exists.")), next |
1646
|
|
|
|
|
|
|
if $this->col_exists($New, $Fields); |
1647
|
|
|
|
|
|
|
|
1648
|
3
|
|
|
|
|
9
|
my $Col = $this->col($Old); ## Creates if not present. |
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
## Rename the column... |
1651
|
3
|
|
|
|
|
11
|
$this->{$New} = delete $this->{$Old}; |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
## Then make the same change to _FieldList, _SortOrder, _SortSpecs |
1654
|
|
|
|
|
|
|
|
1655
|
3
|
100
|
|
|
|
10
|
$this->{_FieldList} = [map {$_ = $New if $_ eq $Old; $_} @{$this->{_FieldList}}] if (defined($this->{_FieldList})); |
|
10
|
50
|
|
|
|
21
|
|
|
10
|
|
|
|
|
25
|
|
|
3
|
|
|
|
|
7
|
|
1656
|
3
|
0
|
|
|
|
12
|
$this->{_SortOrder} = [map {$_ = $New if $_ eq $Old; $_} @{$this->{_SortOrder}}] if (defined($this->{_SortOrder})); |
|
0
|
50
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1657
|
3
|
50
|
33
|
|
|
29
|
$this->{_SortSpecs}->{$New} = delete $this->{_SortSpecs}->{$Old} if (defined($this->{_SortSpecs}) && |
1658
|
|
|
|
|
|
|
( $this->{_SortSpecs}->{$Old})); |
1659
|
|
|
|
|
|
|
} |
1660
|
|
|
|
|
|
|
|
1661
|
3
|
|
|
|
|
5
|
$Success = 1; |
1662
|
3
|
|
|
|
|
9
|
done: |
1663
|
|
|
|
|
|
|
return($Success); |
1664
|
|
|
|
|
|
|
} |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
=pod |
1667
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
=head1 CLEANUP AND VALIDATION |
1669
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
## Performing your own cleanups or validations |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
$t->clean($Sub) ## Clean with custom subroutine |
1673
|
|
|
|
|
|
|
$t->clean($Sub, $Fields) ## Clean specified columns only |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
## Cleaning whitespace |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
$t->clean_ws() ## Clean whitespace in fieldlist() cols |
1678
|
|
|
|
|
|
|
$t->clean_ws($Fields) ## Clean whitespace in specified cols |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
## Cleaning methods that map character sets |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
$t->clean_mac_to_iso8859() |
1683
|
|
|
|
|
|
|
$t->clean_mac_to_iso8859($Fields) |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
$t->clean_iso8859_to_mac() |
1686
|
|
|
|
|
|
|
$t->clean_iso8859_to_mac($Fields) |
1687
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
## Character mapping utilities (not methods) |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
use Data::CTable qw( |
1691
|
|
|
|
|
|
|
ISORoman8859_1ToMacRoman |
1692
|
|
|
|
|
|
|
MacRomanToISORoman8859_1 |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
ISORoman8859_1ToMacRoman_clean |
1695
|
|
|
|
|
|
|
MacRomanToISORoman8859_1_clean |
1696
|
|
|
|
|
|
|
); |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman(\ $Str) ## Pass pointer to buffer |
1699
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1(\ $Str) ## Pass pointer to buffer |
1700
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman_clean() ## Operates on $_ |
1702
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1_clean() ## Operates on $_ |
1703
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
One of the most important things you can do with your data once it's |
1705
|
|
|
|
|
|
|
been placed in a table in Perl is to use the power of Perl to scrub it |
1706
|
|
|
|
|
|
|
like crazy. |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
The built-in clean_ws() method applies a standard white-space cleanup |
1709
|
|
|
|
|
|
|
to selected records in every field in the fieldlist() or other list of |
1710
|
|
|
|
|
|
|
fields you optionally supply (such as fieldlist_all()). |
1711
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
It does the following cleanups that are deemed correct for the |
1713
|
|
|
|
|
|
|
majority of data out there: |
1714
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
- Remove all leading whitespace, including returns (\n) |
1716
|
|
|
|
|
|
|
- Remove all trailing whitespace, including returns (\n) |
1717
|
|
|
|
|
|
|
- Convert runs of spaces to a single space |
1718
|
|
|
|
|
|
|
- Convert empty string values back to undef to save space |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
Of course, depending on your data, clean_ws() might just be the first |
1721
|
|
|
|
|
|
|
thing you do in your cleanup pass. There might be many more cleanups |
1722
|
|
|
|
|
|
|
you'd like to apply. |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
clean() is like clean_ws() except you supply as the first argument |
1725
|
|
|
|
|
|
|
your own cleaning subroutine (code reference). It should do its work |
1726
|
|
|
|
|
|
|
by modifying $_. |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
Both clean_ws() and clean() apply cleaning ONLY to selected records. |
1729
|
|
|
|
|
|
|
If this isn't what you want, then select_all() before cleaning. |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
Since a cleanup subroutine can do ANY modifications to a field that it |
1732
|
|
|
|
|
|
|
likes, you can imagine some cleanup routines that also supply default |
1733
|
|
|
|
|
|
|
values and do other validations. |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
For example, a cleanup routine could convert every value in each field |
1736
|
|
|
|
|
|
|
to an integer, or apply minimum or maximum numerical limits: |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
sub {$_ = int($_) } |
1739
|
|
|
|
|
|
|
sub {$_ = max(int($_), 0) } |
1740
|
|
|
|
|
|
|
sub {$_ = min(int($_), 200)} |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
Or your cleanup routine could use regular expressions to do |
1743
|
|
|
|
|
|
|
capitalizations or other regularizations of data: |
1744
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
sub Capitalize {/\b([a-z])([a-z]+)\b)/\U$1\E$2/g} |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
$t->clean(\ &Capitalize , ['FirstName', 'LastName']); |
1748
|
|
|
|
|
|
|
$t->clean(\ &PhoneFormat, ['Phone', 'Fax' ]); |
1749
|
|
|
|
|
|
|
$t->clean(\ &LegalUSZip, ['HomeZip', 'WorkZip' ]); |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
... and so on. Cleanups are easy to write and quick and easy to apply |
1752
|
|
|
|
|
|
|
with Data::CTable. Do them early! Do them often! |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
=head2 Hints for writing cleanup routines |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
If your cleanup routine may be used to clean up fields that could be |
1757
|
|
|
|
|
|
|
empty/undef and empty/undef is a legal value, it should not touch any |
1758
|
|
|
|
|
|
|
undef values (unintentionally converting them to strings). |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
Finally, instead of setting any values to the empty string, it should |
1761
|
|
|
|
|
|
|
set them to undef instead. This includes any values it might have |
1762
|
|
|
|
|
|
|
left empty during cleanup. (Using undef instead of empty string to |
1763
|
|
|
|
|
|
|
represent empty values is one way that Data::CTable likes to save |
1764
|
|
|
|
|
|
|
memory in tables that may have lots of those.) |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
For an example of a well-behaved cleanup routine, consider the |
1767
|
|
|
|
|
|
|
following implementation of the builtin CleanWhitespace behavior: |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
sub CleanWhitespace |
1770
|
|
|
|
|
|
|
{ |
1771
|
|
|
|
|
|
|
return unless defined; ## Empty/undef values stay that way |
1772
|
|
|
|
|
|
|
s/ \s+$//sx; ## Remove trailing whitespace |
1773
|
|
|
|
|
|
|
s/^\s+ //sx; ## Remove leading whitespace |
1774
|
|
|
|
|
|
|
s/ +/ /g; ## Runs of spaces to single space |
1775
|
|
|
|
|
|
|
$_ = undef unless length; ## (Newly?) empty strings to undef |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
=head2 Roman character set mapping |
1779
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
The character set mapping cleanup routines can be used to convert |
1781
|
|
|
|
|
|
|
upper-ASCII characters bidirectionally between two popular Roman |
1782
|
|
|
|
|
|
|
Character sets -- Mac Roman 1 and ISO 8859-1 (also sometimes called |
1783
|
|
|
|
|
|
|
ISO Latin 1) -- i.e. the Western European Roman character sets. |
1784
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
By default, read() converts all incoming data fields in data files |
1786
|
|
|
|
|
|
|
with Mac line endings to ISO format when reading in. Conversely, |
1787
|
|
|
|
|
|
|
write() does the reverse mapping (ISO to Mac) when writing a file with |
1788
|
|
|
|
|
|
|
Mac line endings. |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
However, you may wish to turn off these default behaviors and instead |
1791
|
|
|
|
|
|
|
apply the mappings manually, possibly just to certain fields. |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
For example, if a table contains fields with non-Roman character sets, |
1794
|
|
|
|
|
|
|
you would definitely not want to apply these mappings, and instead |
1795
|
|
|
|
|
|
|
might want to apply some different ones that you create yourself. |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
=head2 Utility routines for character mapping |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
This module can optionally export four utility routines for mapping |
1800
|
|
|
|
|
|
|
character Latin 1 character sets. Always be sure to map the correct |
1801
|
|
|
|
|
|
|
direction -- otherwise you'll end up with garbage! Be careful to only |
1802
|
|
|
|
|
|
|
pass Western Roman strings -- not double-byte strings or strings |
1803
|
|
|
|
|
|
|
encoded in any single-byte Eastern European Roman or non-Roman |
1804
|
|
|
|
|
|
|
character set. |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman(\ $Str) ## Pass pointer to buffer |
1807
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1(\ $Str) ## Pass pointer to buffer |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
These routines translate characters whose values are 128-255 from one |
1810
|
|
|
|
|
|
|
Western Roman encoding to another. The argument is a string buffer of |
1811
|
|
|
|
|
|
|
any size passed by reference. |
1812
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
The functions return a count of the number of characters that were |
1814
|
|
|
|
|
|
|
mapped (zero or undef if none were). |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman_clean() ## Operates on $_ |
1817
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1_clean() ## Operates on $_ |
1818
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
These routines are variants of the above, but they're versions that |
1820
|
|
|
|
|
|
|
are compatible with clean() -- they operate on $_ and will take care |
1821
|
|
|
|
|
|
|
to leave undefined values undefined. They do not have return values. |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
=head2 More advanced cleaning and validation |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
Unfortunately, clean() only lets you operate on a single field value |
1826
|
|
|
|
|
|
|
at a time -- and there's no way to know the record number or other |
1827
|
|
|
|
|
|
|
useful information inside the cleaning routine. |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
For really powerful cleaning and validation involving access to all |
1830
|
|
|
|
|
|
|
fields of a record as well as record numbers, see the discussion of |
1831
|
|
|
|
|
|
|
the calc() method and other methods for doing complex field |
1832
|
|
|
|
|
|
|
calculations in the next section. |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
=cut |
1835
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
sub clean |
1837
|
|
|
|
|
|
|
{ |
1838
|
8
|
|
|
8
|
0
|
20
|
my $this = shift; |
1839
|
8
|
|
|
|
|
14
|
my ($Sub, $Fields) = @_; |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
## Default is fields in the list. |
1842
|
8
|
|
33
|
|
|
36
|
$Fields ||= $this->fieldlist(); |
1843
|
|
|
|
|
|
|
|
1844
|
8
|
|
|
|
|
22
|
my $Sel = $this->selection(); |
1845
|
|
|
|
|
|
|
|
1846
|
8
|
|
|
|
|
18
|
foreach (@$Fields) {foreach (@{$this->col($_)}[@$Sel]) {&$Sub()}}; |
|
38
|
|
|
|
|
76
|
|
|
38
|
|
|
|
|
81
|
|
|
114
|
|
|
|
|
237
|
|
1847
|
|
|
|
|
|
|
} |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
sub clean_ws |
1850
|
|
|
|
|
|
|
{ |
1851
|
6
|
|
|
6
|
0
|
32
|
my $this = shift; |
1852
|
6
|
|
|
|
|
26
|
return($this->clean(\ &CleanWhitespace, @_)); |
1853
|
|
|
|
|
|
|
} |
1854
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
sub CleanWhitespace |
1856
|
|
|
|
|
|
|
{ |
1857
|
84
|
50
|
|
84
|
0
|
156
|
return unless defined; ## Empty/undef values stay that way |
1858
|
84
|
|
|
|
|
180
|
s/ \s+$//sx; ## Remove trailing whitespace |
1859
|
84
|
|
|
|
|
153
|
s/^\s+ //sx; ## Remove leading whitespace |
1860
|
84
|
|
|
|
|
169
|
s/ +/ /g; ## Runs of spaces to single space |
1861
|
84
|
50
|
|
|
|
257
|
$_ = undef unless length; ## (Newly?) empty strings to undef |
1862
|
|
|
|
|
|
|
} |
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
sub clean_mac_to_iso8859 |
1865
|
|
|
|
|
|
|
{ |
1866
|
1
|
|
|
1
|
0
|
8
|
my $this = shift; |
1867
|
1
|
|
|
|
|
5
|
return($this->clean(\ &MacRomanToISORoman8859_1_clean, @_)); |
1868
|
|
|
|
|
|
|
} |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
sub clean_iso8859_to_mac |
1871
|
|
|
|
|
|
|
{ |
1872
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
1873
|
0
|
|
|
|
|
0
|
return($this->clean(\ &ISORoman8859_1ToMacRoman_clean, @_)); |
1874
|
|
|
|
|
|
|
} |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
sub ISORoman8859_1ToMacRoman |
1877
|
|
|
|
|
|
|
{ |
1878
|
20
|
|
|
20
|
0
|
32
|
return($ {$_[0]} =~ |
|
20
|
|
|
|
|
49
|
|
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
tr/\x80-\xFF/\xDE\xDF\xE2\xC4\xE3\xC9\xA0\xE0\xF6\xE4\xBA\xDC\xCE\xAD\xB3\xB2\xB0\xD4\xD5\xD2\xD3\xA5\xF8\xD1\xF7\xAA\xF9\xDD\xCF\xF0\xDA\xD9\xCA\xC1\xA2\xA3\xDB\xB4\xF5\xA4\xAC\xA9\xBB\xC7\xC2\xD0\xA8\xC3\xA1\xB1\xFA\xFE\xAB\xB5\xA6\xE1\xFC\xFF\xBC\xC8\xC5\xFD\xFB\xC0\xCB\xE7\xE5\xCC\x80\x81\xAE\x82\xE9\x83\xE6\xE8\xED\xEA\xEB\xEC\xC6\x84\xF1\xEE\xEF\xCD\x85\xD7\xAF\xF4\xF2\xF3\x86\xB7\xB8\xA7\x88\x87\x89\x8B\x8A\x8C\xBE\x8D\x8F\x8E\x90\x91\x93\x92\x94\x95\xB6\x96\x98\x97\x99\x9B\x9A\xD6\xBF\x9D\x9C\x9E\x9F\xBD\xB9\xD8/); |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
} |
1883
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
sub ISORoman8859_1ToMacRoman_clean |
1885
|
|
|
|
|
|
|
{ |
1886
|
0
|
0
|
|
0
|
0
|
0
|
return unless defined; ## Empty/undef values stay that way |
1887
|
|
|
|
|
|
|
|
1888
|
0
|
|
|
|
|
0
|
tr/\x80-\xFF/\xDE\xDF\xE2\xC4\xE3\xC9\xA0\xE0\xF6\xE4\xBA\xDC\xCE\xAD\xB3\xB2\xB0\xD4\xD5\xD2\xD3\xA5\xF8\xD1\xF7\xAA\xF9\xDD\xCF\xF0\xDA\xD9\xCA\xC1\xA2\xA3\xDB\xB4\xF5\xA4\xAC\xA9\xBB\xC7\xC2\xD0\xA8\xC3\xA1\xB1\xFA\xFE\xAB\xB5\xA6\xE1\xFC\xFF\xBC\xC8\xC5\xFD\xFB\xC0\xCB\xE7\xE5\xCC\x80\x81\xAE\x82\xE9\x83\xE6\xE8\xED\xEA\xEB\xEC\xC6\x84\xF1\xEE\xEF\xCD\x85\xD7\xAF\xF4\xF2\xF3\x86\xB7\xB8\xA7\x88\x87\x89\x8B\x8A\x8C\xBE\x8D\x8F\x8E\x90\x91\x93\x92\x94\x95\xB6\x96\x98\x97\x99\x9B\x9A\xD6\xBF\x9D\x9C\x9E\x9F\xBD\xB9\xD8/; |
1889
|
|
|
|
|
|
|
} |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
sub MacRomanToISORoman8859_1 |
1892
|
|
|
|
|
|
|
{ |
1893
|
61
|
|
|
61
|
0
|
73
|
return($ {$_[0]} =~ |
|
61
|
|
|
|
|
142
|
|
1894
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
tr/\x80-\xFF/\xC4\xC5\xC7\xC9\xD1\xD6\xDC\xE1\xE0\xE2\xE4\xE3\xE5\xE7\xE9\xE8\xEA\xEB\xED\xEC\xEE\xEF\xF1\xF3\xF2\xF4\xF6\xF5\xFA\xF9\xFB\xFC\x86\xB0\xA2\xA3\xA7\x95\xB6\xDF\xAE\xA9\x99\xB4\xA8\x8D\xC6\xD8\x90\xB1\x8F\x8E\xA5\xB5\xF0\xDD\xDE\xFE\x8A\xAA\xBA\xFD\xE6\xF8\xBF\xA1\xAC\xAF\x83\xBC\xD0\xAB\xBB\x85\xA0\xC0\xC3\xD5\x8C\x9C\xAD\x97\x93\x94\x91\x92\xF7\xD7\xFF\x9F\x9E\xA4\x8B\x9B\x80\x81\x87\xB7\x82\x84\x89\xC2\xCA\xC1\xCB\xC8\xCD\xCE\xCF\xCC\xD3\xD4\x9D\xD2\xDA\xDB\xD9\xA6\x88\x98\x96\x9A\xB2\xBE\xB8\xBD\xB3\xB9/); |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
} |
1898
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
sub MacRomanToISORoman8859_1_clean |
1900
|
|
|
|
|
|
|
{ |
1901
|
15
|
50
|
|
15
|
0
|
27
|
return unless defined; ## Empty/undef values stay that way |
1902
|
|
|
|
|
|
|
|
1903
|
15
|
|
|
|
|
33
|
tr/\x80-\xFF/\xC4\xC5\xC7\xC9\xD1\xD6\xDC\xE1\xE0\xE2\xE4\xE3\xE5\xE7\xE9\xE8\xEA\xEB\xED\xEC\xEE\xEF\xF1\xF3\xF2\xF4\xF6\xF5\xFA\xF9\xFB\xFC\x86\xB0\xA2\xA3\xA7\x95\xB6\xDF\xAE\xA9\x99\xB4\xA8\x8D\xC6\xD8\x90\xB1\x8F\x8E\xA5\xB5\xF0\xDD\xDE\xFE\x8A\xAA\xBA\xFD\xE6\xF8\xBF\xA1\xAC\xAF\x83\xBC\xD0\xAB\xBB\x85\xA0\xC0\xC3\xD5\x8C\x9C\xAD\x97\x93\x94\x91\x92\xF7\xD7\xFF\x9F\x9E\xA4\x8B\x9B\x80\x81\x87\xB7\x82\x84\x89\xC2\xCA\xC1\xCB\xC8\xCD\xCE\xCF\xCC\xD3\xD4\x9D\xD2\xDA\xDB\xD9\xA6\x88\x98\x96\x9A\xB2\xBE\xB8\xBD\xB3\xB9/; |
1904
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
} |
1906
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
=pod |
1908
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
=head1 CALCULATIONS USING calc() |
1910
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
## Calculate a new field's values based on two others |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
$t->calc($Sub) ## Run $Sub for each row, with |
1914
|
|
|
|
|
|
|
## fields bound to local vars |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
$t->calc($Sub, $Sel) ## Use these row nums |
1917
|
|
|
|
|
|
|
$t->calc($Sub, undef, $Fields) ## Use only these fields |
1918
|
|
|
|
|
|
|
$t->calc($Sub, $Sel, $Fields) ## Use custom rows, fields |
1919
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
my $Col = $t->calc($Sub) ## Gather return vals in vector |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
## Example 1: Overwrite values in an existing column. |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
$t->calc(sub{no strict 'vars'; $Size = (stat($Path))[7]}); |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
## Example 2: Create empty column; fill fields 1 by 1 |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
$t->col('PersonID'); |
1931
|
|
|
|
|
|
|
$t->calc(sub{no strict 'vars'; $PersonID = "$Last$First"}); |
1932
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
## Example 3: Calculate values; put into to table if desired |
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
$PersonID = $t->calc(sub{no strict 'vars'; "$Last$First"}); |
1937
|
|
|
|
|
|
|
$t->sel('PersonID', $PersonID); |
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
## Example 4: Using fully-qualified variable names |
1941
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
$t->calc(sub{$main::PersonID = "$main::Last$main::First"}); |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
calc() runs your custom calculation subroutine $Sub once for every row |
1945
|
|
|
|
|
|
|
in the current selection() or other list of row numbers that you |
1946
|
|
|
|
|
|
|
specify in the optional $Sel argument. |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
This lets you apply a complex calculation to every record in a table |
1949
|
|
|
|
|
|
|
in a single statement, storing the results in one or more columns, or |
1950
|
|
|
|
|
|
|
retrieving them as a list. |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
Your custom subroutine may refer to the value in any field in the |
1953
|
|
|
|
|
|
|
current row by using a global variable with the field's name: |
1954
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
For example, if the table has fields First, Last, and Age, then $Sub |
1956
|
|
|
|
|
|
|
may use, modify, and set the variables $First, $Last, $Age. (Also |
1957
|
|
|
|
|
|
|
known as $main::First, $main::Last, $main::Age). |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
Modifying any of these specially-bound variables actually modifies the |
1960
|
|
|
|
|
|
|
data in the correct record and field within the table. |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
By default, the fields available to $Sub are all fields in the table. |
1963
|
|
|
|
|
|
|
calc() must bind all the field names for you for each row, which can |
1964
|
|
|
|
|
|
|
be time-consuming for tables with very large numbers of fields. |
1965
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
You can speed up the operation of calc() by listing only the fields |
1967
|
|
|
|
|
|
|
your $Sub needs in the optional parameter $Fields. Any field names |
1968
|
|
|
|
|
|
|
you don't mention won't be available to $Sub. Conversely, calc() will |
1969
|
|
|
|
|
|
|
run faster because it can bind only the fields you actually need. |
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
If you include non-existent fields in your custom $Fields list, calc() |
1972
|
|
|
|
|
|
|
creates them for you before $Sub runs the first time. Then your $Sub |
1973
|
|
|
|
|
|
|
can store field values into the new column, referring to it by name. |
1974
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
Variables in $Sub are in the "main" package. So you should set $Sub |
1976
|
|
|
|
|
|
|
to use pacakge "main" in case the rest of your code is not in "main". |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
Similarly, if you "use strict", Perl will complain about global |
1979
|
|
|
|
|
|
|
variables in $Sub. So you may need to assert "no strict 'vars'". |
1980
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
{ package Foo; use strict; |
1982
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
$t = ...; |
1984
|
|
|
|
|
|
|
$t->calc(sub {package main; no strict 'vars'; |
1985
|
|
|
|
|
|
|
$Age = int($Age)}); |
1986
|
|
|
|
|
|
|
} |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
## Or this: |
1989
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
{ package Foo; use strict; |
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
$t = ...; |
1993
|
|
|
|
|
|
|
{ package main; no strict 'vars'; |
1994
|
|
|
|
|
|
|
my $Sub = sub {$Age = int($Age)}; |
1995
|
|
|
|
|
|
|
} |
1996
|
|
|
|
|
|
|
$t->calc($Sub); |
1997
|
|
|
|
|
|
|
} |
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
You may be able to get around both problems more easily by prefixing |
2000
|
|
|
|
|
|
|
each variable reference in $Sub with "main::". This takes care of the |
2001
|
|
|
|
|
|
|
package name issue and bypasses "use strict" at the same time, at the |
2002
|
|
|
|
|
|
|
slight cost of making the calculation itself a bit harder to read. |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
$t->calc(sub {$main::Age = int($main::Age)}); ## OK in any package |
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
In addition to the field names, the following three values are defined |
2007
|
|
|
|
|
|
|
during each invocation of $Sub: |
2008
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
$_r ($main::_r) -- the row number in the entire table |
2010
|
|
|
|
|
|
|
$_s ($main::_s) -- the item number in selection or $Recs |
2011
|
|
|
|
|
|
|
$_t ($main::_t) -- the table object itself |
2012
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
You could use these values to print diagnostic information or to |
2014
|
|
|
|
|
|
|
access any of the data, parameters, or methods of the table itself |
2015
|
|
|
|
|
|
|
from within $Sub. Or you could even calculate field values using $_r |
2016
|
|
|
|
|
|
|
or $_s. |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
For example, after searching & sorting, you could make a field which |
2019
|
|
|
|
|
|
|
preserves the resulting sort order for future reference: |
2020
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
$t->col('Ranking'); ## Create the empty column first. |
2022
|
|
|
|
|
|
|
$t->calc(sub{$main::Ranking = $main::_s}); |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
This last example is equivalent to: |
2025
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
$t->sel(Ranking => [0 .. $#{$t->selection()}]); ## See sel() below |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
=head1 "MANUAL" CALCULATIONS |
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
calc() (see previous secion) is the briefer, more elegant way to do |
2032
|
|
|
|
|
|
|
batch calculations on entire columns in a table, but it can be |
2033
|
|
|
|
|
|
|
slightly slower than doing the calculations yourself. |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
If you have extremely large tables, and you notice the processing time |
2036
|
|
|
|
|
|
|
for your calculations taking more than a second, you might want to |
2037
|
|
|
|
|
|
|
rewrite your calculations to use the more efficient techniques shown |
2038
|
|
|
|
|
|
|
here. |
2039
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
You will often need to create new calculated columns based on one or |
2041
|
|
|
|
|
|
|
more existing ones, and then either insert the columns back in the |
2042
|
|
|
|
|
|
|
tables or use them for further calculations or indexing. |
2043
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
Examples 1a and 1b create a new field 'NameOK' containing either the |
2045
|
|
|
|
|
|
|
string "OK" or undef (empty) depending on whether the field 'Name' is |
2046
|
|
|
|
|
|
|
empty. Just use map() to iterate over the existing values in the |
2047
|
|
|
|
|
|
|
other column, binding $_ to each value in turn. |
2048
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
### Example 1a: Calculation based on one other column |
2050
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
## Retrieve column |
2052
|
|
|
|
|
|
|
my $Name = $t->col('Name'); |
2053
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
## Make new column |
2055
|
|
|
|
|
|
|
my $NameOK = [map {!!length && 'OK'} @$Name]; |
2056
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
## Insert column back into table: |
2058
|
|
|
|
|
|
|
$t->col(NameOK => $NameOK); |
2059
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
### Example 1b: Same calculation, in a single statement: |
2061
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
$t->col(NameOK => [map {!!length && 'OK'} @{$t->col('Name')}]); |
2063
|
|
|
|
|
|
|
|
2064
|
|
|
|
|
|
|
In order to iterate over MULTIPLE columns at once, you need a list of |
2065
|
|
|
|
|
|
|
the row numbers generated by $t->all() so you can index the two |
2066
|
|
|
|
|
|
|
columns in tandem. Then, you use map to bind $_ to each row number, |
2067
|
|
|
|
|
|
|
and then use the expression $t->col($ColName)->[$_] to retreive each |
2068
|
|
|
|
|
|
|
value. |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
Examples 2a and 2b demonstrate this method. They create a new field |
2071
|
|
|
|
|
|
|
'FullName' which is a string joining the values in the 'First' and |
2072
|
|
|
|
|
|
|
'Last' columns with a space between. |
2073
|
|
|
|
|
|
|
|
2074
|
|
|
|
|
|
|
### Example 2a: Calculation based on multiple columns |
2075
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
## Retrieve columns |
2077
|
|
|
|
|
|
|
my $First = $t->col('First'); |
2078
|
|
|
|
|
|
|
my $Last = $t->col('Last' ); |
2079
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
## Retreive row nums |
2081
|
|
|
|
|
|
|
my $Nums = $t->all(); |
2082
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
## Calculate a new column based on two others |
2084
|
|
|
|
|
|
|
my $Full = [map {"$First->[$_] $Last->[$_]"} @$Nums]; |
2085
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
## Add new column to the table |
2087
|
|
|
|
|
|
|
$t->col(FullName => $Full); |
2088
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
### Example 2b: Same calculation, in a single statement: |
2090
|
|
|
|
|
|
|
|
2091
|
|
|
|
|
|
|
$t->col(FullName => |
2092
|
|
|
|
|
|
|
[map {"$t->col('First')->[$_] t->col('Last')->[$_]"} |
2093
|
|
|
|
|
|
|
@{$t->all()}]); |
2094
|
|
|
|
|
|
|
|
2095
|
|
|
|
|
|
|
In examples 1 and 2, you create entirely new columns and then add or |
2096
|
|
|
|
|
|
|
replace them in the table. |
2097
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
Using the approach in Examples 3a and 3b, you can assign calculated |
2099
|
|
|
|
|
|
|
results directly into each value of an existing column as you go. |
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
## Example 3a: Calculate by assigning directly into fields... |
2102
|
|
|
|
|
|
|
|
2103
|
|
|
|
|
|
|
my $A = $t->col->('A'); ## This column will be modified |
2104
|
|
|
|
|
|
|
my $B = $t->col->('B'); |
2105
|
|
|
|
|
|
|
my $C = $t->col->('C'); |
2106
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
foreach @($t->all()) {$A->[$_] = $B->[$_] + $C->[$_];} |
2108
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
## Example 3b: Same calculation, in a single statement: |
2110
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
foreach @($t->all()) {($t->col('A')->[$_] = |
2112
|
|
|
|
|
|
|
$t->col('B')->[$_] + |
2113
|
|
|
|
|
|
|
$t->col('C')->[$_])}; |
2114
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
Before writing your code, think about which calculation paradigms best |
2116
|
|
|
|
|
|
|
suit your needs and your data set. |
2117
|
|
|
|
|
|
|
|
2118
|
|
|
|
|
|
|
Just as Perl Hackers know: There's More Than One Way To Do It! |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
=cut |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
sub calc |
2125
|
|
|
|
|
|
|
{ |
2126
|
|
|
|
|
|
|
## We operate in package main for this subroutine so the local |
2127
|
|
|
|
|
|
|
## object and row number can be available to the caller's $Sub. |
2128
|
|
|
|
|
|
|
|
2129
|
|
|
|
|
|
|
## The following local vars will be available to $Sub |
2130
|
|
|
|
|
|
|
## $_r ($main::_r) -- the row number in the entire table |
2131
|
|
|
|
|
|
|
## $_s ($main::_s) -- the row number in selection or $Recs |
2132
|
|
|
|
|
|
|
## $_t ($main::_t) -- the table object itself |
2133
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
package main; |
2135
|
1
|
|
|
1
|
|
13
|
use vars qw($_r $_s $_t); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
139
|
|
2136
|
3
|
|
|
3
|
0
|
37
|
local ($_r, $_s, $_t); |
2137
|
|
|
|
|
|
|
|
2138
|
3
|
|
|
|
|
6
|
$_t = shift; |
2139
|
3
|
|
|
|
|
7
|
my ($Sub, $Recs, $Fields) = @_; |
2140
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
## These optional params default to current field and current sel |
2142
|
|
|
|
|
|
|
|
2143
|
3
|
|
33
|
|
|
21
|
$Recs ||= $_t->selection(); |
2144
|
3
|
|
33
|
|
|
16
|
$Fields ||= $_t->fieldlist_all(); |
2145
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
## Local copy of symbol table. Didn't seem to help. Odd. |
2147
|
|
|
|
|
|
|
## local %main::; |
2148
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
## We'll build a column of return values from $Sub if needed. |
2150
|
3
|
|
|
|
|
5
|
my $WantVals = defined(wantarray); |
2151
|
3
|
50
|
|
|
|
10
|
my $Vals = $_t->col_empty() if $WantVals; |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
## Call col() on each field in list to make sure it exists. |
2154
|
3
|
|
|
|
|
9
|
foreach (@$Fields) {$_t->col($_)}; |
|
15
|
|
|
|
|
31
|
|
2155
|
|
|
|
|
|
|
|
2156
|
3
|
|
|
|
|
12
|
foreach $_s (0..$#$Recs) |
2157
|
|
|
|
|
|
|
{ |
2158
|
9
|
|
|
|
|
28
|
$_r = $Recs->[$_s]; |
2159
|
|
|
|
|
|
|
|
2160
|
|
|
|
|
|
|
## Bind $FieldName1, $FieldName2, (etc. for each field name in |
2161
|
|
|
|
|
|
|
## $Fields) point to address of the current value for that |
2162
|
|
|
|
|
|
|
## field in this record. |
2163
|
|
|
|
|
|
|
|
2164
|
1
|
|
|
1
|
|
5
|
no strict 'refs'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4377
|
|
2165
|
9
|
|
|
|
|
18
|
foreach my $F (@$Fields) {*{$F} = \ $_t->{$F}->[$_r]}; |
|
45
|
|
|
|
|
64
|
|
|
45
|
|
|
|
|
113
|
|
2166
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
## Now $Sub may refer to $_r, $_s, $_t, and ${any field name} |
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
## Call $Sub and capture return values iff caller wants them |
2170
|
9
|
50
|
|
|
|
30
|
($WantVals ? $Vals->[$_r] = &$Sub() : &$Sub()); |
2171
|
|
|
|
|
|
|
} |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
## Return scalar column ref unless return context is undef |
2174
|
3
|
50
|
|
|
|
22
|
return($WantVals ? $Vals : ()); |
2175
|
|
|
|
|
|
|
} |
2176
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
=pod |
2178
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
=head1 INDEXES |
2180
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
## Make indexes of columns or just selected data in columns |
2182
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
my $Index1 = $t->index_all($Key); ## entire column |
2184
|
|
|
|
|
|
|
my $Index2 = $t->index_sel($Key); ## selected data only |
2185
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
## Make hashes of 2 columns or just selected data in columns |
2187
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
my $Index1 = $t->hash_all($KeyFld, $ValFld); ## entire column |
2189
|
|
|
|
|
|
|
my $Index2 = $t->hash_sel($KeyFld, $ValFld); ## selected data |
2190
|
|
|
|
|
|
|
|
2191
|
|
|
|
|
|
|
index_all() creates and returns a hash (reference) that maps keys |
2192
|
|
|
|
|
|
|
found in the column called $Key to corresponding record numbers. |
2193
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
Ideally, values in $Key would be unique (that's up to you). |
2195
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
If any values in $Key are NOT unique, then later values (higher record |
2197
|
|
|
|
|
|
|
numbers) will be ignored. |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
index_sel() creates and returns a hash (ref) that maps keys found in |
2200
|
|
|
|
|
|
|
the SELECTED RECORDS of column $Key to corresponding record numbers. |
2201
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
Any keys in unselected records are ignored. Otherwise, the behavior |
2203
|
|
|
|
|
|
|
is equivalent to index_all(). |
2204
|
|
|
|
|
|
|
|
2205
|
|
|
|
|
|
|
hash_all() and hash_sel() are similar, except they create and return |
2206
|
|
|
|
|
|
|
hashes whose keys are taken from column $KeyFld, but whose values are |
2207
|
|
|
|
|
|
|
from $ValFld in the corresponding records. |
2208
|
|
|
|
|
|
|
|
2209
|
|
|
|
|
|
|
So, for example, imagine you have a tab-delimited file on disk with |
2210
|
|
|
|
|
|
|
just a single tab per line (2 fields) and no header row. The entries |
2211
|
|
|
|
|
|
|
on the left side of the tab on each line are keys and the right side |
2212
|
|
|
|
|
|
|
are values. You could convert that file into a hash in memory like |
2213
|
|
|
|
|
|
|
this: |
2214
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
my $t = Data::CTable->new({_HeaderRow=>0, _FieldList=>[qw(F1 F2)]}, |
2216
|
|
|
|
|
|
|
"DeptLookup.txt"); |
2217
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
my $DeptLookup = $t->hash_all(qw(F1 F2)); |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
=head2 Reverse indexes |
2221
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
If you'd like an index mapping record number to key, just get |
2223
|
|
|
|
|
|
|
$t->col($Key). That's what the data columns in Data::CTable are. |
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
=cut |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
sub index_all |
2228
|
|
|
|
|
|
|
{ |
2229
|
2
|
|
|
2
|
0
|
8
|
my $this = shift; |
2230
|
2
|
|
|
|
|
4
|
my ($Key) = @_; |
2231
|
|
|
|
|
|
|
|
2232
|
2
|
|
|
|
|
4
|
my $Index = {}; @$Index{reverse @{$this->col($Key)}} = reverse @{$this->all()}; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
5
|
|
2233
|
|
|
|
|
|
|
|
2234
|
2
|
|
|
|
|
16
|
return($Index); |
2235
|
|
|
|
|
|
|
} |
2236
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
sub index_sel |
2238
|
|
|
|
|
|
|
{ |
2239
|
2
|
|
|
2
|
0
|
6
|
my $this = shift; |
2240
|
2
|
|
|
|
|
3
|
my ($Key) = @_; |
2241
|
|
|
|
|
|
|
|
2242
|
2
|
|
|
|
|
5
|
my $Index = {}; @$Index{reverse @{$this->sel($Key)}} = reverse @{$this->selection()}; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
6
|
|
2243
|
|
|
|
|
|
|
|
2244
|
2
|
|
|
|
|
15
|
return($Index); |
2245
|
|
|
|
|
|
|
} |
2246
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
sub hash_all |
2249
|
|
|
|
|
|
|
{ |
2250
|
2
|
|
|
2
|
0
|
3
|
my $this = shift; |
2251
|
2
|
|
|
|
|
5
|
my ($Key, $Val) = @_; |
2252
|
|
|
|
|
|
|
|
2253
|
2
|
|
|
|
|
3
|
my $Hash = {}; @$Hash{reverse @{$this->col($Key)}} = reverse @{$this->col($Val)}; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
6
|
|
2254
|
|
|
|
|
|
|
|
2255
|
2
|
|
|
|
|
15
|
return($Hash); |
2256
|
|
|
|
|
|
|
} |
2257
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
sub hash_sel |
2259
|
|
|
|
|
|
|
{ |
2260
|
2
|
|
|
2
|
0
|
4
|
my $this = shift; |
2261
|
2
|
|
|
|
|
4
|
my ($Key, $Val) = @_; |
2262
|
|
|
|
|
|
|
|
2263
|
2
|
|
|
|
|
5
|
my $Hash = {}; @$Hash{reverse @{$this->sel($Key)}} = reverse @{$this->sel($Val)}; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
6
|
|
2264
|
|
|
|
|
|
|
|
2265
|
2
|
|
|
|
|
15
|
return($Hash); |
2266
|
|
|
|
|
|
|
} |
2267
|
|
|
|
|
|
|
|
2268
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
=pod |
2270
|
|
|
|
|
|
|
|
2271
|
|
|
|
|
|
|
=head1 DATA ROWS (RECORDS) |
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
## Getting or setting rows / records |
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
$t->row($Num) ## Get a row or make empty one. |
2276
|
|
|
|
|
|
|
$t->row_get($Num) |
2277
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
$t->row($Num, $HashRef) ## Set all of a row all at once. |
2279
|
|
|
|
|
|
|
$t->row_set($Num, $HashRef) |
2280
|
|
|
|
|
|
|
|
2281
|
|
|
|
|
|
|
$t->row_set($Num, undef) ## Delete a row completely |
2282
|
|
|
|
|
|
|
$t->row_delete($Num) |
2283
|
|
|
|
|
|
|
$t->row_delete($Beg, $End)## Delete a range of rows |
2284
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
$t->row_move($Old, $New) ## Move a row to before $New |
2286
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
$t->row_empty() ## An empty hash |
2288
|
|
|
|
|
|
|
$t->row_exists($Num) ## True if $Num < $t->length() |
2289
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
$t->rows($RowList) ## Get list of multiple row nums |
2291
|
|
|
|
|
|
|
|
2292
|
|
|
|
|
|
|
$t->row_list($Num) ## Get row vals as a list |
2293
|
|
|
|
|
|
|
$t->row_list($Num, $Fields) ## Get row vals: specified fields |
2294
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
$t->row_list_set($Num, undef, $Vals) ## Set row vals as a list |
2296
|
|
|
|
|
|
|
$t->row_list_set($Num, $Fields, $Vals) ## Set row vals as a list |
2297
|
|
|
|
|
|
|
$t->row_list_set($Num, $Fields) ## Set vals to empty/undef |
2298
|
|
|
|
|
|
|
|
2299
|
|
|
|
|
|
|
Usually, when working with Data::CTable objects, you are operating on |
2300
|
|
|
|
|
|
|
entire columns or tables at a time (after all: any transformation you |
2301
|
|
|
|
|
|
|
do on one record you almost always want to do on all records or all |
2302
|
|
|
|
|
|
|
selected ones). |
2303
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
You should very rarely need to access data by retrieving rows or |
2305
|
|
|
|
|
|
|
setting rows, moving them around individually, and so on. (It's much |
2306
|
|
|
|
|
|
|
cleaner, and much more efficient to manipulate the selection() (list |
2307
|
|
|
|
|
|
|
of selected row numbers) instead -- just delete a row number from the |
2308
|
|
|
|
|
|
|
selection, for example, and then for most operations it's almost as if |
2309
|
|
|
|
|
|
|
the row is gone from the table, except the data is really still |
2310
|
|
|
|
|
|
|
there.) |
2311
|
|
|
|
|
|
|
|
2312
|
|
|
|
|
|
|
However, if on rare occasions you really do need direct row |
2313
|
|
|
|
|
|
|
operations, you're reading the right section. |
2314
|
|
|
|
|
|
|
|
2315
|
|
|
|
|
|
|
A row is generally accessed as a hash. The hash you provide or get |
2316
|
|
|
|
|
|
|
back is not saved by the object in any way. Data values are always |
2317
|
|
|
|
|
|
|
copied in or out of it, so you always "own" the hash. |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
Rows are specified by $Num -- the row number with in the unsorted |
2320
|
|
|
|
|
|
|
columns (the raw data in the table). These numbers are just array |
2321
|
|
|
|
|
|
|
indices into the data columns, and so their legal range is: |
2322
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
[0 .. ($t->length() - 1)] ## (Zero-based row numbering.) |
2324
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
The row hash (or "record") has keys that are field names and values |
2326
|
|
|
|
|
|
|
that are copies of the scalar values stored in the data columns within |
2327
|
|
|
|
|
|
|
the table. |
2328
|
|
|
|
|
|
|
|
2329
|
|
|
|
|
|
|
row() always copies only the fields in fieldlist(), except for |
2330
|
|
|
|
|
|
|
row_list() which allows you to specify an optional $Fields parameter |
2331
|
|
|
|
|
|
|
which can override the current fieldlist(). |
2332
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
If the fieldlist happens to be a subset of all fields, but you really |
2334
|
|
|
|
|
|
|
want to get all fields in your record, then call fieldlist_set(0) |
2335
|
|
|
|
|
|
|
first to permanently or temporarily delete it. |
2336
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
row() and row_get() always return a hash. |
2338
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
row($Num, $Hash), row_set() take a hash and set just the fields you |
2340
|
|
|
|
|
|
|
specify in the hash (in the given row of course). Any non-existent |
2341
|
|
|
|
|
|
|
field names in the hash are created, so be careful. |
2342
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
In fact, in general with either getting or setting rows, any |
2344
|
|
|
|
|
|
|
non-existent fields mentioned will be created for you (by internally |
2345
|
|
|
|
|
|
|
calling col()). So you could build a whole table of 100 rows by |
2346
|
|
|
|
|
|
|
starting with an empty, new, table and setting row 99 from a hash that |
2347
|
|
|
|
|
|
|
gives the field names. |
2348
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
Setting a row number higher than any existing row number with row(), |
2350
|
|
|
|
|
|
|
row_set() or row_force() will automatically set the new length of the |
2351
|
|
|
|
|
|
|
entire table to match (extending all the columns with empty rows as |
2352
|
|
|
|
|
|
|
necessary). |
2353
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
IMPORTANT: IF YOU SIMPLY MUST ADD ROWS SEQUENTIALLY, do not let the |
2355
|
|
|
|
|
|
|
table auto-extend by one with each row you set. This is slow and gets |
2356
|
|
|
|
|
|
|
rapidly slower if there's lots of data because the arrays holding the |
2357
|
|
|
|
|
|
|
data columns will keep getting reallocated on every insert. Instead, |
2358
|
|
|
|
|
|
|
first pre-extend the table to your highest row number by calling |
2359
|
|
|
|
|
|
|
length($Len), and then set your rows. Or easier: if convenient just |
2360
|
|
|
|
|
|
|
set your rows starting with the highest-numbered one first. If you |
2361
|
|
|
|
|
|
|
don't know how many you'll have, guess or estimate and pre-extend to |
2362
|
|
|
|
|
|
|
the estimated number and then cut back later. This will be faster |
2363
|
|
|
|
|
|
|
than extending all columns by one each time. |
2364
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
row_delete() removes a row or range of rows completely from the table. |
2366
|
|
|
|
|
|
|
Any rows above the deleted ones will move down and the table's |
2367
|
|
|
|
|
|
|
length() will decrease. If the data columns are very large, this |
2368
|
|
|
|
|
|
|
could be a bit slow because a lot of data could be moved around. The |
2369
|
|
|
|
|
|
|
low and high row numbers will be limited for you to 0 and length() - |
2370
|
|
|
|
|
|
|
1, respectively. Null ranges are OK and are silently ignored. The |
2371
|
|
|
|
|
|
|
range is inclusive, so to delete just row 99, call row_delete(99) or |
2372
|
|
|
|
|
|
|
row_delete(99,99). |
2373
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
EFFICIENCY NOTE: Don't call row_delete() to remove lots of individual |
2375
|
|
|
|
|
|
|
rows. Instead, select those row numbers by setting the selection (if |
2376
|
|
|
|
|
|
|
not already selected), and then invert the selection using |
2377
|
|
|
|
|
|
|
selection_invert(), so the undesired rows are deselected, and then use |
2378
|
|
|
|
|
|
|
the cull() method to rewrite the entire table at once. The deselected |
2379
|
|
|
|
|
|
|
rows will be omitted very efficiently this way. |
2380
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
row_move() moves a row from its $Old row number to the position before |
2382
|
|
|
|
|
|
|
the row currently in row $New (specify $New = length() to move the row |
2383
|
|
|
|
|
|
|
to the end). Again, in shuffling data in columns, lots of data could |
2384
|
|
|
|
|
|
|
get moved around by this operation, so expect it to be slow. If as |
2385
|
|
|
|
|
|
|
with row_delete(), if you will be doing several moves, consider |
2386
|
|
|
|
|
|
|
building an appropriate selection() first, and then using cull() |
2387
|
|
|
|
|
|
|
instead. |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
Using row_delete() and row_move() to shift records around changes the |
2390
|
|
|
|
|
|
|
record numbers of the affected records and many others in the table. |
2391
|
|
|
|
|
|
|
The record numbers in the custom selection, if any, are updated to |
2392
|
|
|
|
|
|
|
reflect these changes, so the records that were selected before will |
2393
|
|
|
|
|
|
|
still be selected after the move (except those that were deleted of |
2394
|
|
|
|
|
|
|
course). If you had a private copy of the selection, your copy will |
2395
|
|
|
|
|
|
|
likely become outdated after these operations. You should get it |
2396
|
|
|
|
|
|
|
again by calling selection(). |
2397
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
row_empty() returns a hash whose keys are the entries in fieldlist() |
2399
|
|
|
|
|
|
|
and whose values are undef. (You could use it to fill in values |
2400
|
|
|
|
|
|
|
before calling row_set()). Note: in this class, row_empty() does |
2401
|
|
|
|
|
|
|
exactly the same thing as fieldlist_hash() when the latter is called |
2402
|
|
|
|
|
|
|
with no arguments. |
2403
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
row_exists() returns true if C<(($Num E= 0) && ($Num E $t-Elength()))>. |
2405
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
rows() calls row() for each row num in a list and returns a list of |
2407
|
|
|
|
|
|
|
the resulting hashes. |
2408
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
row_list() gets row values as a list instead of a hash. They appear |
2410
|
|
|
|
|
|
|
in the order specified in fieldlist() unless you supply an optional |
2411
|
|
|
|
|
|
|
$Fields parameter listing the fields you want to get. |
2412
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
row_list_set() sets row values as a list instead of a hash. Pass your |
2414
|
|
|
|
|
|
|
own $Fields list or undef and fieldlist() will be used. $Values |
2415
|
|
|
|
|
|
|
should be a list with the same number of values as fields expected; |
2416
|
|
|
|
|
|
|
any shortage will result in undef/empty values being set. |
2417
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
=cut |
2419
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
2421
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
sub row |
2423
|
|
|
|
|
|
|
{ |
2424
|
69
|
|
|
69
|
0
|
139
|
my $this = shift; |
2425
|
69
|
|
|
|
|
95
|
my ($Num, $Row) = @_; |
2426
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
## Set if specified. |
2428
|
69
|
50
|
|
|
|
141
|
return($this->row_set($Num, $Row)) if defined($Row); |
2429
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
## Else get. |
2431
|
69
|
|
|
|
|
132
|
return($this->row_get($Num)); |
2432
|
|
|
|
|
|
|
} |
2433
|
|
|
|
|
|
|
|
2434
|
|
|
|
|
|
|
sub row_get |
2435
|
|
|
|
|
|
|
{ |
2436
|
69
|
|
|
69
|
0
|
81
|
my $this = shift; |
2437
|
69
|
|
|
|
|
94
|
my ($Num) = @_; |
2438
|
|
|
|
|
|
|
|
2439
|
69
|
|
|
|
|
123
|
my $Fields = $this->fieldlist(); |
2440
|
69
|
|
|
|
|
100
|
my $Row = {}; @$Row{@$Fields} = map {$this->col($_)->[$Num]} @$Fields; |
|
69
|
|
|
|
|
115
|
|
|
270
|
|
|
|
|
532
|
|
2441
|
|
|
|
|
|
|
|
2442
|
69
|
|
|
|
|
615
|
return($Row); |
2443
|
|
|
|
|
|
|
} |
2444
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
sub row_set |
2446
|
|
|
|
|
|
|
{ |
2447
|
3
|
|
|
3
|
0
|
18
|
my $this = shift; |
2448
|
3
|
|
|
|
|
6
|
my ($Num, $Row) = @_; |
2449
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
## We thoughtfully sort the keys in case columns will get created |
2451
|
|
|
|
|
|
|
## in this order. |
2452
|
|
|
|
|
|
|
|
2453
|
3
|
|
|
|
|
14
|
my $Fields = [sort keys %$Row]; |
2454
|
|
|
|
|
|
|
|
2455
|
|
|
|
|
|
|
## Pre-extend the table to accommodate row $Num if necessary. |
2456
|
|
|
|
|
|
|
## This will do nothing if there are not yet any fields in the |
2457
|
|
|
|
|
|
|
## table (and the length will still be effectively zero). |
2458
|
|
|
|
|
|
|
|
2459
|
3
|
100
|
|
|
|
10
|
$this->length($Num + 1) unless $this->length() >= $Num + 1; |
2460
|
|
|
|
|
|
|
|
2461
|
|
|
|
|
|
|
## Insert into columns, creating them if necessary. |
2462
|
3
|
|
|
|
|
8
|
foreach (@$Fields) {$this->col($_)->[$Num] = $Row->{$_}}; |
|
10
|
|
|
|
|
27
|
|
2463
|
|
|
|
|
|
|
|
2464
|
3
|
|
|
|
|
12
|
return($Row); ## Why not? |
2465
|
|
|
|
|
|
|
} |
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
sub row_delete |
2468
|
|
|
|
|
|
|
{ |
2469
|
10
|
|
|
10
|
0
|
92
|
my $this = shift; |
2470
|
10
|
|
|
|
|
18
|
my ($First, $Last) = @_; |
2471
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
## Nothing to do if $First not specified. |
2473
|
10
|
100
|
|
|
|
23
|
return() unless defined($First); |
2474
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
## Default is $Last is same as first (remove one row only) |
2476
|
9
|
100
|
|
|
|
23
|
$Last = $First unless defined($Last); |
2477
|
|
|
|
|
|
|
|
2478
|
9
|
|
|
|
|
18
|
my $LastIndex = $this->length() - 1; |
2479
|
|
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
## Restrict the range to meaningful values. |
2481
|
9
|
|
|
|
|
23
|
$First = max($First, 0 ); ## First could be very high, to indicate a null range. |
2482
|
9
|
|
|
|
|
23
|
$Last = min($Last, $LastIndex); ## Last could be negative, like -1, to indicate null range. |
2483
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
## Nothing to do if the range is empty. |
2485
|
9
|
100
|
|
|
|
23
|
return() if $Last < $First; |
2486
|
|
|
|
|
|
|
|
2487
|
8
|
|
|
|
|
17
|
my $Fields = $this->fieldlist(); |
2488
|
|
|
|
|
|
|
|
2489
|
8
|
|
|
|
|
13
|
my $RangeSize = ($Last - $First + 1); |
2490
|
|
|
|
|
|
|
|
2491
|
8
|
|
|
|
|
18
|
foreach (@$Fields) {splice @{$this->col($_)}, $First, $RangeSize}; |
|
32
|
|
|
|
|
39
|
|
|
32
|
|
|
|
|
58
|
|
2492
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
## Here we could have trapped all the list segments we spliced out |
2494
|
|
|
|
|
|
|
## and return them in nice CTable-ish hash. Maybe we will some |
2495
|
|
|
|
|
|
|
## day. This would be a way to split a range of rows out of a |
2496
|
|
|
|
|
|
|
## table object to create another table object. We could even |
2497
|
|
|
|
|
|
|
## call it the "split" method... |
2498
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
## After deleting the rows, we need to adjust the _Selection if |
2500
|
|
|
|
|
|
|
## present. Deleted row numbers in the selection need to be |
2501
|
|
|
|
|
|
|
## omitted; row numbers greater than the range of deleted ones |
2502
|
|
|
|
|
|
|
## need to be decreased by the size of the range reduction, and |
2503
|
|
|
|
|
|
|
## others need to be left untouched. |
2504
|
|
|
|
|
|
|
|
2505
|
0
|
0
|
|
|
|
0
|
$this->{_Selection} = |
|
|
0
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
[map |
2507
|
|
|
|
|
|
|
{ |
2508
|
0
|
|
|
|
|
0
|
($_ < $First ? $_ : ## Before range: pass through. |
2509
|
|
|
|
|
|
|
($_ <= $Last ? () : ## In range: omit. |
2510
|
|
|
|
|
|
|
$_ - $RangeSize)) ## After range: reduce by range size |
2511
|
8
|
50
|
|
|
|
33
|
} @{ $this->{_Selection}}] |
2512
|
|
|
|
|
|
|
if $this->{_Selection}; |
2513
|
|
|
|
|
|
|
} |
2514
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
sub row_move |
2516
|
|
|
|
|
|
|
{ |
2517
|
8
|
|
|
8
|
0
|
13
|
my $this = shift; |
2518
|
8
|
|
|
|
|
12
|
my ($Old, $New) = @_; |
2519
|
|
|
|
|
|
|
|
2520
|
|
|
|
|
|
|
## $Old and $New are required params and must not be undef. |
2521
|
8
|
50
|
33
|
|
|
41
|
goto done unless defined($Old) && defined($New); |
2522
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
## If $Old and $New are the same or one apart, there's nothing to do. |
2524
|
8
|
100
|
|
|
|
23
|
goto done if ($New == $Old); ## This would mean a no-op. |
2525
|
7
|
100
|
|
|
|
20
|
goto done if ($New == $Old + 1); ## This would mean a no-op. |
2526
|
|
|
|
|
|
|
|
2527
|
5
|
|
|
|
|
11
|
my $Length = $this->length(); |
2528
|
|
|
|
|
|
|
|
2529
|
|
|
|
|
|
|
## Ensure both $Old and $New are legal indices. |
2530
|
5
|
50
|
33
|
|
|
28
|
goto done if (($Old < 0) || ($Old > $Length - 1)); |
2531
|
5
|
50
|
33
|
|
|
26
|
goto done if (($New < 0) || ($New > $Length )); ## New has a range up to $Length, meaning move to end. |
2532
|
|
|
|
|
|
|
|
2533
|
5
|
|
|
|
|
11
|
my $Fields = $this->fieldlist_all(); |
2534
|
|
|
|
|
|
|
|
2535
|
5
|
100
|
|
|
|
14
|
if ($Old < $New) ## Move forward (to higher / later row num) |
2536
|
|
|
|
|
|
|
{ |
2537
|
|
|
|
|
|
|
## Delete from the lower position and insert into higher -- MINUS ONE to account for prior shortening. |
2538
|
2
|
|
|
|
|
3
|
my $Row = {}; |
2539
|
2
|
|
|
|
|
4
|
foreach (@$Fields) {$Row->{$_} = splice(@{$this->col($_)}, $Old , 1 )}; |
|
8
|
|
|
|
|
95
|
|
|
8
|
|
|
|
|
19
|
|
2540
|
2
|
|
|
|
|
6
|
foreach (@$Fields) { splice(@{$this->col($_)}, $New - 1, 0, $Row->{$_})}; |
|
8
|
|
|
|
|
11
|
|
|
8
|
|
|
|
|
16
|
|
2541
|
|
|
|
|
|
|
|
2542
|
|
|
|
|
|
|
} |
2543
|
|
|
|
|
|
|
else ## Move backward (to lower / earlier row num) |
2544
|
|
|
|
|
|
|
{ |
2545
|
|
|
|
|
|
|
## Delete from the higher position and insert into lower. |
2546
|
3
|
|
|
|
|
5
|
my $Row = {}; |
2547
|
3
|
|
|
|
|
7
|
foreach (@$Fields) {$Row->{$_} = splice(@{$this->col($_)}, $Old , 1 )}; |
|
12
|
|
|
|
|
12
|
|
|
12
|
|
|
|
|
26
|
|
2548
|
3
|
|
|
|
|
6
|
foreach (@$Fields) { splice(@{$this->col($_)}, $New , 0, $Row->{$_})}; |
|
12
|
|
|
|
|
14
|
|
|
12
|
|
|
|
|
24
|
|
2549
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
} |
2551
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
## After moving the rows, we need to adjust the _Selection if |
2553
|
|
|
|
|
|
|
## present. Row numbers outside the shuffled range stay the same; |
2554
|
|
|
|
|
|
|
## the moved row number(s) change; others (inside the range) get |
2555
|
|
|
|
|
|
|
## shifted down or up by 1. |
2556
|
|
|
|
|
|
|
|
2557
|
5
|
100
|
|
|
|
17
|
if ($Old < $New) ## Move forward / higher / later |
2558
|
|
|
|
|
|
|
{ |
2559
|
6
|
100
|
|
|
|
25
|
$this->{_Selection} = |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
[map |
2561
|
|
|
|
|
|
|
{ |
2562
|
2
|
|
|
|
|
5
|
($_ == $Old ? $New - 1 : ## Moved row: change num to new - 1 |
2563
|
|
|
|
|
|
|
($_ < $Old ? $_ : ## Less than $Old: no change |
2564
|
|
|
|
|
|
|
($_ >= $New ? $_ : ## Grtr= than $New: no change |
2565
|
|
|
|
|
|
|
$_ - 1))) ## In range: shift down by 1. |
2566
|
2
|
50
|
|
|
|
7
|
} @{ $this->{_Selection}}] |
2567
|
|
|
|
|
|
|
if $this->{_Selection}; |
2568
|
|
|
|
|
|
|
|
2569
|
|
|
|
|
|
|
} |
2570
|
|
|
|
|
|
|
else ## Move backward / lower / earlier |
2571
|
|
|
|
|
|
|
{ |
2572
|
9
|
100
|
|
|
|
34
|
$this->{_Selection} = |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
[map |
2574
|
|
|
|
|
|
|
{ |
2575
|
3
|
|
|
|
|
7
|
($_ == $Old ? $New : ## Moved row: change num to new |
2576
|
|
|
|
|
|
|
($_ >= $Old ? $_ : ## Grtr= than $Old: no change |
2577
|
|
|
|
|
|
|
($_ < $New ? $_ : ## Less than $New: no change |
2578
|
|
|
|
|
|
|
$_ + 1))) ## In range: shift up by 1. |
2579
|
3
|
50
|
|
|
|
10
|
} @{ $this->{_Selection}}] |
2580
|
|
|
|
|
|
|
if $this->{_Selection}; |
2581
|
|
|
|
|
|
|
} |
2582
|
|
|
|
|
|
|
|
2583
|
8
|
|
|
|
|
25
|
done: |
2584
|
|
|
|
|
|
|
return; |
2585
|
|
|
|
|
|
|
} |
2586
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
sub row_empty |
2588
|
|
|
|
|
|
|
{ |
2589
|
2
|
|
|
2
|
0
|
9
|
my $this = shift; |
2590
|
|
|
|
|
|
|
|
2591
|
2
|
|
|
|
|
5
|
my $Fields = $this->fieldlist(); |
2592
|
2
|
|
|
|
|
4
|
my $Row = {}; @$Row{@$Fields} = undef; |
|
2
|
|
|
|
|
10
|
|
2593
|
|
|
|
|
|
|
|
2594
|
2
|
|
|
|
|
12
|
return($Row); |
2595
|
|
|
|
|
|
|
} |
2596
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
sub row_exists |
2598
|
|
|
|
|
|
|
{ |
2599
|
14
|
|
|
14
|
0
|
38
|
my $this = shift; |
2600
|
14
|
|
|
|
|
16
|
my ($Num) = @_; |
2601
|
|
|
|
|
|
|
|
2602
|
14
|
|
100
|
|
|
46
|
return(($Num >= 0) && ($Num < $this->length())); |
2603
|
|
|
|
|
|
|
} |
2604
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
sub rows |
2606
|
|
|
|
|
|
|
{ |
2607
|
4
|
|
|
4
|
0
|
13
|
my $this = shift; |
2608
|
4
|
|
|
|
|
6
|
my ($Nums) = @_; |
2609
|
|
|
|
|
|
|
|
2610
|
4
|
|
|
|
|
7
|
return([map {$this->row($_)} @$Nums]); |
|
8
|
|
|
|
|
20
|
|
2611
|
|
|
|
|
|
|
} |
2612
|
|
|
|
|
|
|
|
2613
|
|
|
|
|
|
|
sub row_list |
2614
|
|
|
|
|
|
|
{ |
2615
|
19
|
|
|
19
|
0
|
33
|
my $this = shift; |
2616
|
19
|
|
|
|
|
26
|
my ($Num, $Fields) = @_; |
2617
|
|
|
|
|
|
|
|
2618
|
|
|
|
|
|
|
## $Fields argument is optional and defaults to fieldlist(); |
2619
|
19
|
|
66
|
|
|
54
|
$Fields ||= $this->fieldlist(); |
2620
|
|
|
|
|
|
|
|
2621
|
19
|
|
|
|
|
32
|
my $Row = [map {$this->col($_)->[$Num]} @$Fields]; |
|
105
|
|
|
|
|
193
|
|
2622
|
19
|
|
|
|
|
193
|
return($Row); |
2623
|
|
|
|
|
|
|
} |
2624
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
sub row_list_set |
2626
|
|
|
|
|
|
|
{ |
2627
|
6
|
|
|
6
|
0
|
39
|
my $this = shift; |
2628
|
6
|
|
|
|
|
9
|
my ($Num, $Fields, $Vals) = @_; |
2629
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
## $Fields argument is optional and defaults to fieldlist(); |
2631
|
6
|
|
66
|
|
|
19
|
$Fields ||= $this->fieldlist(); |
2632
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
## $Vals is optional and defaults to []. |
2634
|
6
|
|
50
|
|
|
10
|
$Vals ||= []; |
2635
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
## Pre-extend the table to accommodate row $Num if necessary. |
2637
|
6
|
50
|
|
|
|
14
|
$this->length($Num + 1) unless $this->length() >= $Num + 1; |
2638
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
## Set the $Vals in row $Num in the order given by $Fields. |
2640
|
6
|
|
|
|
|
15
|
foreach (0..$#$Fields) {$this->col($Fields->[$_])->[$Num] = $Vals->[$_]} |
|
18
|
|
|
|
|
35
|
|
2641
|
|
|
|
|
|
|
} |
2642
|
|
|
|
|
|
|
|
2643
|
|
|
|
|
|
|
=pod |
2644
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
=head1 ROW / RECORD COUNT (TABLE LENGTH) |
2646
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
## Getting or setting table length |
2648
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
$t->length() ## Get length |
2650
|
|
|
|
|
|
|
$t->length_get() |
2651
|
|
|
|
|
|
|
|
2652
|
|
|
|
|
|
|
$t->length(22) ## Set length (truncate or pre-extend) |
2653
|
|
|
|
|
|
|
$t->length_set(22) |
2654
|
|
|
|
|
|
|
|
2655
|
|
|
|
|
|
|
$t->extend() ## Set length of all columns to match longest |
2656
|
|
|
|
|
|
|
|
2657
|
|
|
|
|
|
|
The length* methods assume the table already has columns of equal |
2658
|
|
|
|
|
|
|
length. So the length of the table is the length of any field taken |
2659
|
|
|
|
|
|
|
at random. We choose the first one in the field list. |
2660
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
Setting the length will truncate or pre-extend every column in the |
2662
|
|
|
|
|
|
|
table to a given length as required. |
2663
|
|
|
|
|
|
|
|
2664
|
|
|
|
|
|
|
(Pre-extending means setting each column's length via $# so that it |
2665
|
|
|
|
|
|
|
has the correct number of entries already allocated (and filled with |
2666
|
|
|
|
|
|
|
undef) so that operations that fill up the table can be done much more |
2667
|
|
|
|
|
|
|
quickly than with push(). |
2668
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
However, if a new column has been added directly, or a table has been |
2670
|
|
|
|
|
|
|
constructed out of columns whose length may not initially match, the |
2671
|
|
|
|
|
|
|
extend() method may be (should be) called to inspect all columns and |
2672
|
|
|
|
|
|
|
extend them all to match the longest one. Note that extend() operates |
2673
|
|
|
|
|
|
|
on all fields in the object, ignoring the custom _FieldList if any. |
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
The length of a table with no columns is zero. |
2676
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
=cut |
2678
|
|
|
|
|
|
|
|
2679
|
|
|
|
|
|
|
sub length |
2680
|
|
|
|
|
|
|
{ |
2681
|
470
|
|
|
470
|
0
|
743
|
my $this = shift; |
2682
|
470
|
|
|
|
|
560
|
my ($Length) = @_; |
2683
|
|
|
|
|
|
|
|
2684
|
470
|
100
|
|
|
|
1172
|
return(defined($Length) ? |
2685
|
|
|
|
|
|
|
$this->length_set($Length) : |
2686
|
|
|
|
|
|
|
$this->length_get()); |
2687
|
|
|
|
|
|
|
} |
2688
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
sub length_get |
2690
|
|
|
|
|
|
|
{ |
2691
|
468
|
|
|
468
|
0
|
581
|
my $this = shift; |
2692
|
|
|
|
|
|
|
|
2693
|
468
|
|
|
|
|
784
|
my $FieldList = $this->fieldlist(); |
2694
|
468
|
|
|
|
|
722
|
my $FirstField = $FieldList->[0]; |
2695
|
468
|
|
|
|
|
869
|
my $Col = $this->{$FirstField}; |
2696
|
468
|
100
|
|
|
|
1119
|
my $Length = (ref($Col) eq 'ARRAY' ? @$Col+0 : 0); |
2697
|
|
|
|
|
|
|
|
2698
|
468
|
|
|
|
|
1835
|
return($Length); |
2699
|
|
|
|
|
|
|
} |
2700
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
sub length_set |
2702
|
|
|
|
|
|
|
{ |
2703
|
26
|
|
|
26
|
0
|
36
|
my $this = shift; |
2704
|
26
|
|
|
|
|
35
|
my ($Length) = @_; |
2705
|
|
|
|
|
|
|
|
2706
|
|
|
|
|
|
|
## Apply the length-setting logic to any field found in the hash |
2707
|
|
|
|
|
|
|
## OR listed in the field list. They will all be created if not |
2708
|
|
|
|
|
|
|
## already present. |
2709
|
|
|
|
|
|
|
|
2710
|
26
|
|
|
|
|
27
|
my $FieldList = [@{$this->fieldlist_all()}, @{$this->fieldlist()}]; |
|
26
|
|
|
|
|
49
|
|
|
26
|
|
|
|
|
51
|
|
2711
|
|
|
|
|
|
|
|
2712
|
26
|
|
|
|
|
60
|
foreach my $FieldName (@$FieldList) |
2713
|
|
|
|
|
|
|
{ |
2714
|
223
|
|
|
|
|
254
|
$#{$this->col($FieldName)} = ($Length - 1); ## $Length = 0 => $# = -1 => empty list. |
|
223
|
|
|
|
|
357
|
|
2715
|
|
|
|
|
|
|
}; |
2716
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
## Since records might have been deleted, re-validate the |
2718
|
|
|
|
|
|
|
## _Selection, if it is present. |
2719
|
|
|
|
|
|
|
|
2720
|
26
|
|
|
|
|
62
|
$this->selection_validate(); |
2721
|
|
|
|
|
|
|
|
2722
|
26
|
|
|
|
|
65
|
return($Length); |
2723
|
|
|
|
|
|
|
} |
2724
|
|
|
|
|
|
|
|
2725
|
|
|
|
|
|
|
sub extend |
2726
|
|
|
|
|
|
|
{ |
2727
|
22
|
|
|
22
|
0
|
32
|
my $this = shift; |
2728
|
22
|
|
|
|
|
29
|
my $Length = 0; |
2729
|
|
|
|
|
|
|
|
2730
|
|
|
|
|
|
|
## Find the length of the longest vector... |
2731
|
|
|
|
|
|
|
|
2732
|
22
|
|
|
|
|
28
|
foreach (@{$this->fieldlist_all()}) {$Length = max($Length, $#{$this->{$_}} + 1)}; |
|
22
|
|
|
|
|
41
|
|
|
114
|
|
|
|
|
132
|
|
|
114
|
|
|
|
|
265
|
|
2733
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
## ...and set them all to be that length. |
2735
|
|
|
|
|
|
|
|
2736
|
22
|
|
|
|
|
64
|
$this->length_set($Length); |
2737
|
|
|
|
|
|
|
} |
2738
|
|
|
|
|
|
|
|
2739
|
|
|
|
|
|
|
=pod |
2740
|
|
|
|
|
|
|
|
2741
|
|
|
|
|
|
|
=head1 SELECTIONS |
2742
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
## Getting or setting the custom selection list itself (_Selection) |
2744
|
|
|
|
|
|
|
|
2745
|
|
|
|
|
|
|
$t->selection() ## Get sel if any; else all() |
2746
|
|
|
|
|
|
|
$t->selection_get() |
2747
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
$t->selection($List) ## Set sel (list of rec nums) |
2749
|
|
|
|
|
|
|
$t->selection_set($List) |
2750
|
|
|
|
|
|
|
|
2751
|
|
|
|
|
|
|
$t->selection(0) ## Remove sel (select all) |
2752
|
|
|
|
|
|
|
$t->selection_set(undef) |
2753
|
|
|
|
|
|
|
$t->selection_delete() |
2754
|
|
|
|
|
|
|
$t->select_all() |
2755
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
$t->selection_inverse() ## Get inverse copy of selection |
2757
|
|
|
|
|
|
|
$t->select_inverse() ## Invert the selection |
2758
|
|
|
|
|
|
|
|
2759
|
|
|
|
|
|
|
$t->selection_validate() ## Remove invalid #s from sel |
2760
|
|
|
|
|
|
|
|
2761
|
|
|
|
|
|
|
## List of all rec nums present (regardless of selection) |
2762
|
|
|
|
|
|
|
|
2763
|
|
|
|
|
|
|
$t->all() |
2764
|
|
|
|
|
|
|
|
2765
|
|
|
|
|
|
|
## Getting or setting just selected fields in columns |
2766
|
|
|
|
|
|
|
## (as contrasted with col() and friends). |
2767
|
|
|
|
|
|
|
|
2768
|
|
|
|
|
|
|
$t->sel($ColName) ## Get col but only records in sel |
2769
|
|
|
|
|
|
|
$t->sel_get($ColName) |
2770
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
$t->sel($ColName, $ListRef) ## Set selected fields in col... |
2772
|
|
|
|
|
|
|
$t->sel_set($ColName, $ListRef) ##... in selection order |
2773
|
|
|
|
|
|
|
|
2774
|
|
|
|
|
|
|
$t->sel_set($ColName) ## Set selected fields to undef |
2775
|
|
|
|
|
|
|
$t->sel_clear($ColName) |
2776
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
$t->sels($ColList) ## Like cols, but selected fields |
2778
|
|
|
|
|
|
|
$t->sels_hash($ColList) ## " " cols_hash()... " " " " |
2779
|
|
|
|
|
|
|
|
2780
|
|
|
|
|
|
|
## Finding out size of selection (number of rows) |
2781
|
|
|
|
|
|
|
|
2782
|
|
|
|
|
|
|
$t->sel_len() ## Get number of selected rows. |
2783
|
|
|
|
|
|
|
|
2784
|
|
|
|
|
|
|
A selection is an ordered list of record numbers. The record numbers |
2785
|
|
|
|
|
|
|
in the selection may be a subset of available records. Furthermore, |
2786
|
|
|
|
|
|
|
they may be in non-record-number order, indicating that the records |
2787
|
|
|
|
|
|
|
have been sorted. |
2788
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
Record numbers are numeric array indices into the columns in the |
2790
|
|
|
|
|
|
|
table. It is an error for any selection list to contain an index less |
2791
|
|
|
|
|
|
|
than zero or greater than (length() - 1), so if you set a selection |
2792
|
|
|
|
|
|
|
explicitly, be careful. |
2793
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
Any selection list you get or set belongs to the object. Be careful |
2795
|
|
|
|
|
|
|
of modifying its contents. |
2796
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
The custom selection, if any, is stored internally in the _Selection |
2798
|
|
|
|
|
|
|
parameter. If this parameter is absent, the selection defaults to |
2799
|
|
|
|
|
|
|
all() -- i.e. a list of all record numbers, in order: |
2800
|
|
|
|
|
|
|
[0..($this->length() - 1)] (which becomes [] if length() is 0). |
2801
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
REMEMBER: length() is one-based, but record numbers are zero-based. |
2803
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
Removing the selection (that is, removing the LIST itself of which |
2805
|
|
|
|
|
|
|
records are selected), is the same as selecting all records. |
2806
|
|
|
|
|
|
|
consequently, selection(0), selection_delete(), and select_all() are |
2807
|
|
|
|
|
|
|
all synonymous. |
2808
|
|
|
|
|
|
|
|
2809
|
|
|
|
|
|
|
selection_validate() removes any entries from the current _Selection |
2810
|
|
|
|
|
|
|
list (if any) that are not valid record numbers -- i.e. it removes any |
2811
|
|
|
|
|
|
|
record whose integer value is < 0 or greater than length() - 1. This |
2812
|
|
|
|
|
|
|
routine is mainly used by other methods that might delete records, |
2813
|
|
|
|
|
|
|
such as length_set(). |
2814
|
|
|
|
|
|
|
|
2815
|
|
|
|
|
|
|
Getting or setting just selected data from columns |
2816
|
|
|
|
|
|
|
|
2817
|
|
|
|
|
|
|
Sometimes, you don't want to get/set entire columns, you instead want |
2818
|
|
|
|
|
|
|
to get or set data in just the selected fields in a column. |
2819
|
|
|
|
|
|
|
|
2820
|
|
|
|
|
|
|
The sel(), sel_get(), sel_set(), sels() and sels_hash() methods are |
2821
|
|
|
|
|
|
|
analagous to the corresponding col(), ... cols_hash() methods except |
2822
|
|
|
|
|
|
|
in these two ways: |
2823
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
- the 'sels' variants get or set just selected data, as determined by |
2825
|
|
|
|
|
|
|
the current selection(), which gives an ordered list of the selected / |
2826
|
|
|
|
|
|
|
sorted records. |
2827
|
|
|
|
|
|
|
|
2828
|
|
|
|
|
|
|
- the 'sels' variants all make COPIES of the data you request or |
2829
|
|
|
|
|
|
|
supply -- the data is copied out of or into the correspnding column. |
2830
|
|
|
|
|
|
|
So, you "own" any vector you pass or receive in reply. |
2831
|
|
|
|
|
|
|
|
2832
|
|
|
|
|
|
|
So, for example, imagine you have just set selection() to only list |
2833
|
|
|
|
|
|
|
record numbers where the LastName field is not empty. Then you have |
2834
|
|
|
|
|
|
|
called sort() to sort those record numbers by the LastName field. You |
2835
|
|
|
|
|
|
|
could then call $t->sel('LastName') to get a sorted list of all |
2836
|
|
|
|
|
|
|
non-empty last names. |
2837
|
|
|
|
|
|
|
|
2838
|
|
|
|
|
|
|
It might be helpful to think of "sel" as short for "selected". So |
2839
|
|
|
|
|
|
|
$t->sel('LastName') would mean "get the selected field values from the |
2840
|
|
|
|
|
|
|
LastName field". |
2841
|
|
|
|
|
|
|
|
2842
|
|
|
|
|
|
|
=cut |
2843
|
|
|
|
|
|
|
|
2844
|
|
|
|
|
|
|
sub selection |
2845
|
|
|
|
|
|
|
{ |
2846
|
477
|
|
|
477
|
1
|
673
|
my $this = shift; |
2847
|
477
|
|
|
|
|
597
|
my ($Selection) = @_; |
2848
|
|
|
|
|
|
|
|
2849
|
|
|
|
|
|
|
## Set if specified. |
2850
|
477
|
100
|
|
|
|
842
|
$this->selection_set($Selection) if defined($Selection); |
2851
|
|
|
|
|
|
|
|
2852
|
|
|
|
|
|
|
## Get and return. |
2853
|
477
|
|
|
|
|
770
|
$Selection = $this->selection_get(); |
2854
|
|
|
|
|
|
|
|
2855
|
477
|
|
|
|
|
1178
|
return($Selection); |
2856
|
|
|
|
|
|
|
} |
2857
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
sub selection_get |
2859
|
|
|
|
|
|
|
{ |
2860
|
478
|
|
|
478
|
0
|
546
|
my $this = shift; |
2861
|
|
|
|
|
|
|
|
2862
|
478
|
|
66
|
|
|
1353
|
my $Selection = $this->{_Selection} || $this->selection_default(); |
2863
|
|
|
|
|
|
|
|
2864
|
478
|
|
|
|
|
784
|
return($Selection); |
2865
|
|
|
|
|
|
|
} |
2866
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
sub selection_set |
2868
|
|
|
|
|
|
|
{ |
2869
|
159
|
|
|
159
|
0
|
186
|
my $this = shift; |
2870
|
159
|
|
|
|
|
185
|
my ($Selection) = @_; |
2871
|
|
|
|
|
|
|
|
2872
|
159
|
100
|
|
|
|
295
|
if (ref($Selection) eq 'ARRAY') |
2873
|
|
|
|
|
|
|
{ |
2874
|
|
|
|
|
|
|
## Set if specified... |
2875
|
36
|
|
|
|
|
69
|
$this->{_Selection} = $Selection; |
2876
|
|
|
|
|
|
|
} |
2877
|
|
|
|
|
|
|
else |
2878
|
|
|
|
|
|
|
{ |
2879
|
|
|
|
|
|
|
## Otherwise, delete and return original if any. |
2880
|
123
|
|
|
|
|
246
|
$Selection = delete $this->{_Selection}; |
2881
|
|
|
|
|
|
|
} |
2882
|
|
|
|
|
|
|
|
2883
|
159
|
|
|
|
|
288
|
return($Selection); |
2884
|
|
|
|
|
|
|
} |
2885
|
|
|
|
|
|
|
|
2886
|
|
|
|
|
|
|
sub selection_delete |
2887
|
|
|
|
|
|
|
{ |
2888
|
2
|
|
|
2
|
0
|
5
|
my $this = shift; |
2889
|
2
|
|
|
|
|
6
|
$this->selection_set(); |
2890
|
|
|
|
|
|
|
} |
2891
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
sub select_all |
2893
|
|
|
|
|
|
|
{ |
2894
|
70
|
|
|
70
|
0
|
115
|
my $this = shift; |
2895
|
70
|
|
|
|
|
167
|
$this->selection_set(); |
2896
|
|
|
|
|
|
|
} |
2897
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
sub select_none |
2899
|
|
|
|
|
|
|
{ |
2900
|
4
|
|
|
4
|
0
|
22
|
my $this = shift; |
2901
|
4
|
|
|
|
|
10
|
$this->selection_set([]); |
2902
|
|
|
|
|
|
|
} |
2903
|
|
|
|
|
|
|
|
2904
|
|
|
|
|
|
|
sub selection_default |
2905
|
|
|
|
|
|
|
{ |
2906
|
322
|
|
|
322
|
0
|
344
|
my $this = shift; |
2907
|
|
|
|
|
|
|
|
2908
|
322
|
|
|
|
|
8686
|
my $Selection = $this->all(); |
2909
|
|
|
|
|
|
|
|
2910
|
322
|
|
|
|
|
885
|
return($Selection); |
2911
|
|
|
|
|
|
|
} |
2912
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
sub all |
2914
|
|
|
|
|
|
|
{ |
2915
|
343
|
|
|
343
|
0
|
384
|
my $this = shift; |
2916
|
343
|
|
|
|
|
580
|
my $RowNums = [0..($this->length() - 1)]; |
2917
|
|
|
|
|
|
|
|
2918
|
343
|
|
|
|
|
587
|
return($RowNums); |
2919
|
|
|
|
|
|
|
} |
2920
|
|
|
|
|
|
|
|
2921
|
|
|
|
|
|
|
sub selection_inverse |
2922
|
|
|
|
|
|
|
{ |
2923
|
9
|
|
|
9
|
0
|
14
|
my $this = shift; |
2924
|
9
|
|
|
|
|
15
|
my $Sel = $this->selection(); |
2925
|
9
|
|
|
|
|
18
|
my $All = $this->all(); |
2926
|
|
|
|
|
|
|
|
2927
|
9
|
|
|
|
|
18
|
@$All[@$Sel] = undef; |
2928
|
9
|
|
|
|
|
17
|
$All = [grep {defined} @$All]; |
|
27
|
|
|
|
|
54
|
|
2929
|
|
|
|
|
|
|
|
2930
|
9
|
|
|
|
|
34
|
return($All); |
2931
|
|
|
|
|
|
|
} |
2932
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
sub select_inverse |
2934
|
|
|
|
|
|
|
{ |
2935
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
2936
|
|
|
|
|
|
|
|
2937
|
1
|
|
|
|
|
4
|
return($this->{_Selection} = $this->selection_inverse()); |
2938
|
|
|
|
|
|
|
} |
2939
|
|
|
|
|
|
|
|
2940
|
|
|
|
|
|
|
sub selection_validate |
2941
|
|
|
|
|
|
|
{ |
2942
|
27
|
|
|
27
|
0
|
29
|
my $this = shift; |
2943
|
|
|
|
|
|
|
|
2944
|
27
|
100
|
|
|
|
84
|
if (ref($this->{_Selection}) eq 'ARRAY') |
2945
|
|
|
|
|
|
|
{ |
2946
|
1
|
|
|
|
|
4
|
$this->{_Selection} = $this->selection_validate_internal($this->{_Selection}); |
2947
|
|
|
|
|
|
|
} |
2948
|
|
|
|
|
|
|
} |
2949
|
|
|
|
|
|
|
|
2950
|
|
|
|
|
|
|
sub selection_validate_internal |
2951
|
|
|
|
|
|
|
{ |
2952
|
19
|
|
|
19
|
0
|
31
|
my $this = shift; |
2953
|
19
|
|
|
|
|
29
|
my ($Selection) = @_; |
2954
|
|
|
|
|
|
|
|
2955
|
19
|
|
|
|
|
37
|
my $Length = $this->length(); |
2956
|
|
|
|
|
|
|
|
2957
|
19
|
100
|
|
|
|
39
|
$Selection = [grep {(($_ >= 0) && ($_ < $Length))} @$Selection]; |
|
71
|
|
|
|
|
299
|
|
2958
|
|
|
|
|
|
|
|
2959
|
19
|
|
|
|
|
43
|
return($Selection); |
2960
|
|
|
|
|
|
|
} |
2961
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
sub sel ## ($ColName, [$Vector]) |
2963
|
|
|
|
|
|
|
{ |
2964
|
284
|
|
|
284
|
0
|
474
|
my $this = shift; |
2965
|
284
|
|
|
|
|
380
|
my ($ColName, $Vector) = @_; |
2966
|
|
|
|
|
|
|
|
2967
|
|
|
|
|
|
|
## Set if specified. |
2968
|
284
|
50
|
|
|
|
447
|
if (defined($Vector)) |
2969
|
|
|
|
|
|
|
{ |
2970
|
0
|
|
|
|
|
0
|
$this->sel_set($ColName, $Vector); |
2971
|
|
|
|
|
|
|
## Nothing to return. |
2972
|
|
|
|
|
|
|
} |
2973
|
|
|
|
|
|
|
## Get and return. |
2974
|
|
|
|
|
|
|
else |
2975
|
|
|
|
|
|
|
{ |
2976
|
284
|
|
|
|
|
521
|
my $Sel = $this->sel_get($ColName); |
2977
|
284
|
|
|
|
|
798
|
return($Sel); |
2978
|
|
|
|
|
|
|
} |
2979
|
|
|
|
|
|
|
} |
2980
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
sub sel_get |
2982
|
|
|
|
|
|
|
{ |
2983
|
318
|
|
|
318
|
0
|
500
|
my $this = shift; |
2984
|
318
|
|
|
|
|
473
|
my ($ColName, $Selection) = @_; |
2985
|
|
|
|
|
|
|
|
2986
|
318
|
|
|
|
|
646
|
my $Col = $this->col($ColName); |
2987
|
318
|
|
66
|
|
|
1098
|
$Selection ||= $this->selection(); |
2988
|
318
|
|
|
|
|
1083
|
my $Sel = [@$Col[@$Selection]]; |
2989
|
|
|
|
|
|
|
|
2990
|
318
|
|
|
|
|
601
|
return($Sel); |
2991
|
|
|
|
|
|
|
} |
2992
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
sub sel_set ## ($ColName, [$Vector]) |
2994
|
|
|
|
|
|
|
{ |
2995
|
5
|
|
|
5
|
0
|
18
|
my $this = shift; |
2996
|
5
|
|
|
|
|
7
|
my ($ColName, $Vector) = @_; |
2997
|
|
|
|
|
|
|
|
2998
|
5
|
|
|
|
|
11
|
my $Col = $this->col($ColName); |
2999
|
5
|
|
|
|
|
11
|
my $Selection = $this->selection(); |
3000
|
|
|
|
|
|
|
|
3001
|
5
|
100
|
66
|
|
|
24
|
if (defined($Vector) && (ref($Vector) eq 'ARRAY')) |
3002
|
|
|
|
|
|
|
{ |
3003
|
3
|
|
|
|
|
13
|
@$Col[@$Selection] = @$Vector; |
3004
|
|
|
|
|
|
|
} |
3005
|
|
|
|
|
|
|
else |
3006
|
|
|
|
|
|
|
{ |
3007
|
2
|
|
|
|
|
6
|
@$Col[@$Selection] = undef; |
3008
|
|
|
|
|
|
|
} |
3009
|
|
|
|
|
|
|
} |
3010
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
sub sel_clear ## ($ColName) |
3012
|
|
|
|
|
|
|
{ |
3013
|
2
|
|
|
2
|
0
|
8
|
my $this = shift; |
3014
|
2
|
|
|
|
|
3
|
my ($ColName) = @_; |
3015
|
|
|
|
|
|
|
|
3016
|
2
|
|
|
|
|
6
|
$this->sel_set($ColName); |
3017
|
|
|
|
|
|
|
} |
3018
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
sub sel_len |
3020
|
|
|
|
|
|
|
{ |
3021
|
20
|
|
|
20
|
0
|
86
|
my $this = shift; |
3022
|
|
|
|
|
|
|
|
3023
|
16
|
|
|
|
|
50
|
return(ref($this->{_Selection}) eq 'ARRAY' ? |
3024
|
20
|
100
|
|
|
|
51
|
@{$this->{_Selection}}+0 : |
3025
|
|
|
|
|
|
|
$this->length()); |
3026
|
|
|
|
|
|
|
} |
3027
|
|
|
|
|
|
|
|
3028
|
|
|
|
|
|
|
sub sels ## ($ColNames) |
3029
|
|
|
|
|
|
|
{ |
3030
|
56
|
|
|
56
|
0
|
71
|
my $this = shift; |
3031
|
56
|
|
|
|
|
76
|
my ($ColNames) = @_; |
3032
|
56
|
|
66
|
|
|
128
|
$ColNames ||= $this->fieldlist(); |
3033
|
56
|
|
|
|
|
196
|
my $Sels = [map {$this->sel($_)} @$ColNames]; |
|
216
|
|
|
|
|
427
|
|
3034
|
|
|
|
|
|
|
|
3035
|
56
|
|
|
|
|
268
|
return($Sels); |
3036
|
|
|
|
|
|
|
} |
3037
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
sub sels_hash ## ($ColNames) |
3039
|
|
|
|
|
|
|
{ |
3040
|
4
|
|
|
4
|
0
|
8
|
my $this = shift; |
3041
|
4
|
|
|
|
|
7
|
my ($ColNames) = @_; |
3042
|
4
|
|
66
|
|
|
18
|
$ColNames ||= $this->fieldlist(); |
3043
|
4
|
|
|
|
|
9
|
my $Sels = $this->sels($ColNames); |
3044
|
4
|
|
|
|
|
8
|
my $SelsHash = {}; @$SelsHash{@$ColNames} = @$Sels; |
|
4
|
|
|
|
|
16
|
|
3045
|
|
|
|
|
|
|
|
3046
|
4
|
|
|
|
|
35
|
return($SelsHash); |
3047
|
|
|
|
|
|
|
} |
3048
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
=pod |
3050
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=head1 SEARCHING / SELECTING RECORDS |
3052
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
## Modifying the table's custom selection (_Selection) |
3054
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
$t->select_all() ## Set _Selection = $t->all() or undef |
3056
|
|
|
|
|
|
|
$t->select_none() ## Set _Selection = [] |
3057
|
|
|
|
|
|
|
$t->select_inverse() ## Invert the curr. sel. (and get it) |
3058
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
## Specific searches: "the select() methods" |
3060
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
$t->select($Field1=>$Sub1, ## Del nonmatching recs from sel. |
3062
|
|
|
|
|
|
|
$Field2=>$Sub2, ## i.e. narrow sel. to match |
3063
|
|
|
|
|
|
|
...); |
3064
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
$t->omit ($Field1=>$Sub1, ## Del matching recs from sel. |
3066
|
|
|
|
|
|
|
$Field2=>$Sub2, |
3067
|
|
|
|
|
|
|
...); |
3068
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
$t->add ($Field1=>$Sub1, ## Add matching recs to sel. |
3070
|
|
|
|
|
|
|
$Field2=>$Sub2, |
3071
|
|
|
|
|
|
|
...); |
3072
|
|
|
|
|
|
|
|
3073
|
|
|
|
|
|
|
$t->but ($Field1=>$Sub1, ## Add nonmatching recs to sel. |
3074
|
|
|
|
|
|
|
$Field2=>$Sub2, |
3075
|
|
|
|
|
|
|
...); |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
## Getting useful lists of record numbers... |
3078
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
$t->all() ## Get "full" sel. (all record #s) |
3080
|
|
|
|
|
|
|
$t->selection() ## Get current selection |
3081
|
|
|
|
|
|
|
$t->selection_inverse() ## Get inverse copy of curr. sel. |
3082
|
|
|
|
|
|
|
|
3083
|
|
|
|
|
|
|
## Example 1: Refine a selection by narrowing down... |
3084
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
$t->select_all() |
3086
|
|
|
|
|
|
|
$t->select(Field1 => sub {$_}); |
3087
|
|
|
|
|
|
|
$t->select(Field2 => sub {$_}); |
3088
|
|
|
|
|
|
|
$t->select(Field3 => sub {$_}); |
3089
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
## Example 2: Manually refine and set the selection... |
3091
|
|
|
|
|
|
|
|
3092
|
|
|
|
|
|
|
$Sel = [grep {$t->col($Field1)->[$_]} @{$t->all ()}]; |
3093
|
|
|
|
|
|
|
$Sel = [grep {$t->col($Field2)->[$_]} @$Sel]; |
3094
|
|
|
|
|
|
|
$Sel = [grep {$t->col($Field3)->[$_]} @$Sel]; |
3095
|
|
|
|
|
|
|
$t->selection($Sel); ## Set the selection when done. |
3096
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
## Example 3: Complex manual search using calculated value |
3098
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
my $A = $t->col('A'); |
3100
|
|
|
|
|
|
|
my $B = $t->col('B'); |
3101
|
|
|
|
|
|
|
my $S = [grep |
3102
|
|
|
|
|
|
|
{my $X = $A->[$_] + $B->[$_]; ($X > 100 && $X < 200);} |
3103
|
|
|
|
|
|
|
@{$t->all()}]; ## Or could start with $t->selection(). |
3104
|
|
|
|
|
|
|
$t->selection($S); ## Set the selection when done. |
3105
|
|
|
|
|
|
|
|
3106
|
|
|
|
|
|
|
## Example 4: Refine a selection by building up... |
3107
|
|
|
|
|
|
|
|
3108
|
|
|
|
|
|
|
$t->select_none() |
3109
|
|
|
|
|
|
|
$t->add(Field1 => sub {$_}); |
3110
|
|
|
|
|
|
|
$t->add(Field2 => sub {$_}); |
3111
|
|
|
|
|
|
|
$t->add(Field3 => sub {$_}); |
3112
|
|
|
|
|
|
|
|
3113
|
|
|
|
|
|
|
## Example 5: Combining the select() methods to build a query... |
3114
|
|
|
|
|
|
|
|
3115
|
|
|
|
|
|
|
$t->select_all() |
3116
|
|
|
|
|
|
|
$t->select(Status => sub {/prime/i }); |
3117
|
|
|
|
|
|
|
$t->omit (DueDate => sub {$_ > $Today}); |
3118
|
|
|
|
|
|
|
$t->add (Force => sub {$_ }); |
3119
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
select() and its friends omit(), add(), and but(), known collectively |
3121
|
|
|
|
|
|
|
as "the select() methods," all work similarly: they take a series of |
3122
|
|
|
|
|
|
|
one or more pairs indicating matches to be done, where each match is |
3123
|
|
|
|
|
|
|
specified as (FieldName => Subroutine). |
3124
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
In addition to the field names already present in the table, the |
3126
|
|
|
|
|
|
|
FieldName in any Spec may also be one of these two special |
3127
|
|
|
|
|
|
|
pseudo-fields: |
3128
|
|
|
|
|
|
|
|
3129
|
|
|
|
|
|
|
=over 4 |
3130
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
=item _RecNum |
3132
|
|
|
|
|
|
|
|
3133
|
|
|
|
|
|
|
the record number of the record being compared |
3134
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
=item _SelNum |
3136
|
|
|
|
|
|
|
|
3137
|
|
|
|
|
|
|
the numerical position of the record being compared within the |
3138
|
|
|
|
|
|
|
previous selection (only usable with select() and omit() since add() |
3139
|
|
|
|
|
|
|
and but() by definition operate on non-selected records). |
3140
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
=back |
3142
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
For example: |
3144
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
## Match 2nd 100 rec numbers |
3146
|
|
|
|
|
|
|
$t->select(_RecNum => sub {$_ >= 100 && $_ <= 199}); |
3147
|
|
|
|
|
|
|
|
3148
|
|
|
|
|
|
|
## Match 2nd 100 currently selected/sorted items |
3149
|
|
|
|
|
|
|
$t->select(_SelNum => sub {$_ >= 100 && $_ <= 199}); |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
Be careful when using _SelNum in a search. In the above _SelNum search |
3152
|
|
|
|
|
|
|
example, since the selection itself will be modified by select(), the |
3153
|
|
|
|
|
|
|
items that were formerly selection items 100 - 199 will now be _SelNum |
3154
|
|
|
|
|
|
|
0 - 99 in the new selection. |
3155
|
|
|
|
|
|
|
|
3156
|
|
|
|
|
|
|
The Subroutine is an anonymous grep-style predicate that operates on |
3157
|
|
|
|
|
|
|
$_ and should return true/false to indicate a match with an element of |
3158
|
|
|
|
|
|
|
the field FieldName. |
3159
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
The order of multiple matches in a single method call is significant |
3161
|
|
|
|
|
|
|
only in that the searches can be faster if the field that will match |
3162
|
|
|
|
|
|
|
the fewest records is listed first. |
3163
|
|
|
|
|
|
|
|
3164
|
|
|
|
|
|
|
A given FieldName may be listed in the specs more than once if it has |
3165
|
|
|
|
|
|
|
multiple search criteria that you prefer to execute as multiple |
3166
|
|
|
|
|
|
|
subroutines (though it would be more efficient on very large tables to |
3167
|
|
|
|
|
|
|
combine their logic into one subroutine joined with "&&"). |
3168
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
Each field match will be applied (with an implied AND joining them) to |
3170
|
|
|
|
|
|
|
determine whether the record itself matches. Then, based on whether |
3171
|
|
|
|
|
|
|
the record itself matches, it will either be added or deleted from the |
3172
|
|
|
|
|
|
|
selection based on which method is being called: |
3173
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
method... operates on... action.... |
3175
|
|
|
|
|
|
|
------------------------------------------------------------------ |
3176
|
|
|
|
|
|
|
select() selected records Keep only recs that DO match |
3177
|
|
|
|
|
|
|
omit() selected records Keep only recs that DO NOT match |
3178
|
|
|
|
|
|
|
add() non-selected recs Add recs that DO match |
3179
|
|
|
|
|
|
|
but() non-selected recs Add recs that DO NOT match |
3180
|
|
|
|
|
|
|
|
3181
|
|
|
|
|
|
|
Here's how to think about what's going on: |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
methods... think... |
3184
|
|
|
|
|
|
|
------------------------------------------------------------------ |
3185
|
|
|
|
|
|
|
select() "SELECT things matching this"... |
3186
|
|
|
|
|
|
|
omit() "... then OMIT those matching this." |
3187
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
select() "SELECT things matching this"... |
3189
|
|
|
|
|
|
|
add() "... and ADD any others matching this." |
3190
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
select() "SELECT things matching this"... |
3192
|
|
|
|
|
|
|
but() "... and add any others BUT those matching this." |
3193
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
select() and omit() both NARROW the selection. |
3195
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
add() and but() both INCREASE the selection. |
3197
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
IMPORTANT: You DO NOT need to use these select() routines to work with |
3199
|
|
|
|
|
|
|
selections. It may be much easier for you to clarify your logic, or |
3200
|
|
|
|
|
|
|
more efficient to express your search, using a single grep or series |
3201
|
|
|
|
|
|
|
of grep operations as in Examples 2 or 3 above. |
3202
|
|
|
|
|
|
|
|
3203
|
|
|
|
|
|
|
Building the selection manually is required if you want to filter |
3204
|
|
|
|
|
|
|
based on any COMPLEX RELATIONSHIPS BETWEEN FIELDS. For example, if |
3205
|
|
|
|
|
|
|
you want to add two fields and match or reject the record based on the |
3206
|
|
|
|
|
|
|
sum of the fields. |
3207
|
|
|
|
|
|
|
|
3208
|
|
|
|
|
|
|
In Example 3 above, we add the values in fields "A" and "B" and then |
3209
|
|
|
|
|
|
|
match the record only if the SUM is between 100 and 199. By grepping |
3210
|
|
|
|
|
|
|
to produce a subset of @{$t->all()}, you end up with a Selection -- a |
3211
|
|
|
|
|
|
|
list of record numbers you want "selected". Then you call |
3212
|
|
|
|
|
|
|
$t->selection() to put the selection you built into the object. |
3213
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
If you had instead wanted to narrow an existing selection in the above |
3215
|
|
|
|
|
|
|
example, you would start with $t->selection() (which defaults to |
3216
|
|
|
|
|
|
|
$t->all()) instead of starting with $t->all(). |
3217
|
|
|
|
|
|
|
|
3218
|
|
|
|
|
|
|
Each of the select() methods returns $this->selection() as a |
3219
|
|
|
|
|
|
|
convenience. |
3220
|
|
|
|
|
|
|
|
3221
|
|
|
|
|
|
|
=head2 The effects of modifying a sorted selection |
3222
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
Generally, you should sort AFTER finding, and you should not generally |
3224
|
|
|
|
|
|
|
rely on sort order after doing a find. But in case you want to know, |
3225
|
|
|
|
|
|
|
the following chart explains what happens to the sort order after the |
3226
|
|
|
|
|
|
|
various select() commands are called (at least in the current |
3227
|
|
|
|
|
|
|
implementation, which may change without notice): |
3228
|
|
|
|
|
|
|
|
3229
|
|
|
|
|
|
|
method... effect on an existing sort order... |
3230
|
|
|
|
|
|
|
------------------------------------------------------------------ |
3231
|
|
|
|
|
|
|
select() relative sort order is preserved (stay sorted) |
3232
|
|
|
|
|
|
|
omit() all selected recs restored to "natural" order (unsorted) |
3233
|
|
|
|
|
|
|
add() orig. recs preserved; new recs appended: "natural" order |
3234
|
|
|
|
|
|
|
but() orig. recs preserved; new recs appended: "natural" order |
3235
|
|
|
|
|
|
|
|
3236
|
|
|
|
|
|
|
In other words, you could sort() first and then call select() to |
3237
|
|
|
|
|
|
|
narrow down the selection repeatedly without disrupting the sort |
3238
|
|
|
|
|
|
|
order. However, any of the other methods will disrupt the sort order |
3239
|
|
|
|
|
|
|
and you would need to re-sort. The preservation of order when using |
3240
|
|
|
|
|
|
|
select(), and other sort order effects, are likely but not guaranteed |
3241
|
|
|
|
|
|
|
to be preserved in future implementations. |
3242
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
=head2 Hints about Boolean logic |
3244
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
Consider the following example and the alternative below it. You |
3246
|
|
|
|
|
|
|
might initially think these are equivalent, but they're not: |
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
## Case 1: |
3249
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
my $Sel = $t->add(Force => sub {$_ == 1 }); |
3251
|
|
|
|
|
|
|
my $Sel = $t->add(Status => sub {$_ eq 'Prime'}); |
3252
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
## Case 2: |
3254
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
my $Sel = $t->add(Force => sub {$_ == 1 }, |
3256
|
|
|
|
|
|
|
Status => sub {$_ eq 'Prime'}); |
3257
|
|
|
|
|
|
|
|
3258
|
|
|
|
|
|
|
Case 1 extends the selection by adding all records where Force == 1, |
3259
|
|
|
|
|
|
|
and then extends it again by adding all additional records where |
3260
|
|
|
|
|
|
|
Status eq 'Prime'. |
3261
|
|
|
|
|
|
|
|
3262
|
|
|
|
|
|
|
Case 2 adds only those records where: Force == 1 AND ALSO, IN THE SAME |
3263
|
|
|
|
|
|
|
RECORD, Status eq 'Prime'. |
3264
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
One final note about logic. This is not SQL and these select() etc. |
3266
|
|
|
|
|
|
|
routines are not meant to replace the full power of a programming |
3267
|
|
|
|
|
|
|
language. |
3268
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
If you want full Boolean expressions, use the power of Perl to form |
3270
|
|
|
|
|
|
|
your own arbitrarily complex query using grep as in Example 3 above. |
3271
|
|
|
|
|
|
|
|
3272
|
|
|
|
|
|
|
Writing your own grep is also almost always faster than chaining the |
3273
|
|
|
|
|
|
|
builtin select() methods or using multiple Field / Sub specifications, |
3274
|
|
|
|
|
|
|
so keep that in mind when working with extremely large data sets. |
3275
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
With tables of only a few thousand records or so, you probably won't |
3277
|
|
|
|
|
|
|
notice the difference in efficiency. |
3278
|
|
|
|
|
|
|
|
3279
|
|
|
|
|
|
|
=cut |
3280
|
|
|
|
|
|
|
|
3281
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
3282
|
|
|
|
|
|
|
|
3283
|
4
|
|
|
4
|
0
|
35
|
sub select {my $this = shift; return($this->select_internal(!'Add', !'Not', @_))} |
|
4
|
|
|
|
|
13
|
|
3284
|
5
|
|
|
5
|
0
|
29
|
sub omit {my $this = shift; return($this->select_internal(!'Add', 'Not', @_))} |
|
5
|
|
|
|
|
19
|
|
3285
|
3
|
|
|
3
|
0
|
23
|
sub add {my $this = shift; return($this->select_internal( 'Add', !'Not', @_))} |
|
3
|
|
|
|
|
8
|
|
3286
|
1
|
|
|
1
|
0
|
8
|
sub but {my $this = shift; return($this->select_internal( 'Add', 'Not', @_))} |
|
1
|
|
|
|
|
4
|
|
3287
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
sub select_internal ## Implements all 4 "select() methods" |
3289
|
|
|
|
|
|
|
{ |
3290
|
13
|
|
|
13
|
0
|
23
|
my $this = shift; |
3291
|
13
|
|
|
|
|
34
|
my ($Add, $Not, @Specs) = @_; |
3292
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
## In "Add" mode, we only operate on not-yet-selected records. |
3294
|
|
|
|
|
|
|
## Otherwise, we operate on the current selection. |
3295
|
|
|
|
|
|
|
|
3296
|
|
|
|
|
|
|
## Either way, start out with all of one or the other. |
3297
|
|
|
|
|
|
|
|
3298
|
13
|
100
|
|
|
|
40
|
my $Start = ($Add ? $this->selection_inverse() : $this->selection()); |
3299
|
|
|
|
|
|
|
|
3300
|
|
|
|
|
|
|
## Then grep repeatedly for each Field/Sub spec we were given, in |
3301
|
|
|
|
|
|
|
## order. For a record to match, ALL specs must match -- i.e. the |
3302
|
|
|
|
|
|
|
## record number must make it through the grep gauntlet once for |
3303
|
|
|
|
|
|
|
## each Field/Sub in the Specs. |
3304
|
|
|
|
|
|
|
|
3305
|
13
|
|
|
|
|
22
|
my $Pseudo = {}; ## hold pseudo-columns _RecNum, _SelNum if requested. |
3306
|
|
|
|
|
|
|
|
3307
|
13
|
|
|
|
|
19
|
my $Matches = $Start; |
3308
|
|
|
|
|
|
|
|
3309
|
13
|
|
|
|
|
18
|
my $i = 0; |
3310
|
13
|
|
|
|
|
36
|
while ($i < (@Specs - 1)) |
3311
|
|
|
|
|
|
|
{ |
3312
|
13
|
|
|
|
|
31
|
my ($Field, $Sub) = @Specs[$i++, $i++]; |
3313
|
|
|
|
|
|
|
|
3314
|
13
|
50
|
33
|
|
|
87
|
next unless ((length($Field)) && (ref($Sub) eq 'CODE')); |
3315
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
## Create pseudo-fields _RecNum / _SelNum if needed, but at |
3317
|
|
|
|
|
|
|
## most once per invocation. |
3318
|
|
|
|
|
|
|
|
3319
|
13
|
50
|
0
|
|
|
33
|
$Pseudo->{_RecNum} ||= $this->all() if ($Field eq '_RecNum'); |
3320
|
13
|
50
|
0
|
|
|
28
|
$Pseudo->{_SelNum} ||= $this->selnum_mask() if ($Field eq '_SelNum'); |
3321
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
## Narrow down $Matches using this Field/Spec, then move on to next. |
3323
|
36
|
50
|
33
|
|
|
142
|
$Matches = |
3324
|
|
|
|
|
|
|
[grep |
3325
|
|
|
|
|
|
|
{ |
3326
|
|
|
|
|
|
|
## Locally bind $_ to value of field in this column / this record. |
3327
|
13
|
|
|
|
|
23
|
local $_ = $ {($this ->{$Field} || ## 98% of time: good field name |
|
36
|
|
|
|
|
178
|
|
3328
|
|
|
|
|
|
|
$Pseudo->{$Field} || ## 1% of time: _RecNum or _SelNum |
3329
|
|
|
|
|
|
|
($this ->warn("Bad field name: $Field"), |
3330
|
|
|
|
|
|
|
$this ->col_empty()) |
3331
|
|
|
|
|
|
|
)}[$_]; ## look up value in record $_ of column |
3332
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
## Call the sub & let it yield the Boolean value. |
3334
|
36
|
|
|
|
|
73
|
&$Sub(); |
3335
|
|
|
|
|
|
|
} |
3336
|
|
|
|
|
|
|
@$Matches]; |
3337
|
|
|
|
|
|
|
} |
3338
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
## IMPLEMENTATION NOTE: |
3340
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
## The logic below to support "Not" looks complicated, and indeed |
3342
|
|
|
|
|
|
|
## it could be made cleaner if we were to process the "Not" logic |
3343
|
|
|
|
|
|
|
## during the previous step. However, doing that would make the |
3344
|
|
|
|
|
|
|
## above nested loop(s) above much less efficient because we'd |
3345
|
|
|
|
|
|
|
## have to move the while() loop inside the grep -- repeating that |
3346
|
|
|
|
|
|
|
## loop up to several times for each record instead of just a few |
3347
|
|
|
|
|
|
|
## times total. So the logic below will actually save execution |
3348
|
|
|
|
|
|
|
## time. Besides, using array-slicing to achieve the selection |
3349
|
|
|
|
|
|
|
## masking is quite fast. |
3350
|
|
|
|
|
|
|
|
3351
|
|
|
|
|
|
|
## "Add" means append the matching record numbers to the existing |
3352
|
|
|
|
|
|
|
## selection, if any. |
3353
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
## sub select() |
3355
|
13
|
100
|
100
|
|
|
173
|
if (!$Not && !$Add) ## ... i.e. remove unfound (i.e. keep (only) the ones we found)... |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
3356
|
|
|
|
|
|
|
{ |
3357
|
4
|
|
|
|
|
11
|
$this->{_Selection} = $Matches; ## sort order is preserved... |
3358
|
|
|
|
|
|
|
} |
3359
|
|
|
|
|
|
|
|
3360
|
|
|
|
|
|
|
## sub add() ## ... i.e. add the ones we found... |
3361
|
|
|
|
|
|
|
elsif (!$Not && $Add) |
3362
|
|
|
|
|
|
|
{ |
3363
|
3
|
|
|
|
|
5
|
push @{$this->selection()}, @$Matches; ## order preserved in first part only |
|
3
|
|
|
|
|
8
|
|
3364
|
|
|
|
|
|
|
} |
3365
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
## ! "Add" means replace the existing selection with those that |
3367
|
|
|
|
|
|
|
## matched (effectively removing any non-matching ones). |
3368
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
## "Not" means select the opposite of the set of records we just |
3370
|
|
|
|
|
|
|
## matched. |
3371
|
|
|
|
|
|
|
|
3372
|
|
|
|
|
|
|
## sub omit() |
3373
|
|
|
|
|
|
|
elsif ( $Not && !$Add) ## ... i.e. remove the opposite of the ones we found... |
3374
|
|
|
|
|
|
|
{ |
3375
|
5
|
|
|
|
|
20
|
my $Sel = $this->col_empty(); ## Start with empty mask (all entries undef). |
3376
|
5
|
|
|
|
|
17
|
@$Sel[@$Start] = @$Start; ## Mask in those in the original selection. |
3377
|
5
|
|
|
|
|
14
|
@$Sel[@$Matches] = undef; ## Mask out those we found. |
3378
|
5
|
|
|
|
|
13
|
my $NonMatches = [grep {defined} @$Sel]; ## The remaining ones are the non-matches. |
|
15
|
|
|
|
|
32
|
|
3379
|
|
|
|
|
|
|
|
3380
|
5
|
|
|
|
|
19
|
$this->{_Selection} = $NonMatches; ## The new selection IS the non-matches. |
3381
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
## selection order not preserved |
3383
|
|
|
|
|
|
|
} |
3384
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
## sub but() |
3386
|
|
|
|
|
|
|
elsif ( $Not && $Add) ## ... i.e. add the opposite of the ones we found... |
3387
|
|
|
|
|
|
|
{ |
3388
|
1
|
|
|
|
|
5
|
my $Sel = $this->all(); ## Start with a full selection mask. |
3389
|
1
|
|
|
|
|
2
|
@$Sel[@{$this->selection()}] = undef; ## Mask out those in original selection. |
|
1
|
|
|
|
|
5
|
|
3390
|
1
|
|
|
|
|
4
|
@$Sel[@$Matches ] = undef; ## Mask out those we found. |
3391
|
1
|
|
|
|
|
3
|
my $NonMatches = [grep {defined} @$Sel]; ## The remaining ones are the non-matches. |
|
3
|
|
|
|
|
8
|
|
3392
|
|
|
|
|
|
|
|
3393
|
1
|
|
|
|
|
3
|
push @{$this->selection()}, @$NonMatches; ## Add the non-matches to the selection. |
|
1
|
|
|
|
|
4
|
|
3394
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
## order preserved in first part only |
3396
|
|
|
|
|
|
|
} |
3397
|
|
|
|
|
|
|
|
3398
|
13
|
|
|
|
|
28
|
return($this->selection()); |
3399
|
|
|
|
|
|
|
} |
3400
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
sub selnum_mask ## Create mask mapping RecNum -> selected item num or undef if not selected |
3402
|
|
|
|
|
|
|
{ |
3403
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
3404
|
0
|
|
|
|
|
0
|
my $Mask = $this->col_empty(); |
3405
|
0
|
|
|
|
|
0
|
my $Sel = $this->selection(); |
3406
|
0
|
|
|
|
|
0
|
@$Mask[@$Sel] = [0..$#$Sel]; |
3407
|
|
|
|
|
|
|
|
3408
|
0
|
|
|
|
|
0
|
return($Mask); |
3409
|
|
|
|
|
|
|
} |
3410
|
|
|
|
|
|
|
|
3411
|
|
|
|
|
|
|
=pod |
3412
|
|
|
|
|
|
|
|
3413
|
|
|
|
|
|
|
=head1 SORTING |
3414
|
|
|
|
|
|
|
|
3415
|
|
|
|
|
|
|
## Sort the current table's _Selection |
3416
|
|
|
|
|
|
|
|
3417
|
|
|
|
|
|
|
$t->sort() ## Use existing/default params |
3418
|
|
|
|
|
|
|
$t->sort([qw(Last First Phone)]) ## Specify _SortOrder (fields) |
3419
|
|
|
|
|
|
|
$t->sort( ## Named-parameter call: |
3420
|
|
|
|
|
|
|
_SortOrder => [...], ## override sort-related params. |
3421
|
|
|
|
|
|
|
_Selection => [...], ## (See param lists above). |
3422
|
|
|
|
|
|
|
_SortSpecs => {...}, |
3423
|
|
|
|
|
|
|
_SRoutines => {...}, |
3424
|
|
|
|
|
|
|
_DefaultSortType=>'Integer', |
3425
|
|
|
|
|
|
|
_DefaultSortDirection=>-1, |
3426
|
|
|
|
|
|
|
); |
3427
|
|
|
|
|
|
|
|
3428
|
|
|
|
|
|
|
The sort() method modifies the _Selection (creating one with all |
3429
|
|
|
|
|
|
|
records if it was missing, undef, or not supplied by caller) so that |
3430
|
|
|
|
|
|
|
the record numbers listed there are sorted according to the criteria |
3431
|
|
|
|
|
|
|
implied by _SortOrder, _SortSpecs, _SRoutines, etc. |
3432
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
For example, before sorting, a table's "natural" order might be: |
3434
|
|
|
|
|
|
|
|
3435
|
|
|
|
|
|
|
Rec# First Last Age State |
3436
|
|
|
|
|
|
|
0 Chris Zack 43 CA |
3437
|
|
|
|
|
|
|
1 Marco Bart 22 NV |
3438
|
|
|
|
|
|
|
2 Pearl Muth 15 HI |
3439
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
... and its selection() method would yield: [0, 1, 2] -- which is a |
3441
|
|
|
|
|
|
|
list of all the records, in order. |
3442
|
|
|
|
|
|
|
|
3443
|
|
|
|
|
|
|
After calling $t->sort([Last]), selection() would yield [1, 2, 0]. So |
3444
|
|
|
|
|
|
|
displaying the table in "selection" order would yield: |
3445
|
|
|
|
|
|
|
|
3446
|
|
|
|
|
|
|
Rec# First Last Age State |
3447
|
|
|
|
|
|
|
1 Marco Bart 22 NV |
3448
|
|
|
|
|
|
|
2 Pearl Muth 15 HI |
3449
|
|
|
|
|
|
|
0 Chris Zack 43 CA |
3450
|
|
|
|
|
|
|
|
3451
|
|
|
|
|
|
|
IMPORTANT: sorting does not alter any data in the table. It merely |
3452
|
|
|
|
|
|
|
alters the _Selection parameter (which you can then get and set using |
3453
|
|
|
|
|
|
|
the selection() methods described above). |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
If you want to permanently alter the table's data in memory so that |
3456
|
|
|
|
|
|
|
the new sorted order becomes the "natural" order, you can use the |
3457
|
|
|
|
|
|
|
cull() method to modify the original object, the snapshot() method to |
3458
|
|
|
|
|
|
|
make a new object, or use the write() method to write the data to disk |
3459
|
|
|
|
|
|
|
in selected/sorted order and then read() it back again. |
3460
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
=head2 Using the Named-parameter calling convention with sort() |
3462
|
|
|
|
|
|
|
|
3463
|
|
|
|
|
|
|
You may specify any combination of the parameters listed above when |
3464
|
|
|
|
|
|
|
calling sort(). Any you specify will be used IN PLACE OF the |
3465
|
|
|
|
|
|
|
corresponding parameters already found in the object. |
3466
|
|
|
|
|
|
|
|
3467
|
|
|
|
|
|
|
If you specify _Selection using the named-parameter calling, the |
3468
|
|
|
|
|
|
|
sort() method reserves the right to "own" the list you provide, and |
3469
|
|
|
|
|
|
|
use it as the object's new _Selection, possibly discarding the |
3470
|
|
|
|
|
|
|
previous _Selection, if any and modifying the one you provided. So |
3471
|
|
|
|
|
|
|
don't make any assumptions about ownership of that list object after |
3472
|
|
|
|
|
|
|
calling sort(). Luckily, you will rarely need to provide _Selection |
3473
|
|
|
|
|
|
|
explicitly since generally you'll want to be sorting the selection() |
3474
|
|
|
|
|
|
|
already inherent in the object. |
3475
|
|
|
|
|
|
|
|
3476
|
|
|
|
|
|
|
sort() returns the _Selection list owned by the object (the same list |
3477
|
|
|
|
|
|
|
that would be returned if you called the selection() method |
3478
|
|
|
|
|
|
|
immediately after calling sort()). |
3479
|
|
|
|
|
|
|
|
3480
|
|
|
|
|
|
|
See the next sections for complete descriptions of _SortOrder and |
3481
|
|
|
|
|
|
|
other sorting parameters. |
3482
|
|
|
|
|
|
|
|
3483
|
|
|
|
|
|
|
=cut |
3484
|
|
|
|
|
|
|
|
3485
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
3486
|
|
|
|
|
|
|
|
3487
|
|
|
|
|
|
|
sub sort |
3488
|
|
|
|
|
|
|
{ |
3489
|
10
|
|
|
10
|
1
|
64
|
my $this = shift; |
3490
|
10
|
100
|
|
|
|
43
|
my $Params = (@_ == 1 ? {_SortOrder => $_[0]} : {@_}); |
3491
|
|
|
|
|
|
|
|
3492
|
60
|
|
|
|
|
111
|
my($SortOrder, $Selection, $SortSpecs, $SRoutines, $DefaultSortType, $DefaultSortDirection) |
3493
|
10
|
|
|
|
|
23
|
= map {$this->getparam($Params, $_)} |
3494
|
|
|
|
|
|
|
qw(_SortOrder _Selection _SortSpecs _SRoutines _DefaultSortType _DefaultSortDirection); |
3495
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
## Validate / rectify all parameters... |
3497
|
|
|
|
|
|
|
|
3498
|
|
|
|
|
|
|
## Default sort order is Record Number |
3499
|
|
|
|
|
|
|
|
3500
|
10
|
50
|
33
|
|
|
69
|
$SortOrder = [qw(_RecNum)] unless ((ref($SortOrder) eq 'ARRAY') && @$SortOrder); |
3501
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
## Note if we're going to sort on _RecNum ( requires extra work). |
3503
|
|
|
|
|
|
|
|
3504
|
10
|
|
|
|
|
18
|
my $NeedRecNum = grep {$_ eq '_RecNum'} @$SortOrder; |
|
14
|
|
|
|
|
38
|
|
3505
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
## Default list of record numbers is all of them. |
3507
|
|
|
|
|
|
|
|
3508
|
10
|
100
|
|
|
|
34
|
$Selection = $this->selection() unless (ref($Selection) eq 'ARRAY'); |
3509
|
10
|
|
|
|
|
29
|
$Selection = $this->selection_validate_internal($Selection); |
3510
|
|
|
|
|
|
|
|
3511
|
|
|
|
|
|
|
## Our private copy of SortSpecs includes a spec for _RecNum |
3512
|
|
|
|
|
|
|
|
3513
|
10
|
50
|
|
|
|
32
|
$SortSpecs = {} unless (ref($SortSpecs) eq 'HASH'); |
3514
|
10
|
|
|
|
|
24
|
$SortSpecs = {%$SortSpecs}; |
3515
|
10
|
50
|
0
|
|
|
24
|
$SortSpecs ->{_RecNum} ||= {SortType => '_RecNum', SortDirection => 1} if $NeedRecNum; |
3516
|
|
|
|
|
|
|
|
3517
|
|
|
|
|
|
|
## Our private copy of SRoutines also has the builtin entries |
3518
|
|
|
|
|
|
|
## added in (including one for _RecNum) |
3519
|
|
|
|
|
|
|
|
3520
|
10
|
50
|
|
|
|
30
|
$SRoutines = {} unless (ref($SRoutines) eq 'HASH'); |
3521
|
10
|
|
|
|
|
20
|
$SRoutines = {%$SRoutines, %{$this->sortroutines_builtin()}}; |
|
10
|
|
|
|
|
26
|
|
3522
|
|
|
|
|
|
|
|
3523
|
|
|
|
|
|
|
## Ensure that DefaultSortType has a reasonable value for which we |
3524
|
|
|
|
|
|
|
## have a sort routine. |
3525
|
|
|
|
|
|
|
|
3526
|
10
|
50
|
33
|
|
|
77
|
$DefaultSortType = 'String' unless (length($DefaultSortType) && |
3527
|
|
|
|
|
|
|
exists($SRoutines->{$DefaultSortType})); |
3528
|
|
|
|
|
|
|
|
3529
|
|
|
|
|
|
|
## Ensure that DefaultSortDirection has a legal value (1 or -1; |
3530
|
|
|
|
|
|
|
## undef/0 will be treated as -1 (descending)) |
3531
|
|
|
|
|
|
|
|
3532
|
10
|
|
50
|
|
|
37
|
$DefaultSortDirection = (max(min(int($DefaultSortDirection), 1), -1) || -1); |
3533
|
|
|
|
|
|
|
|
3534
|
|
|
|
|
|
|
## Make some optimized lists of things to speed sorting. |
3535
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
## Get a hash of all data columns in $this plus a temporary one |
3537
|
|
|
|
|
|
|
## for _RecNum if needed. |
3538
|
10
|
50
|
|
|
|
13
|
my $Cols = {%{$this->cols_hash($this->fieldlist_all())}, |
|
10
|
|
|
|
|
26
|
|
3539
|
|
|
|
|
|
|
($NeedRecNum ? (_RecNum => $this->all()) : ())}; |
3540
|
|
|
|
|
|
|
|
3541
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to data columns |
3542
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to sort directions |
3543
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to sort types |
3544
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to sort routines |
3545
|
|
|
|
|
|
|
## Get a list of the field numbers in $SortOrder |
3546
|
|
|
|
|
|
|
|
3547
|
10
|
50
|
|
|
|
43
|
my $SortCols = [map { $Cols->{$_} || $this->col($_) } @$SortOrder]; |
|
14
|
|
|
|
|
53
|
|
3548
|
10
|
100
|
|
|
|
21
|
my $SortDirs = [map {$ {$SortSpecs->{$_}||{}}{SortDirection} || $DefaultSortDirection} @$SortOrder]; |
|
14
|
100
|
|
|
|
17
|
|
|
14
|
|
|
|
|
108
|
|
3549
|
10
|
100
|
|
|
|
18
|
my $SortTypes = [map {$ {$SortSpecs->{$_}||{}}{SortType } || $DefaultSortType } @$SortOrder]; |
|
14
|
100
|
|
|
|
20
|
|
|
14
|
|
|
|
|
104
|
|
3550
|
10
|
50
|
33
|
0
|
|
17
|
my $SortSubs = [map { $SRoutines->{$_} || $SRoutines->{'String'} || sub {0} } @$SortTypes]; |
|
14
|
|
|
|
|
58
|
|
|
0
|
|
|
|
|
0
|
|
3551
|
10
|
|
|
|
|
29
|
my $FieldNums = [0 .. $#$SortOrder]; |
3552
|
|
|
|
|
|
|
|
3553
|
|
|
|
|
|
|
## Construct a sort subroutine that sorts record numbers by |
3554
|
|
|
|
|
|
|
## examining values in given fields in the table, in the order |
3555
|
|
|
|
|
|
|
## specified in $SortOrder. |
3556
|
|
|
|
|
|
|
|
3557
|
|
|
|
|
|
|
## If a given field's sort routine produces a zero $CmpVal, it |
3558
|
|
|
|
|
|
|
## means that the values are considered equal, and so to |
3559
|
|
|
|
|
|
|
## disambiguate, we keep trying the next fields in the sort order, |
3560
|
|
|
|
|
|
|
## until we've found one that compares non-zero or exhausted all |
3561
|
|
|
|
|
|
|
## the fields. If we get through all the specified sort fields |
3562
|
|
|
|
|
|
|
## and still get zeroes, the values must be equal in all the |
3563
|
|
|
|
|
|
|
## fields, and so the records are considered equal, so return 0. |
3564
|
|
|
|
|
|
|
|
3565
|
10
|
|
|
|
|
18
|
my $ProgCount; |
3566
|
|
|
|
|
|
|
my $ShowedProgress; |
3567
|
58
|
|
|
|
|
60
|
$Selection = |
3568
|
|
|
|
|
|
|
[sort |
3569
|
|
|
|
|
|
|
{ |
3570
|
10
|
|
|
|
|
30
|
my $CmpVal; |
3571
|
58
|
|
|
|
|
89
|
foreach (@$FieldNums) |
3572
|
|
|
|
|
|
|
{ |
3573
|
|
|
|
|
|
|
## $a and $b are record numbers to be compared. |
3574
|
|
|
|
|
|
|
## $_ is the number of a field in the above lists. |
3575
|
|
|
|
|
|
|
|
3576
|
62
|
|
|
|
|
107
|
$CmpVal = (&{ $SortSubs->[$_] } ## Call the sort routine for field $_ with... |
|
62
|
|
|
|
|
114
|
|
3577
|
|
|
|
|
|
|
(\ $SortCols->[$_]->[$a], ## 1st arg: ref to value in field $_, record $a |
3578
|
|
|
|
|
|
|
\ $SortCols->[$_]->[$b]) ## 2nd arg: ref to value in field $_, record $b. |
3579
|
|
|
|
|
|
|
* $SortDirs->[$_] ); ## Then invert cmp value if descending (-1) |
3580
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
# print "($_, $SortCols->[$_]->[$a], $SortCols->[$_]->[$b]) ==> $CmpVal\n"; |
3582
|
|
|
|
|
|
|
|
3583
|
62
|
100
|
|
|
|
143
|
last if $CmpVal; ## Keep going if $CmpVal == 0 (same) |
3584
|
|
|
|
|
|
|
} |
3585
|
|
|
|
|
|
|
|
3586
|
|
|
|
|
|
|
## Maybe show timed progress (only after 2 seconds have elapsed) |
3587
|
58
|
100
|
|
|
|
142
|
my $Did = $this->progress_timed("Sorting", $ProgCount, undef, undef, 1) |
3588
|
|
|
|
|
|
|
if ((($ProgCount++) % 200) == 0); |
3589
|
58
|
|
33
|
|
|
165
|
$ShowedProgress ||= $Did; |
3590
|
|
|
|
|
|
|
|
3591
|
58
|
|
|
|
|
87
|
$CmpVal; |
3592
|
|
|
|
|
|
|
} |
3593
|
|
|
|
|
|
|
@$Selection]; |
3594
|
|
|
|
|
|
|
|
3595
|
|
|
|
|
|
|
## If no progress shown yet (sort took less than 2 seconds or 200 |
3596
|
|
|
|
|
|
|
## operations), show a message now. |
3597
|
|
|
|
|
|
|
|
3598
|
10
|
50
|
|
|
|
47
|
$this->progress("Sorted.") unless $ShowedProgress; |
3599
|
|
|
|
|
|
|
|
3600
|
|
|
|
|
|
|
## Replace any existing selection with the new, sorted, one. |
3601
|
10
|
|
|
|
|
17
|
$this->{_Selection} = $Selection; |
3602
|
|
|
|
|
|
|
|
3603
|
10
|
|
|
|
|
70
|
done: |
3604
|
|
|
|
|
|
|
return($Selection); |
3605
|
|
|
|
|
|
|
} |
3606
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
=pod |
3608
|
|
|
|
|
|
|
|
3609
|
|
|
|
|
|
|
=head1 SORT ORDER |
3610
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
## Getting / Setting table's default _SortOrder |
3612
|
|
|
|
|
|
|
|
3613
|
|
|
|
|
|
|
$t->sortorder() ## Get sortorder (default is []) |
3614
|
|
|
|
|
|
|
|
3615
|
|
|
|
|
|
|
my $Order = [qw(Last First State Zip)]; |
3616
|
|
|
|
|
|
|
$t->sortorder($Order) ## Set sortorder (use [] for none) |
3617
|
|
|
|
|
|
|
|
3618
|
|
|
|
|
|
|
$t->sortorder_default() ## Get the object's default sort order ([]) |
3619
|
|
|
|
|
|
|
|
3620
|
|
|
|
|
|
|
The sort order is an optional list of field names on which to sort and |
3621
|
|
|
|
|
|
|
sub-sort the data when sorting is requested. The field names must be |
3622
|
|
|
|
|
|
|
the names of actual columns in the table. The names in the sort order |
3623
|
|
|
|
|
|
|
do not necessarily need to coincide with the custom fieldlist if any. |
3624
|
|
|
|
|
|
|
|
3625
|
|
|
|
|
|
|
There is one special value that can be included: _RecNum. This sorts |
3626
|
|
|
|
|
|
|
on the imaginary "record number" field. So for example, you could |
3627
|
|
|
|
|
|
|
specify a sort order this way: |
3628
|
|
|
|
|
|
|
|
3629
|
|
|
|
|
|
|
[qw(Last First _RecNum)] |
3630
|
|
|
|
|
|
|
|
3631
|
|
|
|
|
|
|
(There is no point in putting _RecNum anywhere except at the end of |
3632
|
|
|
|
|
|
|
the sort order because no two records will ever have the same record |
3633
|
|
|
|
|
|
|
number so there will be no further need to disambiguate by referring |
3634
|
|
|
|
|
|
|
to additional fields.) |
3635
|
|
|
|
|
|
|
|
3636
|
|
|
|
|
|
|
Sorting by _RecNum adds a bit of computational overhead because sort() |
3637
|
|
|
|
|
|
|
first builds a record number vector for use in sorting, so for very |
3638
|
|
|
|
|
|
|
large tables, don't do it unless you really need it. |
3639
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
A sort order can be specified each time the object is sorted (see the |
3641
|
|
|
|
|
|
|
sort() method for details). |
3642
|
|
|
|
|
|
|
|
3643
|
|
|
|
|
|
|
Or, the object's sort order can be set once, and then sort() will use |
3644
|
|
|
|
|
|
|
that sort order when no other sort order is specified. |
3645
|
|
|
|
|
|
|
|
3646
|
|
|
|
|
|
|
If sorting is done when there is no sort order present in the object |
3647
|
|
|
|
|
|
|
or specifed for the sort() method, the selection is sorted by record |
3648
|
|
|
|
|
|
|
number (i.e. it is "unsorted" or returned to its "natural" order). |
3649
|
|
|
|
|
|
|
|
3650
|
|
|
|
|
|
|
In order words, a sortorder that is undef or [] is considered the same |
3651
|
|
|
|
|
|
|
as: [qw(_RecNum)]. This is sometimes called "unsorting". |
3652
|
|
|
|
|
|
|
|
3653
|
|
|
|
|
|
|
In order to decide how values in each field should be compared, sort() |
3654
|
|
|
|
|
|
|
is informed by SortSpecs (specifying SortType and SortDirection for |
3655
|
|
|
|
|
|
|
each field) and by SortRoutines, each of which may similarly either be |
3656
|
|
|
|
|
|
|
pre-set for the object or specified when calling sort() -- see below |
3657
|
|
|
|
|
|
|
for further details. |
3658
|
|
|
|
|
|
|
|
3659
|
|
|
|
|
|
|
=cut |
3660
|
|
|
|
|
|
|
|
3661
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
3662
|
|
|
|
|
|
|
|
3663
|
|
|
|
|
|
|
sub sortorder |
3664
|
|
|
|
|
|
|
{ |
3665
|
8
|
|
|
8
|
0
|
21
|
my $this = shift; |
3666
|
8
|
|
|
|
|
13
|
my ($SortOrder) = @_; |
3667
|
|
|
|
|
|
|
|
3668
|
8
|
|
100
|
|
|
32
|
my $Valid = ((ref($SortOrder) eq 'ARRAY') && (@$SortOrder > 0)); |
3669
|
|
|
|
|
|
|
|
3670
|
|
|
|
|
|
|
## Set if specified. |
3671
|
8
|
100
|
100
|
|
|
37
|
if (defined($SortOrder) && $Valid) |
|
|
100
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
{ |
3673
|
2
|
|
|
|
|
4
|
$this->{_SortOrder} = $SortOrder; |
3674
|
|
|
|
|
|
|
} |
3675
|
|
|
|
|
|
|
elsif (defined($SortOrder)) |
3676
|
|
|
|
|
|
|
{ |
3677
|
1
|
|
|
|
|
4
|
$this->{_SortOrder} = undef; ## Store undef instead of [] |
3678
|
|
|
|
|
|
|
} |
3679
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
## Get and return. |
3681
|
8
|
|
66
|
|
|
33
|
$SortOrder = $this->{_SortOrder} || $this->sortorder_default(); |
3682
|
|
|
|
|
|
|
|
3683
|
8
|
|
|
|
|
31
|
return($SortOrder); |
3684
|
|
|
|
|
|
|
} |
3685
|
|
|
|
|
|
|
|
3686
|
|
|
|
|
|
|
sub sortorder_default |
3687
|
|
|
|
|
|
|
{ |
3688
|
5
|
|
|
5
|
0
|
13
|
my $this = shift; |
3689
|
|
|
|
|
|
|
|
3690
|
5
|
|
|
|
|
8
|
my $SortOrder = []; |
3691
|
|
|
|
|
|
|
|
3692
|
5
|
|
|
|
|
19
|
return($SortOrder); |
3693
|
|
|
|
|
|
|
} |
3694
|
|
|
|
|
|
|
|
3695
|
|
|
|
|
|
|
sub sortorder_check |
3696
|
|
|
|
|
|
|
{ |
3697
|
63
|
|
|
63
|
0
|
80
|
my $this = shift; |
3698
|
63
|
|
|
|
|
149
|
my $FieldsHash = $this->fieldlist_hash(); |
3699
|
|
|
|
|
|
|
|
3700
|
|
|
|
|
|
|
## Remove any bogus field names from the sort order, if any. |
3701
|
|
|
|
|
|
|
|
3702
|
4
|
|
|
|
|
13
|
$this->{_SortOrder} = [grep {exists($FieldsHash->{$_})} |
|
1
|
|
|
|
|
3
|
|
3703
|
63
|
100
|
|
|
|
244
|
@{$this->{_SortOrder}}] if defined $this->{_SortOrder}; |
3704
|
|
|
|
|
|
|
} |
3705
|
|
|
|
|
|
|
|
3706
|
|
|
|
|
|
|
=pod |
3707
|
|
|
|
|
|
|
|
3708
|
|
|
|
|
|
|
=head1 SORT SPECIFICATIONS |
3709
|
|
|
|
|
|
|
|
3710
|
|
|
|
|
|
|
## Getting / Setting table's default _SortSpecs |
3711
|
|
|
|
|
|
|
|
3712
|
|
|
|
|
|
|
$t->sortspecs() ## Get sortspecs (default is {} -- none) |
3713
|
|
|
|
|
|
|
|
3714
|
|
|
|
|
|
|
my $Specs = {Last => {SortType => 'String' , |
3715
|
|
|
|
|
|
|
SortDirection => -1 }, |
3716
|
|
|
|
|
|
|
Zip => {SortType => 'Integer' }}; |
3717
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
$t->sortspecs($Specs) ## Set sortspecs |
3719
|
|
|
|
|
|
|
|
3720
|
|
|
|
|
|
|
$t->sortspecs_default() ## Get the object's default sort specs ({}) |
3721
|
|
|
|
|
|
|
|
3722
|
|
|
|
|
|
|
The sortspecs are an optional hash mapping field names to "sort |
3723
|
|
|
|
|
|
|
specifications". |
3724
|
|
|
|
|
|
|
|
3725
|
|
|
|
|
|
|
Each field's sort specification may specify zero or more of these |
3726
|
|
|
|
|
|
|
values: |
3727
|
|
|
|
|
|
|
|
3728
|
|
|
|
|
|
|
=over 4 |
3729
|
|
|
|
|
|
|
|
3730
|
|
|
|
|
|
|
=item SortType |
3731
|
|
|
|
|
|
|
|
3732
|
|
|
|
|
|
|
the sort type to use (For example: String, Integer) |
3733
|
|
|
|
|
|
|
|
3734
|
|
|
|
|
|
|
=item SortDirection |
3735
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
the sort direction (1: ascending, -1: descending) |
3737
|
|
|
|
|
|
|
|
3738
|
|
|
|
|
|
|
=back |
3739
|
|
|
|
|
|
|
|
3740
|
|
|
|
|
|
|
Sortspecs can be specified when calling the sort() routine, or, a set |
3741
|
|
|
|
|
|
|
of specs can be placed beforehand into the object itself and those |
3742
|
|
|
|
|
|
|
will be used by sort() if no other specs are given. |
3743
|
|
|
|
|
|
|
|
3744
|
|
|
|
|
|
|
For any field listed in the sort order at the time of sorting, but |
3745
|
|
|
|
|
|
|
lacking a sort spec or any component of the sort spec, the object's |
3746
|
|
|
|
|
|
|
default sort type (see sorttype_default()) and default sort direction |
3747
|
|
|
|
|
|
|
(see sortdirection_default()) will be used. |
3748
|
|
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
In addition to getting/setting sort specs as a whole, they may be |
3750
|
|
|
|
|
|
|
gotten/set on a per-field basis, too: |
3751
|
|
|
|
|
|
|
|
3752
|
|
|
|
|
|
|
sortspec($Field) ## Get sortspec for $Field or default spec |
3753
|
|
|
|
|
|
|
|
3754
|
|
|
|
|
|
|
my $Spec = {SortType => 'Integer', SortDirection => -1}; |
3755
|
|
|
|
|
|
|
sortspec('Zip', $Spec) ## Set sortspec |
3756
|
|
|
|
|
|
|
|
3757
|
|
|
|
|
|
|
sortspec_default() ## Get a sortspec with all defaults filled in |
3758
|
|
|
|
|
|
|
|
3759
|
|
|
|
|
|
|
For any $Field not found in the object's sortspecs, sortspec($Field) |
3760
|
|
|
|
|
|
|
returns the same thing returned by sortspec_default(), which is a |
3761
|
|
|
|
|
|
|
sortspec filled in with the default sort type and sort direction (see |
3762
|
|
|
|
|
|
|
below). |
3763
|
|
|
|
|
|
|
|
3764
|
|
|
|
|
|
|
For a list of available built-in SortTypes, and instructions for how |
3765
|
|
|
|
|
|
|
to define your own, see SORT ROUTINES, below. |
3766
|
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
=cut |
3768
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
3770
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
sub sortspecs |
3772
|
|
|
|
|
|
|
{ |
3773
|
46
|
|
|
46
|
0
|
62
|
my $this = shift; |
3774
|
46
|
|
|
|
|
55
|
my ($SortSpecs) = @_; |
3775
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
## Set if specified. |
3777
|
46
|
100
|
|
|
|
86
|
$this->{_SortSpecs} = $SortSpecs if $SortSpecs; |
3778
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
## Get and return |
3780
|
46
|
|
33
|
|
|
101
|
$SortSpecs = $this->{_SortSpecs} || $this->sortspecs_default(); |
3781
|
|
|
|
|
|
|
|
3782
|
46
|
|
|
|
|
93
|
return($SortSpecs); |
3783
|
|
|
|
|
|
|
} |
3784
|
|
|
|
|
|
|
|
3785
|
|
|
|
|
|
|
sub sortspecs_default |
3786
|
|
|
|
|
|
|
{ |
3787
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
3788
|
|
|
|
|
|
|
|
3789
|
1
|
|
|
|
|
3
|
my $SortSpecs = {}; |
3790
|
|
|
|
|
|
|
|
3791
|
1
|
|
|
|
|
6
|
return($SortSpecs); |
3792
|
|
|
|
|
|
|
} |
3793
|
|
|
|
|
|
|
|
3794
|
|
|
|
|
|
|
sub sortspec |
3795
|
|
|
|
|
|
|
{ |
3796
|
4
|
|
|
4
|
0
|
17
|
my $this = shift; |
3797
|
4
|
|
|
|
|
9
|
my ($FieldName, $SortSpec) = @_; |
3798
|
|
|
|
|
|
|
|
3799
|
|
|
|
|
|
|
## Set if specified. |
3800
|
4
|
100
|
|
|
|
18
|
$this->{_SortSpecs}->{$FieldName} = $SortSpec if $SortSpec; |
3801
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
## Get and return. |
3803
|
4
|
|
33
|
|
|
19
|
my $SortSpec = ($this->{_SortSpecs}->{$FieldName} || |
3804
|
|
|
|
|
|
|
$this->sortspec_default($FieldName)); |
3805
|
|
|
|
|
|
|
|
3806
|
|
|
|
|
|
|
## Provide defaults for needed fields of sort spec. |
3807
|
4
|
|
33
|
|
|
14
|
$SortSpec->{SortType} ||= $this->sorttype_default (); |
3808
|
4
|
50
|
|
|
|
15
|
$SortSpec->{SortDirection} = $this->sortdirection_default() |
3809
|
|
|
|
|
|
|
unless defined($SortSpec->{SortDirection}); |
3810
|
|
|
|
|
|
|
|
3811
|
4
|
|
|
|
|
14
|
return($SortSpec); |
3812
|
|
|
|
|
|
|
} |
3813
|
|
|
|
|
|
|
|
3814
|
|
|
|
|
|
|
sub sortspec_default |
3815
|
|
|
|
|
|
|
{ |
3816
|
2
|
|
|
2
|
0
|
17
|
my $this = shift; |
3817
|
2
|
|
|
|
|
4
|
my ($FieldName) = @_; |
3818
|
|
|
|
|
|
|
|
3819
|
|
|
|
|
|
|
## Default sortspec for a field |
3820
|
|
|
|
|
|
|
|
3821
|
2
|
|
|
|
|
8
|
my $SortType = $this->sorttype_default (); |
3822
|
2
|
|
|
|
|
7
|
my $SortDir = $this->sortdirection_default(); |
3823
|
|
|
|
|
|
|
|
3824
|
2
|
|
|
|
|
8
|
my $Spec = {SortType => $SortType, SortDirection => $SortDir}; |
3825
|
|
|
|
|
|
|
|
3826
|
2
|
|
|
|
|
8
|
return($Spec); |
3827
|
|
|
|
|
|
|
} |
3828
|
|
|
|
|
|
|
|
3829
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
=pod |
3831
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
=head1 DEFAULT SORT DIRECTION |
3833
|
|
|
|
|
|
|
|
3834
|
|
|
|
|
|
|
## Getting / Setting table's default _DefaultSortDirection |
3835
|
|
|
|
|
|
|
|
3836
|
|
|
|
|
|
|
$t->sortdirection_default() ## Get default sort direction |
3837
|
|
|
|
|
|
|
|
3838
|
|
|
|
|
|
|
$t->sortdirection_default(-1) ## Set default sort direction |
3839
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
Each element in a sort specification can optionally specify a sort |
3841
|
|
|
|
|
|
|
direction. |
3842
|
|
|
|
|
|
|
|
3843
|
|
|
|
|
|
|
1 = ascending, -1 = descending |
3844
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
For any sort specs that don't specify a direction, the object's |
3846
|
|
|
|
|
|
|
default sort direction will be used. Use these routines to get/set |
3847
|
|
|
|
|
|
|
the default sort direction. |
3848
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
=cut |
3850
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
3852
|
|
|
|
|
|
|
|
3853
|
|
|
|
|
|
|
sub sortdirection_default |
3854
|
|
|
|
|
|
|
{ |
3855
|
6
|
|
|
6
|
0
|
14
|
my $this = shift; |
3856
|
6
|
|
|
|
|
10
|
my ($DefaultSortDir) = @_; |
3857
|
|
|
|
|
|
|
|
3858
|
6
|
100
|
|
|
|
17
|
if (defined($DefaultSortDir)) |
3859
|
|
|
|
|
|
|
{ |
3860
|
|
|
|
|
|
|
## Set if specified. Force to 1 or -1. Treat 0 as -1 (descending). |
3861
|
1
|
|
50
|
|
|
6
|
$this->{_DefaultSortDirection} = |
3862
|
|
|
|
|
|
|
(max(min(int($DefaultSortDir), 1), -1) || -1); |
3863
|
|
|
|
|
|
|
} |
3864
|
|
|
|
|
|
|
|
3865
|
|
|
|
|
|
|
## Get and return. If not present, then 1 (ascending) is the default. |
3866
|
6
|
50
|
|
|
|
16
|
my $SortDir = (defined($this->{_DefaultSortDirection}) ? |
3867
|
|
|
|
|
|
|
$this->{_DefaultSortDirection} : |
3868
|
|
|
|
|
|
|
1); |
3869
|
6
|
|
|
|
|
21
|
return($SortDir); |
3870
|
|
|
|
|
|
|
} |
3871
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
=pod |
3873
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
=head1 DEFAULT SORT TYPE |
3875
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
## Getting / Setting table's default _DefaultSortType |
3877
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
$t->sorttype_default() ## Get default sort type |
3879
|
|
|
|
|
|
|
|
3880
|
|
|
|
|
|
|
$t->sorttype_default('Integer') ## Set default sort type |
3881
|
|
|
|
|
|
|
|
3882
|
|
|
|
|
|
|
Each element in a sort specification can optionally specify a sort |
3883
|
|
|
|
|
|
|
type. The sort type is a string (like 'String' or 'Integer' or |
3884
|
|
|
|
|
|
|
'Date') that selects from one or more sort routines. (See Sort |
3885
|
|
|
|
|
|
|
Routines, below). |
3886
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
There are several sort routines built into the CTable object, and you |
3888
|
|
|
|
|
|
|
can also add as many of your own routines (and hence Sort Types) as |
3889
|
|
|
|
|
|
|
you like or need. This allows for very flexible sorting. |
3890
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
For any sort specs that don't specify a type, the object's default |
3892
|
|
|
|
|
|
|
sort type will be used. Use these routines to get/set the default |
3893
|
|
|
|
|
|
|
sort type, which initially is 'String'. |
3894
|
|
|
|
|
|
|
|
3895
|
|
|
|
|
|
|
=cut |
3896
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
3898
|
|
|
|
|
|
|
|
3899
|
|
|
|
|
|
|
sub sorttype_default |
3900
|
|
|
|
|
|
|
{ |
3901
|
6
|
|
|
6
|
0
|
10
|
my $this = shift; |
3902
|
6
|
|
|
|
|
9
|
my ($DefaultSortType) = @_; |
3903
|
|
|
|
|
|
|
|
3904
|
6
|
100
|
|
|
|
15
|
if (defined($DefaultSortType)) |
3905
|
|
|
|
|
|
|
{ |
3906
|
|
|
|
|
|
|
## Set if specified. |
3907
|
1
|
|
|
|
|
4
|
$this->{_DefaultSortType} = "$DefaultSortType"; |
3908
|
|
|
|
|
|
|
} |
3909
|
|
|
|
|
|
|
|
3910
|
|
|
|
|
|
|
## Get and return. If not present, then 'String' is the default. |
3911
|
6
|
50
|
|
|
|
21
|
my $SortDir = (defined($this->{_DefaultSortType}) ? |
3912
|
|
|
|
|
|
|
$this->{_DefaultSortType} : |
3913
|
|
|
|
|
|
|
'String'); |
3914
|
6
|
|
|
|
|
18
|
return($SortDir); |
3915
|
|
|
|
|
|
|
} |
3916
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
=pod |
3918
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
=head1 SORT ROUTINES: BUILTIN AND CUSTOM |
3920
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
## Getting / Setting table's custom sort routines (_SRoutines) |
3922
|
|
|
|
|
|
|
|
3923
|
|
|
|
|
|
|
$t->sortroutine($Type) ## Get a sort routine for $Type |
3924
|
|
|
|
|
|
|
|
3925
|
|
|
|
|
|
|
$t->sortroutine($Type, $Sub) ## Set a sort routine for $Type |
3926
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
$t->sortroutine($Type, 0 ) ## Remove sort routine for $Type |
3928
|
|
|
|
|
|
|
$t->sortroutine_set($Type) |
3929
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
$t->sortroutines() ## Get hash of any sort routines |
3931
|
|
|
|
|
|
|
|
3932
|
|
|
|
|
|
|
$t->sortroutines_builtin() ## Get hash of builtin routines |
3933
|
|
|
|
|
|
|
|
3934
|
|
|
|
|
|
|
Each SortType in the sortspecs should have a corresponding sort |
3935
|
|
|
|
|
|
|
routine (any unrecognized type will be sorted using the 'String' sort |
3936
|
|
|
|
|
|
|
routine). |
3937
|
|
|
|
|
|
|
|
3938
|
|
|
|
|
|
|
The sort() command looks up the appropriate sort routine for each |
3939
|
|
|
|
|
|
|
field it is asked to sort, based on the SortType for that field, as |
3940
|
|
|
|
|
|
|
given in the sortspecs, as described above. |
3941
|
|
|
|
|
|
|
|
3942
|
|
|
|
|
|
|
Builtin sort types, recognized and implemented natively by this |
3943
|
|
|
|
|
|
|
module, are: |
3944
|
|
|
|
|
|
|
|
3945
|
|
|
|
|
|
|
String ## Fastest case-sensitive compare (data is string) |
3946
|
|
|
|
|
|
|
Text ## Insensitive compare (lowercases, then compares) |
3947
|
|
|
|
|
|
|
Number ## Number works for floats or integers |
3948
|
|
|
|
|
|
|
Integer ## Faster than floats. Uses "use integer" |
3949
|
|
|
|
|
|
|
DateSecs ## Same as integer; assumes date in seconds |
3950
|
|
|
|
|
|
|
Boolean ## Treats item as a Perlish boolean (empty/undef = false) |
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
The above sort types are always recognized. Additional sort types may |
3953
|
|
|
|
|
|
|
be added by subclasses (and could shadow the builtin implementations |
3954
|
|
|
|
|
|
|
of the above types if desired) and/or may be added to instances (and |
3955
|
|
|
|
|
|
|
again could shadow the above implementations), and/or may be specified |
3956
|
|
|
|
|
|
|
when the sort() method is called, once again optionally shadowing any |
3957
|
|
|
|
|
|
|
deeper definitions. |
3958
|
|
|
|
|
|
|
|
3959
|
|
|
|
|
|
|
=head1 CUSTOM SORT ROUTINE INTERFACE |
3960
|
|
|
|
|
|
|
|
3961
|
|
|
|
|
|
|
A custom sort routine is called with two arguments, each of which is a |
3962
|
|
|
|
|
|
|
pointer to a scalar. The sort routine should dereference each pointer |
3963
|
|
|
|
|
|
|
and compare the resulting scalars, returning -1 if the first scalar is |
3964
|
|
|
|
|
|
|
smaller than the second, 1 if it is larger, and 0 if they are |
3965
|
|
|
|
|
|
|
considered equal. |
3966
|
|
|
|
|
|
|
|
3967
|
|
|
|
|
|
|
For example, here is the built-in comparison routine for 'String': |
3968
|
|
|
|
|
|
|
|
3969
|
|
|
|
|
|
|
sub { $ {$_[0]} cmp $ {$_[1]} } |
3970
|
|
|
|
|
|
|
|
3971
|
|
|
|
|
|
|
NOTE: Your custom sort routines should NOT compare $a and $b as with |
3972
|
|
|
|
|
|
|
Perl's builtin sort() command. |
3973
|
|
|
|
|
|
|
|
3974
|
|
|
|
|
|
|
Examine the variable $BuiltinSortRoutines in this module's |
3975
|
|
|
|
|
|
|
implementation to see some additional examples of sort routines. |
3976
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
Internally, sort() calls the sortroutines() method to get a hash that |
3978
|
|
|
|
|
|
|
should consist of all builtin sort routines with the per-object sort |
3979
|
|
|
|
|
|
|
routines, if any, overlaid. sortroutines() in turn calls the |
3980
|
|
|
|
|
|
|
sortroutines_builtin() method to get a copy of the hash of all builtin |
3981
|
|
|
|
|
|
|
sort routines for the object. (So a subclass could easily add |
3982
|
|
|
|
|
|
|
additional SortTypes or reimplement them by just overriding |
3983
|
|
|
|
|
|
|
sortroutines_builtin() and adding its own additional routines to the |
3984
|
|
|
|
|
|
|
resulting hash.) |
3985
|
|
|
|
|
|
|
|
3986
|
|
|
|
|
|
|
sortroutine() may be called to get or set a custom sort routine for a |
3987
|
|
|
|
|
|
|
given type in the given object. |
3988
|
|
|
|
|
|
|
|
3989
|
|
|
|
|
|
|
There is no way to directly manipulate the builtin sort routines for |
3990
|
|
|
|
|
|
|
the entire class. To accomplish that, you should define and use a |
3991
|
|
|
|
|
|
|
subclass that extends sortroutines_builtin() to add its own routines. |
3992
|
|
|
|
|
|
|
|
3993
|
|
|
|
|
|
|
For example: |
3994
|
|
|
|
|
|
|
|
3995
|
|
|
|
|
|
|
BEGIN |
3996
|
|
|
|
|
|
|
{ ## A subclass of Data::CTable with an INetAddr SortType. |
3997
|
|
|
|
|
|
|
package IATable; use vars qw(@ISA); @ISA = qw(Data::CTable); |
3998
|
|
|
|
|
|
|
|
3999
|
|
|
|
|
|
|
sub sortroutines_builtin |
4000
|
|
|
|
|
|
|
{ |
4001
|
|
|
|
|
|
|
my $this = shift; |
4002
|
|
|
|
|
|
|
my $CustomRoutines = |
4003
|
|
|
|
|
|
|
{INetAddr => |
4004
|
|
|
|
|
|
|
sub {use integer; ip2int($ {$_[0]}) <=> ip2int($ {$_[1]})}}; |
4005
|
|
|
|
|
|
|
my $AllRoutines = |
4006
|
|
|
|
|
|
|
{%{$this->SUPER::sortroutines_builtin()} %$CustomRoutines}; |
4007
|
|
|
|
|
|
|
return($AllRoutines); |
4008
|
|
|
|
|
|
|
}; |
4009
|
|
|
|
|
|
|
|
4010
|
|
|
|
|
|
|
sub ip2int {.....} $# Could memoize & inline for efficiency |
4011
|
|
|
|
|
|
|
} |
4012
|
|
|
|
|
|
|
|
4013
|
|
|
|
|
|
|
my $Table = IATable::new(......); |
4014
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
The IATable class would then have all the same features of |
4016
|
|
|
|
|
|
|
Data::CTable but would then also support the INetAddr SortType. |
4017
|
|
|
|
|
|
|
|
4018
|
|
|
|
|
|
|
=cut |
4019
|
|
|
|
|
|
|
|
4020
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
4021
|
|
|
|
|
|
|
|
4022
|
|
|
|
|
|
|
BEGIN |
4023
|
|
|
|
|
|
|
{ |
4024
|
|
|
|
|
|
|
my $BuiltinSortRoutines = |
4025
|
|
|
|
|
|
|
{( |
4026
|
64
|
|
|
|
|
68
|
String => sub { $ {$_[0]} cmp $ {$_[1]} }, |
|
64
|
|
|
|
|
79
|
|
|
64
|
|
|
|
|
122
|
|
4027
|
5
|
|
|
|
|
6
|
Text => sub { lc($ {$_[0]}) cmp lc($ {$_[1]})}, |
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
15
|
|
4028
|
8
|
|
|
|
|
9
|
Number => sub { $ {$_[0]} <=> $ {$_[1]} }, |
|
8
|
|
|
|
|
11
|
|
|
8
|
|
|
|
|
21
|
|
4029
|
1
|
|
|
1
|
|
1074
|
Integer => sub {use integer; $ {$_[0]} <=> $ {$_[1]} }, |
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
4
|
|
|
8
|
|
|
|
|
10
|
|
|
8
|
|
|
|
|
16
|
|
|
8
|
|
|
|
|
21
|
|
4030
|
1
|
|
|
1
|
|
54
|
DateSecs => sub {use integer; $ {$_[0]} <=> $ {$_[1]} }, |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
51
|
|
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
9
|
|
4031
|
1
|
|
|
1
|
|
48
|
_RecNum => sub {use integer; $ {$_[0]} <=> $ {$_[1]} }, |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
9
|
|
4032
|
4
|
|
|
|
|
5
|
Boolean => sub { !!$ {$_[0]} <=> !!$ {$_[1]} }, |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
12
|
|
4033
|
1
|
|
|
1
|
|
956
|
)}; |
4034
|
|
|
|
|
|
|
|
4035
|
|
|
|
|
|
|
sub sortroutines_builtin ## Class or instance method |
4036
|
|
|
|
|
|
|
{ |
4037
|
53
|
|
|
53
|
0
|
351
|
return({%$BuiltinSortRoutines}); ## Copy of above private hash. |
4038
|
|
|
|
|
|
|
} |
4039
|
|
|
|
|
|
|
} |
4040
|
|
|
|
|
|
|
|
4041
|
|
|
|
|
|
|
sub sortroutine |
4042
|
|
|
|
|
|
|
{ |
4043
|
38
|
|
|
38
|
0
|
179
|
my $this = shift; |
4044
|
38
|
|
|
|
|
59
|
my ($Type, $Routine) = @_; |
4045
|
|
|
|
|
|
|
|
4046
|
38
|
100
|
|
|
|
78
|
if (defined($Routine)) |
4047
|
|
|
|
|
|
|
{ |
4048
|
|
|
|
|
|
|
## Set if $Routine provided. |
4049
|
3
|
|
|
|
|
10
|
$this->sortroutine_set($Type, $Routine); |
4050
|
|
|
|
|
|
|
} |
4051
|
|
|
|
|
|
|
|
4052
|
|
|
|
|
|
|
## Get and return. |
4053
|
38
|
|
|
|
|
70
|
$Routine = $this->sortroutine_get($Type); |
4054
|
|
|
|
|
|
|
|
4055
|
38
|
|
|
|
|
123
|
return($Routine); |
4056
|
|
|
|
|
|
|
} |
4057
|
|
|
|
|
|
|
|
4058
|
|
|
|
|
|
|
sub sortroutine_get |
4059
|
|
|
|
|
|
|
{ |
4060
|
38
|
|
|
38
|
0
|
45
|
my $this = shift; |
4061
|
38
|
|
|
|
|
42
|
my ($Type) = @_; |
4062
|
38
|
|
|
|
|
72
|
my $Routines = $this->sortroutines(); |
4063
|
38
|
|
66
|
|
|
96
|
my $Routine = $Routines->{$Type} || $Routines->{'String'}; |
4064
|
|
|
|
|
|
|
|
4065
|
38
|
|
|
|
|
99
|
return($Routine); |
4066
|
|
|
|
|
|
|
} |
4067
|
|
|
|
|
|
|
|
4068
|
|
|
|
|
|
|
sub sortroutine_set |
4069
|
|
|
|
|
|
|
{ |
4070
|
4
|
|
|
4
|
0
|
5
|
my $this = shift; |
4071
|
4
|
|
|
|
|
6
|
my ($Type, $Routine) = @_; |
4072
|
|
|
|
|
|
|
|
4073
|
4
|
|
|
|
|
7
|
my $Valid = (ref($Routine) eq 'CODE'); |
4074
|
|
|
|
|
|
|
|
4075
|
4
|
100
|
|
|
|
10
|
if ($Valid) |
4076
|
|
|
|
|
|
|
{ |
4077
|
|
|
|
|
|
|
## Add / replace if a routine was supplied. |
4078
|
2
|
|
50
|
|
|
3
|
$ {$this->{_SRoutines} ||= {}}{$Type} = $Routine; |
|
2
|
|
|
|
|
8
|
|
4079
|
|
|
|
|
|
|
} |
4080
|
|
|
|
|
|
|
else |
4081
|
|
|
|
|
|
|
{ |
4082
|
|
|
|
|
|
|
## Otherwise delete. |
4083
|
2
|
|
50
|
|
|
3
|
$Routine = delete $ {$this->{_SRoutines} ||= {}}{$Type}; |
|
2
|
|
|
|
|
8
|
|
4084
|
|
|
|
|
|
|
} |
4085
|
|
|
|
|
|
|
|
4086
|
4
|
|
|
|
|
14
|
return($Routine); |
4087
|
|
|
|
|
|
|
} |
4088
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
sub sortroutines |
4090
|
|
|
|
|
|
|
{ |
4091
|
42
|
|
|
42
|
0
|
54
|
my $this = shift; |
4092
|
|
|
|
|
|
|
|
4093
|
42
|
50
|
|
|
|
86
|
my $Routines = {%{$this->sortroutines_builtin()}, ## First builtin ones |
|
42
|
|
|
|
|
186
|
|
4094
|
42
|
|
|
|
|
46
|
%{$this->{_SRoutines} || {}}}; ## Shadow with object's own |
4095
|
|
|
|
|
|
|
|
4096
|
42
|
|
|
|
|
193
|
return($Routines); |
4097
|
|
|
|
|
|
|
} |
4098
|
|
|
|
|
|
|
|
4099
|
|
|
|
|
|
|
=pod |
4100
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
=head1 FREEZING SELECTION & FIELD LIST |
4102
|
|
|
|
|
|
|
|
4103
|
|
|
|
|
|
|
## Freeze data layout: re-order columns; omit unused fields |
4104
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
$t->cull(...params...) ## Rebuild table in order |
4106
|
|
|
|
|
|
|
my $s = $t->snapshot(...params...) ## Make copy as if rebuilt |
4107
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
The cull() method re-writes all data in the table to be in the order |
4109
|
|
|
|
|
|
|
indicated in _Selection (if present). This will cause any records not |
4110
|
|
|
|
|
|
|
listed in _Selection to be omitted (unless selection is null in which |
4111
|
|
|
|
|
|
|
case all records are retained in original order). |
4112
|
|
|
|
|
|
|
|
4113
|
|
|
|
|
|
|
In addition, if there is a custom field list present, it removes any |
4114
|
|
|
|
|
|
|
fields NOT mentioned in _FieldList. |
4115
|
|
|
|
|
|
|
|
4116
|
|
|
|
|
|
|
The snapshot() method is similar, except instead of modifying the |
4117
|
|
|
|
|
|
|
object itself, it makes a copy of the object that's equivalent to what |
4118
|
|
|
|
|
|
|
cull() would have created, and returns that new object, leaving the |
4119
|
|
|
|
|
|
|
original untouched. (All data structures are deep-copied from the old |
4120
|
|
|
|
|
|
|
object to the new one, leaving the objects totally independent.) |
4121
|
|
|
|
|
|
|
|
4122
|
|
|
|
|
|
|
cull() and snapshot() both take two optional named parameters: |
4123
|
|
|
|
|
|
|
_FieldList and/or _Selection to be used in place of the corresponding |
4124
|
|
|
|
|
|
|
parameters found in the object. |
4125
|
|
|
|
|
|
|
|
4126
|
|
|
|
|
|
|
If only a single argument is supplied, it is assumed to be _Selection. |
4127
|
|
|
|
|
|
|
|
4128
|
|
|
|
|
|
|
=cut |
4129
|
|
|
|
|
|
|
|
4130
|
|
|
|
|
|
|
sub cull |
4131
|
|
|
|
|
|
|
{ |
4132
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
4133
|
1
|
50
|
|
|
|
5
|
my $Params = (@_ == 1 ? {_Selection => $_[0]} : {@_}); |
4134
|
|
|
|
|
|
|
|
4135
|
1
|
|
|
|
|
2
|
my($Selection, $FieldList) = map {$this->getparam($Params, $_)} |
|
2
|
|
|
|
|
4
|
|
4136
|
|
|
|
|
|
|
qw(_Selection _FieldList); |
4137
|
|
|
|
|
|
|
|
4138
|
1
|
|
33
|
|
|
3
|
$FieldList ||= $this->{_FieldList}; |
4139
|
1
|
|
33
|
|
|
4
|
$Selection ||= $this->{_Selection}; |
4140
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
## First cull any fields/columns not mentioned in _FieldList, if any. |
4142
|
1
|
50
|
|
|
|
3
|
if ($FieldList) |
4143
|
|
|
|
|
|
|
{ |
4144
|
1
|
|
|
|
|
2
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
1
|
|
|
|
|
4
|
|
4145
|
1
|
|
|
|
|
2
|
my $DeadFields = [grep {!exists($FieldHash->{$_})} @{$this->fieldlist_all()}]; |
|
4
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
4
|
|
4146
|
1
|
|
|
|
|
4
|
delete @$this{@$DeadFields}; |
4147
|
|
|
|
|
|
|
|
4148
|
|
|
|
|
|
|
## Set the (possibly) new field list in the object. |
4149
|
1
|
|
|
|
|
5
|
$this->fieldlist_set($FieldList); |
4150
|
|
|
|
|
|
|
} |
4151
|
|
|
|
|
|
|
|
4152
|
|
|
|
|
|
|
## Then cull / rearrange all the columns |
4153
|
1
|
50
|
|
|
|
5
|
if ($Selection) |
4154
|
|
|
|
|
|
|
{ |
4155
|
|
|
|
|
|
|
## Temporarily set the selection() to be the one we may have been given... |
4156
|
1
|
|
|
|
|
5
|
$this->selection($Selection); |
4157
|
|
|
|
|
|
|
|
4158
|
|
|
|
|
|
|
## Get the de-facto field list if we don't already have it explicitly |
4159
|
1
|
|
33
|
|
|
4
|
$FieldList ||= $this->fieldlist(); |
4160
|
|
|
|
|
|
|
|
4161
|
|
|
|
|
|
|
## Rearrange each column |
4162
|
1
|
|
|
|
|
2
|
foreach my $FieldName (@$FieldList) |
4163
|
|
|
|
|
|
|
{ |
4164
|
3
|
|
|
|
|
7
|
$this->{$FieldName} = $this->sel($FieldName); |
4165
|
|
|
|
|
|
|
} |
4166
|
|
|
|
|
|
|
|
4167
|
|
|
|
|
|
|
## Remove the _Selection since it is no longer valid. |
4168
|
1
|
|
|
|
|
3
|
$this->selection_delete(); |
4169
|
|
|
|
|
|
|
} |
4170
|
|
|
|
|
|
|
} |
4171
|
|
|
|
|
|
|
|
4172
|
|
|
|
|
|
|
sub snapshot |
4173
|
|
|
|
|
|
|
{ |
4174
|
50
|
|
|
50
|
0
|
1096
|
my $this = shift; |
4175
|
50
|
50
|
|
|
|
151
|
my $Params = (@_ == 1 ? {_Selection => $_[0]} : {@_}); |
4176
|
|
|
|
|
|
|
|
4177
|
50
|
|
|
|
|
85
|
my($Selection, $FieldList) = map {$this->getparam($Params, $_)} |
|
100
|
|
|
|
|
234
|
|
4178
|
|
|
|
|
|
|
qw(_Selection _FieldList); |
4179
|
|
|
|
|
|
|
|
4180
|
50
|
|
33
|
|
|
296
|
$FieldList ||= $this->{_FieldList}; |
4181
|
50
|
|
66
|
|
|
230
|
$Selection ||= $this->{_Selection}; |
4182
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
## First make a shallow copy of $this |
4184
|
50
|
|
|
|
|
812
|
my $copy = {%$this}; |
4185
|
|
|
|
|
|
|
|
4186
|
|
|
|
|
|
|
## Then delete any (references to) data columns owned by $this... |
4187
|
50
|
|
|
|
|
144
|
delete @$copy{@{$this->fieldlist_all()}}; |
|
50
|
|
|
|
|
125
|
|
4188
|
|
|
|
|
|
|
|
4189
|
|
|
|
|
|
|
## Then deep-copy all other parameters from $this... |
4190
|
50
|
|
|
|
|
2619
|
$copy = dclone($copy); |
4191
|
|
|
|
|
|
|
|
4192
|
|
|
|
|
|
|
## Then new/bless/initialize the copy into the same class as $this... |
4193
|
50
|
|
|
|
|
401
|
$copy = ref($this)->new($copy); |
4194
|
|
|
|
|
|
|
|
4195
|
|
|
|
|
|
|
## Temporarily override selection if necessary. |
4196
|
50
|
|
|
|
|
240
|
my $OldSel = $this->{_Selection}; |
4197
|
50
|
|
|
|
|
122
|
$this->selection($Selection); |
4198
|
|
|
|
|
|
|
|
4199
|
|
|
|
|
|
|
## Then insert all the rearranged columns into $copy... |
4200
|
50
|
|
|
|
|
72
|
@$copy{@$FieldList} = @{$this->sels($FieldList)}; |
|
50
|
|
|
|
|
119
|
|
4201
|
|
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
## Restore old selection, if any. |
4203
|
50
|
|
|
|
|
151
|
$this->selection_set($OldSel); |
4204
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
## Remove the selection in copy. |
4206
|
50
|
|
|
|
|
86
|
delete $copy->{_Selection}; |
4207
|
|
|
|
|
|
|
|
4208
|
|
|
|
|
|
|
## Set copy's fieldlist to a copy of the one we used. |
4209
|
50
|
|
|
|
|
175
|
$copy->{_FieldList} = [@$FieldList]; |
4210
|
|
|
|
|
|
|
|
4211
|
50
|
|
|
|
|
204
|
return($copy); |
4212
|
|
|
|
|
|
|
} |
4213
|
|
|
|
|
|
|
|
4214
|
|
|
|
|
|
|
=pod |
4215
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
=head1 LINE ENDINGS |
4217
|
|
|
|
|
|
|
|
4218
|
|
|
|
|
|
|
## Get current value |
4219
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
$t->lineending() ## Get actual setting: string or symbol |
4221
|
|
|
|
|
|
|
$t->lineending_symbol() ## Get setting's symbolic value if possible |
4222
|
|
|
|
|
|
|
$t->lineending_string() ## Get setting's string value if possible |
4223
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
## Set value |
4225
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
$t->lineending($Ending) ## Will be converted internally to symbol |
4227
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
## Convert a value to symbol or string form |
4229
|
|
|
|
|
|
|
|
4230
|
|
|
|
|
|
|
$t->lineending_symbol($L) ## Convert string form to symbolic form |
4231
|
|
|
|
|
|
|
$t->lineending_string($L) ## Convert symbol form to string form |
4232
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
## Get internal conversion hash tables |
4234
|
|
|
|
|
|
|
|
4235
|
|
|
|
|
|
|
$t->lineending_symbols() ## Hash ref mapping known strings to symbols |
4236
|
|
|
|
|
|
|
$t->lineending_strings() ## Hash ref mapping known symbols to strings |
4237
|
|
|
|
|
|
|
|
4238
|
|
|
|
|
|
|
Use these accessor functions to get/set the _LineEnding parameter. |
4239
|
|
|
|
|
|
|
|
4240
|
|
|
|
|
|
|
You can set the parameter in either string or symbol form as you wish. |
4241
|
|
|
|
|
|
|
You can get it in its raw, as-stored, form, or, you can get it in |
4242
|
|
|
|
|
|
|
string form or symbol form as desired. |
4243
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
Finally, some utility conversion calls allows you to convert a string |
4245
|
|
|
|
|
|
|
you have on hand to a symbolic form. For example: |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
$L = "\x0D"; |
4248
|
|
|
|
|
|
|
print ("This file uses " . $t->lineending_symbol($L) . " endings."); |
4249
|
|
|
|
|
|
|
|
4250
|
|
|
|
|
|
|
This would print: |
4251
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
This file uses mac endings. |
4253
|
|
|
|
|
|
|
|
4254
|
|
|
|
|
|
|
=cut |
4255
|
|
|
|
|
|
|
|
4256
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
4257
|
|
|
|
|
|
|
|
4258
|
|
|
|
|
|
|
BEGIN |
4259
|
|
|
|
|
|
|
{ |
4260
|
|
|
|
|
|
|
## Map any recognized _LineEnding value to its actual string |
4261
|
1
|
|
|
1
|
|
8
|
my $LineEnding_Strings = |
4262
|
|
|
|
|
|
|
{( |
4263
|
|
|
|
|
|
|
dos => "\x0D\x0A", |
4264
|
|
|
|
|
|
|
mac => "\x0D", |
4265
|
|
|
|
|
|
|
unix => "\x0A", |
4266
|
|
|
|
|
|
|
"\x0D\x0A" => "\x0D\x0A", |
4267
|
|
|
|
|
|
|
"\x0D" => "\x0D", |
4268
|
|
|
|
|
|
|
"\x0A" => "\x0A", |
4269
|
|
|
|
|
|
|
)}; |
4270
|
|
|
|
|
|
|
|
4271
|
|
|
|
|
|
|
## Map any recognized _LineEnding value to its logical form |
4272
|
1
|
|
|
|
|
781
|
my $LineEnding_Symbols = |
4273
|
|
|
|
|
|
|
{( |
4274
|
|
|
|
|
|
|
"\x0D\x0A" => "dos", |
4275
|
|
|
|
|
|
|
"\x0D" => "mac", |
4276
|
|
|
|
|
|
|
"\x0A" => "unix", |
4277
|
|
|
|
|
|
|
dos => "dos", |
4278
|
|
|
|
|
|
|
mac => "mac", |
4279
|
|
|
|
|
|
|
unix => "unix", |
4280
|
|
|
|
|
|
|
)}; |
4281
|
|
|
|
|
|
|
|
4282
|
|
|
|
|
|
|
sub lineending_strings ## Class or instance method |
4283
|
|
|
|
|
|
|
{ |
4284
|
33
|
|
|
33
|
0
|
311
|
return({%$LineEnding_Strings}); ## Copy of above private hash. |
4285
|
|
|
|
|
|
|
} |
4286
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
sub lineending_symbols ## Class or instance method |
4288
|
|
|
|
|
|
|
{ |
4289
|
25
|
|
|
25
|
0
|
1388
|
return({%$LineEnding_Symbols}); ## Copy of above private hash. |
4290
|
|
|
|
|
|
|
} |
4291
|
|
|
|
|
|
|
} |
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
sub lineending() |
4294
|
|
|
|
|
|
|
{ |
4295
|
3
|
|
|
3
|
0
|
12
|
my $this = shift; |
4296
|
3
|
|
|
|
|
3
|
my ($LineEnding) = @_; |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
## Set if specified. Try to convert to symbolic form if possible. |
4299
|
3
|
50
|
0
|
|
|
7
|
$this->{_LineEnding} = $this->lineending_symbol($LineEnding) || $LineEnding if $LineEnding; |
4300
|
|
|
|
|
|
|
|
4301
|
|
|
|
|
|
|
## Otherwise / either case, get whatever value we have and return it.... |
4302
|
3
|
|
|
|
|
4
|
my $LineEnding = $this->{_LineEnding}; |
4303
|
|
|
|
|
|
|
|
4304
|
3
|
|
|
|
|
7
|
return($LineEnding); |
4305
|
|
|
|
|
|
|
} |
4306
|
|
|
|
|
|
|
|
4307
|
|
|
|
|
|
|
sub lineending_symbol |
4308
|
|
|
|
|
|
|
{ |
4309
|
22
|
|
|
22
|
0
|
34
|
my $this = shift; |
4310
|
22
|
|
|
|
|
30
|
my ($LineEnding) = @_; |
4311
|
|
|
|
|
|
|
|
4312
|
22
|
|
66
|
|
|
57
|
$LineEnding ||= $this->{_LineEnding}; |
4313
|
|
|
|
|
|
|
|
4314
|
22
|
|
33
|
|
|
30
|
return($ {$this->lineending_symbols()}{$LineEnding} || $LineEnding); |
4315
|
|
|
|
|
|
|
} |
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
|
4318
|
|
|
|
|
|
|
sub lineending_string |
4319
|
|
|
|
|
|
|
{ |
4320
|
30
|
|
|
30
|
0
|
50
|
my $this = shift; |
4321
|
30
|
|
|
|
|
42
|
my ($LineEnding) = @_; |
4322
|
|
|
|
|
|
|
|
4323
|
30
|
|
100
|
|
|
125
|
$LineEnding ||= $this->{_LineEnding}; |
4324
|
|
|
|
|
|
|
|
4325
|
30
|
|
66
|
|
|
33
|
return($ {$this->lineending_strings()}{$LineEnding} || $LineEnding); |
4326
|
|
|
|
|
|
|
} |
4327
|
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
=pod |
4329
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
=head1 AUTOMATIC CACHEING |
4331
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
By default, Data::CTable makes cached versions of files it reads so it |
4333
|
|
|
|
|
|
|
can read them much more quickly the next time. Optionally, it can |
4334
|
|
|
|
|
|
|
also cache any file it writes for quicker re-reading later. |
4335
|
|
|
|
|
|
|
|
4336
|
|
|
|
|
|
|
On Unix systems, cache files are always created with 0666 |
4337
|
|
|
|
|
|
|
(world-write) permissions for easy cleanup. |
4338
|
|
|
|
|
|
|
|
4339
|
|
|
|
|
|
|
When reading files, Data::CTable checks the _CacheOnRead parameter. |
4340
|
|
|
|
|
|
|
If that parameter is true, which it is by default, the module tries to |
4341
|
|
|
|
|
|
|
find an up-to-date cache file to read instead of the original. |
4342
|
|
|
|
|
|
|
Reading a cache file can be 10x faster than reading and parsing the |
4343
|
|
|
|
|
|
|
original text file. |
4344
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
In order to look for the cache file, it must first calculate the path |
4346
|
|
|
|
|
|
|
where the cache file should be located, based on the _FileName of the |
4347
|
|
|
|
|
|
|
file to be read. |
4348
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
The path of the cache file is calculated as follows: |
4350
|
|
|
|
|
|
|
|
4351
|
|
|
|
|
|
|
If the _CacheSubDir parameter is a RELATIVE PATH, then it is appended |
4352
|
|
|
|
|
|
|
to the directory component of _FileName to arrive at the directory to |
4353
|
|
|
|
|
|
|
use to store the cache file. If it is an ABSOLUTE PATH, then |
4354
|
|
|
|
|
|
|
_CacheSubDir is used by itself. (The trailing path separator is |
4355
|
|
|
|
|
|
|
optional and an appropriate one will be added by Data::CTable if it is |
4356
|
|
|
|
|
|
|
missing.) |
4357
|
|
|
|
|
|
|
|
4358
|
|
|
|
|
|
|
The file name of the cache file is calculated as follows: |
4359
|
|
|
|
|
|
|
|
4360
|
|
|
|
|
|
|
If the _CacheExtension parameter is specified, it is appended to the |
4361
|
|
|
|
|
|
|
base file name component from the _FileName parameter. If you want |
4362
|
|
|
|
|
|
|
the cached file name to be the same as the name of the original file, |
4363
|
|
|
|
|
|
|
you can set _CacheExtension to "", which is not recommended. |
4364
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
Then, the cache path and cache file name are joined to arrive at the |
4366
|
|
|
|
|
|
|
name of the cache file. If both _CacheSubDir and _CacheExtension were |
4367
|
|
|
|
|
|
|
empty, then the cache file path will be the same as the _FileName, and |
4368
|
|
|
|
|
|
|
Data::CTable will refuse to either read or write a cache file, so |
4369
|
|
|
|
|
|
|
setting these fields both to empty is equivalent to setting |
4370
|
|
|
|
|
|
|
_CacheOnRead to false. |
4371
|
|
|
|
|
|
|
|
4372
|
|
|
|
|
|
|
The cache file contains a highly-efficient representation of all the |
4373
|
|
|
|
|
|
|
following data that would otherwise have to be determined by reading |
4374
|
|
|
|
|
|
|
and parsing the entire text file: |
4375
|
|
|
|
|
|
|
|
4376
|
|
|
|
|
|
|
- All the data columns (field values) |
4377
|
|
|
|
|
|
|
- _FieldList: The list of fields, in order |
4378
|
|
|
|
|
|
|
- _HeaderRow: Whether a header row is / should be present |
4379
|
|
|
|
|
|
|
- _LineEnding: The line ending setting |
4380
|
|
|
|
|
|
|
- _FDelimiter: The field delimiter setting |
4381
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
If found prior to a read(), AND, the date of the cache file is LATER |
4383
|
|
|
|
|
|
|
than the date of the original file, the cache file is used instead. |
4384
|
|
|
|
|
|
|
(If the date is EARLIER, then the cache file is ignored because it can |
4385
|
|
|
|
|
|
|
be presumed that the data inside the text file is newer.) |
4386
|
|
|
|
|
|
|
|
4387
|
|
|
|
|
|
|
If cacheing is ON, then after successfully reading the text file |
4388
|
|
|
|
|
|
|
(either because there was no cache file yet or the cache file was out |
4389
|
|
|
|
|
|
|
of date or corrupted or otherwise unusable), read() will then try to |
4390
|
|
|
|
|
|
|
create a cache file. This, of course, takes some time, but the time |
4391
|
|
|
|
|
|
|
taken will be more than made up in the speedup of the next read() |
4392
|
|
|
|
|
|
|
operation on the same file. |
4393
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
If creating the cache file fails (for example, because file |
4395
|
|
|
|
|
|
|
permissions didn't allow the cache directory to be created or the |
4396
|
|
|
|
|
|
|
cache file to be written), read() generates a warning explaining why |
4397
|
|
|
|
|
|
|
cacheing failed, but the read() operation itself still succeeds. |
4398
|
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
No parameters in the object itself are set or modified to indicate the |
4400
|
|
|
|
|
|
|
success or failure of writing the cache file. |
4401
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
Similarly, there is no way to tell whether a successful read() |
4403
|
|
|
|
|
|
|
operation read from the cache or from the original data file. If you |
4404
|
|
|
|
|
|
|
want to be SURE the reading was from the data file, either turn off |
4405
|
|
|
|
|
|
|
_CacheOnRead, or call the read_file() method instead of read(). |
4406
|
|
|
|
|
|
|
|
4407
|
|
|
|
|
|
|
NOTE: because the name of the cache file to be used is calculated just |
4408
|
|
|
|
|
|
|
before the read() is actually done, the cache file can only be found |
4409
|
|
|
|
|
|
|
if the _CacheSubDir and _CacheExtension are the same as they were when |
4410
|
|
|
|
|
|
|
the cache was last created. If you change these parameters after |
4411
|
|
|
|
|
|
|
having previously cached a file, the older caches could be "orphaned" |
4412
|
|
|
|
|
|
|
and just sit around wasting disk space. |
4413
|
|
|
|
|
|
|
|
4414
|
|
|
|
|
|
|
=head2 Cacheing on write() |
4415
|
|
|
|
|
|
|
|
4416
|
|
|
|
|
|
|
You may optionally set _CacheOnWrite (default = false) to true. If |
4417
|
|
|
|
|
|
|
done, then a cache file will be saved for files written using the |
4418
|
|
|
|
|
|
|
write() command. Read about write() below for more about why you |
4419
|
|
|
|
|
|
|
might want to do this. |
4420
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
=head1 AUTOMATIC DIRECTORY CREATION |
4422
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
When Data::CTable needs to write a file, (a cache file or a data |
4424
|
|
|
|
|
|
|
file), it automatically tries to create any directories or |
4425
|
|
|
|
|
|
|
subdirectories you specify in the _FileName or _CacheSubDir |
4426
|
|
|
|
|
|
|
parameters. |
4427
|
|
|
|
|
|
|
|
4428
|
|
|
|
|
|
|
If it fails while writing a data file, write() will fail (and you will |
4429
|
|
|
|
|
|
|
be warned). If it fails to create a directory while writing a cache |
4430
|
|
|
|
|
|
|
file, a warning will be issued, but the overall read() or write() |
4431
|
|
|
|
|
|
|
operation will still return a result indicating success. |
4432
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
Any directories created will have the permissions 0777 (world-write) |
4434
|
|
|
|
|
|
|
for easy cleanup. |
4435
|
|
|
|
|
|
|
|
4436
|
|
|
|
|
|
|
Generally, the only directory the module will have to create is a |
4437
|
|
|
|
|
|
|
subdirectory to hold cache files. |
4438
|
|
|
|
|
|
|
|
4439
|
|
|
|
|
|
|
However, since other directories could be created, be sure to exercise |
4440
|
|
|
|
|
|
|
caution when allowing the module to create any directories for you on |
4441
|
|
|
|
|
|
|
any system where security might be an issue. |
4442
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
Also, if the 0666 permissions on the cache files themselves are too |
4444
|
|
|
|
|
|
|
liberal, you can either 1) turn off cacheing, or 2) call the |
4445
|
|
|
|
|
|
|
prep_cache_file() method to get the name of the cache file that would |
4446
|
|
|
|
|
|
|
have been written, if any, and then restrict its permissions: |
4447
|
|
|
|
|
|
|
|
4448
|
|
|
|
|
|
|
chmod (0600, $this->prep_cache_file()); |
4449
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
=head1 READING DATA FILES |
4451
|
|
|
|
|
|
|
|
4452
|
|
|
|
|
|
|
## Replacing data in table with data read from a file |
4453
|
|
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
$t->read($Path) ## Simple calling convention |
4455
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
$t->read( ## Named-parameter convention |
4457
|
|
|
|
|
|
|
|
4458
|
|
|
|
|
|
|
## Params that override params in the object if supplied... |
4459
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
_FileName => $Path, ## Full or partial path of file to read |
4461
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
_FieldList => [...], ## Fields to read; others to be discarded |
4463
|
|
|
|
|
|
|
|
4464
|
|
|
|
|
|
|
_HeaderRow => 0, ## No header row (_FieldList required!) |
4465
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
_LineEnding => undef, ## Text line ending (undef means guess) |
4467
|
|
|
|
|
|
|
_FDelimiter => undef, ## Field delimiter (undef means guess) |
4468
|
|
|
|
|
|
|
|
4469
|
|
|
|
|
|
|
_ReturnMap => 1, ## Whether to decode internal returns |
4470
|
|
|
|
|
|
|
_ReturnEncoding=>"\x0B", ## How to decode returns. |
4471
|
|
|
|
|
|
|
_MacRomanMap => undef, ## Whether/when to read Mac char set |
4472
|
|
|
|
|
|
|
|
4473
|
|
|
|
|
|
|
_CacheOnRead => 0, ## Enable/disable cacheing behavior |
4474
|
|
|
|
|
|
|
_CacheExtension=> ".x", ## Extension to add to cache file name |
4475
|
|
|
|
|
|
|
_CacheSubDir => "", ## (Sub-)dir, if any, for cache files |
4476
|
|
|
|
|
|
|
|
4477
|
|
|
|
|
|
|
## Params specific to the read()/write() methods... |
4478
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
_MaxRecords => 200, ## Limit on how many records to read |
4480
|
|
|
|
|
|
|
) |
4481
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
$t->read_file() ## Internal: same as read(); ignores cacheing |
4483
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
read() opens a Merge, CSV, or Tab-delimited file and reads in all or |
4485
|
|
|
|
|
|
|
some fields, and all or some records, REPLACING ANY EXISTING DATA in |
4486
|
|
|
|
|
|
|
the CTable object. |
4487
|
|
|
|
|
|
|
|
4488
|
|
|
|
|
|
|
Using the simple calling convention, just pass it a file name. All |
4489
|
|
|
|
|
|
|
other parameters will come from the object (or will be defaulted if |
4490
|
|
|
|
|
|
|
absent). To specify additional parameters or override any parameters |
4491
|
|
|
|
|
|
|
in the object while reading, use the named-parameter calling |
4492
|
|
|
|
|
|
|
convention. |
4493
|
|
|
|
|
|
|
|
4494
|
|
|
|
|
|
|
See the full PARAMETER LIST, above, or read on for some extra details: |
4495
|
|
|
|
|
|
|
|
4496
|
|
|
|
|
|
|
_ReturnMap controls whether return characters encoded as ASCII 11 |
4497
|
|
|
|
|
|
|
should be mapped back to real newlines (C<"\n">) when read into memory. |
4498
|
|
|
|
|
|
|
If false, they are left as ASCII 11 characters. (default is "true") |
4499
|
|
|
|
|
|
|
|
4500
|
|
|
|
|
|
|
_ReturnEncoding controls the character that returns are encoded as, if |
4501
|
|
|
|
|
|
|
different from ASCII 11. |
4502
|
|
|
|
|
|
|
|
4503
|
|
|
|
|
|
|
_FieldList is an array (reference) listing the names of fields to |
4504
|
|
|
|
|
|
|
import, in order (and will become the object's _FieldList upon |
4505
|
|
|
|
|
|
|
successful completion of the read() operation). If not provided and |
4506
|
|
|
|
|
|
|
not found in the object, or empty, then all fields found in the file |
4507
|
|
|
|
|
|
|
are imported and the object's field list will be set from those found |
4508
|
|
|
|
|
|
|
in the file, in the order found there. If _HeaderRow is false, then |
4509
|
|
|
|
|
|
|
this parameter is required (either in the object or as a formal |
4510
|
|
|
|
|
|
|
parameter) and is assumed to give the correct names for the fields as |
4511
|
|
|
|
|
|
|
they actually occur in the file. If _HeaderRow is true and _FieldList |
4512
|
|
|
|
|
|
|
is provided, then _FieldList specifies the (sub-)set of fields to be |
4513
|
|
|
|
|
|
|
read from the file and others will be ignored. |
4514
|
|
|
|
|
|
|
|
4515
|
|
|
|
|
|
|
_HeaderRow, which defaults to true, if set to false, tells read() to |
4516
|
|
|
|
|
|
|
not expect a header row showing the field names in the file. Instead, |
4517
|
|
|
|
|
|
|
it assumes that the _FieldList gives those (and _FieldList must |
4518
|
|
|
|
|
|
|
therefore be specified either as a parameter or an existing parameter |
4519
|
|
|
|
|
|
|
in the object). |
4520
|
|
|
|
|
|
|
|
4521
|
|
|
|
|
|
|
_MaxRecords (optional) is an upper limit on the number of fields to |
4522
|
|
|
|
|
|
|
import. If not specified, or zero, or undef, then there is no limit; |
4523
|
|
|
|
|
|
|
all records will be imported or memory will be exhausted. |
4524
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
read() returns a Boolean "success" code. |
4526
|
|
|
|
|
|
|
|
4527
|
|
|
|
|
|
|
If read() returns false, then it will also have set the _ErrorMsg |
4528
|
|
|
|
|
|
|
parameter in the object. It may or may not have partially altered |
4529
|
|
|
|
|
|
|
data in the object if an error is encountered. |
4530
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
After a successful read: |
4532
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
fieldlist() (the object's _FieldList parameter) tells which |
4534
|
|
|
|
|
|
|
fields were actually read, in what order. It may omit any fields |
4535
|
|
|
|
|
|
|
requested in _FieldList that were not actually found in the file for |
4536
|
|
|
|
|
|
|
whatever reason. |
4537
|
|
|
|
|
|
|
|
4538
|
|
|
|
|
|
|
length() tells how many fields were read. |
4539
|
|
|
|
|
|
|
|
4540
|
|
|
|
|
|
|
The selection() is reset to no selection (all selected / unsorted) |
4541
|
|
|
|
|
|
|
|
4542
|
|
|
|
|
|
|
The object's _FileName parameter contains the path to the file |
4543
|
|
|
|
|
|
|
that was read. If the _FileName you specified did not have a path, |
4544
|
|
|
|
|
|
|
then _FileName will be prepended with a path component indicating |
4545
|
|
|
|
|
|
|
"current directory" (e.g. "./" on Unix). |
4546
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
_FDelimiter will contain the actual delimiter character that was |
4548
|
|
|
|
|
|
|
used to read the file (either tab or comma if the delimiter was |
4549
|
|
|
|
|
|
|
guessed, or whatever delimiter you specified). |
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
_LineEnding will contain the actual line-ending setting used to |
4552
|
|
|
|
|
|
|
read the file. This will be either "mac" ("\x0D"), "unix" ("\x0D"), |
4553
|
|
|
|
|
|
|
or "dos" ("\x0D\x0A") if the line endings were guessed by read(). |
4554
|
|
|
|
|
|
|
Otherwise it will be whatever _LineEnding you specified. |
4555
|
|
|
|
|
|
|
|
4556
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
=head1 FILE FORMAT NOTES |
4558
|
|
|
|
|
|
|
|
4559
|
|
|
|
|
|
|
As mentioned, read() allows the following flexibilities in reading |
4560
|
|
|
|
|
|
|
text-based tabular data files: |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
You may specify the line endings (record delimiters), or it can |
4563
|
|
|
|
|
|
|
guess them (mac, unix, dos are supported). |
4564
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
You may specify the field delimiters, or it can guess them (tab |
4566
|
|
|
|
|
|
|
and comma are supported). |
4567
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
It can get field names from a header row, or, if there is no |
4569
|
|
|
|
|
|
|
header row, you can tell it the field names, in order. |
4570
|
|
|
|
|
|
|
|
4571
|
|
|
|
|
|
|
You can tell it whether or not to decode embedded returns in |
4572
|
|
|
|
|
|
|
data fields, and if so, which character they were encoded as. |
4573
|
|
|
|
|
|
|
|
4574
|
|
|
|
|
|
|
Beyond supporting the above flexible options, read() makes the |
4575
|
|
|
|
|
|
|
following non-flexible assumptions: |
4576
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
Fields must NOT contain unencoded returns -- that is: whatever |
4578
|
|
|
|
|
|
|
character sequence is specified for _LineEnding will NEVER occur |
4579
|
|
|
|
|
|
|
inside a field in the text file; in addition, the current platform's |
4580
|
|
|
|
|
|
|
definition of C<"\n"> will NEVER occur; these characters if present in |
4581
|
|
|
|
|
|
|
field data, MUST have been encoded to some safe character string |
4582
|
|
|
|
|
|
|
before the file was created. |
4583
|
|
|
|
|
|
|
|
4584
|
|
|
|
|
|
|
Each field may OPTIONALLY be surrounded with double-quote marks. |
4585
|
|
|
|
|
|
|
However, if the field data itself contains either a double-quote |
4586
|
|
|
|
|
|
|
character (C<">) or the current file's field delimiter (such as tab or |
4587
|
|
|
|
|
|
|
comma), then the field MUST be surrounded with double-quotes. |
4588
|
|
|
|
|
|
|
(Currently, all data written by Data::CTable have all field values |
4589
|
|
|
|
|
|
|
surrounded by double-quotes, but a more selective policy may be used |
4590
|
|
|
|
|
|
|
in the future.) |
4591
|
|
|
|
|
|
|
|
4592
|
|
|
|
|
|
|
If a field contains a double-quote character, then each double-quote |
4593
|
|
|
|
|
|
|
character in the field must be encoded as C<""> -- i.e. each C<"> in the |
4594
|
|
|
|
|
|
|
original data becomes C<""> in the text file. |
4595
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
Data files may not mix line-ending types or field delimiter types. |
4597
|
|
|
|
|
|
|
Once determined, the same endings and delimiters will be used to read |
4598
|
|
|
|
|
|
|
the entire file. |
4599
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
The fields recognized on each line will either be determined by |
4601
|
|
|
|
|
|
|
the header row or the _FieldList provided by the caller. Any extra |
4602
|
|
|
|
|
|
|
fields on any given line will be ignored. Any missing fields will be |
4603
|
|
|
|
|
|
|
treated as undef/empty. |
4604
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
If you are having trouble reading a delimited text file, check that |
4606
|
|
|
|
|
|
|
all data in the file obeys these assumptions. |
4607
|
|
|
|
|
|
|
|
4608
|
|
|
|
|
|
|
=cut |
4609
|
|
|
|
|
|
|
|
4610
|
|
|
|
|
|
|
sub read ## Read, cacheing if possible |
4611
|
|
|
|
|
|
|
{ |
4612
|
62
|
|
|
62
|
1
|
99
|
my $this = shift; |
4613
|
62
|
|
|
|
|
191
|
return($this->read_file_or_cache(@_)); |
4614
|
|
|
|
|
|
|
} |
4615
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
sub read_file ## Read, ignoring cacheing |
4617
|
|
|
|
|
|
|
{ |
4618
|
19
|
|
|
19
|
0
|
32
|
my $this = shift; |
4619
|
19
|
50
|
|
|
|
82
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
4620
|
|
|
|
|
|
|
|
4621
|
171
|
|
|
|
|
312
|
my($FileName, $FieldList, $MaxRecords, $LineEnding, $FDelimiter, $ReturnMap, $ReturnEncoding, $MacRomanMap, $HeaderRow) |
4622
|
19
|
|
|
|
|
54
|
= map {$this->getparam($Params, $_)} |
4623
|
|
|
|
|
|
|
qw(_FileName _FieldList _MaxRecords _LineEnding _FDelimiter _ReturnMap _ReturnEncoding _MacRomanMap _HeaderRow); |
4624
|
|
|
|
|
|
|
|
4625
|
19
|
|
|
|
|
34
|
my $Success; |
4626
|
|
|
|
|
|
|
|
4627
|
|
|
|
|
|
|
## Default error message is none. |
4628
|
19
|
|
|
|
|
38
|
$this->{_ErrorMsg} = ""; |
4629
|
|
|
|
|
|
|
|
4630
|
|
|
|
|
|
|
## Default for HeaderRow is true. |
4631
|
19
|
50
|
|
|
|
45
|
$HeaderRow = 1 unless defined($HeaderRow); |
4632
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
## Default for ReturnEncoding is "\x0B" (control-K; ASCII 11) |
4634
|
19
|
50
|
|
|
|
51
|
$ReturnEncoding = "\x0B" unless length($ReturnEncoding); |
4635
|
|
|
|
|
|
|
|
4636
|
|
|
|
|
|
|
## Default for ReturnMap is true. |
4637
|
19
|
50
|
|
|
|
42
|
$ReturnMap = 1 unless defined($ReturnMap); |
4638
|
|
|
|
|
|
|
|
4639
|
|
|
|
|
|
|
## Default for MacRomanMap is undef ("Auto"); |
4640
|
19
|
100
|
|
|
|
49
|
$MacRomanMap = undef unless defined($MacRomanMap); |
4641
|
|
|
|
|
|
|
|
4642
|
|
|
|
|
|
|
## Default for MaxRecords is 0 (meaning import all records) |
4643
|
19
|
50
|
|
|
|
69
|
$MaxRecords = 0 unless (int($MaxRecords) == $MaxRecords); |
4644
|
|
|
|
|
|
|
|
4645
|
|
|
|
|
|
|
## Precompile a regex for the return encoding since we'll call it often (on each field!) later. |
4646
|
19
|
|
|
|
|
101
|
my $RetRegex = qr/$ReturnEncoding/; |
4647
|
|
|
|
|
|
|
|
4648
|
19
|
|
|
|
|
70
|
$this->progress("Reading $FileName..."); |
4649
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
## Open the data file. |
4651
|
0
|
|
|
|
|
0
|
my $File = IO::File->new("<$FileName") or |
4652
|
19
|
50
|
|
|
|
135
|
do {$this->{_ErrorMsg} = "Failed to open $FileName: $!"; goto done}; |
|
0
|
|
|
|
|
0
|
|
4653
|
|
|
|
|
|
|
|
4654
|
|
|
|
|
|
|
## Get its total file size (useful for estimating table size later on). |
4655
|
19
|
50
|
|
|
|
1437
|
my $FileSize = (stat($File))[7] or |
4656
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "File $FileName contains no data.", goto done; |
4657
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
## Convert from optional "dos", "mac", "unix" symbolic values. |
4659
|
19
|
|
|
|
|
68
|
$LineEnding = $this->lineending_string($LineEnding); |
4660
|
|
|
|
|
|
|
|
4661
|
|
|
|
|
|
|
## Default for LineEnding is found by inspecting data in the file. |
4662
|
19
|
50
|
33
|
|
|
114
|
$LineEnding ||= guess_endings($File) or |
4663
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find any line endings in the file $FileName.", goto done; |
4664
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
## DoMacMapping is the actual setting for auto charset mapping |
4666
|
19
|
|
66
|
|
|
110
|
my $DoMacMapping = |
4667
|
|
|
|
|
|
|
((!defined($MacRomanMap) && ($LineEnding eq "\x0D")) || ## Auto |
4668
|
|
|
|
|
|
|
($MacRomanMap)); ## On |
4669
|
|
|
|
|
|
|
|
4670
|
19
|
100
|
|
|
|
146
|
$this->progress("Will convert upper-ascii characters if any, from Mac Roman to ISO 8859-1.") if $DoMacMapping; |
4671
|
|
|
|
|
|
|
|
4672
|
|
|
|
|
|
|
## FieldList is usable is it is a list and has at least one entry. |
4673
|
19
|
|
66
|
|
|
73
|
my $FieldListValid = ((ref($FieldList) eq 'ARRAY') && @$FieldList); |
4674
|
|
|
|
|
|
|
|
4675
|
|
|
|
|
|
|
## Set <$File> to use the line ending sequence we no known we are looking for. |
4676
|
19
|
|
|
|
|
91
|
local $/ = $LineEnding; |
4677
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
## We use $_ explicitly, so must localize. |
4679
|
19
|
|
|
|
|
27
|
local $_; |
4680
|
|
|
|
|
|
|
|
4681
|
19
|
|
|
|
|
29
|
my $IncomingFields; |
4682
|
|
|
|
|
|
|
|
4683
|
19
|
50
|
|
|
|
48
|
if ($HeaderRow) |
4684
|
|
|
|
|
|
|
{ |
4685
|
|
|
|
|
|
|
## Get the list of fields available in the file (first line of file). |
4686
|
|
|
|
|
|
|
|
4687
|
19
|
50
|
|
|
|
190
|
$_ = <$File> or |
4688
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find a first line with field names in $FileName.", goto done; |
4689
|
|
|
|
|
|
|
|
4690
|
|
|
|
|
|
|
## Try to guess file delimiter from the header row if not yet specified. |
4691
|
19
|
50
|
33
|
|
|
93
|
$FDelimiter ||= guess_delimiter($_) or |
4692
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done; |
4693
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
## Maybe convert entire line (all records) Mac to ISO before splitting. |
4695
|
19
|
100
|
|
|
|
78
|
&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping; |
4696
|
|
|
|
|
|
|
|
4697
|
19
|
|
|
|
|
48
|
chomp; |
4698
|
|
|
|
|
|
|
|
4699
|
19
|
|
|
|
|
42
|
s/^\"//; s/\"$//; ## remove possible leading, trailing quotes surrounding header row (rare) |
|
19
|
|
|
|
|
31
|
|
4700
|
|
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
## Split header row into field names, removing optional "" around each at the same time. |
4702
|
19
|
|
|
|
|
306
|
$IncomingFields = [split(/\"?$FDelimiter\"?/, $_)]; |
4703
|
|
|
|
|
|
|
|
4704
|
|
|
|
|
|
|
} |
4705
|
|
|
|
|
|
|
else |
4706
|
|
|
|
|
|
|
{ |
4707
|
|
|
|
|
|
|
## Otherwise, require that the caller specifies it in _FieldList |
4708
|
|
|
|
|
|
|
|
4709
|
0
|
0
|
|
|
|
0
|
$this->{_ErrorMsg} = "Must specify a _FieldList if _HeaderRow says no header row is present.", goto done |
4710
|
|
|
|
|
|
|
unless $FieldListValid; |
4711
|
|
|
|
|
|
|
|
4712
|
0
|
|
|
|
|
0
|
$IncomingFields = [@$FieldList]; |
4713
|
|
|
|
|
|
|
} |
4714
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
## Remove any leading underscores in the names of the incoming |
4716
|
|
|
|
|
|
|
## fields (not allowed because such field names are reserved for |
4717
|
|
|
|
|
|
|
## other object data). Note: this could result in |
4718
|
|
|
|
|
|
|
## duplicate/overwritten field names that were otherwise |
4719
|
|
|
|
|
|
|
## apparently unique in the incoming data file. |
4720
|
|
|
|
|
|
|
|
4721
|
19
|
|
|
|
|
55
|
$IncomingFields = [map {(/^_*(.*)/)[0]} @$IncomingFields]; |
|
79
|
|
|
|
|
259
|
|
4722
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
## Make a hash that can be used to map these fields' names to their numbers. |
4724
|
1
|
|
|
1
|
|
725
|
my $IncomingFieldNameToNum = {}; @$IncomingFieldNameToNum{@$IncomingFields} = ($[ .. $#$IncomingFields); |
|
1
|
|
|
|
|
409
|
|
|
1
|
|
|
|
|
2533
|
|
|
19
|
|
|
|
|
59
|
|
|
19
|
|
|
|
|
188
|
|
4725
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
## Make a list of the fields we'll be importing (by taking the |
4727
|
|
|
|
|
|
|
## list the caller requested, and paring it down to only those |
4728
|
|
|
|
|
|
|
## fields that are actually available in the table.) |
4729
|
|
|
|
|
|
|
|
4730
|
71
|
|
|
|
|
172
|
my $FieldsToGet = |
4731
|
19
|
100
|
|
|
|
61
|
[grep {exists($IncomingFieldNameToNum->{$_})} |
4732
|
|
|
|
|
|
|
($FieldListValid ? @$FieldList : @$IncomingFields)]; |
4733
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
## Make a note of whether we're getting a subset of available |
4735
|
|
|
|
|
|
|
## fields because the caller requested such. If we are, we'll add |
4736
|
|
|
|
|
|
|
## a _Subset => 1 marker to the data for use later in ensuring the |
4737
|
|
|
|
|
|
|
## cache is OK. |
4738
|
|
|
|
|
|
|
|
4739
|
19
|
|
100
|
|
|
59
|
my $GettingSubset = ($FieldListValid && ("@{[sort @$IncomingFields]}" ne |
4740
|
|
|
|
|
|
|
"@{[sort @$FieldList ]}")); |
4741
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
## Make an array of the incoming indices of these fields. |
4743
|
|
|
|
|
|
|
|
4744
|
|
|
|
|
|
|
## Allocate a list of empty arrays into which we can import the |
4745
|
|
|
|
|
|
|
## data. Initially they'll each have 100 empty slots for data; |
4746
|
|
|
|
|
|
|
## after we have imported 100 records, we'll re-consider the size |
4747
|
|
|
|
|
|
|
## estimate. When we're all done, we'll prune them back. |
4748
|
|
|
|
|
|
|
|
4749
|
19
|
|
|
|
|
69
|
my $FieldNums = [@$IncomingFieldNameToNum{@$FieldsToGet}]; |
4750
|
19
|
|
|
|
|
29
|
my $FieldVectors = []; foreach (@$FieldNums) {$#{$FieldVectors->[$_] = []} = 100}; |
|
19
|
|
|
|
|
95
|
|
|
71
|
|
|
|
|
78
|
|
|
71
|
|
|
|
|
431
|
|
4751
|
|
|
|
|
|
|
|
4752
|
|
|
|
|
|
|
## We want to be cool and support any embedded NULL (ascii zero) |
4753
|
|
|
|
|
|
|
## characters should they exist in the data, even though we are |
4754
|
|
|
|
|
|
|
## going to use NULL chars to encode embedded delimiters before we |
4755
|
|
|
|
|
|
|
## split.... |
4756
|
|
|
|
|
|
|
|
4757
|
|
|
|
|
|
|
## First we create a sufficiently obscure placeholder for any |
4758
|
|
|
|
|
|
|
## ascii zero characters in the input text (a rare occurrence |
4759
|
|
|
|
|
|
|
## anyway). |
4760
|
|
|
|
|
|
|
|
4761
|
19
|
|
|
|
|
78
|
my $ZeroMarker = "\001ASCII_ZERO" . time() . "\001"; |
4762
|
|
|
|
|
|
|
|
4763
|
|
|
|
|
|
|
## Now ready to go through the file line-by-line (record-by-record) |
4764
|
|
|
|
|
|
|
|
4765
|
19
|
|
|
|
|
26
|
my $WroteProg; |
4766
|
19
|
|
|
|
|
23
|
my $RecordsRead = 0; |
4767
|
19
|
|
|
|
|
86
|
while (<$File>) |
4768
|
|
|
|
|
|
|
{ |
4769
|
|
|
|
|
|
|
## Try to guess file delimiter from the header row if not yet specified. |
4770
|
57
|
50
|
33
|
|
|
164
|
$FDelimiter ||= guess_delimiter($_) or |
4771
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done; |
4772
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
## Maybe convert entire line (all records) ISO to Mac before splitting. |
4774
|
57
|
100
|
|
|
|
152
|
&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping; |
4775
|
|
|
|
|
|
|
|
4776
|
|
|
|
|
|
|
## Manipulate the single line of data fields into a splittable format. |
4777
|
|
|
|
|
|
|
|
4778
|
57
|
|
|
|
|
197
|
chomp; |
4779
|
|
|
|
|
|
|
|
4780
|
|
|
|
|
|
|
## Replace any delimiters inside quotes with ASCII 0. |
4781
|
|
|
|
|
|
|
## Split fields on delimiters. |
4782
|
|
|
|
|
|
|
## Delete leading or trailing quote marks from each field. |
4783
|
|
|
|
|
|
|
## Restore delimiters ASCII 0 back to delimiters. |
4784
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
## Protect delimiters inside fields. |
4786
|
57
|
|
|
|
|
88
|
s/\000/$ZeroMarker/go; ## Preserve genuine ASCII 0 chars. |
4787
|
57
|
|
|
|
|
75
|
my $InQuote = 0; ## Initialize InQuote flag to zero. |
4788
|
57
|
|
|
|
|
368
|
s/(\")|($FDelimiter)/ ## Replace delimiters inside quotes with ASCII 0 ... |
4789
|
180
|
50
|
|
|
|
1239
|
($1 ? do{$InQuote^=1; $1} : ## ... if quote char, toggle InQuote flag |
|
0
|
50
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
4790
|
|
|
|
|
|
|
($InQuote ? "\000" : $2))/eg; ## ... if delimiter, InQuote sez whether to replace or retain. |
4791
|
|
|
|
|
|
|
|
4792
|
|
|
|
|
|
|
## Split record into fields, then clean each field. |
4793
|
|
|
|
|
|
|
|
4794
|
57
|
|
|
|
|
119
|
s/^\"//; s/\"$//; ## Kill leading, trailing quotes surrounding each record |
|
57
|
|
|
|
|
90
|
|
4795
|
|
|
|
|
|
|
my @FieldVals = |
4796
|
|
|
|
|
|
|
map |
4797
|
57
|
50
|
|
|
|
439
|
{if (length($_)) |
|
237
|
|
|
|
|
473
|
|
4798
|
|
|
|
|
|
|
{ |
4799
|
237
|
|
|
|
|
290
|
s/\"\"/\"/g; ## Restore Merge format's quoted double-quotes. ("" ==> ") |
4800
|
237
|
|
|
|
|
259
|
s/\000/$FDelimiter/g; ## Restore delimiters inside fields |
4801
|
237
|
|
|
|
|
262
|
s/$ZeroMarker/\000/go; ## Restore preserved ASCII 0 chars. |
4802
|
237
|
50
|
|
|
|
836
|
s/$RetRegex/\n/g if $ReturnMap;## Restore return characters that were coded as ASCII 11 (^K) |
4803
|
|
|
|
|
|
|
} |
4804
|
237
|
|
|
|
|
504
|
$_;} ## Return field val after above mods. |
4805
|
|
|
|
|
|
|
split(/\"?$FDelimiter\"?/, $_); ## Split on delimiters, killing optional surrounding quotes at same time. |
4806
|
|
|
|
|
|
|
|
4807
|
|
|
|
|
|
|
## Put the data into the vectors |
4808
|
57
|
|
|
|
|
140
|
foreach (@$FieldNums) |
4809
|
|
|
|
|
|
|
{ |
4810
|
213
|
50
|
|
|
|
623
|
$FieldVectors->[$_]->[$RecordsRead] = $FieldVals[$_] if (length($FieldVals[$_])); |
4811
|
|
|
|
|
|
|
} |
4812
|
57
|
|
|
|
|
80
|
$RecordsRead++; |
4813
|
|
|
|
|
|
|
|
4814
|
|
|
|
|
|
|
## Stop if we've read all the records we wanted. |
4815
|
57
|
50
|
33
|
|
|
175
|
last if ($MaxRecords && ($RecordsRead >= $MaxRecords)); |
4816
|
|
|
|
|
|
|
|
4817
|
|
|
|
|
|
|
## Optimization: |
4818
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
## After importing 100, 200, 300, 400, etc. records, we |
4820
|
|
|
|
|
|
|
## re-estimate the size of the table. To help make the field |
4821
|
|
|
|
|
|
|
## insertion more efficient (by avoiding frequent |
4822
|
|
|
|
|
|
|
## array-resizing), we can set the sizes of the field vectors |
4823
|
|
|
|
|
|
|
## to hold at least our estimated number of records). |
4824
|
|
|
|
|
|
|
## Ideally, this estimation/resize step will happen at most 2 |
4825
|
|
|
|
|
|
|
## or 3 times no matter how big the incoming data file is. |
4826
|
|
|
|
|
|
|
|
4827
|
57
|
|
|
|
|
59
|
my $EstTotalRecords; |
4828
|
57
|
50
|
33
|
|
|
140
|
if ((($RecordsRead % 100) == 0) && ## If we're on a record divisble by 100... |
|
0
|
|
|
|
|
0
|
|
4829
|
|
|
|
|
|
|
(($RecordsRead + 100 > $#{$FieldVectors->[$FieldNums->[0]]}))) ## ... and we're getting close to max size... |
4830
|
|
|
|
|
|
|
{ |
4831
|
|
|
|
|
|
|
## Then estimate the size we'd like to resize it to. |
4832
|
0
|
|
|
|
|
0
|
$EstTotalRecords = (100 + int($RecordsRead * ($FileSize / tell($File)))); |
4833
|
0
|
0
|
0
|
|
|
0
|
$EstTotalRecords = $MaxRecords if ($MaxRecords && ($MaxRecords < $EstTotalRecords)); |
4834
|
|
|
|
|
|
|
|
4835
|
|
|
|
|
|
|
## If this size is greater than the actual size... |
4836
|
0
|
0
|
|
|
|
0
|
if ($EstTotalRecords > $#{$FieldVectors->[$FieldNums->[0]]}) |
|
0
|
|
|
|
|
0
|
|
4837
|
|
|
|
|
|
|
{ |
4838
|
|
|
|
|
|
|
## Then resize all the vectors. |
4839
|
|
|
|
|
|
|
## $this->progress("$RecordsRead: Resizing to $EstTotalRecords...\n"); ## Debugging |
4840
|
0
|
|
|
|
|
0
|
foreach (@$FieldNums) {$#{$FieldVectors->[$_]} = $EstTotalRecords}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
4841
|
|
|
|
|
|
|
} |
4842
|
|
|
|
|
|
|
} |
4843
|
|
|
|
|
|
|
|
4844
|
|
|
|
|
|
|
## Try doing timed (throttled to 1 per 2 secs) progress at |
4845
|
|
|
|
|
|
|
## most every 100th record. |
4846
|
57
|
0
|
|
|
|
111
|
my $Did = ($EstTotalRecords ? |
|
|
50
|
|
|
|
|
|
4847
|
|
|
|
|
|
|
$this->progress_timed("Reading", "$RecordsRead of $EstTotalRecords (est.)", tell($File), $FileSize, 1) : |
4848
|
|
|
|
|
|
|
$this->progress_timed("Reading", "$RecordsRead" , tell($File), $FileSize, 1)) |
4849
|
|
|
|
|
|
|
if (($RecordsRead % 100) == 0); |
4850
|
57
|
|
33
|
|
|
477
|
$WroteProg ||= $Did; |
4851
|
|
|
|
|
|
|
} |
4852
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
## If we wrote timed progress but didn't get to give the 100% |
4854
|
|
|
|
|
|
|
## message yet, print the 100% message now. |
4855
|
|
|
|
|
|
|
|
4856
|
19
|
50
|
|
|
|
48
|
if ($WroteProg) |
4857
|
|
|
|
|
|
|
{ |
4858
|
0
|
0
|
|
|
|
0
|
$this->progress_timed("Reading", "$RecordsRead of $RecordsRead", $FileSize, $FileSize, 1) |
4859
|
|
|
|
|
|
|
unless (($RecordsRead % 100) == 0); |
4860
|
|
|
|
|
|
|
} |
4861
|
|
|
|
|
|
|
|
4862
|
|
|
|
|
|
|
## Print the regular Done message. |
4863
|
19
|
|
|
|
|
78
|
$this->progress("Read $FileName."); |
4864
|
|
|
|
|
|
|
|
4865
|
|
|
|
|
|
|
## Set the field vectors' length to the exact length we really |
4866
|
|
|
|
|
|
|
## read. |
4867
|
|
|
|
|
|
|
|
4868
|
|
|
|
|
|
|
## $this->progress("$RecordsRead: Truncating to @{[$RecordsRead - 1]}... \n"); ## Debugging |
4869
|
19
|
|
|
|
|
41
|
foreach (@$FieldNums) {$#{$FieldVectors->[$_]} = ($RecordsRead - 1)}; |
|
71
|
|
|
|
|
79
|
|
|
71
|
|
|
|
|
160
|
|
4870
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
## Delete any existing columns in the object. |
4872
|
19
|
|
|
|
|
25
|
delete @$this{@{$this->fieldlist_all()}}; |
|
19
|
|
|
|
|
64
|
|
4873
|
|
|
|
|
|
|
|
4874
|
|
|
|
|
|
|
## Put the new columns into the object. |
4875
|
19
|
|
|
|
|
96
|
@$this{@$FieldsToGet} = @$FieldVectors[@$FieldNums]; |
4876
|
|
|
|
|
|
|
|
4877
|
|
|
|
|
|
|
## Set fieldlist to fields we actually read, in order. |
4878
|
19
|
|
|
|
|
60
|
$this->fieldlist($FieldsToGet); |
4879
|
|
|
|
|
|
|
|
4880
|
|
|
|
|
|
|
## Remember the line ending char or chars that were successfully |
4881
|
|
|
|
|
|
|
## used to read the file. The same ending will be used by default |
4882
|
|
|
|
|
|
|
## to write any file based on this object. |
4883
|
19
|
|
|
|
|
57
|
$this->{_LineEnding} = $this->lineending_symbol($LineEnding); |
4884
|
|
|
|
|
|
|
|
4885
|
|
|
|
|
|
|
## Remember the field delimiter that was successfully used to read |
4886
|
|
|
|
|
|
|
## the file. The same delimiter will be used by default to write |
4887
|
|
|
|
|
|
|
## any file based on this object. |
4888
|
19
|
|
|
|
|
66
|
$this->{_FDelimiter} = $FDelimiter; |
4889
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
## Remember the header row setting. |
4891
|
19
|
|
|
|
|
29
|
$this->{_HeaderRow} = $HeaderRow; |
4892
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
## Remember the filename used for reading. |
4894
|
19
|
|
|
|
|
30
|
$this->{_FileName} = $FileName; |
4895
|
|
|
|
|
|
|
|
4896
|
|
|
|
|
|
|
## Remember whether we read (and maybe will cache) a subset of available fields. |
4897
|
19
|
|
100
|
|
|
76
|
$this->{_Subset} = $GettingSubset || 0; |
4898
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
## Clean out _Selection and verify _SortOrder to ensure compatibility |
4900
|
|
|
|
|
|
|
## with current _FieldList. |
4901
|
19
|
|
|
|
|
49
|
$this->read_postcheck(); |
4902
|
|
|
|
|
|
|
|
4903
|
|
|
|
|
|
|
## Other informational data and options, like sort specs, sort |
4904
|
|
|
|
|
|
|
## routines and so on, need not be changed or replaced when data |
4905
|
|
|
|
|
|
|
## changes. |
4906
|
|
|
|
|
|
|
|
4907
|
19
|
|
|
|
|
26
|
$Success = 1; |
4908
|
|
|
|
|
|
|
|
4909
|
19
|
50
|
|
|
|
40
|
done: |
4910
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
$this->warn("FAILURE: $this->{_ErrorMsg}") unless $Success; |
4912
|
|
|
|
|
|
|
|
4913
|
19
|
50
|
|
|
|
281
|
close $File if $File; |
4914
|
19
|
|
|
|
|
258
|
return($Success); |
4915
|
|
|
|
|
|
|
} |
4916
|
|
|
|
|
|
|
|
4917
|
|
|
|
|
|
|
sub read_postcheck ## Called to clean up after a successful read |
4918
|
|
|
|
|
|
|
{ |
4919
|
62
|
|
|
62
|
0
|
92
|
my $this = shift; |
4920
|
|
|
|
|
|
|
|
4921
|
|
|
|
|
|
|
## Run select_all to empty out the _Selection. |
4922
|
62
|
|
|
|
|
180
|
$this->select_all(); |
4923
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
## Remove any bogus field names from the sort order, if any. |
4925
|
62
|
|
|
|
|
167
|
$this->sortorder_check(); |
4926
|
|
|
|
|
|
|
} |
4927
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
sub read_file_or_cache ## Read, cacheing if possible |
4929
|
|
|
|
|
|
|
{ |
4930
|
62
|
|
|
62
|
0
|
91
|
my $this = shift; |
4931
|
62
|
100
|
|
|
|
219
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
4932
|
|
|
|
|
|
|
|
4933
|
62
|
|
|
|
|
117
|
my($FileName, $FieldList, $CacheOnRead, $CacheExtension, $CacheSubDir) = map {$this->getparam($Params, $_)} |
|
310
|
|
|
|
|
784
|
|
4934
|
|
|
|
|
|
|
qw(_FileName _FieldList _CacheOnRead _CacheExtension _CacheSubDir); |
4935
|
|
|
|
|
|
|
|
4936
|
62
|
|
|
|
|
106
|
my $Success; |
4937
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
## If cacheing is turned off, just bail prematurely and treat this |
4939
|
|
|
|
|
|
|
## as a call to read_file(). |
4940
|
|
|
|
|
|
|
|
4941
|
62
|
100
|
|
|
|
163
|
return($this->read_file(%$Params)) unless $CacheOnRead; |
4942
|
|
|
|
|
|
|
|
4943
|
|
|
|
|
|
|
## Otherwise... check if cacheing is possible. |
4944
|
|
|
|
|
|
|
|
4945
|
|
|
|
|
|
|
## Calculate the cache file name. If it comes back empty, it |
4946
|
|
|
|
|
|
|
## means the cache directory probably could not be created, or the |
4947
|
|
|
|
|
|
|
## cache file itself either does not exist or could not be |
4948
|
|
|
|
|
|
|
## preflighted (either read or touched/deleted). |
4949
|
|
|
|
|
|
|
|
4950
|
51
|
|
|
|
|
185
|
my $CacheFileName = $this->prep_cache_file($FileName, $CacheExtension, $CacheSubDir); |
4951
|
|
|
|
|
|
|
|
4952
|
|
|
|
|
|
|
## If the cache file preflight failed, treat this as a regular |
4953
|
|
|
|
|
|
|
## read_file() without cacheing. |
4954
|
|
|
|
|
|
|
|
4955
|
51
|
50
|
|
|
|
141
|
return($this->read_file(%$Params)) unless length($CacheFileName); |
4956
|
|
|
|
|
|
|
|
4957
|
|
|
|
|
|
|
## At this point we believe we'll either be able to read or write |
4958
|
|
|
|
|
|
|
## the cache file as needed. |
4959
|
|
|
|
|
|
|
|
4960
|
|
|
|
|
|
|
## Try to read the cache if both files exist and the mod date is |
4961
|
|
|
|
|
|
|
## later. |
4962
|
|
|
|
|
|
|
|
4963
|
51
|
|
|
|
|
63
|
my $Data; |
4964
|
51
|
100
|
66
|
|
|
2501
|
if ((-e $FileName ) && |
|
|
|
100
|
|
|
|
|
4965
|
|
|
|
|
|
|
(-e $CacheFileName ) && |
4966
|
|
|
|
|
|
|
(((stat($CacheFileName))[9]) > ((stat($FileName))[9])) ) |
4967
|
|
|
|
|
|
|
{ |
4968
|
46
|
|
|
|
|
205
|
$this->progress("Thawing $CacheFileName..."); |
4969
|
46
|
|
|
|
|
237
|
$Data = &retrieve($CacheFileName); |
4970
|
|
|
|
|
|
|
|
4971
|
46
|
50
|
|
|
|
4512
|
$this-warn("Cache restore from $CacheFileName failed: $!"), unlink($CacheFileName) |
4972
|
|
|
|
|
|
|
unless defined ($Data); |
4973
|
|
|
|
|
|
|
} |
4974
|
|
|
|
|
|
|
|
4975
|
51
|
100
|
|
|
|
217
|
if (ref($Data) eq 'HASH') |
4976
|
|
|
|
|
|
|
{ |
4977
|
|
|
|
|
|
|
## Retrieval succeeded. |
4978
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
## Verify that the data in the cache is usable. |
4980
|
|
|
|
|
|
|
|
4981
|
|
|
|
|
|
|
## First, check newline-encoding compatibility. |
4982
|
|
|
|
|
|
|
( |
4983
|
46
|
50
|
|
|
|
135
|
$this->warn("Abandoning cache due to incompatible newline encoding"), |
4984
|
|
|
|
|
|
|
unlink $CacheFileName, goto cache_failed) unless $Data->{_Newline} eq "\n"; |
4985
|
|
|
|
|
|
|
|
4986
|
|
|
|
|
|
|
## Simulate an actual read_file() using data |
4987
|
|
|
|
|
|
|
## from the cache instead of the original file. |
4988
|
|
|
|
|
|
|
|
4989
|
46
|
100
|
66
|
|
|
218
|
if ((ref($FieldList) eq 'ARRAY') && @$FieldList) |
|
|
100
|
|
|
|
|
|
4990
|
|
|
|
|
|
|
{ |
4991
|
|
|
|
|
|
|
## If $FieldList requests fields not found in the cache, |
4992
|
|
|
|
|
|
|
## we abandon (delete and maybe rewrite) the cache: maybe |
4993
|
|
|
|
|
|
|
## we previously cached a different subset of fields from |
4994
|
|
|
|
|
|
|
## a previous request and so the cache is no longer |
4995
|
|
|
|
|
|
|
## adequate. |
4996
|
|
|
|
|
|
|
|
4997
|
18
|
|
|
|
|
33
|
my $MissingFields = [grep {!exists($Data->{$_})} @$FieldList]; |
|
48
|
|
|
|
|
134
|
|
4998
|
|
|
|
|
|
|
( |
4999
|
|
|
|
|
|
|
## $this->warn("Abandoning cache due to change in requested field list"), |
5000
|
18
|
100
|
|
|
|
79
|
unlink $CacheFileName, goto cache_failed) if @$MissingFields; |
5001
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
## If there was a _FieldList supplied in $Params or $this, |
5003
|
|
|
|
|
|
|
## we might need to omit any fields read from cache but |
5004
|
|
|
|
|
|
|
## not mentioned (just as read_file() would have done). |
5005
|
|
|
|
|
|
|
|
5006
|
16
|
|
|
|
|
113
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
16
|
|
|
|
|
53
|
|
5007
|
16
|
|
|
|
|
61
|
my $OmitFields = [grep {!exists($FieldHash->{$_})} grep {!/^_/} keys %$Data]; |
|
64
|
|
|
|
|
120
|
|
|
160
|
|
|
|
|
327
|
|
5008
|
|
|
|
|
|
|
|
5009
|
16
|
|
|
|
|
52
|
delete @$Data{@$OmitFields}; |
5010
|
|
|
|
|
|
|
|
5011
|
|
|
|
|
|
|
## Finally, also pare down _FieldList to only mention |
5012
|
|
|
|
|
|
|
## those fields that were desired.... |
5013
|
|
|
|
|
|
|
|
5014
|
16
|
|
|
|
|
26
|
my $AvailFields = $Data->{_FieldList}; |
5015
|
16
|
|
|
|
|
27
|
$Data->{_FieldList} = [grep {exists($Data->{$_})} @$FieldList]; |
|
42
|
|
|
|
|
105
|
|
5016
|
|
|
|
|
|
|
|
5017
|
|
|
|
|
|
|
## We might have ended up reading a subset of the |
5018
|
|
|
|
|
|
|
## available fields in the cache. (This is logically |
5019
|
|
|
|
|
|
|
## equivalent to reading a subset of the available fields |
5020
|
|
|
|
|
|
|
## in the real file.) If so, then change the value of |
5021
|
|
|
|
|
|
|
## $Data->{_Subset} to indicate that. |
5022
|
|
|
|
|
|
|
|
5023
|
16
|
|
100
|
|
|
80
|
my $GettingSubset = (@$OmitFields && 1 || 0); |
5024
|
16
|
|
66
|
|
|
92
|
$Data->{_Subset} ||= $GettingSubset; |
5025
|
|
|
|
|
|
|
} |
5026
|
|
|
|
|
|
|
elsif ($Data->{_Subset}) |
5027
|
|
|
|
|
|
|
{ |
5028
|
|
|
|
|
|
|
|
5029
|
|
|
|
|
|
|
## Conversely, if no field list was specified (hence all |
5030
|
|
|
|
|
|
|
## fields are desired), but a subset of fields has |
5031
|
|
|
|
|
|
|
## previously been cached, we have to delete / abandon the |
5032
|
|
|
|
|
|
|
## cache and re-read the file so we are getting all |
5033
|
|
|
|
|
|
|
## fields. |
5034
|
|
|
|
|
|
|
|
5035
|
|
|
|
|
|
|
( |
5036
|
|
|
|
|
|
|
## $this->warn("Abandoning partial cache due to request of full field list"), |
5037
|
1
|
|
|
|
|
20
|
unlink $CacheFileName, goto cache_failed); |
5038
|
|
|
|
|
|
|
} |
5039
|
|
|
|
|
|
|
|
5040
|
|
|
|
|
|
|
## Copy all elements from $Data, including possibly overridden |
5041
|
|
|
|
|
|
|
## _FieldList element if any, but excepting the cache-only |
5042
|
|
|
|
|
|
|
## _Newline element, into $this. |
5043
|
|
|
|
|
|
|
|
5044
|
43
|
|
|
|
|
102
|
delete $Data->{_Newline}; |
5045
|
43
|
|
|
|
|
312
|
@$this{keys %$Data} = values %$Data; |
5046
|
|
|
|
|
|
|
|
5047
|
|
|
|
|
|
|
## Set the file name to the name of the original (not the |
5048
|
|
|
|
|
|
|
## cache) file, just as read() would have done. |
5049
|
|
|
|
|
|
|
|
5050
|
43
|
|
|
|
|
125
|
$this->{_FileName} = $FileName; |
5051
|
|
|
|
|
|
|
|
5052
|
|
|
|
|
|
|
## Run the "read post-check" -- the same things we do in |
5053
|
|
|
|
|
|
|
## read_file() after completing the read process: Clean out |
5054
|
|
|
|
|
|
|
## _Selection and verify _SortOrder to ensure compatibility |
5055
|
|
|
|
|
|
|
## with current _FieldList. |
5056
|
|
|
|
|
|
|
|
5057
|
43
|
|
|
|
|
176
|
$this->read_postcheck(); |
5058
|
|
|
|
|
|
|
|
5059
|
43
|
|
|
|
|
148
|
$this->progress("Thawed $FileName."); |
5060
|
|
|
|
|
|
|
|
5061
|
43
|
|
|
|
|
100
|
$Success = 1; |
5062
|
43
|
|
|
|
|
621
|
goto done; ## Successful completion: we read from the cache. |
5063
|
|
|
|
|
|
|
} |
5064
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
## Could not retrieve for whatever reason (maybe cache did not |
5066
|
|
|
|
|
|
|
## exist yet or was out of date or had to be abandoned). So just |
5067
|
|
|
|
|
|
|
## read normally and possibly write the cache. |
5068
|
|
|
|
|
|
|
|
5069
|
|
|
|
|
|
|
cache_failed: |
5070
|
|
|
|
|
|
|
{ |
5071
|
8
|
50
|
|
|
|
15
|
$Success = $this->read_file(%$Params) or goto done; |
|
8
|
|
|
|
|
70
|
|
5072
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
## Now, having read successfully, we try to write the cache |
5074
|
|
|
|
|
|
|
## for next time. Writing the cache is optional; failing to |
5075
|
|
|
|
|
|
|
## write it is not a failure of the method. |
5076
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
{ ## Code in this block may fail and that's OK. |
5078
|
|
|
|
|
|
|
|
5079
|
|
|
|
|
|
|
## First, pre-flight. |
5080
|
8
|
50
|
|
|
|
19
|
$this->warn("Cache file $CacheFileName cannot be created/overwritten: $!"), |
|
8
|
|
|
|
|
37
|
|
5081
|
|
|
|
|
|
|
goto done ## Successful completion. |
5082
|
|
|
|
|
|
|
unless $this->try_file_write($CacheFileName); |
5083
|
|
|
|
|
|
|
|
5084
|
|
|
|
|
|
|
## The data to be stored is: |
5085
|
|
|
|
|
|
|
|
5086
|
|
|
|
|
|
|
## 1) All data columns read by read_file() |
5087
|
|
|
|
|
|
|
## 2) Any parameters set by read_file() |
5088
|
|
|
|
|
|
|
## 3) _Subset param indicating partial fieldlist was read from file. |
5089
|
|
|
|
|
|
|
## 4) _Newline setting so we know if it's compatabile when read back. |
5090
|
|
|
|
|
|
|
|
5091
|
|
|
|
|
|
|
## No other parameters should be cached because we want a |
5092
|
|
|
|
|
|
|
## read from the cache to produce exactly the same result |
5093
|
|
|
|
|
|
|
## as a read from the file itself would have produced. |
5094
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
## After a read, fieldlist() will contain the fields |
5096
|
|
|
|
|
|
|
## actually read, so cols_hash() WILL yield all the |
5097
|
|
|
|
|
|
|
## columns. |
5098
|
|
|
|
|
|
|
|
5099
|
8
|
|
|
|
|
39
|
my $Data = {( |
5100
|
|
|
|
|
|
|
## Refs to each column read by read_file() |
5101
|
|
|
|
|
|
|
%{ $this->cols_hash() }, |
5102
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
## Other parameters set by read_file() |
5104
|
|
|
|
|
|
|
_FieldList => $this->{_FieldList }, |
5105
|
|
|
|
|
|
|
_LineEnding => $this->{_LineEnding}, |
5106
|
|
|
|
|
|
|
_FDelimiter => $this->{_FDelimiter}, |
5107
|
|
|
|
|
|
|
_HeaderRow => $this->{_HeaderRow }, |
5108
|
|
|
|
|
|
|
_Subset => $this->{_Subset }, |
5109
|
8
|
|
|
|
|
23
|
_Newline => "\n", |
5110
|
|
|
|
|
|
|
)}; |
5111
|
|
|
|
|
|
|
|
5112
|
8
|
50
|
|
|
|
48
|
$this->warn("Failed to cache $CacheFileName"), |
5113
|
|
|
|
|
|
|
unlink($CacheFileName), |
5114
|
|
|
|
|
|
|
goto done ## Successful completion. |
5115
|
|
|
|
|
|
|
unless $this->write_cache($Data, $CacheFileName); |
5116
|
8
|
|
|
|
|
235
|
chmod 0666, $CacheFileName; ## Liberal perms if possible. |
5117
|
|
|
|
|
|
|
} |
5118
|
|
|
|
|
|
|
|
5119
|
8
|
|
|
|
|
73
|
goto done; ## Successful completion: we read from the file & maybe saved cache. |
5120
|
|
|
|
|
|
|
} |
5121
|
|
|
|
|
|
|
|
5122
|
|
|
|
|
|
|
done: |
5123
|
51
|
|
|
|
|
317
|
return ($Success); |
5124
|
|
|
|
|
|
|
} |
5125
|
|
|
|
|
|
|
|
5126
|
|
|
|
|
|
|
=pod |
5127
|
|
|
|
|
|
|
|
5128
|
|
|
|
|
|
|
=head1 WRITING DATA FILES |
5129
|
|
|
|
|
|
|
|
5130
|
|
|
|
|
|
|
## Writing some or all data from table into a data file |
5131
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
$t->write($Path) ## Simple calling convention |
5133
|
|
|
|
|
|
|
|
5134
|
|
|
|
|
|
|
$t->write( ## Named-parameter convention |
5135
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
## Params that override params in the object if supplied... |
5137
|
|
|
|
|
|
|
|
5138
|
|
|
|
|
|
|
_FileName => $Path, ## "Base path"; see _WriteExtension |
5139
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
_WriteExtension=> ".out",## Insert/append extension to _FileName |
5141
|
|
|
|
|
|
|
|
5142
|
|
|
|
|
|
|
_FieldList => [...], ## Fields to write; others ignored |
5143
|
|
|
|
|
|
|
_Selection => [...], ## Record (#s) to write; others ignored |
5144
|
|
|
|
|
|
|
|
5145
|
|
|
|
|
|
|
_HeaderRow => 0, ## Include header row in file |
5146
|
|
|
|
|
|
|
|
5147
|
|
|
|
|
|
|
_LineEnding => undef, ## Record delimiter (default is "\n") |
5148
|
|
|
|
|
|
|
_FDelimiter => undef, ## Field delimiter (default is comma) |
5149
|
|
|
|
|
|
|
|
5150
|
|
|
|
|
|
|
_ReturnMap => 1, ## Whether to encode internal returns |
5151
|
|
|
|
|
|
|
_ReturnEncoding=>"\x0B", ## How to encode returns |
5152
|
|
|
|
|
|
|
_MacRomanMap => undef, ## Whether/when to write Mac char set |
5153
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
|
5155
|
|
|
|
|
|
|
_CacheOnWrite => 1, ## Enable saving cache after write() |
5156
|
|
|
|
|
|
|
_CacheExtension=> ".x", ## Extension to add to cache file name |
5157
|
|
|
|
|
|
|
_CacheSubDir => "", ## (Sub-)dir, if any, for cache files |
5158
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
## Params specific to the read()/write() methods... |
5160
|
|
|
|
|
|
|
|
5161
|
|
|
|
|
|
|
_MaxRecords => 200, ## Limit on how many records to write |
5162
|
|
|
|
|
|
|
) |
5163
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
$t->write_file() ## Internal: same as write(); ignores cacheing |
5165
|
|
|
|
|
|
|
|
5166
|
|
|
|
|
|
|
write() writes a Merge, CSV, or Tab-delimited file. |
5167
|
|
|
|
|
|
|
|
5168
|
|
|
|
|
|
|
It uses parameters as described above. Any parameters not supplied |
5169
|
|
|
|
|
|
|
will be gotten from the object. |
5170
|
|
|
|
|
|
|
|
5171
|
|
|
|
|
|
|
Using the simple calling convention, just pass it a path which will |
5172
|
|
|
|
|
|
|
override the _FileName parameter in the object, if any. |
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
All other parameters will come from the object (or will be defaulted |
5175
|
|
|
|
|
|
|
if absent). |
5176
|
|
|
|
|
|
|
|
5177
|
|
|
|
|
|
|
If no _FileName or path is specified, or it is the special string "-" |
5178
|
|
|
|
|
|
|
(dash), then the file handle \ * STDOUT will be used by default (and |
5179
|
|
|
|
|
|
|
you could redirect it to a file). You can supply any open file handle |
5180
|
|
|
|
|
|
|
or IO::File object of your own for the _FileName parameter. |
5181
|
|
|
|
|
|
|
|
5182
|
|
|
|
|
|
|
If write() is writing to a file handle by default or because you |
5183
|
|
|
|
|
|
|
specified one, then no write-cacheing will occur. |
5184
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
To specify additional parameters or override any parameters in the |
5186
|
|
|
|
|
|
|
object while reading, use the named-parameter calling convention. |
5187
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
If the object's data was previously filled in using new() or read(), |
5189
|
|
|
|
|
|
|
then the file format parameters from the previous read() method will |
5190
|
|
|
|
|
|
|
still be in the object, so the format of the written file will |
5191
|
|
|
|
|
|
|
correspond as much as possible to the file that was read(). |
5192
|
|
|
|
|
|
|
|
5193
|
|
|
|
|
|
|
write() returns the path name of the file actually written, or the |
5194
|
|
|
|
|
|
|
empty string if a supplied file handle or STDOUT was written to, or |
5195
|
|
|
|
|
|
|
undef if there was a failure. |
5196
|
|
|
|
|
|
|
|
5197
|
|
|
|
|
|
|
If write() returns undef, then it will also have set the _ErrorMsg |
5198
|
|
|
|
|
|
|
parameter in the object. |
5199
|
|
|
|
|
|
|
|
5200
|
|
|
|
|
|
|
write() never modifies any data in the object itself. |
5201
|
|
|
|
|
|
|
|
5202
|
|
|
|
|
|
|
Consequently, if you specify a _FieldList or a _Selection, only those |
5203
|
|
|
|
|
|
|
fields or records will be written, but the corresponding parameters in |
5204
|
|
|
|
|
|
|
the object itself will be left untouched. |
5205
|
|
|
|
|
|
|
|
5206
|
|
|
|
|
|
|
=head2 How write() calculates the Path |
5207
|
|
|
|
|
|
|
|
5208
|
|
|
|
|
|
|
The _FileName parameter is shared with the read() method. This |
5209
|
|
|
|
|
|
|
parameter is set by read() and may be overridden when calling write(). |
5210
|
|
|
|
|
|
|
|
5211
|
|
|
|
|
|
|
In the base implementation of Data::CTable, write() will try not to |
5212
|
|
|
|
|
|
|
overwrite the same file that was read, which could possibly cause data |
5213
|
|
|
|
|
|
|
loss. |
5214
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
To avoid this, it does not use the _FileName parameter directly. |
5216
|
|
|
|
|
|
|
Instead, it starts with _FileName and inserts or appends the value of |
5217
|
|
|
|
|
|
|
the _WriteExtension parameter (which defaults to ".out") into the file |
5218
|
|
|
|
|
|
|
name before writing. |
5219
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
If the _FileName already has an extension at the end, write() will |
5221
|
|
|
|
|
|
|
place the _WriteExtension BEFORE the final extension; otherwise the |
5222
|
|
|
|
|
|
|
_WriteExtension will be placed at the end of the _FileName. |
5223
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
For example: |
5225
|
|
|
|
|
|
|
|
5226
|
|
|
|
|
|
|
Foobar.txt ==> Foobar.out.txt |
5227
|
|
|
|
|
|
|
Foobar.merge.txt ==> Foobar.merge.out.txt |
5228
|
|
|
|
|
|
|
My_Merge_Data ==> My_Merge_Data.out |
5229
|
|
|
|
|
|
|
|
5230
|
|
|
|
|
|
|
If you DON'T want write() to add a _WriteExtension to _FileName before |
5231
|
|
|
|
|
|
|
it writes the file, then you must set _WriteExtension to empty/undef |
5232
|
|
|
|
|
|
|
either in the object or when calling write(). Or, you could make a |
5233
|
|
|
|
|
|
|
subclass that initializes _WriteExtension to be empty. If |
5234
|
|
|
|
|
|
|
_WriteExtension is empty, then _FileName will be used exactly, which |
5235
|
|
|
|
|
|
|
may result in overwriting the original data file. |
5236
|
|
|
|
|
|
|
|
5237
|
|
|
|
|
|
|
Remember: write() returns the path name it actually used to |
5238
|
|
|
|
|
|
|
successfully write the file. Just as with read(), if the _FileName |
5239
|
|
|
|
|
|
|
you specified did not have a path, then write() will prepend a path |
5240
|
|
|
|
|
|
|
component indicating "current directory" (e.g. "./" on Unix) and this |
5241
|
|
|
|
|
|
|
will be part of the return value. |
5242
|
|
|
|
|
|
|
|
5243
|
|
|
|
|
|
|
|
5244
|
|
|
|
|
|
|
=head2 Cacheing with write() |
5245
|
|
|
|
|
|
|
|
5246
|
|
|
|
|
|
|
By default, Data::CTable only creates a cached version of a file when |
5247
|
|
|
|
|
|
|
it reads that file for the first time (on the assumption that it will |
5248
|
|
|
|
|
|
|
need to read the file again more often than the file's data will |
5249
|
|
|
|
|
|
|
change.) |
5250
|
|
|
|
|
|
|
|
5251
|
|
|
|
|
|
|
But by default, it does not create a cached version of a file when |
5252
|
|
|
|
|
|
|
writing it, on the assumption that the current program probably will |
5253
|
|
|
|
|
|
|
not be re-reading the written file and any other program that wants to |
5254
|
|
|
|
|
|
|
read it can cache it at that time. |
5255
|
|
|
|
|
|
|
|
5256
|
|
|
|
|
|
|
However, if you want write() to create a cache for its output file, it |
5257
|
|
|
|
|
|
|
is much faster to create it on write() than waiting for the next |
5258
|
|
|
|
|
|
|
read() because the next read() will be able to use the cache the very |
5259
|
|
|
|
|
|
|
first time. |
5260
|
|
|
|
|
|
|
|
5261
|
|
|
|
|
|
|
To enable write-cacheing, set _CacheOnWrite to true. Then, after the |
5262
|
|
|
|
|
|
|
write() successfully completes (and only if it does), the cached |
5263
|
|
|
|
|
|
|
version will be written. |
5264
|
|
|
|
|
|
|
|
5265
|
|
|
|
|
|
|
=head1 FORMATTED TABLES (Using Data::ShowTable) |
5266
|
|
|
|
|
|
|
|
5267
|
|
|
|
|
|
|
## Get formatted data in memory |
5268
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
my $StringRef = $t->format(); ## Format same data as write() |
5270
|
|
|
|
|
|
|
my $StringRef = $t->format(10); ## Limit records to 10 |
5271
|
|
|
|
|
|
|
my $StringRef = $t->format(...); ## Specify arbitrary params |
5272
|
|
|
|
|
|
|
print $$StringRef; |
5273
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
## Write formatted table to file or terminal |
5275
|
|
|
|
|
|
|
|
5276
|
|
|
|
|
|
|
$t->out($Dest, ....);## $Dest as follows; other params to format() |
5277
|
|
|
|
|
|
|
$t->out($Dest, 10, ....) ## Limit recs to 10; params to format() |
5278
|
|
|
|
|
|
|
|
5279
|
|
|
|
|
|
|
$t->out() ## print formatted data to STDOUT |
5280
|
|
|
|
|
|
|
$t->out(\*STDERR) ## print to STDERR (or any named handle) |
5281
|
|
|
|
|
|
|
$t->out("Foo.txt") ## print to any path (file to be overwritten) |
5282
|
|
|
|
|
|
|
$t->out($FileObj) ## print to any object with a print() method |
5283
|
|
|
|
|
|
|
|
5284
|
|
|
|
|
|
|
out() takes a first argument specifying a destination for the output, |
5285
|
|
|
|
|
|
|
then passes all other arguments to format() to create a nice-looking |
5286
|
|
|
|
|
|
|
table designed to be human-readable; it takes the resulting buffer and |
5287
|
|
|
|
|
|
|
print()s it to the destination you specified. |
5288
|
|
|
|
|
|
|
|
5289
|
|
|
|
|
|
|
Sample output: |
5290
|
|
|
|
|
|
|
|
5291
|
|
|
|
|
|
|
+-------+------+-----+-------+ |
5292
|
|
|
|
|
|
|
| First | Last | Age | State | |
5293
|
|
|
|
|
|
|
+-------+------+-----+-------+ |
5294
|
|
|
|
|
|
|
| Chris | Zack | 43 | CA | |
5295
|
|
|
|
|
|
|
| Marco | Bart | 22 | NV | |
5296
|
|
|
|
|
|
|
| Pearl | Muth | 15 | HI | |
5297
|
|
|
|
|
|
|
+-------+------+-----+-------+ |
5298
|
|
|
|
|
|
|
|
5299
|
|
|
|
|
|
|
(Note extra space character before each line.) |
5300
|
|
|
|
|
|
|
|
5301
|
|
|
|
|
|
|
The destination may be a file handle (default if undef is \*STDOUT), a |
5302
|
|
|
|
|
|
|
string (treated as a path to be overwritten), or any object that has a |
5303
|
|
|
|
|
|
|
print() method, especially an object of type IO::File. |
5304
|
|
|
|
|
|
|
|
5305
|
|
|
|
|
|
|
The main purpose of out() is to give you a quick way to dump a table |
5306
|
|
|
|
|
|
|
when debugging. out() calls format() to create the output, so read |
5307
|
|
|
|
|
|
|
on... |
5308
|
|
|
|
|
|
|
|
5309
|
|
|
|
|
|
|
format() produces a human-readable version of a table, in the form of |
5310
|
|
|
|
|
|
|
a reference to a string buffer (which could be very large), and |
5311
|
|
|
|
|
|
|
returns the buffer to you. Dereference the resulting string reference |
5312
|
|
|
|
|
|
|
before using. |
5313
|
|
|
|
|
|
|
|
5314
|
|
|
|
|
|
|
If format() is given one argument, that argument is the _MaxRecords |
5315
|
|
|
|
|
|
|
parameter, which limits the length of the output. |
5316
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
Otherwise, format() takes the following named-parameter arguments, |
5318
|
|
|
|
|
|
|
which can optionally override the corresponding parameters, if any, in |
5319
|
|
|
|
|
|
|
the object: |
5320
|
|
|
|
|
|
|
|
5321
|
|
|
|
|
|
|
_FieldList ## Fields to include in table |
5322
|
|
|
|
|
|
|
_Selection ## Records to be included, in order |
5323
|
|
|
|
|
|
|
|
5324
|
|
|
|
|
|
|
_SortSpecs ## SortType controls number formatting |
5325
|
|
|
|
|
|
|
_DefaultSortType |
5326
|
|
|
|
|
|
|
|
5327
|
|
|
|
|
|
|
_MaxRecords ## Limit number of records output |
5328
|
|
|
|
|
|
|
|
5329
|
|
|
|
|
|
|
_MaxWidth ## Limit width of per-col. data in printout |
5330
|
|
|
|
|
|
|
|
5331
|
|
|
|
|
|
|
format() will obey _MaxRecords, if you'd like to limit the number of |
5332
|
|
|
|
|
|
|
rows to be output. _MaxRecords can also be a single argument to |
5333
|
|
|
|
|
|
|
format(), or a second argument to out() if no other parameters are |
5334
|
|
|
|
|
|
|
passed. |
5335
|
|
|
|
|
|
|
|
5336
|
|
|
|
|
|
|
format() also recognizes the _SortSpecs->{SortType} and |
5337
|
|
|
|
|
|
|
_DefaultSortType parameters to help it determine the data types of the |
5338
|
|
|
|
|
|
|
fields being formatted. Fields of type "Number" are output as |
5339
|
|
|
|
|
|
|
right-justified floats; "Integer" or "Boolean" are output as |
5340
|
|
|
|
|
|
|
right-justified integers, and all others (including the default: |
5341
|
|
|
|
|
|
|
String) are output as left-justified strings. |
5342
|
|
|
|
|
|
|
|
5343
|
|
|
|
|
|
|
In addition, there is one parameter uniquely supported by format() and |
5344
|
|
|
|
|
|
|
out(): |
5345
|
|
|
|
|
|
|
|
5346
|
|
|
|
|
|
|
=over 4 |
5347
|
|
|
|
|
|
|
|
5348
|
|
|
|
|
|
|
=item _MaxWidth ||= 15; |
5349
|
|
|
|
|
|
|
|
5350
|
|
|
|
|
|
|
=back |
5351
|
|
|
|
|
|
|
|
5352
|
|
|
|
|
|
|
_MaxWidth specifies the maximum width of columns. If unspecifed, this |
5353
|
|
|
|
|
|
|
will be 15; the minimum legal value is 2. Each column may actually |
5354
|
|
|
|
|
|
|
take up 3 more characters than _MaxWidth due to divider characters. |
5355
|
|
|
|
|
|
|
|
5356
|
|
|
|
|
|
|
The data to be output will be examined, and only the necessary width |
5357
|
|
|
|
|
|
|
will be used for each column. _MaxWidth just limits the upper bound, |
5358
|
|
|
|
|
|
|
not the lower. |
5359
|
|
|
|
|
|
|
|
5360
|
|
|
|
|
|
|
Data values that are too wide to fit in _MaxWidth spaces will be |
5361
|
|
|
|
|
|
|
truncated and the tilde character "~" will appear as the last |
5362
|
|
|
|
|
|
|
character to indicate the truncation. |
5363
|
|
|
|
|
|
|
|
5364
|
|
|
|
|
|
|
Data values with internal returns will have the return characters |
5365
|
|
|
|
|
|
|
mapped to slashes for display. |
5366
|
|
|
|
|
|
|
|
5367
|
|
|
|
|
|
|
format() and out() will NOT wrap entries onto a second line, |
5368
|
|
|
|
|
|
|
like you may have seen Data::ShowTable::ShowBoxTable do in some cases. |
5369
|
|
|
|
|
|
|
Each record will get exactly one line. |
5370
|
|
|
|
|
|
|
|
5371
|
|
|
|
|
|
|
format() and out() ignore the _HeaderRow parameter. A header |
5372
|
|
|
|
|
|
|
row showing the field names is always printed. |
5373
|
|
|
|
|
|
|
|
5374
|
|
|
|
|
|
|
format() and out() make no attempt to map upper-ascii characters from |
5375
|
|
|
|
|
|
|
or to any particular dataset. The encoding used in memory (generally |
5376
|
|
|
|
|
|
|
ISO 8859-1 by default) is the encoding used in the output. If you |
5377
|
|
|
|
|
|
|
want to manipulate the encoding, first call format(), then change the |
5378
|
|
|
|
|
|
|
encoding, then format the resulting table. |
5379
|
|
|
|
|
|
|
|
5380
|
|
|
|
|
|
|
=cut |
5381
|
|
|
|
|
|
|
|
5382
|
|
|
|
|
|
|
sub write ## Write, cacheing afterward if possible |
5383
|
|
|
|
|
|
|
{ |
5384
|
8
|
|
|
8
|
1
|
122
|
my $this = shift; |
5385
|
8
|
|
|
|
|
26
|
return($this->write_file_and_cache(@_)); |
5386
|
|
|
|
|
|
|
} |
5387
|
|
|
|
|
|
|
|
5388
|
|
|
|
|
|
|
sub write_file_and_cache ## Write, cacheing afterward if possible |
5389
|
|
|
|
|
|
|
{ |
5390
|
8
|
|
|
8
|
0
|
14
|
my $this = shift; |
5391
|
|
|
|
|
|
|
|
5392
|
8
|
50
|
|
|
|
34
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
5393
|
|
|
|
|
|
|
|
5394
|
8
|
|
|
|
|
20
|
my($FieldList, $LineEnding, $FDelimiter, $HeaderRow, $CacheOnWrite, $CacheExtension, $CacheSubDir) = map {$this->getparam($Params, $_)} |
|
56
|
|
|
|
|
99
|
|
5395
|
|
|
|
|
|
|
qw($FieldList _LineEnding _FDelimiter _HeaderRow _CacheOnWrite _CacheExtension _CacheSubDir); |
5396
|
|
|
|
|
|
|
|
5397
|
|
|
|
|
|
|
## First write the file and go to done if it failed. |
5398
|
8
|
50
|
|
|
|
36
|
my $WriteFileName = $this->write_file(@_) or goto done; |
5399
|
|
|
|
|
|
|
|
5400
|
|
|
|
|
|
|
## Only try to cache if we got a non-empty $WriteFileName back. |
5401
|
|
|
|
|
|
|
## We won't get a name back in the case where we wrote directly to |
5402
|
|
|
|
|
|
|
## an open file handle. |
5403
|
|
|
|
|
|
|
|
5404
|
8
|
50
|
|
|
|
21
|
goto done unless $WriteFileName; |
5405
|
|
|
|
|
|
|
|
5406
|
|
|
|
|
|
|
## Only try to cache if $CacheOnWrite has been turned ON. |
5407
|
8
|
50
|
|
|
|
566
|
goto done unless $CacheOnWrite; |
5408
|
|
|
|
|
|
|
|
5409
|
|
|
|
|
|
|
## Now, having written successfully, we try to write the cache for |
5410
|
|
|
|
|
|
|
## next time. Writing the cache is always optional; failing to |
5411
|
|
|
|
|
|
|
## write it is not a failure of this method. Consequently, any |
5412
|
|
|
|
|
|
|
## "goto done" statements beyond this point will still result in a |
5413
|
|
|
|
|
|
|
## successful outcome since $WriteFileName will have a value. |
5414
|
|
|
|
|
|
|
|
5415
|
|
|
|
|
|
|
## Calculate the name of the cache file and fail the directory |
5416
|
|
|
|
|
|
|
## creation fails. prep_cache_file will have generated a warning |
5417
|
|
|
|
|
|
|
## if an attempt to create needed subdirectories has failed. |
5418
|
|
|
|
|
|
|
|
5419
|
0
|
0
|
|
|
|
0
|
my $CacheFileName = $this->prep_cache_file($WriteFileName, $CacheExtension, $CacheSubDir) |
5420
|
|
|
|
|
|
|
or goto done; |
5421
|
|
|
|
|
|
|
|
5422
|
|
|
|
|
|
|
## Pre-flight the cache file for writing. |
5423
|
0
|
0
|
|
|
|
0
|
$this->warn("Cache file $CacheFileName cannot be created/overwritten: $!"), |
5424
|
|
|
|
|
|
|
goto done ## Successful completion. |
5425
|
|
|
|
|
|
|
unless $this->try_file_write($CacheFileName); |
5426
|
|
|
|
|
|
|
|
5427
|
|
|
|
|
|
|
## The data to be stored is: |
5428
|
|
|
|
|
|
|
|
5429
|
|
|
|
|
|
|
## 1) All data columns written by write_file() |
5430
|
|
|
|
|
|
|
## 2) Any file format parameters used by write_file() |
5431
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
## Calculate the main writing-related parameters using the same |
5433
|
|
|
|
|
|
|
## logic that write_file() uses... |
5434
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
## Default for FieldList is all fields. |
5436
|
0
|
|
0
|
|
|
0
|
$FieldList ||= $this->fieldlist(); |
5437
|
|
|
|
|
|
|
|
5438
|
|
|
|
|
|
|
## Convert from optional "dos", "mac", "unix" symbolic values. |
5439
|
0
|
|
|
|
|
0
|
$LineEnding = $this->lineending_string($LineEnding); |
5440
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
## Default for LineEnding is "\n" (CR on Mac; LF on Unix; CR/LF on DOS) |
5442
|
0
|
0
|
|
|
|
0
|
$LineEnding = "\n" unless length($LineEnding); |
5443
|
|
|
|
|
|
|
|
5444
|
|
|
|
|
|
|
## Default for FDelimiter is comma |
5445
|
0
|
0
|
|
|
|
0
|
$FDelimiter = ',' unless length($FDelimiter); |
5446
|
|
|
|
|
|
|
|
5447
|
|
|
|
|
|
|
## Default for HeaderRow is true. |
5448
|
0
|
0
|
|
|
|
0
|
$HeaderRow = 1 unless defined($HeaderRow); |
5449
|
|
|
|
|
|
|
|
5450
|
|
|
|
|
|
|
## No other parameters should be cached because we want a |
5451
|
|
|
|
|
|
|
## read from the cache to produce exactly the same result |
5452
|
|
|
|
|
|
|
## as a read from the file itself would have produced. |
5453
|
|
|
|
|
|
|
|
5454
|
0
|
|
|
|
|
0
|
my $Data = {( |
5455
|
|
|
|
|
|
|
## Refs to each column written |
5456
|
0
|
|
0
|
|
|
0
|
%{ $this->cols_hash($FieldList)}, |
5457
|
|
|
|
|
|
|
|
5458
|
|
|
|
|
|
|
## Other relevant file-format parameters |
5459
|
|
|
|
|
|
|
_FieldList => $FieldList, |
5460
|
|
|
|
|
|
|
_LineEnding => $LineEnding, |
5461
|
|
|
|
|
|
|
_FDelimiter => $FDelimiter, |
5462
|
|
|
|
|
|
|
_HeaderRow => $HeaderRow, |
5463
|
|
|
|
|
|
|
_Subset => $this->{_Subset} || 0, |
5464
|
|
|
|
|
|
|
_Newline => "\n", |
5465
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
## We don't need to save _ReturnMap and |
5467
|
|
|
|
|
|
|
## _ReturnEncoding because those only are relevant |
5468
|
|
|
|
|
|
|
## when reading physical files. Cached data has the |
5469
|
|
|
|
|
|
|
## return chars already encoded as returns. |
5470
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
)}; |
5472
|
|
|
|
|
|
|
|
5473
|
0
|
0
|
|
|
|
0
|
$this->warn("Failed to cache $CacheFileName"), |
5474
|
|
|
|
|
|
|
unlink($CacheFileName), ## Delete cache if failure |
5475
|
|
|
|
|
|
|
goto done ## Successful completion. |
5476
|
|
|
|
|
|
|
unless $this->write_cache($Data, $CacheFileName); |
5477
|
0
|
|
|
|
|
0
|
chmod 0666, $CacheFileName; ## Liberal perms if possible. |
5478
|
|
|
|
|
|
|
|
5479
|
8
|
|
|
|
|
36
|
done: |
5480
|
|
|
|
|
|
|
return($WriteFileName); |
5481
|
|
|
|
|
|
|
} |
5482
|
|
|
|
|
|
|
|
5483
|
|
|
|
|
|
|
sub write_cache |
5484
|
|
|
|
|
|
|
{ |
5485
|
8
|
|
|
8
|
0
|
17
|
my $this = shift; |
5486
|
8
|
|
|
|
|
15
|
my ($Data, $CacheFileName) = @_; |
5487
|
|
|
|
|
|
|
|
5488
|
8
|
|
|
|
|
34
|
$this->progress("Storing $CacheFileName..."); |
5489
|
|
|
|
|
|
|
|
5490
|
8
|
|
|
|
|
42
|
my $Success = nstore($Data, $CacheFileName); |
5491
|
|
|
|
|
|
|
|
5492
|
8
|
50
|
|
|
|
1641
|
$this->progress("Stored $CacheFileName.") if $Success; |
5493
|
|
|
|
|
|
|
|
5494
|
8
|
|
|
|
|
36
|
done: |
5495
|
|
|
|
|
|
|
return($Success); |
5496
|
|
|
|
|
|
|
} |
5497
|
|
|
|
|
|
|
|
5498
|
|
|
|
|
|
|
sub write_file ## Just write; don't worry about cacheing |
5499
|
|
|
|
|
|
|
{ |
5500
|
8
|
|
|
8
|
0
|
14
|
my $this = shift; |
5501
|
8
|
50
|
|
|
|
31
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
5502
|
|
|
|
|
|
|
|
5503
|
96
|
|
|
|
|
267
|
my($FileName, $FieldList, $Selection, $MaxRecords, $LineEnding, $FDelimiter, $QuoteFields, $ReturnMap, $ReturnEncoding, $MacRomanMap, $HeaderRow, $WriteExtension) |
5504
|
8
|
|
|
|
|
23
|
= map {$this->getparam($Params, $_)} |
5505
|
|
|
|
|
|
|
qw(_FileName _FieldList _Selection _MaxRecords _LineEnding _FDelimiter _QuoteFields _ReturnMap _ReturnEncoding _MacRomanMap _HeaderRow _WriteExtension); |
5506
|
|
|
|
|
|
|
|
5507
|
8
|
|
|
|
|
17
|
my $Success; |
5508
|
|
|
|
|
|
|
|
5509
|
8
|
|
|
|
|
27
|
$this->{_ErrorMsg} = ""; |
5510
|
|
|
|
|
|
|
|
5511
|
|
|
|
|
|
|
## if FileName is unspecified, or is the single character "-", |
5512
|
|
|
|
|
|
|
## then default to STDOUT. |
5513
|
8
|
50
|
|
|
|
42
|
$FileName = \ *STDOUT if ($FileName =~ /^-?$/); |
5514
|
|
|
|
|
|
|
|
5515
|
|
|
|
|
|
|
## If we have a regular file handle, bless it into IO::File. |
5516
|
8
|
50
|
|
|
|
30
|
$FileName = bless ($FileName, 'IO::File') if ref($FileName) =~ /(HANDLE)|(GLOB)/; |
5517
|
|
|
|
|
|
|
|
5518
|
|
|
|
|
|
|
## If we have a file handle either passed or constructed, make note of that fact. |
5519
|
8
|
|
|
|
|
18
|
my $GotHandle = ref($FileName) eq 'IO::File'; |
5520
|
|
|
|
|
|
|
|
5521
|
8
|
50
|
33
|
|
|
6343
|
$this->{_ErrorMsg} = "FileName must be specified for write()", goto done |
5522
|
|
|
|
|
|
|
unless $GotHandle or length($FileName); |
5523
|
|
|
|
|
|
|
|
5524
|
|
|
|
|
|
|
## Default for FieldList is all fields. |
5525
|
8
|
|
33
|
|
|
20
|
$FieldList ||= $this->fieldlist(); |
5526
|
|
|
|
|
|
|
|
5527
|
|
|
|
|
|
|
## Default for Selection is all records. |
5528
|
8
|
|
33
|
|
|
38
|
$Selection ||= $this->selection(); |
5529
|
|
|
|
|
|
|
|
5530
|
|
|
|
|
|
|
## Default for MaxRecords is 0 (meaning write all records) |
5531
|
8
|
50
|
|
|
|
27
|
$MaxRecords = 0 unless (int($MaxRecords) == $MaxRecords); |
5532
|
|
|
|
|
|
|
|
5533
|
|
|
|
|
|
|
## Convert from optional "dos", "mac", "unix" symbolic values. |
5534
|
8
|
|
|
|
|
20
|
$LineEnding = $this->lineending_string($LineEnding); |
5535
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
## Default for LineEnding is "\n" (CR on Mac; LF on Unix; CR/LF on DOS) |
5537
|
8
|
50
|
|
|
|
30
|
$LineEnding = "\n" unless length($LineEnding); |
5538
|
|
|
|
|
|
|
|
5539
|
|
|
|
|
|
|
## Default for FDelimiter is comma |
5540
|
8
|
50
|
|
|
|
22
|
$FDelimiter = ',' unless length($FDelimiter); |
5541
|
|
|
|
|
|
|
|
5542
|
|
|
|
|
|
|
## Default for QuoteFields is undef (auto) |
5543
|
8
|
50
|
|
|
|
81
|
$QuoteFields = undef unless defined($QuoteFields); |
5544
|
|
|
|
|
|
|
|
5545
|
|
|
|
|
|
|
## "QuoteCheck" mode means check each field -- this is the "auto" |
5546
|
|
|
|
|
|
|
## mode that kicks in when _QuoteFields is undef. |
5547
|
8
|
|
|
|
|
13
|
my $QuoteCheck = (!defined($QuoteFields)); |
5548
|
|
|
|
|
|
|
|
5549
|
|
|
|
|
|
|
## Default for ReturnMap is true. |
5550
|
8
|
50
|
|
|
|
22
|
$ReturnMap = 1 unless defined($ReturnMap); |
5551
|
|
|
|
|
|
|
|
5552
|
|
|
|
|
|
|
## Default for MacRomanMap is undef ("Auto"); |
5553
|
8
|
50
|
|
|
|
17
|
$MacRomanMap = undef unless defined($MacRomanMap); |
5554
|
|
|
|
|
|
|
|
5555
|
|
|
|
|
|
|
## DoMacMapping is the actual setting for auto charset mapping |
5556
|
8
|
|
66
|
|
|
54
|
my $DoMacMapping = |
5557
|
|
|
|
|
|
|
((!defined($MacRomanMap) && ($LineEnding eq "\x0D")) || ## Auto |
5558
|
|
|
|
|
|
|
($MacRomanMap)); ## On |
5559
|
|
|
|
|
|
|
|
5560
|
8
|
100
|
|
|
|
28
|
$this->progress("Will convert upper-ascii characters if any, from ISO-8859-1 to Mac Roman.") if $DoMacMapping; |
5561
|
|
|
|
|
|
|
|
5562
|
|
|
|
|
|
|
## Default for ReturnEncoding is "\x0B" (control-K; ASCII 11) |
5563
|
8
|
50
|
|
|
|
22
|
$ReturnEncoding = "\x0B" unless length($ReturnEncoding); |
5564
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
## Default for HeaderRow is true. |
5566
|
8
|
50
|
|
|
|
17
|
$HeaderRow = 1 unless defined($HeaderRow); |
5567
|
|
|
|
|
|
|
|
5568
|
|
|
|
|
|
|
## Default for $WriteExtension is "" (none) -- meaning use exact $FileName |
5569
|
8
|
50
|
|
|
|
18
|
$WriteExtension = "" unless defined($WriteExtension); |
5570
|
|
|
|
|
|
|
|
5571
|
|
|
|
|
|
|
## Get a hash of fields actually present... |
5572
|
|
|
|
|
|
|
|
5573
|
8
|
|
|
|
|
22
|
my $AllFields = $this->fieldlist_all(); |
5574
|
8
|
|
|
|
|
14
|
my $AllFieldsHash = {}; @$AllFieldsHash{@$AllFields} = undef; |
|
8
|
|
|
|
|
32
|
|
5575
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
## Cull $FieldList to only include fields we have... |
5577
|
|
|
|
|
|
|
|
5578
|
8
|
|
|
|
|
18
|
$FieldList = [grep {exists($AllFieldsHash->{$_})} @$FieldList]; |
|
33
|
|
|
|
|
74
|
|
5579
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
## Ensure $Selection contains only valid record numbers... |
5581
|
|
|
|
|
|
|
|
5582
|
8
|
|
|
|
|
31
|
$Selection = $this->selection_validate_internal($Selection); |
5583
|
|
|
|
|
|
|
|
5584
|
|
|
|
|
|
|
## Get an ordered list of the columns. |
5585
|
8
|
|
|
|
|
34
|
my $Columns = [@$this{@$FieldList}]; |
5586
|
|
|
|
|
|
|
|
5587
|
|
|
|
|
|
|
## Calculate the name of the file we'll write to, if any, and get |
5588
|
|
|
|
|
|
|
## the file handle either from the one we were given or by opening |
5589
|
|
|
|
|
|
|
## the specified file for writing. |
5590
|
|
|
|
|
|
|
|
5591
|
8
|
|
|
|
|
12
|
my $WriteFileName; |
5592
|
|
|
|
|
|
|
my $OutFile; |
5593
|
|
|
|
|
|
|
|
5594
|
8
|
50
|
|
|
|
26
|
if ($GotHandle) |
5595
|
|
|
|
|
|
|
{ |
5596
|
0
|
|
|
|
|
0
|
$WriteFileName = ""; |
5597
|
0
|
|
|
|
|
0
|
$OutFile = $FileName; ## Actually a handle. |
5598
|
|
|
|
|
|
|
} |
5599
|
|
|
|
|
|
|
else |
5600
|
|
|
|
|
|
|
{ |
5601
|
|
|
|
|
|
|
## Calculate the name of the file we're going to try to write to. |
5602
|
8
|
50
|
|
|
|
36
|
$WriteFileName = |
5603
|
|
|
|
|
|
|
|
5604
|
|
|
|
|
|
|
## Use file name exactly if specified in write() call. |
5605
|
|
|
|
|
|
|
($Params->{_FileName} ? $FileName : |
5606
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
## Otherwise, calculate the name by adding/appending the _WriteExtension. |
5608
|
|
|
|
|
|
|
$this->write_file_name($FileName, $WriteExtension)); |
5609
|
|
|
|
|
|
|
|
5610
|
|
|
|
|
|
|
## Ensure the directory that will hold the file actually exists. |
5611
|
1
|
|
|
1
|
|
7
|
use File::Basename qw(fileparse); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
212
|
|
5612
|
8
|
|
|
|
|
182
|
my ($Basename, $Path, $Ext) = fileparse($WriteFileName, '\.[^\.]+'); |
5613
|
8
|
|
|
|
|
18
|
my ($Sep, $Up, $Cur) = @{$this->path_info()}{qw(sep up cur)}; |
|
8
|
|
|
|
|
17
|
|
5614
|
8
|
|
33
|
|
|
26
|
$Path ||= $Cur; ## Once again, default $Path to cwd just in case. |
5615
|
|
|
|
|
|
|
|
5616
|
8
|
50
|
|
|
|
24
|
$this->{_ErrorMsg} = "Can't make directory $Path to save $WriteFileName: $!", goto done |
5617
|
|
|
|
|
|
|
unless $this->verify_or_create_path($Path, $Sep); |
5618
|
|
|
|
|
|
|
|
5619
|
|
|
|
|
|
|
## Ensure the directory is writeable and the file is overwriteable |
5620
|
|
|
|
|
|
|
## if it exists. |
5621
|
8
|
50
|
|
|
|
93
|
$this->{_ErrorMsg} = "Directory $Path is not writeable.", goto done |
5622
|
|
|
|
|
|
|
unless (-w $Path); |
5623
|
|
|
|
|
|
|
|
5624
|
8
|
50
|
33
|
|
|
206
|
$this->{_ErrorMsg} = "File $WriteFileName cannot be overwritten.", goto done |
5625
|
|
|
|
|
|
|
if (-e $WriteFileName && !(-w $WriteFileName)); |
5626
|
|
|
|
|
|
|
|
5627
|
|
|
|
|
|
|
## Open the file for write. |
5628
|
|
|
|
|
|
|
|
5629
|
1
|
|
|
1
|
|
6
|
use IO::File; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1613
|
|
5630
|
8
|
|
|
|
|
60
|
$OutFile = IO::File->new(">$WriteFileName"); |
5631
|
8
|
50
|
|
|
|
983
|
$this->{_ErrorMsg} = "Failed to open $WriteFileName for writing: $!", goto done |
5632
|
|
|
|
|
|
|
unless $OutFile; |
5633
|
|
|
|
|
|
|
|
5634
|
8
|
|
|
|
|
37
|
$this->progress("Writing $WriteFileName..."); |
5635
|
|
|
|
|
|
|
} |
5636
|
|
|
|
|
|
|
|
5637
|
|
|
|
|
|
|
## Figure out the line initiator & ender strings, and delimiter sequence. |
5638
|
|
|
|
|
|
|
|
5639
|
8
|
|
|
|
|
12
|
my ($LineStartQuote, $Delim, $LineEndQuote); |
5640
|
8
|
50
|
|
|
|
17
|
if ($QuoteFields) |
5641
|
|
|
|
|
|
|
{ |
5642
|
|
|
|
|
|
|
## In "forced" quote mode, we just do the quoting by always |
5643
|
|
|
|
|
|
|
## putting them at the start and end of lines and in between |
5644
|
|
|
|
|
|
|
## each field. |
5645
|
|
|
|
|
|
|
|
5646
|
0
|
|
|
|
|
0
|
$LineStartQuote = "\""; |
5647
|
0
|
|
|
|
|
0
|
$LineEndQuote = "\""; |
5648
|
0
|
|
|
|
|
0
|
$Delim = "\"$FDelimiter\""; |
5649
|
|
|
|
|
|
|
} |
5650
|
|
|
|
|
|
|
else |
5651
|
|
|
|
|
|
|
{ |
5652
|
|
|
|
|
|
|
## In no-quote or auto-quote mode, we don't put the quotes in |
5653
|
|
|
|
|
|
|
## these places; they'll either be omitted entirely or |
5654
|
|
|
|
|
|
|
## inserted per-field. |
5655
|
|
|
|
|
|
|
|
5656
|
8
|
|
|
|
|
59
|
$LineStartQuote = ''; |
5657
|
8
|
|
|
|
|
82
|
$LineEndQuote = ''; |
5658
|
8
|
|
|
|
|
14
|
$Delim = $FDelimiter; |
5659
|
|
|
|
|
|
|
} |
5660
|
|
|
|
|
|
|
|
5661
|
|
|
|
|
|
|
## Precompile a regex that checks for quotes, the field delimiter |
5662
|
|
|
|
|
|
|
## sequence, the line ending sequence, or any line-ending-ish |
5663
|
|
|
|
|
|
|
## characters at all. In $QuoteCheck mode, we'll use this to |
5664
|
|
|
|
|
|
|
## identify fields needing double-quotes. |
5665
|
|
|
|
|
|
|
|
5666
|
8
|
50
|
|
|
|
211
|
my $QuoteOrDelimCheck = qr{(?:\Q$LineEnding\E)|(?:\Q$FDelimiter\E)|[\"\x0D\x0A]} if $QuoteCheck; |
5667
|
|
|
|
|
|
|
|
5668
|
|
|
|
|
|
|
## Precompile a return-map-checking regex that checks first for |
5669
|
|
|
|
|
|
|
## the line ending actively in use and then for the platform's |
5670
|
|
|
|
|
|
|
## "\n". These may be the same, but are not necessarily always |
5671
|
|
|
|
|
|
|
## the same. We don't return-map any old \x0D or \x0A because, |
5672
|
|
|
|
|
|
|
## for example in a pure Unix world, \x0D would not be interpreted |
5673
|
|
|
|
|
|
|
## as line-ending-related and hence is a valid character in a |
5674
|
|
|
|
|
|
|
## field. |
5675
|
|
|
|
|
|
|
|
5676
|
8
|
50
|
|
|
|
125
|
my $ReturnMapCheck = qr{(?:\Q$LineEnding\E)|(?:\n)} if $ReturnMap; |
5677
|
|
|
|
|
|
|
|
5678
|
|
|
|
|
|
|
## Print out the header row that lists the field names in order. |
5679
|
|
|
|
|
|
|
|
5680
|
8
|
50
|
|
|
|
27
|
if ($HeaderRow) |
5681
|
|
|
|
|
|
|
{ |
5682
|
33
|
|
|
|
|
62
|
my $Line = ($LineStartQuote . |
5683
|
|
|
|
|
|
|
join($Delim, |
5684
|
|
|
|
|
|
|
map |
5685
|
|
|
|
|
|
|
{ |
5686
|
|
|
|
|
|
|
## Quote any " character as "" |
5687
|
8
|
|
|
|
|
19
|
(my $X = $_) =~ s/\"/\"\"/g; |
5688
|
|
|
|
|
|
|
|
5689
|
|
|
|
|
|
|
## In QuoteCheck mode _QuoteFields => |
5690
|
|
|
|
|
|
|
## undef ("auto"): put quotes around |
5691
|
|
|
|
|
|
|
## field only if required. |
5692
|
|
|
|
|
|
|
|
5693
|
33
|
50
|
33
|
|
|
258
|
$X = "\"$X\"" if $QuoteCheck && $X =~ $QuoteOrDelimCheck; |
5694
|
|
|
|
|
|
|
|
5695
|
|
|
|
|
|
|
## Convert returns back to \x0B |
5696
|
33
|
50
|
|
|
|
142
|
$X =~ s/$ReturnMapCheck/$ReturnEncoding/g if $ReturnMap; |
5697
|
|
|
|
|
|
|
|
5698
|
33
|
|
|
|
|
105
|
$X; |
5699
|
|
|
|
|
|
|
} @$FieldList) . |
5700
|
|
|
|
|
|
|
$LineEndQuote . |
5701
|
|
|
|
|
|
|
$LineEnding); |
5702
|
|
|
|
|
|
|
|
5703
|
|
|
|
|
|
|
## Maybe convert entire line (all records) Mac to ISO before writing it. |
5704
|
8
|
100
|
|
|
|
41
|
&ISORoman8859_1ToMacRoman(\ $Line) if $DoMacMapping; |
5705
|
|
|
|
|
|
|
|
5706
|
8
|
50
|
|
|
|
55
|
$OutFile->print($Line) if $HeaderRow; |
5707
|
|
|
|
|
|
|
} |
5708
|
|
|
|
|
|
|
|
5709
|
|
|
|
|
|
|
## Print out each row (record). Fields are output in $FieldList |
5710
|
|
|
|
|
|
|
## order (same order as they were in the header row, if any). |
5711
|
|
|
|
|
|
|
## Records are printed in the order specified in $Selection. |
5712
|
|
|
|
|
|
|
|
5713
|
8
|
|
|
|
|
114
|
my $WroteProg; |
5714
|
8
|
|
|
|
|
16
|
my $TotalLen = @$Selection+0; |
5715
|
8
|
50
|
|
|
|
16
|
my $RecordsToWrite = ($MaxRecords ? min($this, $MaxRecords, $TotalLen) : $TotalLen); |
5716
|
8
|
|
|
|
|
9
|
my $RecordsWritten = 0; |
5717
|
|
|
|
|
|
|
|
5718
|
8
|
|
|
|
|
15
|
foreach my $i (@$Selection) |
5719
|
|
|
|
|
|
|
{ |
5720
|
99
|
|
|
|
|
175
|
my $Line = ($LineStartQuote . |
5721
|
|
|
|
|
|
|
join($Delim, |
5722
|
|
|
|
|
|
|
map |
5723
|
|
|
|
|
|
|
{ |
5724
|
|
|
|
|
|
|
## Quote any " character as "" |
5725
|
24
|
|
|
|
|
42
|
(my $X = $_->[$i]) =~ s/\"/\"\"/g; |
5726
|
|
|
|
|
|
|
|
5727
|
|
|
|
|
|
|
## In QuoteCheck mode _QuoteFields => |
5728
|
|
|
|
|
|
|
## undef ("auto"): put quotes around |
5729
|
|
|
|
|
|
|
## field only if required. |
5730
|
|
|
|
|
|
|
|
5731
|
99
|
50
|
33
|
|
|
713
|
$X = "\"$X\"" if $QuoteCheck && $X =~ $QuoteOrDelimCheck; |
5732
|
|
|
|
|
|
|
|
5733
|
|
|
|
|
|
|
## Convert returns back to \x0B |
5734
|
99
|
50
|
|
|
|
369
|
$X =~ s/$ReturnMapCheck/$ReturnEncoding/g if $ReturnMap; |
5735
|
|
|
|
|
|
|
|
5736
|
99
|
|
|
|
|
252
|
$X; |
5737
|
|
|
|
|
|
|
} @$Columns) . |
5738
|
|
|
|
|
|
|
$LineEndQuote . |
5739
|
|
|
|
|
|
|
$LineEnding); |
5740
|
|
|
|
|
|
|
|
5741
|
|
|
|
|
|
|
## Maybe convert entire line (all records) ISO to Mac before writing it. |
5742
|
24
|
100
|
|
|
|
79
|
&ISORoman8859_1ToMacRoman(\ $Line) if $DoMacMapping; |
5743
|
|
|
|
|
|
|
|
5744
|
24
|
|
|
|
|
70
|
$OutFile->print($Line); |
5745
|
|
|
|
|
|
|
|
5746
|
24
|
|
|
|
|
117
|
$RecordsWritten++; |
5747
|
|
|
|
|
|
|
|
5748
|
|
|
|
|
|
|
## Try doing timed (throttled to 1 per 2 secs) progress at |
5749
|
|
|
|
|
|
|
## most every 100th record. |
5750
|
24
|
50
|
|
|
|
54
|
my $Did = $this->progress_timed("Writing", $RecordsWritten, $RecordsWritten, $RecordsToWrite, 1) |
5751
|
|
|
|
|
|
|
if (($RecordsWritten % 100) == 0); |
5752
|
24
|
|
33
|
|
|
82
|
$WroteProg ||= $Did; |
5753
|
|
|
|
|
|
|
|
5754
|
|
|
|
|
|
|
## Stop if we have written all the records we wanted. |
5755
|
24
|
100
|
|
|
|
65
|
last if ($RecordsWritten >= $RecordsToWrite); |
5756
|
|
|
|
|
|
|
} |
5757
|
|
|
|
|
|
|
|
5758
|
|
|
|
|
|
|
## If we wrote timed progress but didn't get to give the 100% |
5759
|
|
|
|
|
|
|
## message yet, print the 100% message now. |
5760
|
8
|
50
|
|
|
|
18
|
if ($WroteProg) |
5761
|
|
|
|
|
|
|
{ |
5762
|
0
|
0
|
|
|
|
0
|
my $FinalProg = $this->progress_timed("Writing", $RecordsWritten, $RecordsWritten, $RecordsToWrite, 1) |
5763
|
|
|
|
|
|
|
unless (($RecordsWritten % 100) == 0); |
5764
|
|
|
|
|
|
|
} |
5765
|
|
|
|
|
|
|
|
5766
|
|
|
|
|
|
|
## Print the regular Done message. |
5767
|
8
|
50
|
|
|
|
20
|
if ($GotHandle) |
5768
|
|
|
|
|
|
|
{ |
5769
|
0
|
|
|
|
|
0
|
$this->progress("Done writing."); |
5770
|
0
|
|
|
|
|
0
|
$Success = 1; |
5771
|
|
|
|
|
|
|
} |
5772
|
|
|
|
|
|
|
else |
5773
|
|
|
|
|
|
|
{ |
5774
|
8
|
|
|
|
|
33
|
$this->progress("Wrote $WriteFileName."); |
5775
|
|
|
|
|
|
|
} |
5776
|
|
|
|
|
|
|
|
5777
|
8
|
50
|
|
|
|
29
|
if (!$GotHandle) |
5778
|
|
|
|
|
|
|
{ |
5779
|
|
|
|
|
|
|
## Close the file and check the exit code. |
5780
|
8
|
|
|
|
|
36
|
$OutFile->close(); |
5781
|
8
|
|
|
|
|
425
|
$Success = (($?>>8) == 0); |
5782
|
|
|
|
|
|
|
|
5783
|
8
|
50
|
|
|
|
27
|
$this->{_ErrorMsg} = "Unexpected failure writing $WriteFileName ($!)", goto done |
5784
|
|
|
|
|
|
|
unless $Success; |
5785
|
|
|
|
|
|
|
} |
5786
|
|
|
|
|
|
|
|
5787
|
|
|
|
|
|
|
done: |
5788
|
8
|
50
|
|
|
|
22
|
$this->warn("FAILURE: $this->{_ErrorMsg}") unless $Success; |
5789
|
|
|
|
|
|
|
|
5790
|
8
|
50
|
|
|
|
105
|
return($Success ? $WriteFileName : undef); |
5791
|
|
|
|
|
|
|
} |
5792
|
|
|
|
|
|
|
|
5793
|
|
|
|
|
|
|
sub write_file_name ## Calculate the name of the file to be written. |
5794
|
|
|
|
|
|
|
{ |
5795
|
8
|
|
|
8
|
0
|
15
|
my $this = shift; |
5796
|
8
|
|
|
|
|
14
|
my ($FileName, $WriteExtension) = @_; |
5797
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
## Break the path into its parts... |
5799
|
1
|
|
|
1
|
|
6
|
use File::Basename qw(fileparse); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4249
|
|
5800
|
8
|
|
|
|
|
246
|
my ($Basename, $Path, $Ext) = fileparse($FileName, '\.[^\.]+'); |
5801
|
|
|
|
|
|
|
|
5802
|
|
|
|
|
|
|
## If no directory is explicitly named, set $Path to be the |
5803
|
|
|
|
|
|
|
## implicit "current directory" (e.g. "./") |
5804
|
|
|
|
|
|
|
|
5805
|
8
|
|
|
|
|
19
|
my ($Sep, $Up, $Cur) = @{$this->path_info()}{qw(sep up cur)}; |
|
8
|
|
|
|
|
21
|
|
5806
|
8
|
|
33
|
|
|
35
|
$Path ||= $Cur; |
5807
|
|
|
|
|
|
|
|
5808
|
|
|
|
|
|
|
## If $WriteExtension is empty, which is allowed, then the result |
5809
|
|
|
|
|
|
|
## will be the same as $FileName, which could in some cases result |
5810
|
|
|
|
|
|
|
## in the overwriting of the same file that was read in (and may |
5811
|
|
|
|
|
|
|
## be what is intended). |
5812
|
|
|
|
|
|
|
|
5813
|
8
|
|
|
|
|
19
|
my $WriteFileName = "$Path$Basename$WriteExtension$Ext"; |
5814
|
|
|
|
|
|
|
|
5815
|
8
|
|
|
|
|
24
|
return($WriteFileName); |
5816
|
|
|
|
|
|
|
} |
5817
|
|
|
|
|
|
|
|
5818
|
|
|
|
|
|
|
sub out |
5819
|
|
|
|
|
|
|
{ |
5820
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
5821
|
0
|
|
|
|
|
0
|
my $Dest = shift; |
5822
|
|
|
|
|
|
|
|
5823
|
0
|
|
|
|
|
0
|
my $Success; |
5824
|
|
|
|
|
|
|
|
5825
|
|
|
|
|
|
|
## First do the formatting (or fail) -- format() will warn if needed. |
5826
|
0
|
0
|
|
|
|
0
|
my $Data = $this->format(@_) or goto done; |
5827
|
|
|
|
|
|
|
|
5828
|
|
|
|
|
|
|
## If $Dest is empty or not defined, use STDOUT. |
5829
|
0
|
|
0
|
|
|
0
|
$Dest ||= \*STDOUT; |
5830
|
|
|
|
|
|
|
|
5831
|
|
|
|
|
|
|
## If given an IO handle (such as \*STDERR), bless and use it. |
5832
|
0
|
0
|
|
|
|
0
|
if (ref($Dest) eq 'HANDLE') {$Dest = bless($Dest , 'IO::File')}; |
|
0
|
|
|
|
|
0
|
|
5833
|
|
|
|
|
|
|
|
5834
|
|
|
|
|
|
|
## If it is not an object, treat it as a file name to be opened. |
5835
|
0
|
0
|
0
|
|
|
0
|
if (!ref($Dest)) {$Dest = (IO::File->new(">$Dest") or |
|
0
|
|
|
|
|
0
|
|
5836
|
|
|
|
|
|
|
$this->warn("Can't open file $Dest: $!"), goto done)}; |
5837
|
|
|
|
|
|
|
|
5838
|
|
|
|
|
|
|
## At this point treat $Dest as an object with a print method and |
5839
|
|
|
|
|
|
|
## complain if the print method doesn't return a true value. |
5840
|
|
|
|
|
|
|
|
5841
|
0
|
0
|
|
|
|
0
|
$Dest->print($$Data) or $this->warn("Had trouble writing file: $!"), goto done; |
5842
|
|
|
|
|
|
|
|
5843
|
0
|
|
|
|
|
0
|
$Success = 1; |
5844
|
0
|
|
|
|
|
0
|
done: |
5845
|
|
|
|
|
|
|
return($Success); |
5846
|
|
|
|
|
|
|
} |
5847
|
|
|
|
|
|
|
|
5848
|
|
|
|
|
|
|
sub format ## use Data::ShowTable to format the table in a pretty way. |
5849
|
|
|
|
|
|
|
{ |
5850
|
1
|
|
|
1
|
1
|
3
|
my $this = shift; |
5851
|
1
|
50
|
|
|
|
7
|
my $Params = (@_ == 1 ? {_MaxRecords => $_[0]} : {@_}); |
5852
|
|
|
|
|
|
|
|
5853
|
1
|
|
|
|
|
3
|
my($Selection, $FieldList, $SortSpecs, $DefaultSortType, $MaxRecords, $MaxWidth) = map {$this->getparam($Params, $_)} |
|
6
|
|
|
|
|
24
|
|
5854
|
|
|
|
|
|
|
qw(_Selection _FieldList _SortSpecs _DefaultSortType _MaxRecords _MaxWidth); |
5855
|
|
|
|
|
|
|
|
5856
|
|
|
|
|
|
|
## This method relies on Data::ShowTable. |
5857
|
1
|
50
|
|
|
|
7
|
$this->warn("@{[__PACKAGE__]}::show() requires optional Data::ShowTable module."), goto done |
|
1
|
|
|
|
|
10
|
|
5858
|
|
|
|
|
|
|
unless $HaveShowTable; |
5859
|
|
|
|
|
|
|
|
5860
|
0
|
|
0
|
|
|
0
|
$FieldList ||= $this->fieldlist(); |
5861
|
0
|
|
0
|
|
|
0
|
$Selection ||= $this->selection(); |
5862
|
0
|
|
0
|
|
|
0
|
$SortSpecs ||= {}; |
5863
|
0
|
0
|
|
|
|
0
|
$DefaultSortType = 'String' unless (length($DefaultSortType)); |
5864
|
0
|
|
0
|
|
|
0
|
$MaxRecords ||= 0; ## Default is no maximum (all records). |
5865
|
0
|
|
0
|
|
|
0
|
$MaxWidth ||= 15; ## Zero or undef means use default. |
5866
|
0
|
|
|
|
|
0
|
$MaxWidth = max(2, $MaxWidth); ## MaxWidth must not be less than 2 |
5867
|
|
|
|
|
|
|
|
5868
|
0
|
|
|
|
|
0
|
my $TypeMap = {qw(string char |
5869
|
|
|
|
|
|
|
text text |
5870
|
|
|
|
|
|
|
integer int |
5871
|
|
|
|
|
|
|
number numeric |
5872
|
|
|
|
|
|
|
boolean int)}; |
5873
|
|
|
|
|
|
|
|
5874
|
0
|
0
|
0
|
|
|
0
|
my $Types = [map {$TypeMap->{lc(@{$SortSpecs->{$_} || {}}{SortType} || |
|
0
|
|
|
|
|
0
|
|
5875
|
|
|
|
|
|
|
$DefaultSortType) } || 'string'} |
5876
|
|
|
|
|
|
|
@$FieldList]; |
5877
|
|
|
|
|
|
|
|
5878
|
0
|
|
|
|
|
0
|
my $TotalLen = @$Selection+0; |
5879
|
0
|
0
|
|
|
|
0
|
my $RecordsToWrite = ($MaxRecords ? min($this, $MaxRecords, $TotalLen) : $TotalLen); |
5880
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
## The row-yielder subroutine and its private state variables. |
5882
|
0
|
|
|
|
|
0
|
my $SelNum = 0; |
5883
|
|
|
|
|
|
|
my $RowSub = sub ## A closure over the local vars in this subroutine. |
5884
|
|
|
|
|
|
|
{ |
5885
|
|
|
|
|
|
|
## We might be asked to rewind. |
5886
|
0
|
|
|
0
|
|
0
|
my ($Rewind) = @_; |
5887
|
0
|
0
|
|
|
|
0
|
$SelNum = 0, return(1) if ($Rewind); |
5888
|
|
|
|
|
|
|
|
5889
|
|
|
|
|
|
|
## Done if we've written all rows. |
5890
|
0
|
0
|
|
|
|
0
|
return() if $SelNum >= $RecordsToWrite; |
5891
|
|
|
|
|
|
|
|
5892
|
|
|
|
|
|
|
## Otherwise, yield a row if we still can. |
5893
|
0
|
0
|
|
|
|
0
|
my $List = [map |
5894
|
|
|
|
|
|
|
{ |
5895
|
|
|
|
|
|
|
## Truncate as needed. |
5896
|
0
|
|
|
|
|
0
|
my $X = (length > $MaxWidth ? |
5897
|
|
|
|
|
|
|
(substr($_, 0, ($MaxWidth - 1)) . '>') : $_); |
5898
|
|
|
|
|
|
|
|
5899
|
|
|
|
|
|
|
## Encode returns, tabs as carets. |
5900
|
0
|
|
|
|
|
0
|
$X =~ s{(?:\x0D\x0A)|[\x0D\x0A\x09]}{^}g; |
5901
|
|
|
|
|
|
|
|
5902
|
0
|
|
|
|
|
0
|
$X; |
5903
|
|
|
|
|
|
|
} |
5904
|
0
|
|
|
|
|
0
|
@{$this->row_list($Selection->[$SelNum++], $FieldList)}]; |
5905
|
|
|
|
|
|
|
|
5906
|
0
|
|
|
|
|
0
|
return(@$List); |
5907
|
0
|
|
|
|
|
0
|
}; |
5908
|
|
|
|
|
|
|
|
5909
|
|
|
|
|
|
|
## Locally replace put() and out() in Data::ShowTable so we can |
5910
|
|
|
|
|
|
|
## gather the data into memory instead of having it go right out |
5911
|
|
|
|
|
|
|
## to STDOUT before we may want it to. |
5912
|
|
|
|
|
|
|
|
5913
|
|
|
|
|
|
|
## Too bad Data::ShowTable is not a subclassable object instead. |
5914
|
|
|
|
|
|
|
|
5915
|
0
|
|
|
|
|
0
|
my $Data = [""]; ## Array to hold output from out() and put() |
5916
|
|
|
|
|
|
|
{ |
5917
|
0
|
|
|
|
|
0
|
local *{Data::ShowTable::out} = sub ## See sub out in ShowTable.pm |
5918
|
|
|
|
|
|
|
{ |
5919
|
0
|
|
|
0
|
|
0
|
my $fmt = shift; |
5920
|
0
|
0
|
|
|
|
0
|
$fmt .= "\n" unless $fmt =~ /\n$/; |
5921
|
0
|
|
|
|
|
0
|
$Data->[-1] .= sprintf($fmt, @_); |
5922
|
0
|
|
|
|
|
0
|
push @$Data, ""; |
5923
|
0
|
|
|
|
|
0
|
}; |
5924
|
|
|
|
|
|
|
|
5925
|
|
|
|
|
|
|
local *{Data::ShowTable::put} = sub ## See sub put in ShowTable.pm |
5926
|
|
|
|
|
|
|
{ |
5927
|
0
|
|
|
0
|
|
0
|
my $fmt = shift(); |
5928
|
0
|
|
|
|
|
0
|
$Data->[-1] .= sprintf($fmt, @_); |
5929
|
0
|
|
|
|
|
0
|
}; |
5930
|
|
|
|
|
|
|
|
5931
|
0
|
|
|
|
|
0
|
&ShowBoxTable({titles => $FieldList, |
5932
|
|
|
|
|
|
|
types => $Types, |
5933
|
|
|
|
|
|
|
row_sub => $RowSub, |
5934
|
|
|
|
|
|
|
widths => [], ## Will calculate from the data |
5935
|
|
|
|
|
|
|
}); |
5936
|
|
|
|
|
|
|
} |
5937
|
|
|
|
|
|
|
|
5938
|
|
|
|
|
|
|
## Remove spurious extra newline entries at end of $Data |
5939
|
0
|
0
|
|
|
|
0
|
pop @$Data if $Data->[-1] =~ /^\s*$/s; |
5940
|
0
|
0
|
|
|
|
0
|
pop @$Data if $Data->[-1] =~ /^\s*$/s; |
5941
|
|
|
|
|
|
|
|
5942
|
0
|
|
|
|
|
0
|
my $Formatted = join("", @$Data); |
5943
|
|
|
|
|
|
|
|
5944
|
1
|
|
|
|
|
7
|
done: |
5945
|
|
|
|
|
|
|
return(\ $Formatted); |
5946
|
|
|
|
|
|
|
} |
5947
|
|
|
|
|
|
|
|
5948
|
|
|
|
|
|
|
|
5949
|
|
|
|
|
|
|
=pod |
5950
|
|
|
|
|
|
|
|
5951
|
|
|
|
|
|
|
=head1 APPENDING / MERGING / JOINING TABLES |
5952
|
|
|
|
|
|
|
|
5953
|
|
|
|
|
|
|
## Append all records from a second table |
5954
|
|
|
|
|
|
|
|
5955
|
|
|
|
|
|
|
$t->append($Other) ## Append records from $Other |
5956
|
|
|
|
|
|
|
$t->append_file($File, $Params) ## Append from new($Params, $File) |
5957
|
|
|
|
|
|
|
$t->append_files($Files, $Params) ## Call append_file for all files |
5958
|
|
|
|
|
|
|
$t->append_files_new($Files, $Params) ## Internal helper routine |
5959
|
|
|
|
|
|
|
|
5960
|
|
|
|
|
|
|
## Combine all fields from a second table |
5961
|
|
|
|
|
|
|
|
5962
|
|
|
|
|
|
|
$t->combine($Other) ## Combine fields from $Other |
5963
|
|
|
|
|
|
|
$t->combine_file($File, $Params) ## Combine new($Params, $File) |
5964
|
|
|
|
|
|
|
$t->combine_files($Files, $Params) ## combine_file on each file |
5965
|
|
|
|
|
|
|
|
5966
|
|
|
|
|
|
|
## Left-join records from a second table (lookup field vals) |
5967
|
|
|
|
|
|
|
|
5968
|
|
|
|
|
|
|
$t->join ($Other, $KeyField1, [$KeyField2, $Fields]) |
5969
|
|
|
|
|
|
|
$t->join_file ($File, $Params, $KeyField1, [$KeyField2, $Fields]) |
5970
|
|
|
|
|
|
|
$t->join_files($Files, $Params, $KeyField1, [$KeyField2, $Fields]) |
5971
|
|
|
|
|
|
|
|
5972
|
|
|
|
|
|
|
The append() method concatenates all the records from two CTable |
5973
|
|
|
|
|
|
|
objects together -- even if the two tables didn't start out with |
5974
|
|
|
|
|
|
|
exactly the same fields (or even any of the same fields). |
5975
|
|
|
|
|
|
|
|
5976
|
|
|
|
|
|
|
It takes all the data records from another CTable object and appends |
5977
|
|
|
|
|
|
|
them into the present table. Any columns present in the $Other table |
5978
|
|
|
|
|
|
|
but not in the first table, are created (and the corresponding field |
5979
|
|
|
|
|
|
|
values in the first table will all be empty/undef). Similarly, any |
5980
|
|
|
|
|
|
|
columns present in $t but not present in $Other will be extended |
5981
|
|
|
|
|
|
|
to the correct new length as necessary and the field values in the |
5982
|
|
|
|
|
|
|
original columns will be empty/undef. Columns present in both will, |
5983
|
|
|
|
|
|
|
of course, have all the data from both the original sets of data. |
5984
|
|
|
|
|
|
|
|
5985
|
|
|
|
|
|
|
All data from the second table is brought into the first one. No |
5986
|
|
|
|
|
|
|
attempt whatsoever is made to eliminate any duplicate records that |
5987
|
|
|
|
|
|
|
might result. |
5988
|
|
|
|
|
|
|
|
5989
|
|
|
|
|
|
|
The number of records (length()) after this call is the sum of the |
5990
|
|
|
|
|
|
|
length() of each of the tables before the operation. |
5991
|
|
|
|
|
|
|
|
5992
|
|
|
|
|
|
|
IMPORTANT NOTE: The data from the $Other object is COPIED in memory |
5993
|
|
|
|
|
|
|
into the new object. This could be hard on memory if $Other is big. |
5994
|
|
|
|
|
|
|
Might want to be sure to discard $Other when you're done with it. |
5995
|
|
|
|
|
|
|
|
5996
|
|
|
|
|
|
|
$Other is left untouched by the operation. |
5997
|
|
|
|
|
|
|
|
5998
|
|
|
|
|
|
|
All columns from both tables are combined whether or not they are |
5999
|
|
|
|
|
|
|
mentioned in the custom field list of either. |
6000
|
|
|
|
|
|
|
|
6001
|
|
|
|
|
|
|
The custom field lists, if present in either table object, are |
6002
|
|
|
|
|
|
|
concatenated into this object's custom field list, but with |
6003
|
|
|
|
|
|
|
duplications eliminated, and order retained. |
6004
|
|
|
|
|
|
|
|
6005
|
|
|
|
|
|
|
Any existing custom selections, custom sort order, sort specs, and/or |
6006
|
|
|
|
|
|
|
sort routines are also combined appropriately, with settings from this |
6007
|
|
|
|
|
|
|
object taking precedence over those from $Other anywhere the two have |
6008
|
|
|
|
|
|
|
conflicting settings. |
6009
|
|
|
|
|
|
|
|
6010
|
|
|
|
|
|
|
append_file() takes a file name and optional $Params hash. It uses |
6011
|
|
|
|
|
|
|
those to create a new() object with data read from the file. Then, |
6012
|
|
|
|
|
|
|
the new table is appended to $t using append() and then the new table |
6013
|
|
|
|
|
|
|
is discarded. |
6014
|
|
|
|
|
|
|
|
6015
|
|
|
|
|
|
|
append_files() is a convenience function that calls append_file() on |
6016
|
|
|
|
|
|
|
each file in a list, using the same optional $Params for each. |
6017
|
|
|
|
|
|
|
|
6018
|
|
|
|
|
|
|
append_files_new() is the internal routine that implements the |
6019
|
|
|
|
|
|
|
processing done by new() on the optional list of files to be read. It |
6020
|
|
|
|
|
|
|
does the following: It calls read() on the first file in the list. |
6021
|
|
|
|
|
|
|
Then, it calls append_files() to read the remaining into their own |
6022
|
|
|
|
|
|
|
new() objects of the same class as $t and using the same $Params to |
6023
|
|
|
|
|
|
|
new() (if any were supplied). Then each of these is append()-ed in |
6024
|
|
|
|
|
|
|
turn to $t and discarded. The final result will be that $t will hold |
6025
|
|
|
|
|
|
|
a concatenation of all the data in all the files mentioned. However, |
6026
|
|
|
|
|
|
|
consistent with the behavior of append(), the _FileName parameter and |
6027
|
|
|
|
|
|
|
other read()-controlled settings will correspond to the first file |
6028
|
|
|
|
|
|
|
read. The intermediate objects are discarded. |
6029
|
|
|
|
|
|
|
|
6030
|
|
|
|
|
|
|
NOTE: As with new() and read(), if a non-empty _FieldList Param is |
6031
|
|
|
|
|
|
|
specified, the read() methods called internally by the append_file*() |
6032
|
|
|
|
|
|
|
methods will read only the fields mentioned and will ignore any other |
6033
|
|
|
|
|
|
|
fields in the files. |
6034
|
|
|
|
|
|
|
|
6035
|
|
|
|
|
|
|
=head2 Combining tables |
6036
|
|
|
|
|
|
|
|
6037
|
|
|
|
|
|
|
combine() adds columns from a second table into the current one. |
6038
|
|
|
|
|
|
|
|
6039
|
|
|
|
|
|
|
CAUTION: You should only use combine() when you have two tables where |
6040
|
|
|
|
|
|
|
all the (possibly selected) records in the second table line up |
6041
|
|
|
|
|
|
|
perfectly with all the (unselected) records in the first table -- in |
6042
|
|
|
|
|
|
|
other words, each table before combine() should contain a few of the |
6043
|
|
|
|
|
|
|
columns of the new table -- for example, maybe one table contains a |
6044
|
|
|
|
|
|
|
column of file names, and the other contains columns of corresponding |
6045
|
|
|
|
|
|
|
file sizes and modification times. If you don't understand the |
6046
|
|
|
|
|
|
|
consequences of combine, don't use it or you could end up with some |
6047
|
|
|
|
|
|
|
records whose field values don't refer to the same object. (Maybe you |
6048
|
|
|
|
|
|
|
meant to use append() or join() instead.) |
6049
|
|
|
|
|
|
|
|
6050
|
|
|
|
|
|
|
If the second table has a custom field list, only those columns are |
6051
|
|
|
|
|
|
|
brought in. |
6052
|
|
|
|
|
|
|
|
6053
|
|
|
|
|
|
|
If any column in the second table has the same name as one in the |
6054
|
|
|
|
|
|
|
current table, the incoming column replaces the one by the same name. |
6055
|
|
|
|
|
|
|
|
6056
|
|
|
|
|
|
|
All columns are COPIED from the second table, so the first table owns |
6057
|
|
|
|
|
|
|
the new data exclusively. |
6058
|
|
|
|
|
|
|
|
6059
|
|
|
|
|
|
|
If the second table has a selection, only those records are copied, in |
6060
|
|
|
|
|
|
|
selection order. (select_all() first if that's not what you want.) |
6061
|
|
|
|
|
|
|
|
6062
|
|
|
|
|
|
|
The selection in the first table, if any, is ignored during the |
6063
|
|
|
|
|
|
|
combine. If this isn't what you want, then consider using cull() |
6064
|
|
|
|
|
|
|
before combine(). |
6065
|
|
|
|
|
|
|
|
6066
|
|
|
|
|
|
|
Field list and sort order are concatenated (but retaining uniqueness: |
6067
|
|
|
|
|
|
|
second mentions of a field in the combined lists are omitted). |
6068
|
|
|
|
|
|
|
|
6069
|
|
|
|
|
|
|
Custom sort routines and sort specs are combined, with those in the |
6070
|
|
|
|
|
|
|
first table taking precedence over any copied in with the same name. |
6071
|
|
|
|
|
|
|
|
6072
|
|
|
|
|
|
|
The custom _Selection from the first table, if any, is retained. (It |
6073
|
|
|
|
|
|
|
will initially omit any records added by extend()). |
6074
|
|
|
|
|
|
|
|
6075
|
|
|
|
|
|
|
All other parameters from the first table are retained, and from the |
6076
|
|
|
|
|
|
|
second table are ignored. |
6077
|
|
|
|
|
|
|
|
6078
|
|
|
|
|
|
|
combine() calls extend() after combining to ensure that all columns |
6079
|
|
|
|
|
|
|
have the same length: if either the older or newer columns were |
6080
|
|
|
|
|
|
|
shorter, they will all be set to the length of the longest columns in |
6081
|
|
|
|
|
|
|
the table -- creating some empty field values at the end of the |
6082
|
|
|
|
|
|
|
lengthened columns. |
6083
|
|
|
|
|
|
|
|
6084
|
|
|
|
|
|
|
combine_file() does the same as combine() except starting with a file |
6085
|
|
|
|
|
|
|
name, first creating the $Other object by creating it using |
6086
|
|
|
|
|
|
|
new($Params, $File), then discarding it after combining. |
6087
|
|
|
|
|
|
|
|
6088
|
|
|
|
|
|
|
=head2 Joining tables (Looking up data from another table) |
6089
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
join() looks up field values from a second table, based on common |
6091
|
|
|
|
|
|
|
values in key fields which may have different or the same names in |
6092
|
|
|
|
|
|
|
each table. It adds columns to the current table if necessary to hold |
6093
|
|
|
|
|
|
|
any new field values that must be brought in. |
6094
|
|
|
|
|
|
|
|
6095
|
|
|
|
|
|
|
join() never adds any new or non-matching records to the table: |
6096
|
|
|
|
|
|
|
records where the lookup fails will simply have empty/undef values in |
6097
|
|
|
|
|
|
|
the corresponding columns. |
6098
|
|
|
|
|
|
|
|
6099
|
|
|
|
|
|
|
## Example: |
6100
|
|
|
|
|
|
|
|
6101
|
|
|
|
|
|
|
$t->join ($People, 'FullName', 'FirstAndLast'); ## or |
6102
|
|
|
|
|
|
|
$t->join_file("People.txt", {}, 'FullName', 'FirstAndLast'); |
6103
|
|
|
|
|
|
|
|
6104
|
|
|
|
|
|
|
|
6105
|
|
|
|
|
|
|
Here's how join() calculates the list of fields to bring in: |
6106
|
|
|
|
|
|
|
|
6107
|
|
|
|
|
|
|
- Legal field names from the optional $Fields list, if supplied |
6108
|
|
|
|
|
|
|
- Otherwise, the fieldlist() from second table |
6109
|
|
|
|
|
|
|
- ... minus any fields with same name as $KeyField1 or $KeyField2 |
6110
|
|
|
|
|
|
|
|
6111
|
|
|
|
|
|
|
Join starts by adding new empty columns in the first table for any |
6112
|
|
|
|
|
|
|
field to be brought in from the second but not yet present in the |
6113
|
|
|
|
|
|
|
first. |
6114
|
|
|
|
|
|
|
|
6115
|
|
|
|
|
|
|
Here's how join() calculates the records eligible for lookup: |
6116
|
|
|
|
|
|
|
|
6117
|
|
|
|
|
|
|
- Join only modifies the selected records in the first table |
6118
|
|
|
|
|
|
|
- Join only looks up values from selected records in second table |
6119
|
|
|
|
|
|
|
|
6120
|
|
|
|
|
|
|
(If you want all records to be used in both or either table, call the |
6121
|
|
|
|
|
|
|
table's select_all() method before calling join().) |
6122
|
|
|
|
|
|
|
|
6123
|
|
|
|
|
|
|
Then, for every selected record in $t (using the example above), join |
6124
|
|
|
|
|
|
|
examines the FullName field ($KeyField1), and looks up a corresponding |
6125
|
|
|
|
|
|
|
entry (must be 'eq') in the FirstAndLast field ($KeyField2) in the |
6126
|
|
|
|
|
|
|
second table. |
6127
|
|
|
|
|
|
|
|
6128
|
|
|
|
|
|
|
IMPORTANT NOTE ABOUT KEY LENGTH: To speed lookup, hash-based indices |
6129
|
|
|
|
|
|
|
are made. The strings in $Key1 and $Key2 fields should not be so long |
6130
|
|
|
|
|
|
|
that the hash lookups bog down or things could get ugly fast. There |
6131
|
|
|
|
|
|
|
is no fixed limit to hash key length in Perl, but fewer than 128 |
6132
|
|
|
|
|
|
|
characters in length is longer than customary for such things. (Many |
6133
|
|
|
|
|
|
|
systems require text-based keys to be no longer than 31 characters.) |
6134
|
|
|
|
|
|
|
So be judicious about the values in $Key1 and $Key2 fields. |
6135
|
|
|
|
|
|
|
|
6136
|
|
|
|
|
|
|
The first record found in the second table's selection with a matching |
6137
|
|
|
|
|
|
|
value in the key field is then copied over (but only the appropriate |
6138
|
|
|
|
|
|
|
fields are copied, as explained above). Any field values being |
6139
|
|
|
|
|
|
|
brought over will REPLACE corresponding field values in the first |
6140
|
|
|
|
|
|
|
table, possibly overwriting any previous values if the field being |
6141
|
|
|
|
|
|
|
looked up was already present in the first table and contained data. |
6142
|
|
|
|
|
|
|
|
6143
|
|
|
|
|
|
|
The first table's _FieldList is updated to reflect new fields added. |
6144
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
Its _Selection is untouched. |
6146
|
|
|
|
|
|
|
|
6147
|
|
|
|
|
|
|
Its _SortOrder is untouched. |
6148
|
|
|
|
|
|
|
|
6149
|
|
|
|
|
|
|
Its _SortSpecs are augmented to include any entries from the second |
6150
|
|
|
|
|
|
|
table that should be brought over due to the field additions. |
6151
|
|
|
|
|
|
|
|
6152
|
|
|
|
|
|
|
Its _SRoutines are augmented to add new ones from the second table. |
6153
|
|
|
|
|
|
|
|
6154
|
|
|
|
|
|
|
All other parameters of table 1 are untouched. |
6155
|
|
|
|
|
|
|
|
6156
|
|
|
|
|
|
|
The second table is not modified. No data structures will be shared |
6157
|
|
|
|
|
|
|
between the tables. Data is only copied. |
6158
|
|
|
|
|
|
|
|
6159
|
|
|
|
|
|
|
join_file() calls join() after creating a seond table from your $File. |
6160
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
join_files() calls join_file() repeatedly for each file in a list, but |
6162
|
|
|
|
|
|
|
it is important to note that each file in the list of files to be |
6163
|
|
|
|
|
|
|
joined must have a $Key2 field -- AND, that any values looked up from |
6164
|
|
|
|
|
|
|
the second file will overwrite any values of the same key found in the |
6165
|
|
|
|
|
|
|
first file, and so on. You probably will not ever need join_files(). |
6166
|
|
|
|
|
|
|
It is mainly here for completeness. |
6167
|
|
|
|
|
|
|
|
6168
|
|
|
|
|
|
|
=cut |
6169
|
|
|
|
|
|
|
|
6170
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
6171
|
|
|
|
|
|
|
|
6172
|
|
|
|
|
|
|
sub append ## ($this, $OtherCTable) |
6173
|
|
|
|
|
|
|
{ |
6174
|
10
|
|
|
10
|
0
|
41
|
my $this = shift; |
6175
|
10
|
|
|
|
|
16
|
my ($that) = @_; |
6176
|
|
|
|
|
|
|
|
6177
|
10
|
|
|
|
|
13
|
my $Success; |
6178
|
|
|
|
|
|
|
|
6179
|
|
|
|
|
|
|
## Get all fields in $this, but only selected ones in $that |
6180
|
10
|
|
|
|
|
28
|
my $ThisFieldsAll = $this->fieldlist_all(); |
6181
|
10
|
|
|
|
|
23
|
my $ThatFields = $that->fieldlist(); |
6182
|
|
|
|
|
|
|
|
6183
|
|
|
|
|
|
|
## Figure out how many data fields in each of the tables. |
6184
|
10
|
|
|
|
|
25
|
my $ThisFieldCount = @$ThisFieldsAll+0; |
6185
|
10
|
|
|
|
|
15
|
my $ThatFieldCount = @$ThatFields +0; |
6186
|
|
|
|
|
|
|
|
6187
|
|
|
|
|
|
|
## We're going to bring over only the selected records in $that. |
6188
|
10
|
|
|
|
|
21
|
my $ThisSel = $this->selection(); |
6189
|
10
|
|
|
|
|
20
|
my $ThatSel = $that->selection(); |
6190
|
|
|
|
|
|
|
|
6191
|
|
|
|
|
|
|
## Figure out how many records there were to start with. |
6192
|
10
|
|
|
|
|
22
|
my $ThisLength = $this->length(); |
6193
|
10
|
|
|
|
|
16
|
my $ThatLength = @$ThatSel+0; |
6194
|
|
|
|
|
|
|
|
6195
|
|
|
|
|
|
|
## New record count is sum of the other two. |
6196
|
10
|
|
|
|
|
13
|
my $NewLength = $ThisLength + $ThatLength; |
6197
|
|
|
|
|
|
|
|
6198
|
|
|
|
|
|
|
## Create any missing columns not yet present in $this and, |
6199
|
|
|
|
|
|
|
## whether new or not, presize all vectors to the new length, |
6200
|
|
|
|
|
|
|
## which will create empty/undef entries as necessary. |
6201
|
|
|
|
|
|
|
|
6202
|
10
|
|
50
|
|
|
179
|
foreach (@$ThisFieldsAll, @$ThatFields) {$#{$this->{$_} ||= []} = ($NewLength - 1)}; |
|
72
|
|
|
|
|
81
|
|
|
72
|
|
|
|
|
735
|
|
6203
|
|
|
|
|
|
|
|
6204
|
|
|
|
|
|
|
## Then copy the field data from the second table into the already |
6205
|
|
|
|
|
|
|
## pre-sized columns in this one. |
6206
|
|
|
|
|
|
|
|
6207
|
10
|
|
|
|
|
185
|
foreach my $FieldName (@$ThatFields) |
6208
|
|
|
|
|
|
|
{ |
6209
|
34
|
|
|
|
|
48
|
my $NewVector = $this->{$FieldName}; |
6210
|
34
|
|
|
|
|
71
|
my $OldVector = $that->sel_get($FieldName, $ThatSel); |
6211
|
|
|
|
|
|
|
|
6212
|
34
|
|
|
|
|
65
|
foreach my $RecordNum (0..$#$OldVector) |
|
86
|
|
|
|
|
750
|
|
6213
|
|
|
|
|
|
|
{($NewVector->[$ThisLength + $RecordNum] = |
6214
|
|
|
|
|
|
|
$OldVector->[ $RecordNum])}; |
6215
|
|
|
|
|
|
|
} |
6216
|
|
|
|
|
|
|
|
6217
|
|
|
|
|
|
|
## Now all the data columns have been combined. We just have to |
6218
|
|
|
|
|
|
|
## combine any custom metadata. |
6219
|
|
|
|
|
|
|
|
6220
|
|
|
|
|
|
|
## If either table had a custom fieldlist, then make a new custom |
6221
|
|
|
|
|
|
|
## field list which is the result of concatenating both field |
6222
|
|
|
|
|
|
|
## lists together, without duplicates, and of course preserving |
6223
|
|
|
|
|
|
|
## the original order as completely as possible (with the order |
6224
|
|
|
|
|
|
|
## given in the first table taking precedence). |
6225
|
|
|
|
|
|
|
|
6226
|
10
|
50
|
33
|
|
|
35
|
if (defined($this->{_FieldList}) || |
6227
|
|
|
|
|
|
|
defined($that->{_FieldList})) |
6228
|
|
|
|
|
|
|
|
6229
|
|
|
|
|
|
|
{ |
6230
|
10
|
|
|
|
|
23
|
my $ThisFields = $this->fieldlist(); |
6231
|
10
|
|
|
|
|
221
|
my $ThatFields = $that->fieldlist(); |
6232
|
|
|
|
|
|
|
|
6233
|
|
|
|
|
|
|
## Make a hash mapping field names from both tables to the |
6234
|
|
|
|
|
|
|
## order they should appear |
6235
|
|
|
|
|
|
|
|
6236
|
10
|
|
|
|
|
18
|
my $FieldOrderHash = {}; |
6237
|
10
|
|
66
|
|
|
21
|
foreach (@$ThisFields, @$ThatFields) {$FieldOrderHash->{$_} ||= (keys %$FieldOrderHash) + 1}; |
|
69
|
|
|
|
|
226
|
|
6238
|
|
|
|
|
|
|
|
6239
|
10
|
|
|
|
|
33
|
my $FieldList = [sort {$FieldOrderHash->{$a} <=> $FieldOrderHash->{$b}} keys %$FieldOrderHash]; |
|
42
|
|
|
|
|
71
|
|
6240
|
|
|
|
|
|
|
|
6241
|
10
|
|
|
|
|
37
|
$this->{_FieldList} = $FieldList; |
6242
|
|
|
|
|
|
|
} |
6243
|
|
|
|
|
|
|
|
6244
|
|
|
|
|
|
|
## If either table had a custom sortorder, then make a new custom |
6245
|
|
|
|
|
|
|
## sort order which is the result of concatenating both orders |
6246
|
|
|
|
|
|
|
## together, without duplicates, and of course preserving the |
6247
|
|
|
|
|
|
|
## original order as completely as possible (with the order given |
6248
|
|
|
|
|
|
|
## in the first table taking precedence). |
6249
|
|
|
|
|
|
|
|
6250
|
10
|
50
|
33
|
|
|
50
|
if (defined($this->{_SortOrder}) || |
6251
|
|
|
|
|
|
|
defined($that->{_SortOrder})) |
6252
|
|
|
|
|
|
|
{ |
6253
|
0
|
|
|
|
|
0
|
my $ThisOrder = $this->sortorder(); |
6254
|
0
|
|
|
|
|
0
|
my $ThatOrder = $that->sortorder(); |
6255
|
|
|
|
|
|
|
|
6256
|
|
|
|
|
|
|
## Make a hash mapping field names from both lists to the |
6257
|
|
|
|
|
|
|
## order they should appear |
6258
|
|
|
|
|
|
|
|
6259
|
0
|
|
|
|
|
0
|
my $OrderHash = {}; |
6260
|
0
|
|
0
|
|
|
0
|
foreach (@$ThisOrder, @$ThatOrder) {$OrderHash->{$_} ||= (keys %$OrderHash) + 1}; |
|
0
|
|
|
|
|
0
|
|
6261
|
|
|
|
|
|
|
|
6262
|
0
|
|
|
|
|
0
|
my $OrderList = [sort {$OrderHash->{$a} <=> $OrderHash->{$b}} keys %$OrderHash]; |
|
0
|
|
|
|
|
0
|
|
6263
|
|
|
|
|
|
|
|
6264
|
0
|
|
|
|
|
0
|
$this->{_SortOrder} = $OrderList; |
6265
|
|
|
|
|
|
|
} |
6266
|
|
|
|
|
|
|
|
6267
|
|
|
|
|
|
|
## If either table had a custom selection, then create a new |
6268
|
|
|
|
|
|
|
## selection which is the concatenation of the two selections. |
6269
|
|
|
|
|
|
|
|
6270
|
10
|
100
|
100
|
|
|
46
|
if (defined($this->{_Selection}) || |
6271
|
|
|
|
|
|
|
defined($that->{_Selection})) |
6272
|
|
|
|
|
|
|
{ |
6273
|
3
|
|
|
|
|
11
|
$this->{_Selection} = [@$ThisSel, ## Original selected records... |
6274
|
|
|
|
|
|
|
## Plus an adjusted entry for newly-added ones. |
6275
|
|
|
|
|
|
|
($ThisLength .. ($ThisLength + @$ThatSel - 1)) |
6276
|
|
|
|
|
|
|
]; |
6277
|
|
|
|
|
|
|
} |
6278
|
|
|
|
|
|
|
|
6279
|
|
|
|
|
|
|
## If either table had custom sortspecs, then create a new |
6280
|
|
|
|
|
|
|
## sortspecs hash by starting with all the entries from the other |
6281
|
|
|
|
|
|
|
## table and adding/overwriting with those from this table. |
6282
|
|
|
|
|
|
|
|
6283
|
10
|
50
|
33
|
|
|
34
|
if (defined($this->{_SortSpecs}) || |
6284
|
|
|
|
|
|
|
defined($that->{_SortSpecs})) |
6285
|
|
|
|
|
|
|
{ |
6286
|
10
|
|
|
|
|
24
|
my $ThisSpecs = $this->sortspecs(); |
6287
|
10
|
|
|
|
|
20
|
my $ThatSpecs = $that->sortspecs(); |
6288
|
|
|
|
|
|
|
|
6289
|
10
|
|
|
|
|
37
|
$this->{_SortSpecs} = {%$ThatSpecs, %$ThisSpecs}; |
6290
|
|
|
|
|
|
|
} |
6291
|
|
|
|
|
|
|
|
6292
|
|
|
|
|
|
|
## If either table had custom sortroutines, then create a new |
6293
|
|
|
|
|
|
|
## sortroutines hash by starting with all the entries from the |
6294
|
|
|
|
|
|
|
## other table and adding/overwriting with those from this table. |
6295
|
|
|
|
|
|
|
|
6296
|
10
|
50
|
33
|
|
|
32
|
if (defined($this->{_SRoutines}) || |
6297
|
|
|
|
|
|
|
defined($that->{_SRoutines})) |
6298
|
|
|
|
|
|
|
{ |
6299
|
10
|
|
50
|
|
|
24
|
my $ThisRoutines = $this->{_SRoutines} || {}; |
6300
|
10
|
|
50
|
|
|
22
|
my $ThatRoutines = $that->{_SRoutines} || {}; |
6301
|
|
|
|
|
|
|
|
6302
|
10
|
|
|
|
|
29
|
$this->{_SRoutines} = {%$ThatRoutines, %$ThisRoutines}; |
6303
|
|
|
|
|
|
|
} |
6304
|
|
|
|
|
|
|
|
6305
|
|
|
|
|
|
|
## All other settings are kept from $this and those from $that are |
6306
|
|
|
|
|
|
|
## ignored. |
6307
|
|
|
|
|
|
|
|
6308
|
10
|
|
|
|
|
15
|
$Success = 1; |
6309
|
10
|
|
|
|
|
41
|
done: |
6310
|
|
|
|
|
|
|
return($Success); |
6311
|
|
|
|
|
|
|
} |
6312
|
|
|
|
|
|
|
|
6313
|
|
|
|
|
|
|
sub append_file |
6314
|
|
|
|
|
|
|
{ |
6315
|
2
|
|
|
2
|
0
|
3
|
my $this = shift; |
6316
|
2
|
|
|
|
|
5
|
my ($FileName, $Params) = @_; |
6317
|
|
|
|
|
|
|
|
6318
|
2
|
|
|
|
|
2
|
my $Success; |
6319
|
|
|
|
|
|
|
|
6320
|
|
|
|
|
|
|
## $Params argument is optional. If supplied, it must be a hash. |
6321
|
2
|
|
100
|
|
|
8
|
$Params ||= {}; |
6322
|
|
|
|
|
|
|
|
6323
|
|
|
|
|
|
|
## Create a new empty table object of the same class as $this and |
6324
|
|
|
|
|
|
|
## read just the specified file into it. (note: this could be a |
6325
|
|
|
|
|
|
|
## recursive call here). |
6326
|
|
|
|
|
|
|
|
6327
|
2
|
50
|
|
|
|
13
|
my $that = ref($this)->new($Params, $FileName) or goto done; |
6328
|
|
|
|
|
|
|
|
6329
|
|
|
|
|
|
|
## Append the data from $that table into this one. |
6330
|
|
|
|
|
|
|
|
6331
|
2
|
50
|
|
|
|
8
|
$this->append($that) or goto done; |
6332
|
|
|
|
|
|
|
|
6333
|
2
|
|
|
|
|
4
|
$Success = 1; |
6334
|
2
|
|
|
|
|
17
|
done: |
6335
|
|
|
|
|
|
|
return($Success); |
6336
|
|
|
|
|
|
|
} |
6337
|
|
|
|
|
|
|
|
6338
|
|
|
|
|
|
|
sub append_files |
6339
|
|
|
|
|
|
|
{ |
6340
|
109
|
|
|
109
|
0
|
134
|
my $this = shift; |
6341
|
109
|
|
|
|
|
157
|
my ($FileNames, $Params) = @_; |
6342
|
|
|
|
|
|
|
|
6343
|
109
|
|
|
|
|
107
|
my $Success; |
6344
|
|
|
|
|
|
|
|
6345
|
109
|
|
|
|
|
255
|
foreach my $FileName (@$FileNames) |
6346
|
|
|
|
|
|
|
{ |
6347
|
2
|
50
|
|
|
|
7
|
goto done unless $this->append_file($FileName, $Params); |
6348
|
|
|
|
|
|
|
} |
6349
|
|
|
|
|
|
|
|
6350
|
109
|
|
|
|
|
128
|
$Success = 1; |
6351
|
109
|
|
|
|
|
274
|
done: |
6352
|
|
|
|
|
|
|
return($Success); |
6353
|
|
|
|
|
|
|
} |
6354
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
sub append_files_new ## Called by new() to process its file name args. |
6356
|
|
|
|
|
|
|
{ |
6357
|
109
|
|
|
109
|
0
|
137
|
my $this = shift; |
6358
|
109
|
|
|
|
|
149
|
my ($FileNames, $Params) = @_; |
6359
|
|
|
|
|
|
|
|
6360
|
109
|
|
|
|
|
133
|
my $Success; |
6361
|
|
|
|
|
|
|
|
6362
|
|
|
|
|
|
|
## First we read the first file, if any, into this object using |
6363
|
|
|
|
|
|
|
## the read() method. |
6364
|
|
|
|
|
|
|
|
6365
|
109
|
|
|
|
|
155
|
my $FirstFile = shift @$FileNames; |
6366
|
109
|
100
|
|
|
|
233
|
if (defined($FirstFile)) |
6367
|
|
|
|
|
|
|
{ |
6368
|
57
|
100
|
|
|
|
63
|
goto done unless $this->read(%{$Params || {}}, _FileName => $FirstFile); |
|
57
|
50
|
|
|
|
305
|
|
6369
|
|
|
|
|
|
|
} |
6370
|
|
|
|
|
|
|
|
6371
|
109
|
50
|
|
|
|
366
|
goto done unless $this->append_files($FileNames, $Params); |
6372
|
|
|
|
|
|
|
|
6373
|
109
|
|
|
|
|
145
|
$Success = 1; |
6374
|
109
|
|
|
|
|
266
|
done: |
6375
|
|
|
|
|
|
|
return($Success); |
6376
|
|
|
|
|
|
|
} |
6377
|
|
|
|
|
|
|
|
6378
|
|
|
|
|
|
|
sub combine |
6379
|
|
|
|
|
|
|
{ |
6380
|
6
|
|
|
6
|
0
|
10
|
my $this = shift; |
6381
|
6
|
|
|
|
|
8
|
my ($that) = @_; |
6382
|
|
|
|
|
|
|
|
6383
|
6
|
|
|
|
|
6
|
my $Success; |
6384
|
|
|
|
|
|
|
|
6385
|
|
|
|
|
|
|
## Get a snapshot of field lists before any modifications. |
6386
|
6
|
|
|
|
|
9
|
my $ThisFields = $this->fieldlist(); |
6387
|
6
|
|
|
|
|
12
|
my $ThatFields = $that->fieldlist(); |
6388
|
|
|
|
|
|
|
|
6389
|
|
|
|
|
|
|
## Bring in all (listed) fields from other table. |
6390
|
6
|
|
|
|
|
8
|
my $IncomingFields = $ThatFields; |
6391
|
|
|
|
|
|
|
|
6392
|
|
|
|
|
|
|
## Preserve any previous non-selection and force one to be saved |
6393
|
|
|
|
|
|
|
## in the interim. This will prevent $that->sel() from |
6394
|
|
|
|
|
|
|
## recalculating the selection each time if there is none. |
6395
|
6
|
|
|
|
|
7
|
my $OldSel = $that->{_Selection}; |
6396
|
6
|
|
|
|
|
12
|
$that->{_Selection} = $that->selection(); ## Might be a no-op |
6397
|
|
|
|
|
|
|
|
6398
|
|
|
|
|
|
|
## Copy columns from $that in selection order; (re)place into $this |
6399
|
6
|
|
|
|
|
11
|
foreach (@$IncomingFields) {$this->col_set($_, $that->sel($_))}; |
|
12
|
|
|
|
|
22
|
|
6400
|
|
|
|
|
|
|
|
6401
|
|
|
|
|
|
|
## Restore the possibly-undef selection in other table. |
6402
|
6
|
|
|
|
|
12
|
$that->{_Selection} = $OldSel; ## Might be a no-op |
6403
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
## Extend any short columns (whether originating from other table |
6405
|
|
|
|
|
|
|
## or from this one) to be the same length as all others. |
6406
|
|
|
|
|
|
|
|
6407
|
6
|
|
|
|
|
15
|
$this->extend(); |
6408
|
|
|
|
|
|
|
|
6409
|
|
|
|
|
|
|
## If either table had a custom fieldlist, then make a new custom |
6410
|
|
|
|
|
|
|
## field list which is the result of concatenating both field |
6411
|
|
|
|
|
|
|
## lists together, without duplicates, and of course preserving |
6412
|
|
|
|
|
|
|
## the original order as completely as possible (with the order |
6413
|
|
|
|
|
|
|
## given in the first table taking precedence). |
6414
|
|
|
|
|
|
|
|
6415
|
6
|
50
|
33
|
|
|
20
|
if (defined($this->{_FieldList}) || |
6416
|
|
|
|
|
|
|
defined($that->{_FieldList})) |
6417
|
|
|
|
|
|
|
{ |
6418
|
|
|
|
|
|
|
## Make a hash mapping field names from both tables to the |
6419
|
|
|
|
|
|
|
## order they should appear |
6420
|
6
|
|
|
|
|
9
|
my $FieldOrderHash = {}; |
6421
|
6
|
|
66
|
|
|
13
|
foreach (@$ThisFields, @$ThatFields) {$FieldOrderHash->{$_} ||= (keys %$FieldOrderHash) + 1}; |
|
47
|
|
|
|
|
148
|
|
6422
|
|
|
|
|
|
|
|
6423
|
6
|
|
|
|
|
20
|
my $FieldList = [sort {$FieldOrderHash->{$a} <=> $FieldOrderHash->{$b}} keys %$FieldOrderHash]; |
|
60
|
|
|
|
|
75
|
|
6424
|
|
|
|
|
|
|
|
6425
|
6
|
|
|
|
|
19
|
$this->{_FieldList} = $FieldList; |
6426
|
|
|
|
|
|
|
} |
6427
|
|
|
|
|
|
|
|
6428
|
|
|
|
|
|
|
## If either table had a custom sortorder, then make a new custom |
6429
|
|
|
|
|
|
|
## sort order which is the result of concatenating both orders |
6430
|
|
|
|
|
|
|
## together, without duplicates, and of course preserving the |
6431
|
|
|
|
|
|
|
## original order as completely as possible (with the order given |
6432
|
|
|
|
|
|
|
## in the first table taking precedence). |
6433
|
|
|
|
|
|
|
|
6434
|
6
|
50
|
33
|
|
|
30
|
if (defined($this->{_SortOrder}) || |
6435
|
|
|
|
|
|
|
defined($that->{_SortOrder})) |
6436
|
|
|
|
|
|
|
{ |
6437
|
0
|
|
|
|
|
0
|
my $ThisOrder = $this->sortorder(); |
6438
|
0
|
|
|
|
|
0
|
my $ThatOrder = $that->sortorder(); |
6439
|
|
|
|
|
|
|
|
6440
|
|
|
|
|
|
|
## Make a hash mapping field names from both lists to the |
6441
|
|
|
|
|
|
|
## order they should appear |
6442
|
|
|
|
|
|
|
|
6443
|
0
|
|
|
|
|
0
|
my $OrderHash = {}; |
6444
|
0
|
|
0
|
|
|
0
|
foreach (@$ThisOrder, @$ThatOrder) {$OrderHash->{$_} ||= (keys %$OrderHash) + 1}; |
|
0
|
|
|
|
|
0
|
|
6445
|
|
|
|
|
|
|
|
6446
|
0
|
|
|
|
|
0
|
my $OrderList = [sort {$OrderHash->{$a} <=> $OrderHash->{$b}} keys %$OrderHash]; |
|
0
|
|
|
|
|
0
|
|
6447
|
|
|
|
|
|
|
|
6448
|
0
|
|
|
|
|
0
|
$this->{_SortOrder} = $OrderList; |
6449
|
|
|
|
|
|
|
} |
6450
|
|
|
|
|
|
|
|
6451
|
|
|
|
|
|
|
## If either table had custom sortspecs, then create a new |
6452
|
|
|
|
|
|
|
## sortspecs hash by starting with all the entries from the other |
6453
|
|
|
|
|
|
|
## table and adding/overwriting with those from this table. |
6454
|
|
|
|
|
|
|
|
6455
|
6
|
50
|
33
|
|
|
19
|
if (defined($this->{_SortSpecs}) || |
6456
|
|
|
|
|
|
|
defined($that->{_SortSpecs})) |
6457
|
|
|
|
|
|
|
{ |
6458
|
6
|
|
|
|
|
193
|
my $ThisSpecs = $this->sortspecs(); |
6459
|
6
|
|
|
|
|
14
|
my $ThatSpecs = $that->sortspecs(); |
6460
|
|
|
|
|
|
|
|
6461
|
6
|
|
|
|
|
21
|
$this->{_SortSpecs} = {%$ThatSpecs, %$ThisSpecs}; |
6462
|
|
|
|
|
|
|
} |
6463
|
|
|
|
|
|
|
|
6464
|
|
|
|
|
|
|
## If either table had custom sortroutines, then create a new |
6465
|
|
|
|
|
|
|
## sortroutines hash by starting with all the entries from the |
6466
|
|
|
|
|
|
|
## other table and adding/overwriting with those from this table. |
6467
|
|
|
|
|
|
|
|
6468
|
6
|
50
|
33
|
|
|
19
|
if (defined($this->{_SRoutines}) || |
6469
|
|
|
|
|
|
|
defined($that->{_SRoutines})) |
6470
|
|
|
|
|
|
|
{ |
6471
|
6
|
|
50
|
|
|
16
|
my $ThisRoutines = $this->{_SRoutines} || {}; |
6472
|
6
|
|
50
|
|
|
12
|
my $ThatRoutines = $that->{_SRoutines} || {}; |
6473
|
|
|
|
|
|
|
|
6474
|
6
|
|
|
|
|
16
|
$this->{_SRoutines} = {%$ThatRoutines, %$ThisRoutines}; |
6475
|
|
|
|
|
|
|
} |
6476
|
|
|
|
|
|
|
|
6477
|
|
|
|
|
|
|
## All other settings are kept from $this and those from $that are |
6478
|
|
|
|
|
|
|
## ignored. |
6479
|
|
|
|
|
|
|
|
6480
|
6
|
|
|
|
|
9
|
$Success = 1; |
6481
|
6
|
|
|
|
|
21
|
done: |
6482
|
|
|
|
|
|
|
return($Success); |
6483
|
|
|
|
|
|
|
} |
6484
|
|
|
|
|
|
|
|
6485
|
|
|
|
|
|
|
sub combine_file |
6486
|
|
|
|
|
|
|
{ |
6487
|
5
|
|
|
5
|
0
|
29
|
my $this = shift; |
6488
|
5
|
|
|
|
|
7
|
my ($FileName, $Params) = @_; |
6489
|
|
|
|
|
|
|
|
6490
|
5
|
|
|
|
|
6
|
my $Success; |
6491
|
|
|
|
|
|
|
|
6492
|
|
|
|
|
|
|
## $Params argument is optional. If supplied, it must be a hash. |
6493
|
5
|
|
50
|
|
|
39
|
$Params ||= {}; |
6494
|
|
|
|
|
|
|
|
6495
|
|
|
|
|
|
|
## Create a new empty table object of the same class as $this and |
6496
|
|
|
|
|
|
|
## read just the specified file into it. |
6497
|
|
|
|
|
|
|
|
6498
|
5
|
50
|
|
|
|
17
|
my $that = ref($this)->new($Params, $FileName) or goto done; |
6499
|
|
|
|
|
|
|
|
6500
|
|
|
|
|
|
|
## Combine the data from $that table into this one. |
6501
|
|
|
|
|
|
|
|
6502
|
5
|
50
|
|
|
|
15
|
$this->combine($that) or goto done; |
6503
|
|
|
|
|
|
|
|
6504
|
5
|
|
|
|
|
8
|
$Success = 1; |
6505
|
5
|
|
|
|
|
35
|
done: |
6506
|
|
|
|
|
|
|
return($Success); |
6507
|
|
|
|
|
|
|
} |
6508
|
|
|
|
|
|
|
|
6509
|
|
|
|
|
|
|
sub combine_files |
6510
|
|
|
|
|
|
|
{ |
6511
|
1
|
|
|
1
|
0
|
13
|
my $this = shift; |
6512
|
1
|
|
|
|
|
3
|
my ($FileNames, $Params) = @_; |
6513
|
|
|
|
|
|
|
|
6514
|
1
|
|
|
|
|
2
|
my $Success; |
6515
|
|
|
|
|
|
|
|
6516
|
1
|
|
|
|
|
3
|
foreach my $FileName (@$FileNames) |
6517
|
|
|
|
|
|
|
{ |
6518
|
1
|
50
|
|
|
|
6
|
goto done unless $this->combine_file($FileName, $Params); |
6519
|
|
|
|
|
|
|
} |
6520
|
|
|
|
|
|
|
|
6521
|
1
|
|
|
|
|
3
|
$Success = 1; |
6522
|
1
|
|
|
|
|
4
|
done: |
6523
|
|
|
|
|
|
|
return($Success); |
6524
|
|
|
|
|
|
|
} |
6525
|
|
|
|
|
|
|
|
6526
|
|
|
|
|
|
|
sub join |
6527
|
|
|
|
|
|
|
{ |
6528
|
5
|
|
|
5
|
0
|
31
|
my $this = shift; |
6529
|
5
|
|
|
|
|
9
|
my ($that, $Key1, $Key2, $Fields) = @_; |
6530
|
|
|
|
|
|
|
|
6531
|
5
|
|
|
|
|
7
|
my $Success; |
6532
|
|
|
|
|
|
|
|
6533
|
|
|
|
|
|
|
## $Key1 is required. |
6534
|
5
|
50
|
|
|
|
12
|
$this->warn("Key1 is required for join()"), goto done |
6535
|
|
|
|
|
|
|
unless ($Key1); |
6536
|
|
|
|
|
|
|
|
6537
|
|
|
|
|
|
|
## $Key2 defaults to the same as $Key1. |
6538
|
5
|
|
33
|
|
|
10
|
$Key2 ||= $Key1; |
6539
|
|
|
|
|
|
|
|
6540
|
|
|
|
|
|
|
## The fields we'll be getting can optionally be overridden by |
6541
|
|
|
|
|
|
|
## caller; otherwise, they're the field list of the other table. |
6542
|
5
|
|
33
|
|
|
17
|
my $IncomingFields = $Fields || $that->fieldlist(); |
6543
|
|
|
|
|
|
|
|
6544
|
|
|
|
|
|
|
## Cull $Key1 and $Key2 from the incoming field list. |
6545
|
5
|
50
|
|
|
|
9
|
$IncomingFields = [grep {($_ ne $Key1) && ($_ ne $Key2)} @$IncomingFields]; |
|
26
|
|
|
|
|
89
|
|
6546
|
|
|
|
|
|
|
|
6547
|
|
|
|
|
|
|
## Preserve any previous non-selection and force one to be saved |
6548
|
|
|
|
|
|
|
## in the interim. This will prevent $that->sel() from |
6549
|
|
|
|
|
|
|
## recalculating the selection each time if there is none. |
6550
|
5
|
|
|
|
|
11
|
my $OldSel = $that->{_Selection}; |
6551
|
5
|
|
|
|
|
11
|
$that->{_Selection} = $that->selection(); ## Might be a no-op |
6552
|
|
|
|
|
|
|
|
6553
|
|
|
|
|
|
|
## Make an index mapping values in $Key2 to record numbers in |
6554
|
|
|
|
|
|
|
## $that. We reverse the order of insertion into the $Index |
6555
|
|
|
|
|
|
|
## because we want the items earliest in selection order to have |
6556
|
|
|
|
|
|
|
## precedence in case keys are not unique as they should be. |
6557
|
|
|
|
|
|
|
|
6558
|
5
|
|
|
|
|
9
|
my $Index = {}; @$Index{reverse @{$that->sel($Key2)}} = reverse @{$that->{_Selection}}; |
|
5
|
|
|
|
|
6
|
|
|
5
|
|
|
|
|
15
|
|
|
5
|
|
|
|
|
12
|
|
6559
|
|
|
|
|
|
|
|
6560
|
|
|
|
|
|
|
## Get a list of record numbers in $this that we'll be copying data into. |
6561
|
5
|
|
|
|
|
14
|
my $Recs1 = $this->selection(); |
6562
|
|
|
|
|
|
|
|
6563
|
|
|
|
|
|
|
## Get a corresponding list of keys we're going to look up. |
6564
|
5
|
|
|
|
|
9
|
my $Key1s = $this->sel($Key1); |
6565
|
|
|
|
|
|
|
|
6566
|
|
|
|
|
|
|
## The default "record number" in table 2 is $that->length() |
6567
|
|
|
|
|
|
|
## ... i.e. an invalid record number past the end of the table. |
6568
|
|
|
|
|
|
|
## This will ensure that failed lookups result in lookups to this |
6569
|
|
|
|
|
|
|
## illegal record number, correctly producing undef in the |
6570
|
|
|
|
|
|
|
## corresponding joined fields, whereas looking up "undef" would |
6571
|
|
|
|
|
|
|
## have produced record number zero. |
6572
|
|
|
|
|
|
|
|
6573
|
5
|
|
|
|
|
9
|
my $DefaultRecNum = $that->length(); |
6574
|
|
|
|
|
|
|
|
6575
|
|
|
|
|
|
|
## Look up @$Key1s in @$Index to get a list of data-source record |
6576
|
|
|
|
|
|
|
## numbers in $that. Failed lookups map to $DefaultRecNum. |
6577
|
|
|
|
|
|
|
|
6578
|
5
|
100
|
|
|
|
16
|
my $Recs2 = [map {defined() ? $_ : $DefaultRecNum} @$Index{@$Key1s}]; |
|
15
|
|
|
|
|
29
|
|
6579
|
|
|
|
|
|
|
|
6580
|
|
|
|
|
|
|
## Copy data into selected positions within columns of $this, one |
6581
|
|
|
|
|
|
|
## column at a time, creating pre-sized columns in $this as |
6582
|
|
|
|
|
|
|
## necessary (col()). These array slice operations are very, very |
6583
|
|
|
|
|
|
|
## fast. |
6584
|
|
|
|
|
|
|
|
6585
|
5
|
|
|
|
|
11
|
foreach my $Field (@$IncomingFields) |
6586
|
|
|
|
|
|
|
{ |
6587
|
21
|
|
|
|
|
36
|
(@{$this->col($Field)}[@$Recs1] = ## Put values into selected records of $this |
|
21
|
|
|
|
|
36
|
|
6588
|
21
|
|
|
|
|
26
|
@{$that->col($Field)}[@$Recs2]); ## Get values from looked-up records of $that |
6589
|
|
|
|
|
|
|
} |
6590
|
|
|
|
|
|
|
|
6591
|
|
|
|
|
|
|
## Restore the possibly-undef selection in other table. |
6592
|
5
|
|
|
|
|
8
|
$that->{_Selection} = $OldSel; ## Might be a no-op |
6593
|
|
|
|
|
|
|
|
6594
|
|
|
|
|
|
|
## If this table had a custom fieldlist, then make a new custom |
6595
|
|
|
|
|
|
|
## field list which is the result of concatenating both field |
6596
|
|
|
|
|
|
|
## lists together, without duplicates, and of course preserving |
6597
|
|
|
|
|
|
|
## the original order as completely as possible (with the order |
6598
|
|
|
|
|
|
|
## given in the first table taking precedence). |
6599
|
|
|
|
|
|
|
|
6600
|
5
|
50
|
|
|
|
13
|
if (defined($this->{_FieldList})) |
6601
|
|
|
|
|
|
|
{ |
6602
|
5
|
|
|
|
|
10
|
my $ThisFields = $this->fieldlist(); |
6603
|
5
|
|
|
|
|
8
|
my $ThatFields = $IncomingFields; |
6604
|
|
|
|
|
|
|
|
6605
|
|
|
|
|
|
|
## Make a hash mapping field names from both tables to the |
6606
|
|
|
|
|
|
|
## order they should appear |
6607
|
|
|
|
|
|
|
|
6608
|
5
|
|
|
|
|
7
|
my $FieldOrderHash = {}; |
6609
|
5
|
|
66
|
|
|
12
|
foreach (@$ThisFields, @$ThatFields) {$FieldOrderHash->{$_} ||= (keys %$FieldOrderHash) + 1}; |
|
56
|
|
|
|
|
223
|
|
6610
|
|
|
|
|
|
|
|
6611
|
5
|
|
|
|
|
21
|
my $FieldList = [sort {$FieldOrderHash->{$a} <=> $FieldOrderHash->{$b}} keys %$FieldOrderHash]; |
|
64
|
|
|
|
|
86
|
|
6612
|
|
|
|
|
|
|
|
6613
|
5
|
|
|
|
|
23
|
$this->{_FieldList} = $FieldList; |
6614
|
|
|
|
|
|
|
} |
6615
|
|
|
|
|
|
|
|
6616
|
|
|
|
|
|
|
## If either table had custom sortspecs, then create a new |
6617
|
|
|
|
|
|
|
## sortspecs hash by starting with all the entries from the other |
6618
|
|
|
|
|
|
|
## table and adding/overwriting with those from this table. |
6619
|
|
|
|
|
|
|
|
6620
|
5
|
50
|
33
|
|
|
19
|
if (defined($this->{_SortSpecs}) || |
6621
|
|
|
|
|
|
|
defined($that->{_SortSpecs})) |
6622
|
|
|
|
|
|
|
{ |
6623
|
5
|
|
|
|
|
15
|
my $ThisSpecs = $this->sortspecs(); |
6624
|
5
|
|
|
|
|
9
|
my $ThatSpecs = $that->sortspecs(); |
6625
|
|
|
|
|
|
|
|
6626
|
5
|
|
|
|
|
16
|
$this->{_SortSpecs} = {%$ThatSpecs, %$ThisSpecs}; |
6627
|
|
|
|
|
|
|
} |
6628
|
|
|
|
|
|
|
|
6629
|
|
|
|
|
|
|
## If either table had custom sortroutines, then create a new |
6630
|
|
|
|
|
|
|
## sortroutines hash by starting with all the entries from the |
6631
|
|
|
|
|
|
|
## other table and adding/overwriting with those from this table. |
6632
|
|
|
|
|
|
|
|
6633
|
5
|
50
|
33
|
|
|
24
|
if (defined($this->{_SRoutines}) || |
6634
|
|
|
|
|
|
|
defined($that->{_SRoutines})) |
6635
|
|
|
|
|
|
|
{ |
6636
|
5
|
|
50
|
|
|
12
|
my $ThisRoutines = $this->{_SRoutines} || {}; |
6637
|
5
|
|
50
|
|
|
11
|
my $ThatRoutines = $that->{_SRoutines} || {}; |
6638
|
|
|
|
|
|
|
|
6639
|
5
|
|
|
|
|
13
|
$this->{_SRoutines} = {%$ThatRoutines, %$ThisRoutines}; |
6640
|
|
|
|
|
|
|
} |
6641
|
|
|
|
|
|
|
|
6642
|
|
|
|
|
|
|
## All other settings are kept from $this and those from $that are |
6643
|
|
|
|
|
|
|
## ignored. |
6644
|
|
|
|
|
|
|
|
6645
|
5
|
|
|
|
|
7
|
$Success = 1; |
6646
|
5
|
|
|
|
|
24
|
done: |
6647
|
|
|
|
|
|
|
return($Success); |
6648
|
|
|
|
|
|
|
} |
6649
|
|
|
|
|
|
|
|
6650
|
|
|
|
|
|
|
sub join_file |
6651
|
|
|
|
|
|
|
{ |
6652
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
6653
|
0
|
|
|
|
|
0
|
my ($FileName, $Params, $Key1, $Key2, $Fields) = @_; |
6654
|
|
|
|
|
|
|
|
6655
|
0
|
|
|
|
|
0
|
my $Success; |
6656
|
|
|
|
|
|
|
|
6657
|
|
|
|
|
|
|
## $Params argument may be undef. If supplied, it must be a hash. |
6658
|
0
|
|
0
|
|
|
0
|
$Params ||= {}; |
6659
|
|
|
|
|
|
|
|
6660
|
|
|
|
|
|
|
## Create a new empty table object of the same class as $this and |
6661
|
|
|
|
|
|
|
## read just the specified file into it. |
6662
|
|
|
|
|
|
|
|
6663
|
0
|
0
|
|
|
|
0
|
my $that = ref($this)->new($Params, $FileName) or goto done; |
6664
|
|
|
|
|
|
|
|
6665
|
|
|
|
|
|
|
## Join the data from $that table into this one. |
6666
|
|
|
|
|
|
|
|
6667
|
0
|
0
|
|
|
|
0
|
$this->join($that, $Key1, $Key2, $Fields) or goto done; |
6668
|
|
|
|
|
|
|
|
6669
|
0
|
|
|
|
|
0
|
$Success = 1; |
6670
|
0
|
|
|
|
|
0
|
done: |
6671
|
|
|
|
|
|
|
return($Success); |
6672
|
|
|
|
|
|
|
} |
6673
|
|
|
|
|
|
|
|
6674
|
|
|
|
|
|
|
sub join_files |
6675
|
|
|
|
|
|
|
{ |
6676
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
6677
|
0
|
|
|
|
|
0
|
my ($FileNames, $Params, $Key1, $Key2, $Fields) = @_; |
6678
|
|
|
|
|
|
|
|
6679
|
0
|
|
|
|
|
0
|
my $Success; |
6680
|
|
|
|
|
|
|
|
6681
|
0
|
|
|
|
|
0
|
foreach my $FileName (@$FileNames) |
6682
|
|
|
|
|
|
|
{ |
6683
|
0
|
0
|
|
|
|
0
|
goto done unless $this->join_file($FileName, $Params, $Key1, $Key2, $Fields); |
6684
|
|
|
|
|
|
|
} |
6685
|
|
|
|
|
|
|
|
6686
|
0
|
|
|
|
|
0
|
$Success = 1; |
6687
|
0
|
|
|
|
|
0
|
done: |
6688
|
|
|
|
|
|
|
return($Success); |
6689
|
|
|
|
|
|
|
} |
6690
|
|
|
|
|
|
|
|
6691
|
|
|
|
|
|
|
=pod |
6692
|
|
|
|
|
|
|
|
6693
|
|
|
|
|
|
|
=head1 INVERTING A TABLE'S ROWS/COLUMNS |
6694
|
|
|
|
|
|
|
|
6695
|
|
|
|
|
|
|
## Re-orient table's data using vals from $ColName as field names... |
6696
|
|
|
|
|
|
|
$t-invert($ColName) |
6697
|
|
|
|
|
|
|
|
6698
|
|
|
|
|
|
|
Sometimes a situation gives you a table that's initially organized |
6699
|
|
|
|
|
|
|
with column data in rows, and field names in one of the columns, so |
6700
|
|
|
|
|
|
|
you need to flip the table in order to be able to work meaningfully |
6701
|
|
|
|
|
|
|
with it. |
6702
|
|
|
|
|
|
|
|
6703
|
|
|
|
|
|
|
"Inverting" a table means to rewrite each row as a column. One row is |
6704
|
|
|
|
|
|
|
designated to be used as the field names. |
6705
|
|
|
|
|
|
|
|
6706
|
|
|
|
|
|
|
For example, consider this table: |
6707
|
|
|
|
|
|
|
|
6708
|
|
|
|
|
|
|
F01 F02 F03 F04 |
6709
|
|
|
|
|
|
|
------------------------ |
6710
|
|
|
|
|
|
|
First Chris Agnes James |
6711
|
|
|
|
|
|
|
Last Bart Marco Nixon |
6712
|
|
|
|
|
|
|
Age 22 33 44 |
6713
|
|
|
|
|
|
|
|
6714
|
|
|
|
|
|
|
Calling invert() using field names from "F01"... |
6715
|
|
|
|
|
|
|
|
6716
|
|
|
|
|
|
|
$t->invert('F01'); |
6717
|
|
|
|
|
|
|
|
6718
|
|
|
|
|
|
|
... would change the table to look like this: |
6719
|
|
|
|
|
|
|
|
6720
|
|
|
|
|
|
|
First Last Age |
6721
|
|
|
|
|
|
|
---------------- |
6722
|
|
|
|
|
|
|
Chris Bart 22 |
6723
|
|
|
|
|
|
|
Agnes Marco 33 |
6724
|
|
|
|
|
|
|
James Nixon 44 |
6725
|
|
|
|
|
|
|
|
6726
|
|
|
|
|
|
|
The field F01 which formerly contained the field names, is now gone, |
6727
|
|
|
|
|
|
|
and the remaining data columns have been converted from their old row |
6728
|
|
|
|
|
|
|
orientation into a column orientation. |
6729
|
|
|
|
|
|
|
|
6730
|
|
|
|
|
|
|
=cut |
6731
|
|
|
|
|
|
|
|
6732
|
|
|
|
|
|
|
sub invert |
6733
|
|
|
|
|
|
|
{ |
6734
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
6735
|
0
|
|
|
|
|
0
|
my ($HeaderField) = @_; |
6736
|
|
|
|
|
|
|
|
6737
|
0
|
|
|
|
|
0
|
my $Success; |
6738
|
|
|
|
|
|
|
|
6739
|
|
|
|
|
|
|
## Get new field names from an existing column and delete it at the same time. |
6740
|
0
|
0
|
|
|
|
0
|
my $NewColNames = $this->col_delete($HeaderField) or |
6741
|
|
|
|
|
|
|
$this->warn("Invalid field name given to invert() method"), goto done; |
6742
|
|
|
|
|
|
|
|
6743
|
|
|
|
|
|
|
## Get a hash of all existing (remaining) data columns. |
6744
|
0
|
|
|
|
|
0
|
my $OldColNames = $this->fieldlist_all(); |
6745
|
0
|
|
|
|
|
0
|
my $OldCols = $this->cols_hash($OldColNames); |
6746
|
|
|
|
|
|
|
|
6747
|
|
|
|
|
|
|
## Make the new columns... |
6748
|
0
|
|
|
|
|
0
|
my $NewCols = [map {$this->row_list($_, $OldColNames)} (0..$#$NewColNames)]; |
|
0
|
|
|
|
|
0
|
|
6749
|
|
|
|
|
|
|
|
6750
|
|
|
|
|
|
|
## Delete old columns from the object. |
6751
|
0
|
|
|
|
|
0
|
delete @$this{@$OldColNames}; |
6752
|
|
|
|
|
|
|
|
6753
|
|
|
|
|
|
|
## Add new columns |
6754
|
0
|
|
|
|
|
0
|
@$this{@$NewColNames} = @$NewCols; |
6755
|
|
|
|
|
|
|
|
6756
|
|
|
|
|
|
|
## Set the field name list... |
6757
|
0
|
|
|
|
|
0
|
$this->{_FieldList} = $NewColNames; |
6758
|
|
|
|
|
|
|
|
6759
|
0
|
|
|
|
|
0
|
$Success = 1; |
6760
|
0
|
|
|
|
|
0
|
done: |
6761
|
|
|
|
|
|
|
return($Success); |
6762
|
|
|
|
|
|
|
} |
6763
|
|
|
|
|
|
|
|
6764
|
|
|
|
|
|
|
=pod |
6765
|
|
|
|
|
|
|
|
6766
|
|
|
|
|
|
|
=head1 PROGRESS MESSAGES |
6767
|
|
|
|
|
|
|
|
6768
|
|
|
|
|
|
|
## Printing a progress message.... |
6769
|
|
|
|
|
|
|
|
6770
|
|
|
|
|
|
|
$t->progress($Msg) ## Print a message per current settings |
6771
|
|
|
|
|
|
|
|
6772
|
|
|
|
|
|
|
## Progress settings applying to this object only... |
6773
|
|
|
|
|
|
|
|
6774
|
|
|
|
|
|
|
$t->progress_get() ## Get current progress setting |
6775
|
|
|
|
|
|
|
|
6776
|
|
|
|
|
|
|
$t->progress_set(1) ## Use progress_default() method |
6777
|
|
|
|
|
|
|
$t->progress_set($Sub) ## Set a custom progress routine |
6778
|
|
|
|
|
|
|
$t->progress_set(0) ## Disable progress |
6779
|
|
|
|
|
|
|
$t->progress_set(undef) ## Use class's settings (default)... |
6780
|
|
|
|
|
|
|
|
6781
|
|
|
|
|
|
|
## Class's settings (for instances with _Progress == undef) |
6782
|
|
|
|
|
|
|
|
6783
|
|
|
|
|
|
|
$t->progress_class() ## Get current setting. |
6784
|
|
|
|
|
|
|
|
6785
|
|
|
|
|
|
|
$t->progress_class(1) ## Use progress_default() method |
6786
|
|
|
|
|
|
|
$t->progress_class($Sub) ## Set shared custom prog routine |
6787
|
|
|
|
|
|
|
$t->progress_class(0) ## Disable class-default progress |
6788
|
|
|
|
|
|
|
|
6789
|
|
|
|
|
|
|
Data::CTable->progress_class(..) ## Call without an object |
6790
|
|
|
|
|
|
|
|
6791
|
|
|
|
|
|
|
## Call builtin default progress method regardless of settings |
6792
|
|
|
|
|
|
|
|
6793
|
|
|
|
|
|
|
$t->progress_default($Msg) ## Default prog. routine for class |
6794
|
|
|
|
|
|
|
|
6795
|
|
|
|
|
|
|
## Generate a warning (used internally by other methods) |
6796
|
|
|
|
|
|
|
|
6797
|
|
|
|
|
|
|
$t->warn($Msg) ## In this class, calls progress_default() |
6798
|
|
|
|
|
|
|
|
6799
|
|
|
|
|
|
|
## Timed progress: print msg to start, then at most once/2 sec |
6800
|
|
|
|
|
|
|
|
6801
|
|
|
|
|
|
|
$t->progress_timed($Op, $Item) ## Re-print msg every 2 sec |
6802
|
|
|
|
|
|
|
$t->progress_timed($Op, $Item, $Pos, $Tot) ##... with % readout |
6803
|
|
|
|
|
|
|
$t->progress_timed($Op, $Item, $Pos, $Tot, $Wait) ## Not 1st x |
6804
|
|
|
|
|
|
|
|
6805
|
|
|
|
|
|
|
$t->progress_timed_default($Msg) ## Called by progress_timed |
6806
|
|
|
|
|
|
|
|
6807
|
|
|
|
|
|
|
Data::CTable is especially useful in creating batch-oriented |
6808
|
|
|
|
|
|
|
applications for processing data. As such, routines that may perform |
6809
|
|
|
|
|
|
|
time-consuming tasks will, by default, generate helpful progress |
6810
|
|
|
|
|
|
|
messages. The progress mechanism is highly customizable, however, to |
6811
|
|
|
|
|
|
|
suit the needs of applications that don't require this output, or that |
6812
|
|
|
|
|
|
|
require the output to go somewhere other than STDERR or the console. |
6813
|
|
|
|
|
|
|
|
6814
|
|
|
|
|
|
|
The default progress routine is one that prints a message with a |
6815
|
|
|
|
|
|
|
date/time stamp to STDERR if and only if STDERR is an interactive |
6816
|
|
|
|
|
|
|
terminal, and otherwise is silent. |
6817
|
|
|
|
|
|
|
|
6818
|
|
|
|
|
|
|
You could write a custom progress routine that does something else or |
6819
|
|
|
|
|
|
|
something in addition (e.g. logs to a file or syslog). The custom |
6820
|
|
|
|
|
|
|
routine could either be implemented by overriding the |
6821
|
|
|
|
|
|
|
progress_default() method in a subclass, or by calling progress_set() |
6822
|
|
|
|
|
|
|
in any instance. |
6823
|
|
|
|
|
|
|
|
6824
|
|
|
|
|
|
|
The custom progress routine, if any, is stored in the _Progress |
6825
|
|
|
|
|
|
|
parameter of the object. But use progress_set() and progress_get() to |
6826
|
|
|
|
|
|
|
access it. |
6827
|
|
|
|
|
|
|
|
6828
|
|
|
|
|
|
|
The interface for your custom progress routine should be: |
6829
|
|
|
|
|
|
|
|
6830
|
|
|
|
|
|
|
sub MyProgress {my ($Obj, $Message) = @_; chomp $Message; .....} |
6831
|
|
|
|
|
|
|
|
6832
|
|
|
|
|
|
|
In other words, the routine takes a single message which may or may |
6833
|
|
|
|
|
|
|
not have a trailing newline. It should always chomp the newline if |
6834
|
|
|
|
|
|
|
present, and then do its business... which generally will include |
6835
|
|
|
|
|
|
|
printing or logging a message (usually with a newline added). |
6836
|
|
|
|
|
|
|
|
6837
|
|
|
|
|
|
|
The default, built-in progress routine for Data::CTable is: |
6838
|
|
|
|
|
|
|
|
6839
|
|
|
|
|
|
|
sub progress_default |
6840
|
|
|
|
|
|
|
{ |
6841
|
|
|
|
|
|
|
my ($this, $msg) = @_; |
6842
|
|
|
|
|
|
|
chomp $msg; |
6843
|
|
|
|
|
|
|
|
6844
|
|
|
|
|
|
|
print STDERR (localtime() . " $msg\n") if -t STDERR; |
6845
|
|
|
|
|
|
|
|
6846
|
|
|
|
|
|
|
return(1); ## Indicate progress actually completed |
6847
|
|
|
|
|
|
|
} |
6848
|
|
|
|
|
|
|
|
6849
|
|
|
|
|
|
|
Of course, you are free to call this method directly at any time, and |
6850
|
|
|
|
|
|
|
it will do its thing regardless of other progress-enabling settings. |
6851
|
|
|
|
|
|
|
But the preferred way is to first set the settings and then call |
6852
|
|
|
|
|
|
|
progress(). |
6853
|
|
|
|
|
|
|
|
6854
|
|
|
|
|
|
|
The warn() method always calls progress_default() -- i.e. warnings |
6855
|
|
|
|
|
|
|
will display even if progress is otherwise disabled or overridden at |
6856
|
|
|
|
|
|
|
the object or class level. However, you could create a subclass that |
6857
|
|
|
|
|
|
|
changes warn()'s behavior if desired. (For example, it could just |
6858
|
|
|
|
|
|
|
call perl's builtin warn function, or be even more forceful, |
6859
|
|
|
|
|
|
|
generating warnings even if STDERR is not a terminal, for example.) |
6860
|
|
|
|
|
|
|
|
6861
|
|
|
|
|
|
|
The progress_set() method may be used to override the progress routine |
6862
|
|
|
|
|
|
|
for an individual object (set to 1/true for default behavior, or |
6863
|
|
|
|
|
|
|
0/undef/false to disable progress for that object entirely). |
6864
|
|
|
|
|
|
|
|
6865
|
|
|
|
|
|
|
Call progress_class() to set similar values to control the global |
6866
|
|
|
|
|
|
|
default behavior (e.g. turning on/off default progress behavior for |
6867
|
|
|
|
|
|
|
all instances), but be cautious about using this approach in any |
6868
|
|
|
|
|
|
|
environment where other programs might be accessing the same loaded |
6869
|
|
|
|
|
|
|
class data, since the setting is stored in a class-owned global |
6870
|
|
|
|
|
|
|
($Data::CTable::DefaultProgress). |
6871
|
|
|
|
|
|
|
|
6872
|
|
|
|
|
|
|
Manipulating the class-default settings is only recommended in batch |
6873
|
|
|
|
|
|
|
or shell-script environments, not in mod_perl Web applications where |
6874
|
|
|
|
|
|
|
the module stays loaded into the Perl environment across multiple |
6875
|
|
|
|
|
|
|
invocations, for example. |
6876
|
|
|
|
|
|
|
|
6877
|
|
|
|
|
|
|
If you want a particular method (e.g. read() but not write()) to be |
6878
|
|
|
|
|
|
|
silent, you could make a subclass and could override that method with |
6879
|
|
|
|
|
|
|
an implementation that first disables progress, calls the SUPER:: |
6880
|
|
|
|
|
|
|
method, and then restores the progress setting to its original |
6881
|
|
|
|
|
|
|
setting. |
6882
|
|
|
|
|
|
|
|
6883
|
|
|
|
|
|
|
=head2 Timed progress |
6884
|
|
|
|
|
|
|
|
6885
|
|
|
|
|
|
|
Timed progress is a way of printing periodically-recurring progress |
6886
|
|
|
|
|
|
|
messages about potentially time-consuming processes to the terminal. |
6887
|
|
|
|
|
|
|
|
6888
|
|
|
|
|
|
|
For example, consider the following messages which might appear every |
6889
|
|
|
|
|
|
|
2 seconds during a lengthy read() operation: |
6890
|
|
|
|
|
|
|
|
6891
|
|
|
|
|
|
|
Reading... 0 (0%) |
6892
|
|
|
|
|
|
|
Reading... 2000 (4%) |
6893
|
|
|
|
|
|
|
... |
6894
|
|
|
|
|
|
|
Reading... 38000 (96%) |
6895
|
|
|
|
|
|
|
Reading... 40000 (100%) |
6896
|
|
|
|
|
|
|
|
6897
|
|
|
|
|
|
|
The progress_timed() method is called internally by potentially |
6898
|
|
|
|
|
|
|
time-consuming processes (read(), write(), and sort()), and you may |
6899
|
|
|
|
|
|
|
want to call it yourself from your own scripts, to produce |
6900
|
|
|
|
|
|
|
weary-programmer-soothing visual output during otherwise |
6901
|
|
|
|
|
|
|
panic-producing long delays. |
6902
|
|
|
|
|
|
|
|
6903
|
|
|
|
|
|
|
Generally, progress_timed() is called with the $Wait parameter set to |
6904
|
|
|
|
|
|
|
true, which delays the display of any messages until 2 seconds have |
6905
|
|
|
|
|
|
|
passed, so no messages will be displayed unless the process actually |
6906
|
|
|
|
|
|
|
does end up being slower than 2 seconds. |
6907
|
|
|
|
|
|
|
|
6908
|
|
|
|
|
|
|
Parameters are: |
6909
|
|
|
|
|
|
|
|
6910
|
|
|
|
|
|
|
$Op The string that identifies the "operation" taking place |
6911
|
|
|
|
|
|
|
$Item A milestone such as a number or datum to indicate progress |
6912
|
|
|
|
|
|
|
$Pos A numeric position against the (maybe estimated) baseline |
6913
|
|
|
|
|
|
|
$Tot The baseline. If estimated, don't re-estimate too often |
6914
|
|
|
|
|
|
|
$Wait If true, skip printing the first message for this $Op |
6915
|
|
|
|
|
|
|
|
6916
|
|
|
|
|
|
|
All parameters except $Op are optional. |
6917
|
|
|
|
|
|
|
|
6918
|
|
|
|
|
|
|
progress_timed() has a throttle that keeps it from re-triggering more |
6919
|
|
|
|
|
|
|
often than every 2 seconds for any given sequence of the same $Op. |
6920
|
|
|
|
|
|
|
The clock is restarted each time you call it with a different $Op or |
6921
|
|
|
|
|
|
|
$Tot from the previous call (on the assumption that if the operation |
6922
|
|
|
|
|
|
|
or the baseline changes then that fact should be noted). |
6923
|
|
|
|
|
|
|
|
6924
|
|
|
|
|
|
|
The messages printed will start with "$Op... ". |
6925
|
|
|
|
|
|
|
|
6926
|
|
|
|
|
|
|
If you supply $Item, which could be a number or a string, the messages |
6927
|
|
|
|
|
|
|
will then show the $Item after the $Op. |
6928
|
|
|
|
|
|
|
|
6929
|
|
|
|
|
|
|
If you supply BOTH $Pos and $Tot, then a percentage will be calculated |
6930
|
|
|
|
|
|
|
and added to the readout; otherwise omitted. |
6931
|
|
|
|
|
|
|
|
6932
|
|
|
|
|
|
|
If you supply $Wait, the first message (only) that uses this $Op will |
6933
|
|
|
|
|
|
|
be skipped, and the next one won't appear for at least 2 seconds. |
6934
|
|
|
|
|
|
|
|
6935
|
|
|
|
|
|
|
If using $Pos and $Tot to display percentages for your user, be sure |
6936
|
|
|
|
|
|
|
to call progress_timed() one final time when $Pos == $Tot so your user |
6937
|
|
|
|
|
|
|
sees the satisfying 100% milestone. This "completion" call will not |
6938
|
|
|
|
|
|
|
be skipped even if 2 seconds have not passed since the previous timed |
6939
|
|
|
|
|
|
|
progress message was printed. |
6940
|
|
|
|
|
|
|
|
6941
|
|
|
|
|
|
|
Althought progress_timed() is designed to cut down on too much visual |
6942
|
|
|
|
|
|
|
output when called often in a tight loop, remember that it still takes |
6943
|
|
|
|
|
|
|
some processing time to call it and so if you call it too frequently, |
6944
|
|
|
|
|
|
|
you're slowing down the very loop you wish were running faster. |
6945
|
|
|
|
|
|
|
|
6946
|
|
|
|
|
|
|
So, you might want to call it every tenth or 100th or even 1000th time |
6947
|
|
|
|
|
|
|
through a tight loop, instead of every time through, using the mod (%) |
6948
|
|
|
|
|
|
|
operator: |
6949
|
|
|
|
|
|
|
|
6950
|
|
|
|
|
|
|
$t->progress_timed(....) if ($LoopCount % 100) == 0; |
6951
|
|
|
|
|
|
|
|
6952
|
|
|
|
|
|
|
progress_timed_default() is the method called internally by |
6953
|
|
|
|
|
|
|
progress_timed() to actually print the messages it has prepared. In |
6954
|
|
|
|
|
|
|
this implementation, progress_timed_default() just calls |
6955
|
|
|
|
|
|
|
progress_default(). That is, it ignores all other progress-inhibiting |
6956
|
|
|
|
|
|
|
or -enhancing settings so delay-soothing messages will print on the |
6957
|
|
|
|
|
|
|
terminal even if other messages are turned off. |
6958
|
|
|
|
|
|
|
|
6959
|
|
|
|
|
|
|
This is because the author assumes that even if you don't want all |
6960
|
|
|
|
|
|
|
those other progress messages, you might still want these ones that |
6961
|
|
|
|
|
|
|
explain long delays. If you REALLY don't, then just make yourself a |
6962
|
|
|
|
|
|
|
lightweight subclass where progress_timed_default() is a no-op, or |
6963
|
|
|
|
|
|
|
maybe calls regular progress(). For example: |
6964
|
|
|
|
|
|
|
|
6965
|
|
|
|
|
|
|
BEGIN {package Data::CTable::Silent; use vars qw(@ISA); |
6966
|
|
|
|
|
|
|
@ISA=qw(Data::CTable); sub progress_timed_default{}} |
6967
|
|
|
|
|
|
|
|
6968
|
|
|
|
|
|
|
## Later... |
6969
|
|
|
|
|
|
|
my $t = Data::CTable::Silent->new(...); |
6970
|
|
|
|
|
|
|
|
6971
|
|
|
|
|
|
|
|
6972
|
|
|
|
|
|
|
=cut |
6973
|
|
|
|
|
|
|
|
6974
|
|
|
|
|
|
|
$Data::CTable::DefaultProgress = 1; |
6975
|
|
|
|
|
|
|
|
6976
|
|
|
|
|
|
|
sub progress_set |
6977
|
|
|
|
|
|
|
{ |
6978
|
5
|
|
|
5
|
0
|
30
|
my $this = shift; |
6979
|
5
|
|
|
|
|
8
|
my ($ProgSetting) = @_; |
6980
|
|
|
|
|
|
|
|
6981
|
5
|
|
|
|
|
14
|
$this->{_Progress} = $ProgSetting; |
6982
|
|
|
|
|
|
|
} |
6983
|
|
|
|
|
|
|
|
6984
|
|
|
|
|
|
|
sub progress_class |
6985
|
|
|
|
|
|
|
{ |
6986
|
199
|
|
|
199
|
0
|
366
|
my $Ignored = shift; |
6987
|
199
|
|
|
|
|
264
|
my ($ProgSetting) = @_; |
6988
|
|
|
|
|
|
|
|
6989
|
|
|
|
|
|
|
## Set if specified... |
6990
|
199
|
100
|
|
|
|
424
|
$Data::CTable::DefaultProgress = $ProgSetting if defined($ProgSetting); |
6991
|
|
|
|
|
|
|
|
6992
|
|
|
|
|
|
|
## Return.. |
6993
|
199
|
|
|
|
|
346
|
return($Data::CTable::DefaultProgress); |
6994
|
|
|
|
|
|
|
} |
6995
|
|
|
|
|
|
|
|
6996
|
|
|
|
|
|
|
sub progress_get |
6997
|
|
|
|
|
|
|
{ |
6998
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
6999
|
|
|
|
|
|
|
|
7000
|
0
|
|
|
|
|
0
|
my $ProgSetting = $this->{_Progress}; |
7001
|
|
|
|
|
|
|
|
7002
|
0
|
|
|
|
|
0
|
return($ProgSetting); |
7003
|
|
|
|
|
|
|
} |
7004
|
|
|
|
|
|
|
|
7005
|
|
|
|
|
|
|
sub progress_default |
7006
|
|
|
|
|
|
|
{ |
7007
|
7
|
|
|
7
|
0
|
18
|
my ($this, $msg) = @_; |
7008
|
7
|
|
|
|
|
23
|
chomp $msg; |
7009
|
|
|
|
|
|
|
|
7010
|
7
|
50
|
|
|
|
65
|
print STDERR (localtime() . " $msg\n") if -t STDERR; |
7011
|
|
|
|
|
|
|
|
7012
|
7
|
|
|
|
|
78
|
return(1); ## Indicate progress actually completed |
7013
|
|
|
|
|
|
|
} |
7014
|
|
|
|
|
|
|
|
7015
|
|
|
|
|
|
|
sub progress |
7016
|
|
|
|
|
|
|
{ |
7017
|
193
|
|
|
193
|
1
|
1965
|
my $this = shift; |
7018
|
193
|
|
|
|
|
262
|
my ($msg) = @_; |
7019
|
|
|
|
|
|
|
|
7020
|
193
|
|
|
|
|
326
|
my $Prog1 = $this->{_Progress}; ## First check object's progress setting |
7021
|
193
|
|
|
|
|
371
|
my $Prog2 = $this->progress_class(); ## Then check class's setting |
7022
|
|
|
|
|
|
|
|
7023
|
|
|
|
|
|
|
## Calling regular progress resets the timers & ops in |
7024
|
|
|
|
|
|
|
## progress_timed... |
7025
|
|
|
|
|
|
|
|
7026
|
193
|
|
|
|
|
472
|
delete $this->{_ProgTimeInfo}; |
7027
|
|
|
|
|
|
|
|
7028
|
|
|
|
|
|
|
## First examine object setting to find a progress routine... |
7029
|
|
|
|
|
|
|
|
7030
|
193
|
100
|
|
|
|
376
|
return(&$Prog1($this, $msg)) if ref($Prog1) eq 'CODE'; ## Code ref: return it. |
7031
|
191
|
100
|
|
|
|
389
|
return($this->progress_default($msg)) if $Prog1; ## true: use default progress. |
7032
|
185
|
100
|
|
|
|
321
|
return(undef) if defined($Prog1); ## false but defined: no progress. |
7033
|
|
|
|
|
|
|
|
7034
|
|
|
|
|
|
|
## undef: fall through to class settings... |
7035
|
|
|
|
|
|
|
|
7036
|
181
|
100
|
|
|
|
302
|
return(&$Prog2($this, $msg)) if ref($Prog2) eq 'CODE'; ## Code ref: return it. |
7037
|
179
|
50
|
|
|
|
303
|
return($this->progress_default($msg)) if $Prog2; ## true: use default progress. |
7038
|
179
|
|
|
|
|
268
|
return(undef); ## false/undef: no progress. |
7039
|
|
|
|
|
|
|
} |
7040
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
sub progress_timed |
7042
|
|
|
|
|
|
|
{ |
7043
|
188159
|
|
|
188159
|
0
|
582041
|
my $this = shift; |
7044
|
188159
|
|
|
|
|
282096
|
my ($Op, $Item, $Pos, $Tot, $Wait) = @_; |
7045
|
|
|
|
|
|
|
|
7046
|
|
|
|
|
|
|
## Get params from previous call if any. |
7047
|
188159
|
100
|
|
|
|
200829
|
my ($LastOp, $LastItem, $LastPos, $LastTot, $LastTime) = @{$this->{_ProgTimeInfo} || []}; |
|
188159
|
|
|
|
|
555883
|
|
7048
|
|
|
|
|
|
|
|
7049
|
|
|
|
|
|
|
## print &Dumper([$Op, $Item, $Pos, $Tot, $Wait], [$LastOp, $LastItem, $LastPos, $LastTot, $LastTime]); |
7050
|
|
|
|
|
|
|
|
7051
|
|
|
|
|
|
|
## Get elapsed time. |
7052
|
188159
|
|
|
|
|
241832
|
my $Time = time(); |
7053
|
188159
|
|
66
|
|
|
369130
|
my $Elapsed = $Time - ($LastTime || $Time); |
7054
|
|
|
|
|
|
|
|
7055
|
|
|
|
|
|
|
## We're on the "same" operation if the $Op name is the same and |
7056
|
|
|
|
|
|
|
## the total (baseline) is the same. Otherwise treat as new op. |
7057
|
|
|
|
|
|
|
|
7058
|
188159
|
|
|
|
|
257075
|
my $SameOp = (($Op eq $LastOp)); |
7059
|
188159
|
|
66
|
|
|
641931
|
my $SameOpAndTot = ($SameOp && ($Tot == $LastTot)); |
7060
|
188159
|
|
100
|
|
|
593417
|
my $Finished = ($Tot && ($Pos == $Tot)); |
7061
|
|
|
|
|
|
|
|
7062
|
|
|
|
|
|
|
## We trigger a message to print if we've been on the same op for |
7063
|
|
|
|
|
|
|
## 2 seconds or more, OR this is a new op. |
7064
|
|
|
|
|
|
|
|
7065
|
188159
|
|
100
|
|
|
1554885
|
my $Trigger = (($SameOpAndTot && ($Elapsed >= 2)) || ## Yes if same op & time has passed... |
7066
|
|
|
|
|
|
|
($SameOpAndTot && $Finished) || ## Yes if we're finished (100%). |
7067
|
|
|
|
|
|
|
!$SameOpAndTot); ## Yes if new op |
7068
|
|
|
|
|
|
|
|
7069
|
|
|
|
|
|
|
## Quit now if nothing to do. |
7070
|
188159
|
100
|
|
|
|
471098
|
goto done unless $Trigger; |
7071
|
|
|
|
|
|
|
|
7072
|
|
|
|
|
|
|
## Otherwise print message and save details for next time around. |
7073
|
17
|
100
|
66
|
|
|
125
|
my $Percent = sprintf("(%2d\%)", int(($Pos * 100) / $Tot)) if (defined($Pos) && $Tot); |
7074
|
|
|
|
|
|
|
|
7075
|
|
|
|
|
|
|
## If we've been asked to "wait", we skip actually printing the |
7076
|
|
|
|
|
|
|
## message this the first time, but act as if we did (starting the timer). |
7077
|
|
|
|
|
|
|
|
7078
|
17
|
100
|
100
|
|
|
110
|
my $RetVal = $this->progress_timed_default("$Op... $Item $Percent") |
7079
|
|
|
|
|
|
|
unless (!$SameOp && $Wait); ## Skip first-time message if $Wait. |
7080
|
|
|
|
|
|
|
|
7081
|
17
|
|
|
|
|
65
|
$this->{_ProgTimeInfo} = [$Op, $Item, $Pos, $Tot, $Time]; |
7082
|
|
|
|
|
|
|
|
7083
|
188159
|
|
|
|
|
440607
|
done: |
7084
|
|
|
|
|
|
|
return($RetVal); |
7085
|
|
|
|
|
|
|
} |
7086
|
|
|
|
|
|
|
|
7087
|
|
|
|
|
|
|
sub progress_timed_default |
7088
|
|
|
|
|
|
|
{ |
7089
|
6
|
|
|
6
|
0
|
82
|
my $this = shift; |
7090
|
6
|
|
|
|
|
13
|
my ($msg) = @_; |
7091
|
|
|
|
|
|
|
|
7092
|
6
|
|
|
|
|
37
|
return($this->progress_default("$msg")); |
7093
|
|
|
|
|
|
|
} |
7094
|
|
|
|
|
|
|
|
7095
|
|
|
|
|
|
|
sub warn |
7096
|
|
|
|
|
|
|
{ |
7097
|
1
|
|
|
1
|
0
|
3
|
my $this = shift; |
7098
|
1
|
|
|
|
|
4
|
my ($msg) = @_; |
7099
|
|
|
|
|
|
|
|
7100
|
1
|
|
|
|
|
6
|
return($this->progress_default("WARNING: $msg")); |
7101
|
|
|
|
|
|
|
} |
7102
|
|
|
|
|
|
|
|
7103
|
|
|
|
|
|
|
=pod |
7104
|
|
|
|
|
|
|
|
7105
|
|
|
|
|
|
|
=head1 DEBUGGING / DUMPING |
7106
|
|
|
|
|
|
|
|
7107
|
|
|
|
|
|
|
## Print some debugging output... |
7108
|
|
|
|
|
|
|
|
7109
|
|
|
|
|
|
|
$t->out() ## Pretty-print $t using Data::ShowTable |
7110
|
|
|
|
|
|
|
|
7111
|
|
|
|
|
|
|
$t->dump() ## Dump $t using Data::Dumper |
7112
|
|
|
|
|
|
|
$t->dump($x, $y) ## Dump anything else using Data::Dumper |
7113
|
|
|
|
|
|
|
|
7114
|
|
|
|
|
|
|
## Print some debugging output and then die. |
7115
|
|
|
|
|
|
|
|
7116
|
|
|
|
|
|
|
die $t->out() ## Same but die afterwards. |
7117
|
|
|
|
|
|
|
|
7118
|
|
|
|
|
|
|
die $t->dump() ## Same but die afterwards. |
7119
|
|
|
|
|
|
|
die $t->dump($x, $y) ## Same but die afterwards. |
7120
|
|
|
|
|
|
|
|
7121
|
|
|
|
|
|
|
These two methods can be very helpful in debugging your scripts. |
7122
|
|
|
|
|
|
|
|
7123
|
|
|
|
|
|
|
The out() method, which has many options, is described in complete |
7124
|
|
|
|
|
|
|
detail in the section below titled "FORMATTED TABLES". In short, it |
7125
|
|
|
|
|
|
|
prints a nicely-formatted diagram of $t, obeying the custom field list |
7126
|
|
|
|
|
|
|
if any and custom selection if any. |
7127
|
|
|
|
|
|
|
|
7128
|
|
|
|
|
|
|
The dump() method uses the Data::Dumper module to call &Dumper() on |
7129
|
|
|
|
|
|
|
the table itself (by default) and prints the result to STDERR. If you |
7130
|
|
|
|
|
|
|
specify any number of other values, those will be dumped instead using |
7131
|
|
|
|
|
|
|
a single call to &Dumper (rather than individually). |
7132
|
|
|
|
|
|
|
|
7133
|
|
|
|
|
|
|
=head2 Optional module dependencies |
7134
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
These methods require the otherwise-optional modules shown here: |
7136
|
|
|
|
|
|
|
|
7137
|
|
|
|
|
|
|
out() Data::ShowTable |
7138
|
|
|
|
|
|
|
dump() Data::Dumper |
7139
|
|
|
|
|
|
|
|
7140
|
|
|
|
|
|
|
You'll get a warning at runtime if you try to call either method |
7141
|
|
|
|
|
|
|
without the appropriate module installed on your system. |
7142
|
|
|
|
|
|
|
|
7143
|
|
|
|
|
|
|
=cut |
7144
|
|
|
|
|
|
|
|
7145
|
|
|
|
|
|
|
sub dump |
7146
|
|
|
|
|
|
|
{ |
7147
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
7148
|
0
|
|
|
|
|
0
|
my (@Things) = @_; |
7149
|
|
|
|
|
|
|
|
7150
|
|
|
|
|
|
|
## Default is to dump the object. |
7151
|
0
|
0
|
|
|
|
0
|
@Things = ($this) unless @Things; |
7152
|
|
|
|
|
|
|
|
7153
|
0
|
0
|
|
|
|
0
|
if ($HaveDumper) |
7154
|
|
|
|
|
|
|
{ |
7155
|
0
|
|
|
|
|
0
|
print STDERR &Dumper(@Things); |
7156
|
|
|
|
|
|
|
} |
7157
|
|
|
|
|
|
|
else |
7158
|
|
|
|
|
|
|
{ |
7159
|
0
|
|
|
|
|
0
|
carp("Data::Dumper module is not installed. Can't dump."); |
7160
|
|
|
|
|
|
|
} |
7161
|
|
|
|
|
|
|
|
7162
|
0
|
|
|
|
|
0
|
return(1); |
7163
|
|
|
|
|
|
|
} |
7164
|
|
|
|
|
|
|
|
7165
|
|
|
|
|
|
|
=pod |
7166
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
=head1 MISCELLANEOUS UTILITY METHODS |
7168
|
|
|
|
|
|
|
|
7169
|
|
|
|
|
|
|
The following utilities are methods of the Data::CTable object. They |
7170
|
|
|
|
|
|
|
may be called directly by clients, subclassed, or used by subclass |
7171
|
|
|
|
|
|
|
implementations as needed. |
7172
|
|
|
|
|
|
|
|
7173
|
|
|
|
|
|
|
## Get cache file path (all args optional: defaulted from $t) |
7174
|
|
|
|
|
|
|
|
7175
|
|
|
|
|
|
|
$f = $t->prep_cache_file($FileName, $CacheExtension, $CacheSubDir) |
7176
|
|
|
|
|
|
|
|
7177
|
|
|
|
|
|
|
## Verify all directories in a path, creating any needed ones. |
7178
|
|
|
|
|
|
|
|
7179
|
|
|
|
|
|
|
$ok = $t->verify_or_create_path($DirPath, $Sep) |
7180
|
|
|
|
|
|
|
|
7181
|
|
|
|
|
|
|
## Testing readability / writeability of a proposed file |
7182
|
|
|
|
|
|
|
|
7183
|
|
|
|
|
|
|
$ok = $t->try_file_read ($Path); ## Opens for read; then closes |
7184
|
|
|
|
|
|
|
$ok = $t->try_file_write($Path); ## Opens for write; then deletes |
7185
|
|
|
|
|
|
|
|
7186
|
|
|
|
|
|
|
## Getting parameters from object with optional overrides |
7187
|
|
|
|
|
|
|
|
7188
|
|
|
|
|
|
|
$param = $t->getparam($Params, $Param) |
7189
|
|
|
|
|
|
|
|
7190
|
|
|
|
|
|
|
prep_cache_file() is the internal method used by both read() and |
7191
|
|
|
|
|
|
|
write() to calculate the name of a cache file to be used for a given |
7192
|
|
|
|
|
|
|
$FileName. |
7193
|
|
|
|
|
|
|
|
7194
|
|
|
|
|
|
|
It calculates the path to the cache file that corresponds to the given |
7195
|
|
|
|
|
|
|
$FileName (which may be a bare file name, a relative path, or a |
7196
|
|
|
|
|
|
|
partial path, as long as it obeys the current platform's path format |
7197
|
|
|
|
|
|
|
rules). All arguments are optional and if absent (undef), will be |
7198
|
|
|
|
|
|
|
defaulted from the corresponding parameters in $t. |
7199
|
|
|
|
|
|
|
|
7200
|
|
|
|
|
|
|
In addition to calculating the path and file name, it also prepends |
7201
|
|
|
|
|
|
|
the "current directory" path if there was no path. Then it checks |
7202
|
|
|
|
|
|
|
that all directories mentioned in the path actually exist. If not, it |
7203
|
|
|
|
|
|
|
fails. Then, it checks that EITHER the file exists and is readable, |
7204
|
|
|
|
|
|
|
OR it does not exist but would be writeable in that directory. If any |
7205
|
|
|
|
|
|
|
of these directory creations or file checks fails, then undef is |
7206
|
|
|
|
|
|
|
returned (and there would be no cache file). |
7207
|
|
|
|
|
|
|
|
7208
|
|
|
|
|
|
|
You may call it with no arguments on a file that has been read() to |
7209
|
|
|
|
|
|
|
find the path to the cache file that may have been used and/or |
7210
|
|
|
|
|
|
|
created, if any. |
7211
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
You may call it with a file name that was written to, to see what the |
7213
|
|
|
|
|
|
|
corresponding written cache file would be. |
7214
|
|
|
|
|
|
|
|
7215
|
|
|
|
|
|
|
For example: |
7216
|
|
|
|
|
|
|
|
7217
|
|
|
|
|
|
|
## Get name of cache file used or created by read and delete it. |
7218
|
|
|
|
|
|
|
|
7219
|
|
|
|
|
|
|
$RCache = $t->prep_cache_file() and unlink($RCache); |
7220
|
|
|
|
|
|
|
|
7221
|
|
|
|
|
|
|
## Cache on write() and get name of file and delete it. |
7222
|
|
|
|
|
|
|
|
7223
|
|
|
|
|
|
|
$Written = $t->write(_CacheOnWrite=>1, _FileName=>"Foo.txt"); |
7224
|
|
|
|
|
|
|
$WCache = $t->prep_cache_file($Written) and unlink($WCache); |
7225
|
|
|
|
|
|
|
|
7226
|
|
|
|
|
|
|
verify_or_create_path() is the internal routine used by read(), |
7227
|
|
|
|
|
|
|
write(), and the cache-writing logic, that makes sure a requested file |
7228
|
|
|
|
|
|
|
path exists (by creating it if necessary and possible) before any file |
7229
|
|
|
|
|
|
|
is written by this module. |
7230
|
|
|
|
|
|
|
|
7231
|
|
|
|
|
|
|
(If you don't like this module's tendency to try to create |
7232
|
|
|
|
|
|
|
directories, make yourself a subclass in which this routine simply |
7233
|
|
|
|
|
|
|
checks -d on its $Path argument and returns the result.) |
7234
|
|
|
|
|
|
|
|
7235
|
|
|
|
|
|
|
It must be called with a full or partial path TO A DIRECTORY, NOT A |
7236
|
|
|
|
|
|
|
FILE. You may supply $Sep, a platform-appropriate separator character |
7237
|
|
|
|
|
|
|
(which defaults correctly for the runtime platform if you don't). |
7238
|
|
|
|
|
|
|
|
7239
|
|
|
|
|
|
|
Returns true if the path verification and/or creation ultimately |
7240
|
|
|
|
|
|
|
succeeded, false otherwise (meaning that, after this call, there is no |
7241
|
|
|
|
|
|
|
such directory on the system and so you should not try to write a file |
7242
|
|
|
|
|
|
|
there). |
7243
|
|
|
|
|
|
|
|
7244
|
|
|
|
|
|
|
try_file_read() and try_file_write() are the internal methods called |
7245
|
|
|
|
|
|
|
by prep_cache_file() as well as by read() and write() to preflight |
7246
|
|
|
|
|
|
|
proposed file reading and writing locations. |
7247
|
|
|
|
|
|
|
|
7248
|
|
|
|
|
|
|
try_file_read() opens a file for read and closes it again; returns |
7249
|
|
|
|
|
|
|
true if the open was possible. |
7250
|
|
|
|
|
|
|
|
7251
|
|
|
|
|
|
|
try_file_write() opens a file for write and closes it again, deleting |
7252
|
|
|
|
|
|
|
it if successful. Returns true if the open for write and the delete |
7253
|
|
|
|
|
|
|
were successful. (Be aware that this call will actually delete any |
7254
|
|
|
|
|
|
|
existing file by this name.) |
7255
|
|
|
|
|
|
|
|
7256
|
|
|
|
|
|
|
The reason that failure to delete causes try_file_write() to fail is |
7257
|
|
|
|
|
|
|
that successful cacheing depends on the ability to delete cache files |
7258
|
|
|
|
|
|
|
as well as create them or write to them. A file in a location that |
7259
|
|
|
|
|
|
|
couldn't be deleted will not be used for cacheing. |
7260
|
|
|
|
|
|
|
|
7261
|
|
|
|
|
|
|
getparam() looks up a named parameter in a params hash if it exists |
7262
|
|
|
|
|
|
|
there, otherwise looks it up in the object, thereby allowing $Params |
7263
|
|
|
|
|
|
|
to shadow any parameters in $this. |
7264
|
|
|
|
|
|
|
|
7265
|
|
|
|
|
|
|
This internal routine is used by any methods that allow overriding of |
7266
|
|
|
|
|
|
|
parameters in the object when using a named-parameter calling |
7267
|
|
|
|
|
|
|
interface. It should be used by any subclasses that also wish to use |
7268
|
|
|
|
|
|
|
a named-parameter calling convention. For example: |
7269
|
|
|
|
|
|
|
|
7270
|
|
|
|
|
|
|
my $this = shift; |
7271
|
|
|
|
|
|
|
my $Params = (@_ == 1 ? {_FieldList => $_[0]} : {@_}); |
7272
|
|
|
|
|
|
|
|
7273
|
|
|
|
|
|
|
my($FieldList, $Selection) = map {$this->getparam($Params, $_)} |
7274
|
|
|
|
|
|
|
qw(_FieldList _Selection); |
7275
|
|
|
|
|
|
|
|
7276
|
|
|
|
|
|
|
=cut |
7277
|
|
|
|
|
|
|
|
7278
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
7279
|
|
|
|
|
|
|
|
7280
|
|
|
|
|
|
|
sub prep_cache_file |
7281
|
|
|
|
|
|
|
{ |
7282
|
53
|
|
|
53
|
0
|
90
|
my $this = shift; |
7283
|
53
|
|
|
|
|
89
|
my($FileName, $CacheExtension, $CacheSubDir) = @_; |
7284
|
|
|
|
|
|
|
|
7285
|
53
|
|
|
|
|
65
|
my $Success; |
7286
|
|
|
|
|
|
|
|
7287
|
53
|
|
66
|
|
|
115
|
$FileName ||= $this->{_FileName}; |
7288
|
53
|
|
66
|
|
|
102
|
$CacheExtension ||= $this->{_CacheExtension}; |
7289
|
53
|
|
66
|
|
|
93
|
$CacheSubDir ||= $this->{_CacheSubDir}; |
7290
|
|
|
|
|
|
|
|
7291
|
|
|
|
|
|
|
## Break the path into its parts... |
7292
|
1
|
|
|
1
|
|
10
|
use File::Basename qw(fileparse); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
506
|
|
7293
|
53
|
|
|
|
|
1923
|
my ($Basename, $Path, $Ext) = fileparse($FileName, '\.[^\.]+'); |
7294
|
|
|
|
|
|
|
|
7295
|
|
|
|
|
|
|
## Figure out what the path separator should be... |
7296
|
53
|
|
|
|
|
98
|
my ($Sep, $Up, $Cur) = @{$this->path_info()}{qw(sep up cur)}; |
|
53
|
|
|
|
|
187
|
|
7297
|
|
|
|
|
|
|
|
7298
|
|
|
|
|
|
|
## FileDir is guaranteed to either be empty or have a trailing |
7299
|
|
|
|
|
|
|
## separator (see: man File::Basename). If empty, we set it to |
7300
|
|
|
|
|
|
|
## $Cur (the current directory). |
7301
|
|
|
|
|
|
|
|
7302
|
53
|
50
|
|
|
|
187
|
my $FileDir = (length($Path) ? $Path : $Cur); |
7303
|
|
|
|
|
|
|
|
7304
|
|
|
|
|
|
|
## Ensure $CacheSubDir is either empty or has a trailing separator... |
7305
|
|
|
|
|
|
|
|
7306
|
53
|
|
|
|
|
414
|
$CacheSubDir =~ s/([^\Q$Sep\E])$/$1$Sep/; |
7307
|
|
|
|
|
|
|
|
7308
|
|
|
|
|
|
|
## Check whether it's an absolute path (and not really a "sub"-dir) |
7309
|
53
|
|
|
|
|
131
|
my $Absolute = &path_is_absolute($CacheSubDir); |
7310
|
|
|
|
|
|
|
|
7311
|
|
|
|
|
|
|
## $CacheDir is $FileDir with $CacheSubDir appended. $CacheSubDir |
7312
|
|
|
|
|
|
|
## may be empty, meaning $FileDir is to be used for the cache |
7313
|
|
|
|
|
|
|
## files. But if it's an absolute path, it stands alone. |
7314
|
|
|
|
|
|
|
|
7315
|
53
|
100
|
|
|
|
155
|
my $CacheDir = ($Absolute ? $CacheSubDir : "$FileDir$CacheSubDir"); |
7316
|
|
|
|
|
|
|
|
7317
|
|
|
|
|
|
|
## Now we need to make sure that CacheDir exists OR try to create |
7318
|
|
|
|
|
|
|
## it. (Warning will have already happened if necessary.) |
7319
|
53
|
50
|
|
|
|
152
|
goto done unless $this->verify_or_create_path($CacheDir, $Sep); |
7320
|
|
|
|
|
|
|
|
7321
|
|
|
|
|
|
|
## Verify that the dir is writeable. |
7322
|
53
|
50
|
|
|
|
673
|
$this->warn("Cache directory $CacheDir is read-only"), goto done |
7323
|
|
|
|
|
|
|
unless -r $CacheDir; |
7324
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
## Full path: note $CacheExtension and $Ext may be empty. |
7326
|
53
|
|
|
|
|
136
|
my $CacheFilePath = "$CacheDir$Basename$Ext$CacheExtension"; |
7327
|
|
|
|
|
|
|
|
7328
|
|
|
|
|
|
|
## If the cache path and the full path end up being the same |
7329
|
|
|
|
|
|
|
## (probably because the _CacheSubDir and _CacheExtension are both |
7330
|
|
|
|
|
|
|
## empty), we bail. Obviously, we don't want to risk overwriting |
7331
|
|
|
|
|
|
|
## the original data with the cache (or trying to read cache data |
7332
|
|
|
|
|
|
|
## from a text file). |
7333
|
|
|
|
|
|
|
|
7334
|
53
|
50
|
|
|
|
117
|
$this->warn("Can't cache $FileName without either _CacheSubDir or _CacheExtension"), goto done |
7335
|
|
|
|
|
|
|
if $CacheFilePath eq $FileName; |
7336
|
|
|
|
|
|
|
|
7337
|
|
|
|
|
|
|
## Pre-flight the cache file: ensure we can either read it or |
7338
|
|
|
|
|
|
|
## touch and then delete it. |
7339
|
|
|
|
|
|
|
|
7340
|
53
|
50
|
66
|
|
|
173
|
$this->warn("Cache file $CacheFilePath cannot be created/overwritten: $!"), goto done |
7341
|
|
|
|
|
|
|
unless ($this->try_file_read ($CacheFilePath) || |
7342
|
|
|
|
|
|
|
$this->try_file_write($CacheFilePath)); |
7343
|
|
|
|
|
|
|
|
7344
|
53
|
|
|
|
|
83
|
$Success = 1; |
7345
|
53
|
50
|
|
|
|
522
|
done: |
7346
|
|
|
|
|
|
|
return($Success ? $CacheFilePath : undef); |
7347
|
|
|
|
|
|
|
} |
7348
|
|
|
|
|
|
|
|
7349
|
|
|
|
|
|
|
sub verify_or_create_path |
7350
|
|
|
|
|
|
|
{ |
7351
|
61
|
|
|
61
|
0
|
101
|
my $this = shift; |
7352
|
61
|
|
|
|
|
93
|
my ($Dir, $Sep) = @_; |
7353
|
|
|
|
|
|
|
|
7354
|
|
|
|
|
|
|
## Get default value for $Sep if not supplied. |
7355
|
61
|
|
33
|
|
|
135
|
$Sep ||= ${$this->path_info()}{sep}; |
|
0
|
|
|
|
|
0
|
|
7356
|
|
|
|
|
|
|
|
7357
|
|
|
|
|
|
|
## $Dir might end in $Sep; split strips trailing one if so. |
7358
|
61
|
|
|
|
|
292
|
my $Parts = [split(/\Q$Sep\E/, $Dir)]; |
7359
|
|
|
|
|
|
|
|
7360
|
61
|
|
|
|
|
138
|
my $WholePath = ""; |
7361
|
61
|
|
|
|
|
119
|
foreach (@$Parts) |
7362
|
|
|
|
|
|
|
{ |
7363
|
114
|
|
|
|
|
193
|
$WholePath = "$WholePath$_$Sep"; |
7364
|
114
|
50
|
|
|
|
1737
|
next if -d $WholePath; |
7365
|
|
|
|
|
|
|
|
7366
|
|
|
|
|
|
|
## Directory does not exist. We need to make it. |
7367
|
|
|
|
|
|
|
|
7368
|
|
|
|
|
|
|
## mkdir($WholePath, 0777) or |
7369
|
|
|
|
|
|
|
## $this->warn("Failed to create directory '$WholePath': $!"), last; |
7370
|
|
|
|
|
|
|
|
7371
|
|
|
|
|
|
|
## On some platforms (e.g. Darwin), perl's mkdir fails if |
7372
|
|
|
|
|
|
|
## there's a trailing separator. Others tolerate its absence, |
7373
|
|
|
|
|
|
|
## so we remove it. |
7374
|
|
|
|
|
|
|
|
7375
|
0
|
|
|
|
|
0
|
(my $TryDir = $WholePath) =~ s/\Q$Sep\E$//; |
7376
|
|
|
|
|
|
|
|
7377
|
0
|
0
|
|
|
|
0
|
mkdir($TryDir, 0777) or |
7378
|
|
|
|
|
|
|
$this->warn("Failed to create directory '$TryDir': $!"), last; |
7379
|
|
|
|
|
|
|
} |
7380
|
|
|
|
|
|
|
|
7381
|
61
|
|
|
|
|
777
|
return(-d $Dir); |
7382
|
|
|
|
|
|
|
} |
7383
|
|
|
|
|
|
|
|
7384
|
|
|
|
|
|
|
sub try_file_write ## like a "touch" but deletes the file if it succeeds. |
7385
|
|
|
|
|
|
|
{ |
7386
|
10
|
|
|
10
|
0
|
20
|
my $this = shift; |
7387
|
10
|
|
|
|
|
17
|
my ($Path) = @_; |
7388
|
|
|
|
|
|
|
|
7389
|
10
|
|
|
|
|
14
|
my $Success; |
7390
|
|
|
|
|
|
|
|
7391
|
|
|
|
|
|
|
## Try creating it. |
7392
|
1
|
|
|
1
|
|
8
|
use IO::File; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
314
|
|
7393
|
10
|
|
|
|
|
69
|
my $File = IO::File->new(">$Path"); |
7394
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
## Created: close and delete it. |
7396
|
10
|
50
|
|
|
|
1304
|
$File->close(), unlink($Path) if $File; |
7397
|
|
|
|
|
|
|
|
7398
|
|
|
|
|
|
|
## Failed: bail. |
7399
|
10
|
50
|
|
|
|
810
|
goto done unless $File; |
7400
|
|
|
|
|
|
|
|
7401
|
|
|
|
|
|
|
## If we couldn't unlink, fail. The ability to delete a failed or |
7402
|
|
|
|
|
|
|
## part-written cache file is a critical part of cacheing. |
7403
|
|
|
|
|
|
|
|
7404
|
10
|
50
|
|
|
|
178
|
goto done if -e $Path; |
7405
|
|
|
|
|
|
|
|
7406
|
10
|
|
|
|
|
20
|
$Success = 1; |
7407
|
10
|
|
|
|
|
59
|
done: |
7408
|
|
|
|
|
|
|
return($Success); |
7409
|
|
|
|
|
|
|
} |
7410
|
|
|
|
|
|
|
|
7411
|
|
|
|
|
|
|
sub try_file_read ## Verifies that a file exists / can be read... |
7412
|
|
|
|
|
|
|
{ |
7413
|
53
|
|
|
53
|
0
|
73
|
my $this = shift; |
7414
|
53
|
|
|
|
|
76
|
my ($Path) = @_; |
7415
|
|
|
|
|
|
|
|
7416
|
53
|
|
|
|
|
71
|
my $Success; |
7417
|
|
|
|
|
|
|
|
7418
|
|
|
|
|
|
|
## Try opening it. |
7419
|
1
|
|
|
1
|
|
5
|
use IO::File; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
516
|
|
7420
|
53
|
100
|
|
|
|
394
|
my $File = IO::File->new("<$Path") or goto done; |
7421
|
|
|
|
|
|
|
|
7422
|
51
|
|
|
|
|
3867
|
$Success = 1; |
7423
|
53
|
|
|
|
|
966
|
done: |
7424
|
|
|
|
|
|
|
return($Success); |
7425
|
|
|
|
|
|
|
} |
7426
|
|
|
|
|
|
|
|
7427
|
|
|
|
|
|
|
sub getparam |
7428
|
|
|
|
|
|
|
{ |
7429
|
801
|
|
|
801
|
0
|
950
|
my $this = shift; |
7430
|
801
|
|
|
|
|
1036
|
my ($Params, $Param) = @_; |
7431
|
|
|
|
|
|
|
|
7432
|
801
|
100
|
|
|
|
2778
|
return(exists($Params->{$Param}) ? |
7433
|
|
|
|
|
|
|
( $Params->{$Param}) : |
7434
|
|
|
|
|
|
|
( $this->{$Param}) ); |
7435
|
|
|
|
|
|
|
} |
7436
|
|
|
|
|
|
|
|
7437
|
|
|
|
|
|
|
|
7438
|
|
|
|
|
|
|
|
7439
|
|
|
|
|
|
|
=pod |
7440
|
|
|
|
|
|
|
|
7441
|
|
|
|
|
|
|
=head1 GENERAL-PURPOSE UTILITY FUNCTIONS |
7442
|
|
|
|
|
|
|
|
7443
|
|
|
|
|
|
|
These general-purpose utility routines are defined in the Data::CTable |
7444
|
|
|
|
|
|
|
module but are not method calls. You may optionally import them or |
7445
|
|
|
|
|
|
|
call them by their fully-qualified name. |
7446
|
|
|
|
|
|
|
|
7447
|
|
|
|
|
|
|
use Data::CTable qw( |
7448
|
|
|
|
|
|
|
guess_endings |
7449
|
|
|
|
|
|
|
guess_delimiter |
7450
|
|
|
|
|
|
|
path_info |
7451
|
|
|
|
|
|
|
path_is_absolute |
7452
|
|
|
|
|
|
|
min |
7453
|
|
|
|
|
|
|
max |
7454
|
|
|
|
|
|
|
); |
7455
|
|
|
|
|
|
|
|
7456
|
|
|
|
|
|
|
## File-format guessing |
7457
|
|
|
|
|
|
|
|
7458
|
|
|
|
|
|
|
my $E = &guess_endings($IOHandle) ## Guess txt file line endings |
7459
|
|
|
|
|
|
|
my $D = &guess_delimiter($String) ## Tab if found, else comma |
7460
|
|
|
|
|
|
|
|
7461
|
|
|
|
|
|
|
## Cross-platform file path analysis |
7462
|
|
|
|
|
|
|
|
7463
|
|
|
|
|
|
|
my $Info = path_info(); ## Hash: 3 of platform's path values: |
7464
|
|
|
|
|
|
|
my ($Sep, ## ... path separator ( / on Unix) |
7465
|
|
|
|
|
|
|
$Up, ## ... "up" component (../ on Unix) |
7466
|
|
|
|
|
|
|
$Cur) = ## ... curr. dir path ( ./ on Unix) |
7467
|
|
|
|
|
|
|
@$Info{qw(sep up cur)}; |
7468
|
|
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
my $Abs = path_is_absolute($Path) ## Check path type |
7470
|
|
|
|
|
|
|
|
7471
|
|
|
|
|
|
|
## Our old favorites min and max |
7472
|
|
|
|
|
|
|
|
7473
|
|
|
|
|
|
|
$x = max($x, 0); ## Should have been part of Perl... |
7474
|
|
|
|
|
|
|
$x = min($x, 100); |
7475
|
|
|
|
|
|
|
|
7476
|
|
|
|
|
|
|
guess_endings() tries to figure out whether an open IO::File handle |
7477
|
|
|
|
|
|
|
has DOS, Mac, or Unix file endings. It reads successively large |
7478
|
|
|
|
|
|
|
blocks of the file until it either finds evidence of at least two |
7479
|
|
|
|
|
|
|
separate line endings (of any type, but presumably they are the same), |
7480
|
|
|
|
|
|
|
or until it reaches the end of the file. Then, it takes the resulting |
7481
|
|
|
|
|
|
|
block and searches for the first qualifying line ending sequence it |
7482
|
|
|
|
|
|
|
finds, if any. This sequence is then returned to the caller. If it |
7483
|
|
|
|
|
|
|
returns undef, it was not able to find any evidence of line endings in |
7484
|
|
|
|
|
|
|
the file. |
7485
|
|
|
|
|
|
|
|
7486
|
|
|
|
|
|
|
guess_delimiter() takes a string buffer and returns a "," unless it |
7487
|
|
|
|
|
|
|
finds a tab character before the first comma in the $String, if any, |
7488
|
|
|
|
|
|
|
in which case a tab is returned. |
7489
|
|
|
|
|
|
|
|
7490
|
|
|
|
|
|
|
path_info() returns a hash of three helpful strings for building and |
7491
|
|
|
|
|
|
|
parsing paths on the current platform. Knows about Mac, Dos/Win, and |
7492
|
|
|
|
|
|
|
otherwise defaults to Unix. |
7493
|
|
|
|
|
|
|
|
7494
|
|
|
|
|
|
|
path_is_absolute($Path) returns true if it thinks the given path |
7495
|
|
|
|
|
|
|
string is an absolute path on the current platform. |
7496
|
|
|
|
|
|
|
|
7497
|
|
|
|
|
|
|
=cut |
7498
|
|
|
|
|
|
|
|
7499
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
7500
|
|
|
|
|
|
|
|
7501
|
|
|
|
|
|
|
sub guess_endings |
7502
|
|
|
|
|
|
|
{ |
7503
|
19
|
|
|
19
|
0
|
29
|
my ($File) = @_; |
7504
|
|
|
|
|
|
|
|
7505
|
19
|
|
|
|
|
29
|
my $Ending = undef; |
7506
|
|
|
|
|
|
|
|
7507
|
19
|
|
|
|
|
25
|
my $ReadCount = 0; |
7508
|
19
|
|
|
|
|
31
|
my $BlockSize = 512; |
7509
|
|
|
|
|
|
|
|
7510
|
19
|
|
|
|
|
26
|
my $Buf; |
7511
|
|
|
|
|
|
|
my $Actual; |
7512
|
|
|
|
|
|
|
|
7513
|
19
|
|
|
|
|
104
|
while ($File->seek(0, 0), $Actual = $File->read($Buf, ($BlockSize * ++$ReadCount))) |
7514
|
|
|
|
|
|
|
{ |
7515
|
|
|
|
|
|
|
## Break out of the loop if it appears a line ending match is |
7516
|
|
|
|
|
|
|
## found (but disallow initial match at very end of buffer). |
7517
|
|
|
|
|
|
|
|
7518
|
19
|
50
|
|
|
|
792
|
last if $Buf =~ /((?:\x0D\x0A)|(?:\x0D)|(?:\x0A))[^\x0D\x0A]/; |
7519
|
|
|
|
|
|
|
|
7520
|
|
|
|
|
|
|
## Break out of the loop if we just read any less than |
7521
|
|
|
|
|
|
|
## attempted (we are probably at the end of a very short, |
7522
|
|
|
|
|
|
|
## maybe one-line or even zero-line, file). |
7523
|
|
|
|
|
|
|
|
7524
|
0
|
0
|
|
|
|
0
|
last if $Actual < ($BlockSize * $ReadCount); |
7525
|
|
|
|
|
|
|
} |
7526
|
|
|
|
|
|
|
|
7527
|
|
|
|
|
|
|
## We can presume that the buffer we now have must either have |
7528
|
|
|
|
|
|
|
## line endings in it, or there is no line ending in the file at |
7529
|
|
|
|
|
|
|
## all. So we extract the first one we come to, (trying the DOS |
7530
|
|
|
|
|
|
|
## ending first since it contains the other two), if any, and we |
7531
|
|
|
|
|
|
|
## return it. |
7532
|
|
|
|
|
|
|
|
7533
|
19
|
|
|
|
|
181
|
my $Ending = ($Buf =~ /((\x0D\x0A)|(\x0D)|(\x0A))/)[0]; |
7534
|
|
|
|
|
|
|
|
7535
|
|
|
|
|
|
|
## &progress_default(undef, "DOS line endings") if $2; ## Debugging. |
7536
|
|
|
|
|
|
|
## &progress_default(undef, "Mac line endings") if $3; ## Debugging. |
7537
|
|
|
|
|
|
|
## &progress_default(undef, "Unix line endings") if $4; ## Debugging. |
7538
|
|
|
|
|
|
|
|
7539
|
19
|
|
|
|
|
70
|
done: |
7540
|
|
|
|
|
|
|
|
7541
|
|
|
|
|
|
|
## We always seek back to zero when done. |
7542
|
|
|
|
|
|
|
$File->seek(0, 0); |
7543
|
|
|
|
|
|
|
|
7544
|
19
|
|
|
|
|
223
|
return($Ending); |
7545
|
|
|
|
|
|
|
} |
7546
|
|
|
|
|
|
|
|
7547
|
|
|
|
|
|
|
sub guess_delimiter |
7548
|
|
|
|
|
|
|
{ |
7549
|
19
|
|
|
19
|
0
|
37
|
my ($String) = @_; |
7550
|
|
|
|
|
|
|
|
7551
|
19
|
|
50
|
|
|
162
|
return(($String =~ /([,\t])/)[0] || ","); |
7552
|
|
|
|
|
|
|
} |
7553
|
|
|
|
|
|
|
|
7554
|
|
|
|
|
|
|
sub path_info |
7555
|
|
|
|
|
|
|
{ |
7556
|
1
|
|
|
1
|
|
5
|
use Config qw(%Config); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
145
|
|
7557
|
70
|
|
|
70
|
0
|
673
|
my $OSName = $Config{osname}; |
7558
|
|
|
|
|
|
|
|
7559
|
70
|
50
|
|
|
|
277
|
return({sep =>':' , up =>'::' , cur =>':' }) if $OSName =~ /mac /ix; |
7560
|
70
|
50
|
|
|
|
363
|
return({sep =>'\\', up =>'..\\', cur =>'.\\'}) if $OSName =~ /(?
|
7561
|
70
|
|
|
|
|
379
|
return({sep =>'/' , up =>'../' , cur =>'./' }) ; |
7562
|
|
|
|
|
|
|
} |
7563
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
sub path_is_absolute |
7565
|
|
|
|
|
|
|
{ |
7566
|
53
|
|
|
53
|
0
|
86
|
my ($Path) = @_; |
7567
|
|
|
|
|
|
|
|
7568
|
1
|
|
|
1
|
|
6
|
use Config qw(%Config); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
494
|
|
7569
|
53
|
|
|
|
|
316
|
my $OSName = $Config{osname}; |
7570
|
|
|
|
|
|
|
|
7571
|
53
|
50
|
|
|
|
190
|
return($Path =~ /^[^:]/) if $OSName =~ /mac /ix; |
7572
|
53
|
50
|
|
|
|
216
|
return($Path =~ /^(([a-z][:])|(\\\\))/i) if $OSName =~ /(?
|
7573
|
53
|
|
|
|
|
182
|
return($Path =~ /^\//) ; |
7574
|
|
|
|
|
|
|
} |
7575
|
|
|
|
|
|
|
|
7576
|
|
|
|
|
|
|
### min and max |
7577
|
|
|
|
|
|
|
|
7578
|
188169
|
100
|
|
188169
|
0
|
865915
|
sub min {return($_[0] < $_[1] ? $_[0] : $_[1])} |
7579
|
134
|
100
|
|
134
|
0
|
323
|
sub max {return($_[0] > $_[1] ? $_[0] : $_[1])} |
7580
|
|
|
|
|
|
|
|
7581
|
|
|
|
|
|
|
|
7582
|
|
|
|
|
|
|
=pod |
7583
|
|
|
|
|
|
|
|
7584
|
|
|
|
|
|
|
=head1 IMPLEMENTATION LIMITATIONS |
7585
|
|
|
|
|
|
|
|
7586
|
|
|
|
|
|
|
=over 4 |
7587
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
=item Column (field) names must not start with underscore |
7589
|
|
|
|
|
|
|
|
7590
|
|
|
|
|
|
|
This object is implemented as a blessed hash reference. By |
7591
|
|
|
|
|
|
|
convention, keys that do not start with underscore are data columns |
7592
|
|
|
|
|
|
|
and the key is the field name. Keys that do start with underscore |
7593
|
|
|
|
|
|
|
refer to parameters or other data structures stored in the object. |
7594
|
|
|
|
|
|
|
|
7595
|
|
|
|
|
|
|
Consequently, no field names may start with underscore. When a file |
7596
|
|
|
|
|
|
|
is read from disk, any field names that DO start with underscores will |
7597
|
|
|
|
|
|
|
have the leading underscores stripped off. Strange things could then |
7598
|
|
|
|
|
|
|
occur if the field names are then no longer unique. For example, |
7599
|
|
|
|
|
|
|
field "A" and "_A" in the data file would be treated as the single |
7600
|
|
|
|
|
|
|
field "A" after the file was read. |
7601
|
|
|
|
|
|
|
|
7602
|
|
|
|
|
|
|
=item Field values are always read as strings |
7603
|
|
|
|
|
|
|
|
7604
|
|
|
|
|
|
|
Field values when written to a file are necessarily converted to |
7605
|
|
|
|
|
|
|
strings. When read back in, they are read as strings, regardless of |
7606
|
|
|
|
|
|
|
original format. The sole exception is the empty string which is read |
7607
|
|
|
|
|
|
|
back in as undef for efficiency. |
7608
|
|
|
|
|
|
|
|
7609
|
|
|
|
|
|
|
An exception is when the _CacheOnWrite feature is used: field values |
7610
|
|
|
|
|
|
|
stored internally as integers or other scalar types may be saved and |
7611
|
|
|
|
|
|
|
later restored as such. However, you should not rely on this |
7612
|
|
|
|
|
|
|
behavior. |
7613
|
|
|
|
|
|
|
|
7614
|
|
|
|
|
|
|
=item Undef vs. empty |
7615
|
|
|
|
|
|
|
|
7616
|
|
|
|
|
|
|
Empty field values are stored as "undef" for efficiency. This means |
7617
|
|
|
|
|
|
|
that programs should generally not rely on any differences between "" |
7618
|
|
|
|
|
|
|
and undef in field values. However, when working with large but |
7619
|
|
|
|
|
|
|
sparse tables, programs should take care not to convert undef values |
7620
|
|
|
|
|
|
|
to empty strings unnecessarily since the separate string objects |
7621
|
|
|
|
|
|
|
consume considerably more memory than undef. |
7622
|
|
|
|
|
|
|
|
7623
|
|
|
|
|
|
|
=back |
7624
|
|
|
|
|
|
|
|
7625
|
|
|
|
|
|
|
=head1 CONTRIBUTIONS |
7626
|
|
|
|
|
|
|
|
7627
|
|
|
|
|
|
|
Corrections, bug reports, bug fixes, or feature additions are |
7628
|
|
|
|
|
|
|
encouraged. Please send additions or patches with a clear explanation |
7629
|
|
|
|
|
|
|
of their purpose. Consider making additions in the form of a subclass |
7630
|
|
|
|
|
|
|
if possible. |
7631
|
|
|
|
|
|
|
|
7632
|
|
|
|
|
|
|
I'm committed to bundling useful subclasses contributed by myself or |
7633
|
|
|
|
|
|
|
others with this main distribution. |
7634
|
|
|
|
|
|
|
|
7635
|
|
|
|
|
|
|
So, if you've got a subclass of Data::CTable (which should have a name |
7636
|
|
|
|
|
|
|
like Data::CTable::YourClassName) and you would like it included in |
7637
|
|
|
|
|
|
|
the main distribution, please send it along with a test script and |
7638
|
|
|
|
|
|
|
I'll review the code and add it (at my discretion). |
7639
|
|
|
|
|
|
|
|
7640
|
|
|
|
|
|
|
If you've got a module that uses, augments, or complements this one, |
7641
|
|
|
|
|
|
|
let me know that, too, and I'll make appropriate mention of it. |
7642
|
|
|
|
|
|
|
|
7643
|
|
|
|
|
|
|
=head1 SEE ALSO |
7644
|
|
|
|
|
|
|
|
7645
|
|
|
|
|
|
|
The Data::CTable home page: |
7646
|
|
|
|
|
|
|
|
7647
|
|
|
|
|
|
|
http://christhorman.com/projects/perl/Data-CTable/ |
7648
|
|
|
|
|
|
|
|
7649
|
|
|
|
|
|
|
The implementation in CTable.pm. |
7650
|
|
|
|
|
|
|
|
7651
|
|
|
|
|
|
|
The test.pl script, other subclasses, and examples. |
7652
|
|
|
|
|
|
|
|
7653
|
|
|
|
|
|
|
The Data::ShowTable module. |
7654
|
|
|
|
|
|
|
|
7655
|
|
|
|
|
|
|
The Data::Table module by Yingyao Zhou & Guangzhou Zou. |
7656
|
|
|
|
|
|
|
|
7657
|
|
|
|
|
|
|
The perlref manual page. |
7658
|
|
|
|
|
|
|
|
7659
|
|
|
|
|
|
|
=head1 AUTHOR |
7660
|
|
|
|
|
|
|
|
7661
|
|
|
|
|
|
|
Chris Thorman |
7662
|
|
|
|
|
|
|
|
7663
|
|
|
|
|
|
|
Copyright (c) 1995-2002 Chris Thorman. All rights reserved. |
7664
|
|
|
|
|
|
|
|
7665
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
7666
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
7667
|
|
|
|
|
|
|
|
7668
|
|
|
|
|
|
|
=cut |
7669
|
|
|
|
|
|
|
|
7670
|
|
|
|
|
|
|
1; |
7671
|
|
|
|
|
|
|
|