| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
|
2
|
|
|
|
|
|
|
## Emacs: -*- tab-width: 4; -*- |
|
3
|
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
13071
|
use strict; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
211
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Data::CTable; |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
8
|
use vars qw($VERSION); $VERSION = '1.01'; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
333
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=pod |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Data::CTable - Read, write, manipulate tabular data |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
## Read some data files in various tabular formats |
|
19
|
|
|
|
|
|
|
use Data::CTable; |
|
20
|
|
|
|
|
|
|
my $People = Data::CTable->new("people.merge.mac.txt"); |
|
21
|
|
|
|
|
|
|
my $Stats = Data::CTable->new("stats.tabs.unix.txt"); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
## Clean stray whitespace in fields |
|
24
|
|
|
|
|
|
|
$People->clean_ws(); |
|
25
|
|
|
|
|
|
|
$Stats ->clean_ws(); |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
## Retrieve columns |
|
28
|
|
|
|
|
|
|
my $First = $People->col('FirstName'); |
|
29
|
|
|
|
|
|
|
my $Last = $People->col('LastName' ); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
## Calculate a new column based on two others |
|
32
|
|
|
|
|
|
|
my $Full = [map {"$First->[$_] $Last->[$_]"} @{$People->all()}]; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
## Add new column to the table |
|
35
|
|
|
|
|
|
|
$People->col(FullName => $Full); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
## Another way to calculate a new column |
|
38
|
|
|
|
|
|
|
$People->col('Key'); |
|
39
|
|
|
|
|
|
|
$People->calc(sub {no strict 'vars'; $Key = "$Last,$First";}); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
## "Left join" records matching Stats:PersonID to People:Key |
|
42
|
|
|
|
|
|
|
$Stats->join($People, PersonID => 'Key'); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
## Find certain records |
|
45
|
|
|
|
|
|
|
$Stats->select_all(); |
|
46
|
|
|
|
|
|
|
$Stats->select(Department => sub {/Sale/i }); ## Sales depts |
|
47
|
|
|
|
|
|
|
$Stats->omit (Department => sub {/Resale/i}); ## not Resales |
|
48
|
|
|
|
|
|
|
$Stats->select(UsageIndex => sub {$_ > 20.0}); ## high usage |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
## Sort the found records |
|
51
|
|
|
|
|
|
|
$Stats->sortspec('DeptNum' , {SortType => 'Integer'}); |
|
52
|
|
|
|
|
|
|
$Stats->sortspec('UsageIndex', {SortType => 'Number' }); |
|
53
|
|
|
|
|
|
|
$Stats->sort([qw(DeptNum UsageIndex Last First)]); |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## Make copy of table with only found/sorted data, in order |
|
56
|
|
|
|
|
|
|
my $Report = $Stats->snapshot(); |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
## Write an output file |
|
59
|
|
|
|
|
|
|
$Report->write(_FileName => "Rept.txt", _LineEnding => "mac"); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
## Print a final progress message. |
|
62
|
|
|
|
|
|
|
$Stats->progress("Done!"); |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
## Dozens more methods and parameters available... |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 OVERVIEW |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Data::CTable is a comprehensive utility for reading, writing, |
|
69
|
|
|
|
|
|
|
manipulating, cleaning and otherwise transforming tabular data. The |
|
70
|
|
|
|
|
|
|
distribution includes several illustrative subclasses and utility |
|
71
|
|
|
|
|
|
|
scripts. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
A Columnar Table represents a table as a hash of data columns, making |
|
74
|
|
|
|
|
|
|
it easy to do data cleanup, formatting, searching, calculations, |
|
75
|
|
|
|
|
|
|
joins, or other complex operations. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The object's hash keys are the field names and the hash values hold |
|
78
|
|
|
|
|
|
|
the data columns (as array references). |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Tables also store a "selection" -- a list of selected / sorted record |
|
81
|
|
|
|
|
|
|
numbers, and a "field list" -- an ordered list of all or some fields |
|
82
|
|
|
|
|
|
|
to be operated on. Select() and sort() methods manipulate the |
|
83
|
|
|
|
|
|
|
selection list. Later, you can optionally rewrite the table in memory |
|
84
|
|
|
|
|
|
|
or on disk to reflect changes in the selection list or field list. |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Data::CTable reads and writes any tabular text file format including |
|
87
|
|
|
|
|
|
|
Merge, CSV, Tab-delimited, and variants. It transparently detects, |
|
88
|
|
|
|
|
|
|
reads, and preserves Unix, Mac, and/or DOS line endings and tab or |
|
89
|
|
|
|
|
|
|
comma field delimiters -- regardless of the runtime platform. |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
In addition to reading data files, CTable is a good way to gather, |
|
92
|
|
|
|
|
|
|
store, and operate on tabular data in memory, and to export data to |
|
93
|
|
|
|
|
|
|
delimited text files to be read by other programs or interactive |
|
94
|
|
|
|
|
|
|
productivity applications. |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
To achieve extremely fast data loading, CTable caches data file |
|
97
|
|
|
|
|
|
|
contents using the Storable module. This can be helpful in CGI |
|
98
|
|
|
|
|
|
|
environments or when operating on very large data files. CTable can |
|
99
|
|
|
|
|
|
|
read an entire cached table of about 120 megabytes into memory in |
|
100
|
|
|
|
|
|
|
about 10 seconds on an average mid-range computer. |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
For simple data-driven applications needing to store and quickly |
|
103
|
|
|
|
|
|
|
retrieve simple tabular data sets, CTable provides a credible |
|
104
|
|
|
|
|
|
|
alternative to DBM files or SQL. |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
For data hygiene applications, CTable forms the foundation for writing |
|
107
|
|
|
|
|
|
|
utility scripts or compilers to transfer data from external sources, |
|
108
|
|
|
|
|
|
|
such as FileMaker, Excel, Access, personal organizers, etc. into |
|
109
|
|
|
|
|
|
|
compiled or validated formats -- or even as a gateway to loading data |
|
110
|
|
|
|
|
|
|
into SQL databases or other destinations. You can easily write short, |
|
111
|
|
|
|
|
|
|
repeatable scripts in Perl to do reporting, error checking, analysis, |
|
112
|
|
|
|
|
|
|
or validation that would be hard to duplicate in less-flexible |
|
113
|
|
|
|
|
|
|
application environments. |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The data representation is simple and open so you can directly access |
|
116
|
|
|
|
|
|
|
the data in the object if you feel like it -- or you can use accessors |
|
117
|
|
|
|
|
|
|
to request "clean" structures containing only the data or copies of |
|
118
|
|
|
|
|
|
|
it. Or you can build your own columns in memory and then when you're |
|
119
|
|
|
|
|
|
|
ready, turn them into a table object using the very flexible new() |
|
120
|
|
|
|
|
|
|
method. |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
The highly factored interface and implementation allow fine-grained |
|
123
|
|
|
|
|
|
|
subclassing so you can easily create useful lightweight subclasses. |
|
124
|
|
|
|
|
|
|
Several subclasses are included with the distribution. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Most defaults and parameters can be customized by subclassing, |
|
127
|
|
|
|
|
|
|
overridden at the instance level (avoiding the need to subclass too |
|
128
|
|
|
|
|
|
|
often), and further overridden via optional named-parameter arguments |
|
129
|
|
|
|
|
|
|
to most major method calls. |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 Similar / related modules on CPAN |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
The Data::Table module by Yingyao Zhou & Guangzhou Zou offers similar |
|
134
|
|
|
|
|
|
|
functionality, but uses a different underlying data representation |
|
135
|
|
|
|
|
|
|
(2-dimensional array), and has a somewhat different feature set. |
|
136
|
|
|
|
|
|
|
Check it out. Maybe you will prefer it for your application. |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
http://search.cpan.org/search?mode=module&query=Data::Table |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
The Data::ShowTable module renders tables in various viewable formats. |
|
141
|
|
|
|
|
|
|
CTable relies on ShowTable's ShowBoxTable method to implement its own |
|
142
|
|
|
|
|
|
|
format() and out() methods. |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
http://search.cpan.org/search?mode=module&query=Data::ShowTable |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 Prerequisites |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
The CTable documentation, source code, and examples assume familiarity |
|
150
|
|
|
|
|
|
|
with large nested data structures, object-oriented syntax and |
|
151
|
|
|
|
|
|
|
terminology, and comfort working with array and hash references and |
|
152
|
|
|
|
|
|
|
array and hash slice syntax. |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
See the perlref man page for more on these topics. |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head2 How to learn more |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Dozens more methods, parameters, and examples are described below. |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
See the full source code in CTable.pm. |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Or, after installing, read the man page using: |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
man Data::CTable |
|
166
|
|
|
|
|
|
|
perldoc Data::CTable |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
See the eg/ (examples) folder in the Data::CTable distribution and the |
|
169
|
|
|
|
|
|
|
test.pl script for scripts demonstrating every CTable method. |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
For latest version and other news, check the Data::CTable home page: |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
http://christhorman.com/projects/perl/Data-CTable/ |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Or search CPAN: |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
http://search.cpan.org/search?mode=module&query=Data::CTable |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 INSTALLATION |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Using CPAN module: |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
perl -MCPAN -e 'install Data::CTable' |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Or manually: |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
tar xzvf Data-CTable*gz |
|
188
|
|
|
|
|
|
|
cd Data-CTable-?.?? |
|
189
|
|
|
|
|
|
|
perl Makefile.PL |
|
190
|
|
|
|
|
|
|
make |
|
191
|
|
|
|
|
|
|
make test |
|
192
|
|
|
|
|
|
|
make install |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head1 INCLUDED SUBCLASSES AND UTILITIES |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
In addition to the module itself, there are a number of subclasses and |
|
197
|
|
|
|
|
|
|
simple utilities included with the Data::CTable distribution. |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 Subclases |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
The Data::CTable distribution includes these example subclasses. Each |
|
202
|
|
|
|
|
|
|
is installed in your Perl environment along with the main module, and |
|
203
|
|
|
|
|
|
|
so may be used by your scripts. Each has its own man/perldoc page |
|
204
|
|
|
|
|
|
|
containing more detail. |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
B is a subclass that logs all progress |
|
207
|
|
|
|
|
|
|
messages to a list within the object itself rather than (merely) |
|
208
|
|
|
|
|
|
|
echoing them to STDERR. Later, you may retrieve and examine the list. |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
B is a virtual subclass that includes class and |
|
211
|
|
|
|
|
|
|
object methods that make it easy to write a simple interactive |
|
212
|
|
|
|
|
|
|
command-line program that parses options and outputs a table. |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
B is a very useful subclass of |
|
215
|
|
|
|
|
|
|
Data::CTable::Script that implements a souped-up Unix-like "ls" (file |
|
216
|
|
|
|
|
|
|
listing) command -- it first gets an optionally-recursive listing of |
|
217
|
|
|
|
|
|
|
any number of files and/or directories, builds a list of their full |
|
218
|
|
|
|
|
|
|
absolute or relative paths, then build a Data::CTable::Listing object |
|
219
|
|
|
|
|
|
|
that contains all the paths, plus about 25+ other pieces of useful |
|
220
|
|
|
|
|
|
|
information about each file or directory. |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
The "tls" utility, below, is simply a command-line cover for this |
|
223
|
|
|
|
|
|
|
class, but you could use this class in your own scripts in order to |
|
224
|
|
|
|
|
|
|
get detailed file listings. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 Utilities |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Each of these utilities is provided mainly so you can see mature |
|
229
|
|
|
|
|
|
|
examples of how to use Data::CTable in real-world scripts. |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
But each is also genuinely useful, too, and you may enjoy adding them |
|
232
|
|
|
|
|
|
|
to your regular bag of tricks or using them as an easily-modifiable |
|
233
|
|
|
|
|
|
|
basis for scripts of your own. |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
On most systems, these will be installed in an appropriate directory |
|
236
|
|
|
|
|
|
|
in your path when you install CTable, and hence will be executable |
|
237
|
|
|
|
|
|
|
just by typing their file name. (On Windows, they'll be wrapped by a |
|
238
|
|
|
|
|
|
|
.bat file and installed in C:\Perl\bin or equivalent. On *nix, |
|
239
|
|
|
|
|
|
|
they'll be in /usr/bin/ or equivalent.) |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
B is a command-line utility that wraps Data::CTable::Listing to |
|
242
|
|
|
|
|
|
|
implement a variant on the classic Unix "ls" command in which an |
|
243
|
|
|
|
|
|
|
internal CTable object is used to hold and calculate a very large |
|
244
|
|
|
|
|
|
|
amount of meta-data about each file and directory and then output that |
|
245
|
|
|
|
|
|
|
data in formatted tables, or as any kind of delimited text file, and |
|
246
|
|
|
|
|
|
|
with much more flexibility and control over included data, and sort |
|
247
|
|
|
|
|
|
|
and sub-sort order than with ls. |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
B is a command-line utility that reads each of its arguments as |
|
250
|
|
|
|
|
|
|
a Data::CTable file and then calls the out() method to display its |
|
251
|
|
|
|
|
|
|
entire contents. (Warning: out() is slow with very large data sets.) |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
B is a command-line utility that takes a US zip code, |
|
254
|
|
|
|
|
|
|
grabs the local weather report from a popular weather web site and |
|
255
|
|
|
|
|
|
|
uses a CTable object to store, process, and clean, and present the |
|
256
|
|
|
|
|
|
|
table of weather data that results in a simple text format. |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=cut |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
## Required dependencies |
|
261
|
|
|
|
|
|
|
|
|
262
|
1
|
|
|
1
|
|
1022
|
use IO::File; |
|
|
1
|
|
|
|
|
12860
|
|
|
|
1
|
|
|
|
|
134
|
|
|
263
|
1
|
|
|
1
|
|
10
|
use Config; qw(%Config); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
49
|
|
|
264
|
|
|
|
|
|
|
|
|
265
|
1
|
|
|
1
|
|
4
|
use Carp qw(croak carp confess cluck); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
64
|
|
|
266
|
1
|
|
|
1
|
|
1141
|
use Storable qw(store nstore retrieve dclone); |
|
|
1
|
|
|
|
|
3435
|
|
|
|
1
|
|
|
|
|
111
|
|
|
267
|
1
|
|
|
1
|
|
8
|
use File::Basename qw(fileparse); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
137
|
|
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
## Optional dependencies |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
my $HaveDumper; |
|
273
|
|
|
|
|
|
|
my $HaveShowTable; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
BEGIN |
|
276
|
|
|
|
|
|
|
{ |
|
277
|
1
|
|
|
1
|
|
70
|
eval " |
|
|
1
|
|
|
1
|
|
1657
|
|
|
|
1
|
|
|
1
|
|
10438
|
|
|
|
1
|
|
|
|
|
83
|
|
|
|
1
|
|
|
|
|
311
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
278
|
|
|
|
|
|
|
use Data::Dumper qw(Dumper); |
|
279
|
|
|
|
|
|
|
use Data::ShowTable qw(ShowBoxTable); |
|
280
|
|
|
|
|
|
|
"; |
|
281
|
|
|
|
|
|
|
|
|
282
|
1
|
|
|
|
|
3
|
$HaveDumper = $Data::Dumper::VERSION; |
|
283
|
1
|
|
|
|
|
21
|
$HaveShowTable = exists($Data::ShowTable::{ShowBoxTable}); |
|
284
|
|
|
|
|
|
|
}; |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
## We optionally export a few general-purpose utility routines. |
|
288
|
|
|
|
|
|
|
|
|
289
|
1
|
|
|
1
|
|
4
|
use Exporter; use vars qw(@ISA @EXPORT_OK); @ISA=qw(Exporter); |
|
|
1
|
|
|
1
|
|
1
|
|
|
|
1
|
|
|
|
|
33
|
|
|
|
1
|
|
|
|
|
5
|
|
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
3376
|
|
|
290
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
|
291
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman |
|
292
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1 |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman_clean |
|
295
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1_clean |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
&guess_endings |
|
298
|
|
|
|
|
|
|
&guess_delimiter |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
&path_info |
|
301
|
|
|
|
|
|
|
&path_is_absolute |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
&min |
|
304
|
|
|
|
|
|
|
&max |
|
305
|
|
|
|
|
|
|
); |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=pod |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head1 CREATING TABLE OBJECTS |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
## Create an object / read file(s) / override params |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
use Data::CTable; |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
$t = Data::CTable->new() |
|
316
|
|
|
|
|
|
|
$t = Data::CTable->new($File) |
|
317
|
|
|
|
|
|
|
$t = Data::CTable->new($File1, $File2....) |
|
318
|
|
|
|
|
|
|
$t = Data::CTable->new($Params) |
|
319
|
|
|
|
|
|
|
$t = Data::CTable->new($Params, $File1) |
|
320
|
|
|
|
|
|
|
$t = Data::CTable->new($Params, $File1, $File2....) |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
## Internal initializer (subclassable): called for you by new() |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
$t->initialize() |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
If the first argument to new() is a hash ref, it is the $Params hash |
|
327
|
|
|
|
|
|
|
of initial parameters and/or data columns which, if supplied will form |
|
328
|
|
|
|
|
|
|
the starting point for the object being created. Any non-hash and/or |
|
329
|
|
|
|
|
|
|
further arguments to new() are treated as file names to be opened. |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
If supplied, data in the $Params hash will be shallowly copied -- the |
|
332
|
|
|
|
|
|
|
original hash object passed will not be used, but any sub-structures |
|
333
|
|
|
|
|
|
|
within it will now "belong" to the resulting new object which will |
|
334
|
|
|
|
|
|
|
feel free to manipulate them or discard them. |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Then, any parameters not supplied (usually most of them) will be |
|
337
|
|
|
|
|
|
|
defaulted for you because new() will call the internal method |
|
338
|
|
|
|
|
|
|
initialize() before the object is finished and returned. |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
See the PARAMETER REFERENCE section below for the parameters you can |
|
341
|
|
|
|
|
|
|
choose to supply or have defaulted for you. |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Any file name arguments will be read and appended into a single |
|
344
|
|
|
|
|
|
|
object. If any of the files fails to be read, then new() will fail |
|
345
|
|
|
|
|
|
|
(return a false value) and no object will be created. |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
initialize() makes sure there is a legal and consistent value for |
|
348
|
|
|
|
|
|
|
every internal parameter in the object. Generally, initialize() |
|
349
|
|
|
|
|
|
|
leaves alone any parameters you supplied to new(), and simply sets |
|
350
|
|
|
|
|
|
|
default values for any that were not yet supplied. |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
You should never need to call initialize() directly. |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
After calling initialize(), new() then calls the append_files_new() |
|
355
|
|
|
|
|
|
|
method to process the filename arguments. This method then calls |
|
356
|
|
|
|
|
|
|
read() on the first filename, and then append_file on all subsequent |
|
357
|
|
|
|
|
|
|
file names, appending them to the first, in sequence. |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
See append() for an explanation of how the data from multiple files is |
|
360
|
|
|
|
|
|
|
combined into a single table. |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head2 Advanced: Using a template object |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
## Calling new() with a template object |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
$v = $t->new() |
|
367
|
|
|
|
|
|
|
$v = $t->new($File) |
|
368
|
|
|
|
|
|
|
$v = $t->new($File1, $File2....) |
|
369
|
|
|
|
|
|
|
$v = $t->new($Params) |
|
370
|
|
|
|
|
|
|
$v = $t->new($Params, $File1) |
|
371
|
|
|
|
|
|
|
$v = $t->new($Params, $File1, $File2....) |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
You can also call $t->new() to use an existing object, $t, as a |
|
374
|
|
|
|
|
|
|
template for the new object. $t->new() will create a new object of |
|
375
|
|
|
|
|
|
|
the same class or subclass as the template object. Furthermore, the |
|
376
|
|
|
|
|
|
|
template object, if provided, will be used as a starting point for the |
|
377
|
|
|
|
|
|
|
resulting object -- in fact, it will initially share shallow copies of |
|
378
|
|
|
|
|
|
|
all data columns, if any, and all internal parameters and data |
|
379
|
|
|
|
|
|
|
structures, if any. |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
This advanced shared-data technique could be used to create two |
|
382
|
|
|
|
|
|
|
separate table objects that share and operate on the same underlying |
|
383
|
|
|
|
|
|
|
data columns in memory but have different custom field lists, custom |
|
384
|
|
|
|
|
|
|
selections, sort behavior, etc. But don't do this unless you're sure |
|
385
|
|
|
|
|
|
|
you understand what you're doing, because changing data in one table |
|
386
|
|
|
|
|
|
|
would change it in the other. |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head1 PARAMETER REFERENCE |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
The parameters listed here are recognized by new(), initialize(), and |
|
391
|
|
|
|
|
|
|
by many functions that use the named-parameter calling convention, |
|
392
|
|
|
|
|
|
|
such as read(), write(), sort(), and others. |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Any parameter listed may be specified when using new(). |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
Most parameters should not be directly accessed or manipulated once |
|
397
|
|
|
|
|
|
|
the object has been created, except those that have appropriate |
|
398
|
|
|
|
|
|
|
accessor methods described throughout in this documentation. |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Each parameter in the lists below is listed along with its defaulting |
|
401
|
|
|
|
|
|
|
logic as performed by new() via initialize(). |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 Custom field list and custom selection (record list) |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=over 4 |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item _FieldList ||= undef; |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
This is the ordered list (array reference) of columns / fields present |
|
410
|
|
|
|
|
|
|
in the object. It is set by read() to reflect the names and order of |
|
411
|
|
|
|
|
|
|
the fields encountered or actually read from in the incoming data |
|
412
|
|
|
|
|
|
|
file, if any. Initially this list is undefined, and if removed or |
|
413
|
|
|
|
|
|
|
left undefined, then the de-facto field list will be a list of all |
|
414
|
|
|
|
|
|
|
columns present in the table object, in alphabetical order (see |
|
415
|
|
|
|
|
|
|
fieldlist() and fieldlist_all()). |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Normally, all fields present in the table object would be listed in |
|
418
|
|
|
|
|
|
|
the field list. |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
However, this parameter may be set in the object (or overridden in |
|
421
|
|
|
|
|
|
|
named-parameter function calls like read(), write(), etc.) to cause a |
|
422
|
|
|
|
|
|
|
subset of fields to be read, written or otherwise used. If a subset |
|
423
|
|
|
|
|
|
|
field list is specified before reading a data file, then ONLY fields |
|
424
|
|
|
|
|
|
|
listed will be read -- this is a way to read just certain fields from |
|
425
|
|
|
|
|
|
|
a very large file, but may not always be what you want. |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Specifying the field list before calling read() is required if the |
|
428
|
|
|
|
|
|
|
data file has no header row giving names to its fields -- the names |
|
429
|
|
|
|
|
|
|
you specify in the field list, in order, will be applied to the data |
|
430
|
|
|
|
|
|
|
file being read. See the _HeaderRow parameter, below. |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
If a subset field list is used after columns are already loaded in |
|
433
|
|
|
|
|
|
|
memory, the columns not listed in the field list will still be present |
|
434
|
|
|
|
|
|
|
in the object (and can be listed by calling fieldlist_all()), but they |
|
435
|
|
|
|
|
|
|
will be omitted from most operations that iterate over fields. |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=item _Selection ||= undef; |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
This is a list of the record numbers of "selected" records in the |
|
440
|
|
|
|
|
|
|
table, possibly indicating sorted order. |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
If absent, then all records are considered to be selected, in |
|
443
|
|
|
|
|
|
|
"natural" order -- i.e. the order they occur in the file. |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
You can create and set your own selection list or get and modify an |
|
446
|
|
|
|
|
|
|
existing one. Deleting it resets the selection (for example, by |
|
447
|
|
|
|
|
|
|
calling select_all()). |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Calling sort() will create a _Selection if none existed. Otherwise it |
|
450
|
|
|
|
|
|
|
operates by modifying the existing _Selection, which may be a subset |
|
451
|
|
|
|
|
|
|
of all record numbers. |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=back |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 Cache behavior controls |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
See sections related to Cacheing, below. |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over 4 |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item _CacheOnRead = 1 unless exists |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Boolean: whether data files read by the read() method should be cached |
|
464
|
|
|
|
|
|
|
after reading. Once cached, the data will be read from the cache |
|
465
|
|
|
|
|
|
|
instead of the original file the NEXT TIME READ() IS CALLED, but only |
|
466
|
|
|
|
|
|
|
if: 1) the cache file is found, and 2) its date is later than the |
|
467
|
|
|
|
|
|
|
original. Otherwise, the cache file is ignored or re-written. |
|
468
|
|
|
|
|
|
|
Cacheing can be up to 10x faster than parsing the file, so it's almost |
|
469
|
|
|
|
|
|
|
always worth doing in any situation where you'll be reading a data |
|
470
|
|
|
|
|
|
|
file more often than writing it. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
This parameter defaults to true. |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=item _CacheOnWrite = 0 unless exists |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Boolean: whether tables written by the write() method should be cached |
|
477
|
|
|
|
|
|
|
after writing. This defaults to false on the assumption that the |
|
478
|
|
|
|
|
|
|
program won't need to re-read a file it just wrote. However, this |
|
479
|
|
|
|
|
|
|
behavior would be useful if a later step in your program or another |
|
480
|
|
|
|
|
|
|
program will be reading the file that was written and would benefit |
|
481
|
|
|
|
|
|
|
from having the cacheing already done. Cacheing a file after writing |
|
482
|
|
|
|
|
|
|
is quite fast since the data is already in memory and of course it |
|
483
|
|
|
|
|
|
|
speeds up subsequent read() operations by up to 10x. |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=item _CacheExtension = ".cache" unless exists |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
This is the file name extension that is added to a file's name to |
|
488
|
|
|
|
|
|
|
determine the name of its corresponding cache file. (First, any |
|
489
|
|
|
|
|
|
|
existing extension, if any, is removed.) If this extension is empty, |
|
490
|
|
|
|
|
|
|
then the cache file will be named the same as the original assuming |
|
491
|
|
|
|
|
|
|
the cache file is being stored in a different directory. (See next |
|
492
|
|
|
|
|
|
|
setting.) |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=item _CacheSubDir = "cache" unless exists |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
This is the absolute or relative path to the subdirectory that should |
|
497
|
|
|
|
|
|
|
be used to store the cache files. The default value is the relative |
|
498
|
|
|
|
|
|
|
path to a directory called "cache". Relative paths will be appended |
|
499
|
|
|
|
|
|
|
to the directory path containing the original file being read. |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
Absolute cache paths (such as /tmp or c:\temp\) can also be used. |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Override _CacheExtension and _CacheSubdir in a subclass, in each |
|
504
|
|
|
|
|
|
|
object, or in each call to read() or write() in order to have the |
|
505
|
|
|
|
|
|
|
cache files stored elsewhere. But remember: unless you use the same |
|
506
|
|
|
|
|
|
|
cache settings next time you read the same file, the cache files will |
|
507
|
|
|
|
|
|
|
be orphaned. |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=back |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=head2 Progress routine / setting |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=over 4 |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=item _Progress = undef unless exists |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
The _Progress setting controls the routing of diagnostic messages. |
|
518
|
|
|
|
|
|
|
Four possible settings are recognized: |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
undef (default) The class's progress settings are used. |
|
521
|
|
|
|
|
|
|
subroutine reference Your own custom progress routine. |
|
522
|
|
|
|
|
|
|
true Built-in progress_default() method used. |
|
523
|
|
|
|
|
|
|
0/false No progress messages for this object. |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
See the PROGRESS section for a description of the interface of custom |
|
526
|
|
|
|
|
|
|
progress routines and for details on how the builtin one works. |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=back |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 File format settings |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=over 4 |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=item _LineEnding ||= undef; |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
_LineEnding indicates the line ending string or setting to be used to |
|
537
|
|
|
|
|
|
|
read a file, the setting that actually I used to read a file, |
|
538
|
|
|
|
|
|
|
and/or the line ending that will be used to write a file. |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Set this parameter to force a particular encoding to be used. |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Otherwise, leave it undef. The program will Do What You Mean. |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
If _LineEnding is undef when read() is called, read() will try to |
|
545
|
|
|
|
|
|
|
guess the line ending type by inspecting the first file it reads. |
|
546
|
|
|
|
|
|
|
Then it will set this setting for you. It can detect DOS, Unix, and |
|
547
|
|
|
|
|
|
|
Mac line endings. |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
If _LineEnding is undef when write() is called, write() will use |
|
550
|
|
|
|
|
|
|
C<"\n">, which yields different strings depending on the current |
|
551
|
|
|
|
|
|
|
runtime platform: \x0A on Unix; \x0D in MacPerl, \x0D\x0A on DOS. |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Otherwise, write() uses the value defined in _LineEnding, which would |
|
554
|
|
|
|
|
|
|
match the value filled in by read() if this object's data originally |
|
555
|
|
|
|
|
|
|
had been read from a file. So if you read a file and then later write |
|
556
|
|
|
|
|
|
|
it out, the line endings in the written file will match the format of |
|
557
|
|
|
|
|
|
|
original unless you override _LineEnding specifically. |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Since Data::CTable supports reading and writing all common endings, |
|
560
|
|
|
|
|
|
|
base your decision on line ending format during write() on the needs |
|
561
|
|
|
|
|
|
|
of other programs you might be using. |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
For example: FileMaker Pro and Excel crash / hang if Unix line endings |
|
564
|
|
|
|
|
|
|
are used, so be sure to use the ending format that matches the needs |
|
565
|
|
|
|
|
|
|
of the other programs you plan to use. |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
As a convenience, you may specify and retrieve the _LineEnding setting |
|
568
|
|
|
|
|
|
|
using the mnemonic symbols "mac", "dos" and "unix." These special |
|
569
|
|
|
|
|
|
|
values are converted to the string values shown in this chart: |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
symbol string value chars decimal octal control |
|
572
|
|
|
|
|
|
|
------------------------------------------------------------- |
|
573
|
|
|
|
|
|
|
dos "\x0D\x0A" CR/LF 13,10 "\015\012" ^M^J |
|
574
|
|
|
|
|
|
|
mac "\x0D" CR 13 "\015" ^M |
|
575
|
|
|
|
|
|
|
unix "\x0A" LF 10 "\012" ^J |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
See the section LINE ENDINGS, below, for accessor methods and |
|
578
|
|
|
|
|
|
|
conversion utilities that help you get/set this parameter in either |
|
579
|
|
|
|
|
|
|
symbolic format or string format as you prefer. |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=item _FDelimiter ||= undef; |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
_FDelimiter is the field delimiter between field names in the header |
|
584
|
|
|
|
|
|
|
row (if any) and also between fields in the body of the file. If |
|
585
|
|
|
|
|
|
|
undef, read() will try to guess whether it is tab C<"\t"> or comma |
|
586
|
|
|
|
|
|
|
<",">, and set this parameter accordingly. If there is only one field |
|
587
|
|
|
|
|
|
|
in the file, then comma is assumed by read() and will be used by |
|
588
|
|
|
|
|
|
|
write(). |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
To guess the delimiter, the program looks for the first comma or tab |
|
591
|
|
|
|
|
|
|
character in the header row (if present) or in the first record. |
|
592
|
|
|
|
|
|
|
Whichever character is found first is assumed to be the delimiter. |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
If you don't want the program to guess, or you have a data file format |
|
595
|
|
|
|
|
|
|
that uses a custom delimiter, specify the delimiter explicitly in the |
|
596
|
|
|
|
|
|
|
object or when calling read() or make a subclass that initializes this |
|
597
|
|
|
|
|
|
|
value differently. On write(), this will default to comma if it is |
|
598
|
|
|
|
|
|
|
empty or undef. |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=item _QuoteFields = undef unless exists |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
_QuoteFields controls how field values are quoted by write() when |
|
603
|
|
|
|
|
|
|
writing the table to a delimited text file. |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
An undef value (the default) means "auto" -- each field is checked |
|
606
|
|
|
|
|
|
|
individually and if it contains either the _FDelimiter character or a |
|
607
|
|
|
|
|
|
|
double-quote character, the field value will be surrounded by |
|
608
|
|
|
|
|
|
|
double-quotes as it is written to the file. This method is slower to |
|
609
|
|
|
|
|
|
|
write but faster to read, and may make the output easier for humans to |
|
610
|
|
|
|
|
|
|
read. |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
A true value means always put double-quotes around every field value. |
|
613
|
|
|
|
|
|
|
This mode is faster to write but slower to read. |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
A zero value means never to use double-quotes around field values and |
|
616
|
|
|
|
|
|
|
not to check for the need to use them. This method is the fastest to |
|
617
|
|
|
|
|
|
|
read and write. You may use it when you are certain that your data |
|
618
|
|
|
|
|
|
|
can't contain any special characters. However, if you're wrong, this |
|
619
|
|
|
|
|
|
|
mode will produce a corrupted file in the event that one of the fields |
|
620
|
|
|
|
|
|
|
does contain the active delimiter (such as comma or tab) or a quote. |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=item _HeaderRow = 1 unless exists |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
_HeaderRow is a boolean that says whether to expect a header row in |
|
625
|
|
|
|
|
|
|
data files. The default is true: a header row is required. If false, |
|
626
|
|
|
|
|
|
|
_FieldList MUST be present before calling read() or an error will be |
|
627
|
|
|
|
|
|
|
generated. In this latter case, _FieldList will be assumed to give |
|
628
|
|
|
|
|
|
|
the correct names of the fields in the file, in order, before the file |
|
629
|
|
|
|
|
|
|
is read. In other words, the object expects that either a) it can get |
|
630
|
|
|
|
|
|
|
the field names from the file's header row or b) you will supply them |
|
631
|
|
|
|
|
|
|
before read() opens the file. |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=back |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=head2 Encoding of return characters within fields |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=over 4 |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=item _ReturnMap = 1 unless exists |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
_ReturnMap says that returns embedded in fields should be decoded on |
|
642
|
|
|
|
|
|
|
read() and encoded again on write(). The industry-standard encoding |
|
643
|
|
|
|
|
|
|
for embedded returns is ^K (ascii 11 -- but see next setting to change |
|
644
|
|
|
|
|
|
|
it). This defaults to true but can be turned off if you want data |
|
645
|
|
|
|
|
|
|
untouched by read(). This setting has no effect on data files where |
|
646
|
|
|
|
|
|
|
no fields contain embedded returns. However, it is vital to leave |
|
647
|
|
|
|
|
|
|
this option ON when writing any data file whose fields could contain |
|
648
|
|
|
|
|
|
|
embedded returns -- if you have such data and call write() with |
|
649
|
|
|
|
|
|
|
_ReturnMap turned off, the resulting file will be an invalid Merge/CSV |
|
650
|
|
|
|
|
|
|
file and might not be re-readable. |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
When these fields are decoded on read(), encoded returns are converted |
|
653
|
|
|
|
|
|
|
to C<"\n"> in memory, whatever its interpretation may be on the current |
|
654
|
|
|
|
|
|
|
platform (\x0A on Unix or DOS; \x0D on MacPerl). |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
IMPORTANT NOTE: When these fields are encoded by write(), any |
|
657
|
|
|
|
|
|
|
occurrence of the current _LineEnding being used to write the file is |
|
658
|
|
|
|
|
|
|
searched and encoded FIRST, and THEN, any occurrence of "\n" is also |
|
659
|
|
|
|
|
|
|
searched and encoded. For example, if using mac line endings (^M) to |
|
660
|
|
|
|
|
|
|
write a file on a Unix machine, any ^M characters in fields will be |
|
661
|
|
|
|
|
|
|
encoded, and then any "\n" (^J) characters will ALSO be encoded. This |
|
662
|
|
|
|
|
|
|
may not be what you want, so be sure you know how your data is encoded |
|
663
|
|
|
|
|
|
|
in cases where your field values might contain any ^J and/or ^M |
|
664
|
|
|
|
|
|
|
characters. |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
IMPORTANT NOTE: If you turn _ReturnMap off, fields with returns in |
|
667
|
|
|
|
|
|
|
them will still be double-quoted correctly. Some parsers of tab- or |
|
668
|
|
|
|
|
|
|
comma-delimited files are able to support reading such files. |
|
669
|
|
|
|
|
|
|
HOWEVER, the parser in this module's read() method DOES NOT currently |
|
670
|
|
|
|
|
|
|
support reading files in which a single field value appears to span |
|
671
|
|
|
|
|
|
|
multiple lines in the file. If you have a need to read such a file, |
|
672
|
|
|
|
|
|
|
you may need to write your own parser as a subclass of this module. |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=item _ReturnEncoding ||= "\x0B"; |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
This is the default encoding to assume when embedding return |
|
677
|
|
|
|
|
|
|
characters within fields. The industry standard is "\x0B" (ascii 11 / |
|
678
|
|
|
|
|
|
|
octal \013 / ^K) so you should probably not ever change this setting. |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
When fields are encoded on write(), C<"\n"> is converted to this |
|
681
|
|
|
|
|
|
|
value. Note that different platforms use different ascii values for |
|
682
|
|
|
|
|
|
|
C<"\n">, which is another good reason to leave the ReturnEncoding |
|
683
|
|
|
|
|
|
|
feature enabled when calling write(). |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
To summarize: this module likes to assume, and you should too, that |
|
686
|
|
|
|
|
|
|
returns in data files on disk are encoded as "\x0B", but once loaded |
|
687
|
|
|
|
|
|
|
into memory, they are encoded as the current platform's value of |
|
688
|
|
|
|
|
|
|
C<"\n">. |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=item _MacRomanMap = undef unless exists |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
Data::CTable assumes by default that you want field data in memory to |
|
693
|
|
|
|
|
|
|
be in the ISO 8859-1 character set (the standard for Latin 1 Roman |
|
694
|
|
|
|
|
|
|
characters on Unix and Windows in the English and Western European |
|
695
|
|
|
|
|
|
|
languages -- and also the default encoding for HTML Web pages). |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
_MacRomanMap controls the module's optional mapping of Roman |
|
698
|
|
|
|
|
|
|
characters from Mac format on disk to ISO format in memory when |
|
699
|
|
|
|
|
|
|
reading and writing data files. These settings are recognized: |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
undef ## Auto: Read/write Mac chars if using Mac line endings |
|
702
|
|
|
|
|
|
|
1 ## On: Assume Mac char set in all fields |
|
703
|
|
|
|
|
|
|
0 ## Off: Don't do any character mapping at all |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
The default setting is undef, which enables "Auto" mode: files found |
|
706
|
|
|
|
|
|
|
to contain Mac line endings will be assumed to contain Mac upper-ASCII |
|
707
|
|
|
|
|
|
|
characters and will be mapped to ISO on read(); and files to be |
|
708
|
|
|
|
|
|
|
written with Mac line endings will mapped back from ISO to Mac format |
|
709
|
|
|
|
|
|
|
on write(). |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
If your data uses any non-Latin-1 character sets, or binary data, or |
|
712
|
|
|
|
|
|
|
you really want Mac upper-ASCII characters in memory, or you just |
|
713
|
|
|
|
|
|
|
don't want this module messing with your encodings, set this option to |
|
714
|
|
|
|
|
|
|
0 (Off) or make a subclass that always sets it to 0. |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
See also the clean() methods that can help you translate just the |
|
717
|
|
|
|
|
|
|
columns you want after reading a file or before writing it, which may |
|
718
|
|
|
|
|
|
|
be faster for you if only a few fields might contain high-ASCII |
|
719
|
|
|
|
|
|
|
characters. |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=item _FileName ||= undef; |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
This is the name of the file that should be read from or WAS read |
|
724
|
|
|
|
|
|
|
from. (read() will set _FileName to the value it used to read the |
|
725
|
|
|
|
|
|
|
file, even if _FileName was only supplied as a named parameter.) |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
This name will also be used, unless overridden, to re-write the file |
|
728
|
|
|
|
|
|
|
again, but with an optional extension added. (See next setting.) |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=item _WriteExtension = ".out" unless exists |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
The _WriteExtension is provided so that CTable won't overwrite your |
|
733
|
|
|
|
|
|
|
input data file unless you tell it to. |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
_WriteExtension will be added to the object's _FileName setting to |
|
736
|
|
|
|
|
|
|
create a new, related file name, before writing.... UNLESS _FileName |
|
737
|
|
|
|
|
|
|
is supplied as an direct or named parameter when calling write(). |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
In the latter case, write() uses the file name you supply and adds no |
|
740
|
|
|
|
|
|
|
extension, even if this would mean overwriting the original data file. |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
To add _WriteExtension, write() places it prior to any existing final |
|
743
|
|
|
|
|
|
|
extension in the _FileName: |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
_FileName default file name used by write() |
|
746
|
|
|
|
|
|
|
-------------------------------------------------------------- |
|
747
|
|
|
|
|
|
|
People.merge.txt People.merge.out.txt |
|
748
|
|
|
|
|
|
|
People People.out |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
If you want to always overwrite the original file without having to |
|
751
|
|
|
|
|
|
|
supply _FileName each time, simply set _WriteExtension to undef in a |
|
752
|
|
|
|
|
|
|
subclass or in each instance. |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
If _CacheOnWrite is true, then the _WriteExtension logic is applied |
|
755
|
|
|
|
|
|
|
first to arrive at the actual name of the file to be written, and then |
|
756
|
|
|
|
|
|
|
the _CacheExtension logic is applied to that name to arrive at the |
|
757
|
|
|
|
|
|
|
name of the cache file to be written. |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
=back |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=head2 Sorting-related parameters |
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=over 4 |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=item _SortOrder ||= undef; |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
_SortOrder is the list of fields which should be used as primary, |
|
768
|
|
|
|
|
|
|
secondary, etc. sort keys when sort() is called. Like other |
|
769
|
|
|
|
|
|
|
parameters, it may be initialized by a subclass, stored in the object, |
|
770
|
|
|
|
|
|
|
or provided as a named parameter on each call to sort(). |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
If _SortOrder is empty or undefined, then sort() sorts the records by |
|
773
|
|
|
|
|
|
|
record number (i.e. they are returned to their "natural" order). |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
=item _SortSpecs ||= {}; |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
_SortSpecs is a hash of specifications for the SortType and |
|
778
|
|
|
|
|
|
|
SortDirection of fields on which sorting may be done. For any field |
|
779
|
|
|
|
|
|
|
missing a sort spec or the SortType or SortDirection components of its |
|
780
|
|
|
|
|
|
|
sort spec, the _DefaultSortType and _DefaultSortDirection settings |
|
781
|
|
|
|
|
|
|
will be used. So, for example, if all fields are of type String and |
|
782
|
|
|
|
|
|
|
you want them to sort Ascending, then you don't need to worry about |
|
783
|
|
|
|
|
|
|
_SortSpecs. You only need to provide specs for fields that don't take |
|
784
|
|
|
|
|
|
|
the default settings. |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
_SortSpecs might look like this: |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
{Age => {SortType => 'Integer'}, |
|
789
|
|
|
|
|
|
|
NameKey => {SortType => 'Text', SortDirection => -1}} |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=item _SRoutines ||= {}; |
|
792
|
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
_SRoutines is a hash mapping any new SortTypes invented by you to your |
|
794
|
|
|
|
|
|
|
custom subroutines for sorting that type of data. (See the section on |
|
795
|
|
|
|
|
|
|
sort routines, below, for a full discussion.) |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=back |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 Sorting defaults |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=over 4 |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
=item _DefaultSortType ||= 'String'; |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
If you sort using a field with no sort spec supplied, or whose sort |
|
806
|
|
|
|
|
|
|
spec omits the SortType, it will get its SortType from this parameter. |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
See the sections below on SORT TYPES and SORT ROUTINES. |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=item _DefaultSortDirection ||= 1; |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
If you sort using a field with no sort spec supplied, or whose sort |
|
813
|
|
|
|
|
|
|
spec omits the SortDirection, it will get its SortDirection from this |
|
814
|
|
|
|
|
|
|
parameter. |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Legal sort directions are: 1 (Ascending) or -1 (Descending). |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
See the section below on DEFAULT SORT DIRECTION. |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=back |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=head2 Miscellaneous parameters |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=over 4 |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
=item _ErrorMsg ||= ""; |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
This parameter is set by read() or write() methods that encounter an |
|
829
|
|
|
|
|
|
|
error (usually a parameter error or file-system error) that prevents |
|
830
|
|
|
|
|
|
|
them from completing. If those methods or any methods that call them |
|
831
|
|
|
|
|
|
|
return a false value indicating failure, then _ErrorMsg will contain a |
|
832
|
|
|
|
|
|
|
string explaining the problem. The message will also have been passed |
|
833
|
|
|
|
|
|
|
to the progress() method for possible console feedback. |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
=item _Subset |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
This parameter is set to 1 (true) by read() if the last call to read() |
|
838
|
|
|
|
|
|
|
brought in a subset of the fields available in the file; 0 otherwise. |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
The object uses this field internally so it knows to abandon any cache |
|
841
|
|
|
|
|
|
|
files that might not contain all requested fields upon read(). |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
=back |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
=head1 SUBCLASSING |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
Most subclasses will override initialize() to set default values for |
|
848
|
|
|
|
|
|
|
the parameters of the parent class and then they may provide default |
|
849
|
|
|
|
|
|
|
values for other subclass-specific parameters. Then, the subclass's |
|
850
|
|
|
|
|
|
|
initialize() should call SUPER::initialize() to let the parent |
|
851
|
|
|
|
|
|
|
class(es) take care of the remaining ones. |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Every initialize() method should always allow for parameters to have |
|
854
|
|
|
|
|
|
|
already been provided by the $Params hash or template object. It |
|
855
|
|
|
|
|
|
|
should not overwrite any valid values that already exist. |
|
856
|
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
The following sample subclass changes the default setting of the |
|
858
|
|
|
|
|
|
|
_Progress parameter from undef to 1 and then overrides the |
|
859
|
|
|
|
|
|
|
progress_default() method to log all progress messages into a new |
|
860
|
|
|
|
|
|
|
"_ProgrLog" (progress log) parameter stored in the object. |
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
BEGIN |
|
863
|
|
|
|
|
|
|
{ ## Data::CTable::ProgressLogger: store messages in the object |
|
864
|
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
package Data::CTable::ProgressLogger; |
|
866
|
|
|
|
|
|
|
use vars qw(@ISA); @ISA=qw(Data::CTable); |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
sub initialize ## Add a new param; change one default |
|
869
|
|
|
|
|
|
|
{ |
|
870
|
|
|
|
|
|
|
my $this = shift; |
|
871
|
|
|
|
|
|
|
$this->{_Progress} = 1 unless exists($this->{_Progress}); |
|
872
|
|
|
|
|
|
|
$this->{_ProgrLog} ||= []; |
|
873
|
|
|
|
|
|
|
$this->SUPER::initialize(); |
|
874
|
|
|
|
|
|
|
} |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
sub progress_default ## Log message to object's ProgMsgs list |
|
877
|
|
|
|
|
|
|
{ |
|
878
|
|
|
|
|
|
|
my $this = shift; |
|
879
|
|
|
|
|
|
|
my ($msg) = @_; |
|
880
|
|
|
|
|
|
|
chomp $msg; |
|
881
|
|
|
|
|
|
|
push @{$this->{_ProgrLog}}, localtime() . " $msg"; |
|
882
|
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
return(1); |
|
884
|
|
|
|
|
|
|
} |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
sub show_log ## Use Dumper to spit out the log list |
|
887
|
|
|
|
|
|
|
{ |
|
888
|
|
|
|
|
|
|
my $this = shift; |
|
889
|
|
|
|
|
|
|
$this->dump($this->{_ProgrLog}); |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
} |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
## Later... |
|
894
|
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
my $Table = Data::CTable::ProgressLogger->new("mydata.txt"); |
|
896
|
|
|
|
|
|
|
# ... do stuff... |
|
897
|
|
|
|
|
|
|
$Table->write(); |
|
898
|
|
|
|
|
|
|
$Table->show_log(); |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=cut |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
903
|
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub new |
|
905
|
|
|
|
|
|
|
{ |
|
906
|
|
|
|
|
|
|
## First arg to new is always either class name or a template |
|
907
|
|
|
|
|
|
|
## object. This allows $obj->new() or CLASS->new(). |
|
908
|
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
## Second argument (if and only if it is a hash ref or an object |
|
910
|
|
|
|
|
|
|
## whose underlying representation is a hash ref) is an optional |
|
911
|
|
|
|
|
|
|
## anonymous hash of parameters which if supplied, will override |
|
912
|
|
|
|
|
|
|
## any parameters already found in the template object, if any. |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
## See the initialize method, below, for a list of parameters that |
|
915
|
|
|
|
|
|
|
## can be supplied (and will be defaulted for you if not |
|
916
|
|
|
|
|
|
|
## supplied). |
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
## Note that the template object and the params hash will be |
|
919
|
|
|
|
|
|
|
## SHALLOWLY copied -- the original hash objects passed will not |
|
920
|
|
|
|
|
|
|
## be used, but any sub-structures within them will now "belong" |
|
921
|
|
|
|
|
|
|
## to the resulting new object which will feel free to manipulate |
|
922
|
|
|
|
|
|
|
## them, possibly invalidating the integrity of the original |
|
923
|
|
|
|
|
|
|
## template object. |
|
924
|
|
|
|
|
|
|
|
|
925
|
109
|
|
|
109
|
0
|
3853
|
my $ClassOrObj = shift; |
|
926
|
109
|
100
|
|
|
|
531
|
my ($Params) = {%{shift()}} if UNIVERSAL::isa($_[0], 'HASH'); |
|
|
82
|
|
|
|
|
855
|
|
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
## Shallow-copy all params from template object and/or optional |
|
929
|
|
|
|
|
|
|
## $Params hash into new hash. DON'T re-use caller's obj or hash. |
|
930
|
|
|
|
|
|
|
|
|
931
|
109
|
50
|
|
|
|
673
|
my $this = |
|
932
|
109
|
100
|
|
|
|
862
|
{%{(UNIVERSAL::isa($ClassOrObj, 'HASH') ? $ClassOrObj : {})}, |
|
933
|
109
|
|
|
|
|
212
|
%{(UNIVERSAL::isa($Params, 'HASH') ? $Params : {})}}; |
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
## Bless the new object into the class |
|
936
|
|
|
|
|
|
|
|
|
937
|
109
|
|
33
|
|
|
545
|
my $class = ref($ClassOrObj) || $ClassOrObj; |
|
938
|
109
|
|
|
|
|
273
|
bless $this, $class; |
|
939
|
|
|
|
|
|
|
|
|
940
|
109
|
|
|
|
|
133
|
my $Success; |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
## Run the subclassable initialize() method to create default |
|
943
|
|
|
|
|
|
|
## settings for any private parameters. |
|
944
|
|
|
|
|
|
|
|
|
945
|
109
|
50
|
|
|
|
268
|
goto done unless $this->initialize(); |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
## Finally, process any (other) arguments to new(), if any. |
|
948
|
|
|
|
|
|
|
|
|
949
|
109
|
|
|
|
|
211
|
my $RemainingArgs = [@_]; |
|
950
|
|
|
|
|
|
|
|
|
951
|
109
|
50
|
|
|
|
305
|
goto done unless $this->process_new_args($RemainingArgs, $Params); |
|
952
|
|
|
|
|
|
|
|
|
953
|
109
|
|
|
|
|
125
|
$Success = 1; |
|
954
|
109
|
50
|
|
|
|
556
|
done: |
|
955
|
|
|
|
|
|
|
return ($Success ? $this : undef); |
|
956
|
|
|
|
|
|
|
} |
|
957
|
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
### process_new_args |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
### Any optional remaining (non-HASH ref) arguments to new() are |
|
961
|
|
|
|
|
|
|
### treated as file names of files to open and append to the in-memory |
|
962
|
|
|
|
|
|
|
### table, creating new columns as necessary. We call the |
|
963
|
|
|
|
|
|
|
### subclassable append_files_new() method to process these. |
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
sub process_new_args |
|
966
|
|
|
|
|
|
|
{ |
|
967
|
109
|
|
|
109
|
0
|
155
|
my $this = shift; |
|
968
|
109
|
|
|
|
|
148
|
my ($RemainingArgs, $Params) = @_; |
|
969
|
|
|
|
|
|
|
|
|
970
|
109
|
|
|
|
|
169
|
my $Success; |
|
971
|
|
|
|
|
|
|
|
|
972
|
109
|
50
|
|
|
|
301
|
$Success = $this->append_files_new($RemainingArgs, $Params) or goto done; |
|
973
|
|
|
|
|
|
|
|
|
974
|
109
|
|
|
|
|
132
|
$Success = 1; |
|
975
|
109
|
|
|
|
|
263
|
done: |
|
976
|
|
|
|
|
|
|
return ($Success); |
|
977
|
|
|
|
|
|
|
} |
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
### initialize |
|
980
|
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
### Assumptions made by initialize() (and all other methods, too): |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
### The blessed object is a hash ref. |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
### All hash keys beginning with _ are reserved for non-data columns. |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
### Hash keys beginning with a single _ are reserved for future |
|
988
|
|
|
|
|
|
|
### versions of this parent class implementation. Subclasses might |
|
989
|
|
|
|
|
|
|
### want to use double-underscore for additional slots. |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
### All other hash keys are field names; their values are data |
|
992
|
|
|
|
|
|
|
### columns (array references). |
|
993
|
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
### initialize() sets / validates initial settings for all parameters |
|
995
|
|
|
|
|
|
|
### recognized by this parent class. It exercises caution to not |
|
996
|
|
|
|
|
|
|
### override any legal values previously set by the |
|
997
|
|
|
|
|
|
|
### subclass::initialize() or by new(). |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
sub initialize |
|
1000
|
|
|
|
|
|
|
{ |
|
1001
|
109
|
50
|
|
109
|
0
|
306
|
my $this = shift or goto done; |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
109
|
|
|
|
|
121
|
my $Success; |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
## Reading / writing |
|
1006
|
|
|
|
|
|
|
|
|
1007
|
109
|
|
100
|
|
|
361
|
$this->{_FileName} ||= undef; ## Path of file that was read |
|
1008
|
|
|
|
|
|
|
|
|
1009
|
109
|
100
|
|
|
|
329
|
$this->{_WriteExtension} = ".out" unless exists($this->{_WriteExtension}); |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
## Cache settings |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
109
|
100
|
|
|
|
297
|
$this->{_CacheOnRead} = 1 unless exists($this->{_CacheOnRead}); |
|
1014
|
109
|
100
|
|
|
|
298
|
$this->{_CacheOnWrite} = 0 unless exists($this->{_CacheOnWrite}); |
|
1015
|
109
|
100
|
|
|
|
288
|
$this->{_CacheExtension} = ".cache" unless exists($this->{_CacheExtension}); |
|
1016
|
109
|
100
|
|
|
|
323
|
$this->{_CacheSubDir} = "cache" unless exists($this->{_CacheSubDir}); |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
## File format settings |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
109
|
|
100
|
|
|
374
|
$this->{_LineEnding} ||= undef; |
|
1021
|
109
|
|
100
|
|
|
361
|
$this->{_FDelimiter} ||= undef; |
|
1022
|
109
|
100
|
|
|
|
284
|
$this->{_QuoteFields} = undef unless exists ($this->{_QuoteFields}); |
|
1023
|
109
|
100
|
|
|
|
281
|
$this->{_HeaderRow} = 1 unless exists ($this->{_HeaderRow}); |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
## Return encodings |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
109
|
100
|
|
|
|
261
|
$this->{_ReturnMap} = 1 unless exists ($this->{_ReturnMap}); |
|
1028
|
109
|
|
100
|
|
|
356
|
$this->{_ReturnEncoding} ||= "\x0B"; ## Char to use for return chars |
|
1029
|
109
|
100
|
|
|
|
288
|
$this->{_MacRomanMap} = undef unless exists ($this->{_MacRomanMap}); |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
## Sorting defaults |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
109
|
|
100
|
|
|
334
|
$this->{_DefaultSortType} ||= 'String'; |
|
1034
|
109
|
|
100
|
|
|
330
|
$this->{_DefaultSortDirection} ||= 1; ## Ascending (-1 = desc) |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
## Progress routine / setting |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
109
|
100
|
|
|
|
373
|
$this->{_Progress} = undef unless exists ($this->{_Progress}); |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
## Internal meta-structures |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
109
|
|
100
|
|
|
346
|
$this->{_FieldList} ||= undef; ## List of fields; undef means all fields, alpha order |
|
1043
|
109
|
|
100
|
|
|
487
|
$this->{_Selection} ||= undef; ## List of rec #s; undef means all records, natural order |
|
1044
|
109
|
|
50
|
|
|
422
|
$this->{_SortOrder} ||= undef; ## List of fields; undef/empty means sort by record number |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
109
|
|
100
|
|
|
359
|
$this->{_SortSpecs} ||= {}; ## Hash: FieldName => Sortspec |
|
1047
|
109
|
|
100
|
|
|
348
|
$this->{_SRoutines} ||= {}; ## Hash: SortType => custom sort routine for type |
|
1048
|
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
## Miscellaneous |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
109
|
|
50
|
|
|
371
|
$this->{_ErrorMsg} ||= ""; ## Explains last read/write failure |
|
1052
|
109
|
|
50
|
|
|
394
|
$this->{_Subset} ||= 0; ## Flag indicating subset of available fields were read |
|
1053
|
|
|
|
|
|
|
|
|
1054
|
109
|
|
|
|
|
141
|
$Success = 1; |
|
1055
|
109
|
|
|
|
|
291
|
done: |
|
1056
|
|
|
|
|
|
|
return($Success); |
|
1057
|
|
|
|
|
|
|
} |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
=pod |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
=head1 FIELD LIST |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
## Getting / setting the object's _FieldList |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
$t->fieldlist() ## Get _FieldList or fieldlist_all() |
|
1066
|
|
|
|
|
|
|
$t->fieldlist_get() |
|
1067
|
|
|
|
|
|
|
$t->fieldlist_hash() ## Get fieldlist() as keys in a hash |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
$t->fieldlist_all() ## Get all fields (ignore _FieldList) |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
$t->fieldlist($MyList) ## Set field list (_FieldList param) |
|
1072
|
|
|
|
|
|
|
$t->fieldlist_set($MyList) |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
$t->fieldlist(0) ## Remove field list (use default) |
|
1075
|
|
|
|
|
|
|
$t->fieldlist_set() |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
$t->fieldlist_force($MyList)## Set list; remove non-matching cols |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
$t->fieldlist_truncate() ## Just remove nonmatching cols |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
$t->fieldlist_default() ## Default field list (alpha-sorted) |
|
1082
|
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
$t->fieldlist_add($MyName) ## Append new name to custom list. |
|
1084
|
|
|
|
|
|
|
$t->fieldlist_delete($MyName) ## Delete name from custom list. |
|
1085
|
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
A CTable object can optionally have a custom field list. The custom |
|
1087
|
|
|
|
|
|
|
field list can store both the ORDER of the fields (which otherwise |
|
1088
|
|
|
|
|
|
|
would be unordered since they are stored as keys in a hash), and also |
|
1089
|
|
|
|
|
|
|
can be a subset of the fields actually in the object, allowing you to |
|
1090
|
|
|
|
|
|
|
temporarily ignore certain effectively-hidden fields for the benefit |
|
1091
|
|
|
|
|
|
|
of certain operations. The custom field list can be changed or |
|
1092
|
|
|
|
|
|
|
removed at any time. |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
The custom field list is stored in the private _FieldList parameter. |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
fieldlist() always returns a list (reference). The list is either the |
|
1097
|
|
|
|
|
|
|
same list as _FieldList, if present, or it is the result of calling |
|
1098
|
|
|
|
|
|
|
fieldlist_default(). In CTable, fieldlist_default() in turn calls |
|
1099
|
|
|
|
|
|
|
fieldlist_all() -- hence fieldlist() would yield an auto-generated |
|
1100
|
|
|
|
|
|
|
list of all fields in alphabetical order. |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
fieldlist_all() can be called directly to get a list of all fields |
|
1103
|
|
|
|
|
|
|
present regardless of the presence of a _FieldList parameter. The |
|
1104
|
|
|
|
|
|
|
list is an alphabetical case-insensitively sorted list of all hash |
|
1105
|
|
|
|
|
|
|
keys whose names do not begin with an underscore. |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
You could override this method if you want a different behavior. Or, |
|
1108
|
|
|
|
|
|
|
you could create your own custom field list by calling fieldlist_all() |
|
1109
|
|
|
|
|
|
|
and removing fields or ordering them differently. |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
To set a custom field list (in _FieldList), call fieldlist() or |
|
1112
|
|
|
|
|
|
|
fieldlist_set() with a list (reference). The list must be a list of |
|
1113
|
|
|
|
|
|
|
strings (field names) that do not begin with underscore. The object |
|
1114
|
|
|
|
|
|
|
owns the list you supply. |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
To remove a custom field list (and let the default be used), call |
|
1117
|
|
|
|
|
|
|
fieldlist(0) or fieldlist_set() with no arguments (these will return |
|
1118
|
|
|
|
|
|
|
the fieldlist that was deleted, if any). |
|
1119
|
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
fieldlist_freeze() "freezes" the fieldlist in its current state. This |
|
1121
|
|
|
|
|
|
|
is equivalent to the following: |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
$t->fieldlist_set($t->fieldlist()); |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
... which would force the fieldlist to $t->fieldlist_all() if and only |
|
1126
|
|
|
|
|
|
|
if there is not already a custom _FieldList present. |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
IMPORTANT NOTE ABOUT PARTIAL FIELD LISTS: When setting a field list, |
|
1129
|
|
|
|
|
|
|
the object ensures that all fields (columns) mentioned in the list are |
|
1130
|
|
|
|
|
|
|
present in the object -- it creates empty columns of the correct |
|
1131
|
|
|
|
|
|
|
length as necessary. However, it does NOT delete any fields not |
|
1132
|
|
|
|
|
|
|
mentioned in the field list. This allows you to manipulate the field |
|
1133
|
|
|
|
|
|
|
list in order to have certain fields be temporarily ignored by all |
|
1134
|
|
|
|
|
|
|
other methods, then alter, restore, or remove it (allow it to revert |
|
1135
|
|
|
|
|
|
|
to default) and they will be effectively unhidden again. Some methods |
|
1136
|
|
|
|
|
|
|
(such as cols(), write(), etc.) also allow you to specify a custom |
|
1137
|
|
|
|
|
|
|
field list that will override any other list just during the execution |
|
1138
|
|
|
|
|
|
|
of that method call but will not modify the object itself. |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
Call fieldlist_force() to set the list AND have any non-listed fields |
|
1141
|
|
|
|
|
|
|
also deleted at the same time (by calling fieldlist_truncate() |
|
1142
|
|
|
|
|
|
|
internally). You can also just delete individual columns one-by-one, |
|
1143
|
|
|
|
|
|
|
of course, using the column-manipulation methods and the custom |
|
1144
|
|
|
|
|
|
|
fieldlist, if any, will be appropriately updated for you. |
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
fieldlist_truncate() deletes any fields found in the table but not |
|
1147
|
|
|
|
|
|
|
currently present in _FieldList. A hash of the deleted columns is |
|
1148
|
|
|
|
|
|
|
returned to the caller. If there is no _FieldList, then this method |
|
1149
|
|
|
|
|
|
|
does nothing. |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
fieldlist_default() just calls fieldlist_all() in this implementation, |
|
1152
|
|
|
|
|
|
|
but could be changed in subclasses. |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
fieldlist_add() is the internal method that adds a new field name to |
|
1155
|
|
|
|
|
|
|
the custom field list (if present) and if the field name was not |
|
1156
|
|
|
|
|
|
|
already on the list. It is called by other methods any time a new |
|
1157
|
|
|
|
|
|
|
column is added to the table. Don't call it directly unless you know |
|
1158
|
|
|
|
|
|
|
what you're doing because the corresponding column won't be created. |
|
1159
|
|
|
|
|
|
|
(Instead, use col().) The field name is appended to the end of the |
|
1160
|
|
|
|
|
|
|
existing custom field list. If there is no custom field list, nothing |
|
1161
|
|
|
|
|
|
|
is done. |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
fieldlist_delete() is the internal method that deletes a field name |
|
1164
|
|
|
|
|
|
|
from the custom field list (if present). It is called by other |
|
1165
|
|
|
|
|
|
|
methods when columns are deleted, but it does not actually delete the |
|
1166
|
|
|
|
|
|
|
columns themselves, so use with caution: deleting a field from the |
|
1167
|
|
|
|
|
|
|
custom field list effectively hides the field. This method has no |
|
1168
|
|
|
|
|
|
|
effect, however, if there is no custom field list present. So don't |
|
1169
|
|
|
|
|
|
|
call this method directly unless you know what you're doing. |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
=cut |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
sub fieldlist_all |
|
1174
|
|
|
|
|
|
|
{ |
|
1175
|
223
|
|
|
223
|
0
|
298
|
my $this = shift; |
|
1176
|
223
|
|
|
|
|
1020
|
my $FieldList = [sort {lc($a) cmp lc($b)} grep {!/^_/} keys %$this]; |
|
|
883
|
|
|
|
|
1558
|
|
|
|
5726
|
|
|
|
|
12123
|
|
|
1177
|
|
|
|
|
|
|
|
|
1178
|
223
|
|
|
|
|
899
|
return($FieldList); |
|
1179
|
|
|
|
|
|
|
} |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
sub fieldlist_default ## Same as fieldlist_all() in this class. |
|
1182
|
|
|
|
|
|
|
{ |
|
1183
|
40
|
|
|
40
|
0
|
51
|
my $this = shift; |
|
1184
|
40
|
|
|
|
|
73
|
my $FieldList = $this->fieldlist_all(); |
|
1185
|
|
|
|
|
|
|
|
|
1186
|
40
|
|
|
|
|
136
|
return($FieldList); |
|
1187
|
|
|
|
|
|
|
} |
|
1188
|
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
sub fieldlist |
|
1190
|
|
|
|
|
|
|
{ |
|
1191
|
805
|
|
|
805
|
0
|
2485
|
my $this = shift; |
|
1192
|
805
|
|
|
|
|
927
|
my ($FieldList) = @_; |
|
1193
|
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
## Set if specified. |
|
1195
|
805
|
100
|
|
|
|
1560
|
$this->fieldlist_set($FieldList) if defined($FieldList); |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
## Get and return. |
|
1198
|
805
|
|
|
|
|
1336
|
$FieldList = $this->fieldlist_get(); |
|
1199
|
|
|
|
|
|
|
|
|
1200
|
805
|
|
|
|
|
1857
|
return($FieldList); |
|
1201
|
|
|
|
|
|
|
} |
|
1202
|
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
sub fieldlist_get |
|
1204
|
|
|
|
|
|
|
{ |
|
1205
|
805
|
|
|
805
|
0
|
1291
|
my $this = shift; |
|
1206
|
805
|
|
66
|
|
|
1821
|
my $FieldList = $this->{_FieldList} || $this->fieldlist_default(); |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
805
|
|
|
|
|
1250
|
return($FieldList); |
|
1209
|
|
|
|
|
|
|
} |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
sub fieldlist_hash ## ([$FieldList]) |
|
1212
|
|
|
|
|
|
|
{ |
|
1213
|
96
|
|
|
96
|
0
|
124
|
my $this = shift; |
|
1214
|
96
|
|
|
|
|
123
|
my ($FieldList) = @_; |
|
1215
|
96
|
|
66
|
|
|
280
|
$FieldList ||= $this->fieldlist(); |
|
1216
|
96
|
|
|
|
|
170
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
|
96
|
|
|
|
|
388
|
|
|
1217
|
|
|
|
|
|
|
|
|
1218
|
96
|
|
|
|
|
189
|
return($FieldHash); |
|
1219
|
|
|
|
|
|
|
} |
|
1220
|
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
sub fieldlist_set |
|
1222
|
|
|
|
|
|
|
{ |
|
1223
|
36
|
|
|
36
|
0
|
62
|
my $this = shift; |
|
1224
|
36
|
|
|
|
|
50
|
my ($FieldList) = @_; |
|
1225
|
|
|
|
|
|
|
|
|
1226
|
36
|
|
|
|
|
118
|
return($this->fieldlist_set_internal($FieldList, 0)); |
|
1227
|
|
|
|
|
|
|
} |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
sub fieldlist_freeze |
|
1230
|
|
|
|
|
|
|
{ |
|
1231
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
1232
|
0
|
|
|
|
|
0
|
return($this->fieldlist_set($this->fieldlist())); |
|
1233
|
|
|
|
|
|
|
} |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
sub fieldlist_force |
|
1236
|
|
|
|
|
|
|
{ |
|
1237
|
3
|
|
|
3
|
0
|
7
|
my $this = shift; |
|
1238
|
3
|
|
|
|
|
5
|
my ($FieldList) = @_; |
|
1239
|
|
|
|
|
|
|
|
|
1240
|
3
|
|
|
|
|
9
|
return($this->fieldlist_set_internal($FieldList, 1)); |
|
1241
|
|
|
|
|
|
|
} |
|
1242
|
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
sub fieldlist_set_internal |
|
1244
|
|
|
|
|
|
|
{ |
|
1245
|
39
|
|
|
39
|
0
|
49
|
my $this = shift; |
|
1246
|
39
|
|
|
|
|
50
|
my ($FieldList, $Force) = @_; |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
39
|
100
|
|
|
|
106
|
if (ref($FieldList) eq 'ARRAY') |
|
1249
|
|
|
|
|
|
|
{ |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
## Whether forcing or not, ensure all fields mentioned in the |
|
1252
|
|
|
|
|
|
|
## list actually exist and are the correct length. |
|
1253
|
33
|
|
|
|
|
88
|
$this->fieldlist_check($FieldList); |
|
1254
|
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
## Set the custom list |
|
1256
|
33
|
|
|
|
|
68
|
$this->{_FieldList} = $FieldList; |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
## In "force" mode, remove any non-listed columns. |
|
1259
|
33
|
100
|
|
|
|
92
|
$this->fieldlist_truncate() if ($Force); |
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
else |
|
1262
|
|
|
|
|
|
|
{ |
|
1263
|
|
|
|
|
|
|
## Remove the custom field list. |
|
1264
|
6
|
|
|
|
|
17
|
$FieldList = delete $this->{_FieldList}; |
|
1265
|
|
|
|
|
|
|
} |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
39
|
|
|
|
|
73
|
return($FieldList); ## Return the one that was set or deleted. |
|
1268
|
|
|
|
|
|
|
} |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
sub fieldlist_check |
|
1271
|
|
|
|
|
|
|
{ |
|
1272
|
33
|
|
|
33
|
0
|
39
|
my $this = shift; |
|
1273
|
33
|
|
|
|
|
65
|
my ($FieldList) = @_; |
|
1274
|
|
|
|
|
|
|
|
|
1275
|
33
|
|
33
|
|
|
81
|
$FieldList ||= $this->fieldlist(); |
|
1276
|
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
## Visit each field name in the current list. Make sure it is |
|
1278
|
|
|
|
|
|
|
## present. |
|
1279
|
|
|
|
|
|
|
|
|
1280
|
33
|
|
|
|
|
60
|
foreach my $FieldName (@$FieldList) |
|
1281
|
|
|
|
|
|
|
{ |
|
1282
|
|
|
|
|
|
|
## The col method will the column exist if not present. |
|
1283
|
100
|
|
|
|
|
214
|
$this->col($FieldName); |
|
1284
|
|
|
|
|
|
|
} |
|
1285
|
|
|
|
|
|
|
} |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
sub fieldlist_truncate |
|
1288
|
|
|
|
|
|
|
{ |
|
1289
|
3
|
|
|
3
|
0
|
5
|
my $this = shift; |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
3
|
|
|
|
|
8
|
my $FieldList = $this->fieldlist(); |
|
1292
|
3
|
|
|
|
|
9
|
my $AllFields = $this->fieldlist_all(); |
|
1293
|
3
|
|
|
|
|
6
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
|
3
|
|
|
|
|
11
|
|
|
1294
|
|
|
|
|
|
|
|
|
1295
|
3
|
|
|
|
|
6
|
my $DeletedCols = {}; |
|
1296
|
|
|
|
|
|
|
|
|
1297
|
3
|
|
|
|
|
7
|
foreach my $FieldName (@$AllFields) |
|
1298
|
|
|
|
|
|
|
{ |
|
1299
|
8
|
100
|
|
|
|
23
|
if (!exists($FieldHash->{$FieldName})) |
|
1300
|
|
|
|
|
|
|
{ |
|
1301
|
3
|
|
|
|
|
9
|
$DeletedCols->{$FieldName} = delete $this->{$FieldName}; |
|
1302
|
|
|
|
|
|
|
} |
|
1303
|
|
|
|
|
|
|
} |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
3
|
|
|
|
|
12
|
return($DeletedCols); |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
sub fieldlist_add |
|
1309
|
|
|
|
|
|
|
{ |
|
1310
|
43
|
|
|
43
|
0
|
53
|
my $this = shift; |
|
1311
|
43
|
|
|
|
|
56
|
my ($FieldName) = @_; |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
43
|
100
|
|
|
|
124
|
if (ref($this->{_FieldList}) eq 'ARRAY') |
|
1314
|
|
|
|
|
|
|
{ |
|
1315
|
34
|
|
|
|
|
47
|
my $FieldList = $this->{_FieldList}; |
|
1316
|
34
|
|
|
|
|
52
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
|
34
|
|
|
|
|
112
|
|
|
1317
|
|
|
|
|
|
|
|
|
1318
|
34
|
50
|
|
|
|
85
|
if (!exists($FieldHash->{$FieldName})) |
|
1319
|
|
|
|
|
|
|
{ |
|
1320
|
34
|
|
|
|
|
117
|
push @$FieldList, $FieldName; |
|
1321
|
|
|
|
|
|
|
} |
|
1322
|
|
|
|
|
|
|
} |
|
1323
|
|
|
|
|
|
|
} |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
sub fieldlist_delete |
|
1326
|
|
|
|
|
|
|
{ |
|
1327
|
3
|
|
|
3
|
0
|
10
|
my $this = shift; |
|
1328
|
3
|
|
|
|
|
8
|
my ($FieldName) = @_; |
|
1329
|
|
|
|
|
|
|
|
|
1330
|
3
|
50
|
|
|
|
14
|
if (ref($this->{_FieldList}) eq 'ARRAY') |
|
1331
|
|
|
|
|
|
|
{ |
|
1332
|
3
|
|
|
|
|
6
|
$this->{_FieldList} = [grep {$_ ne $FieldName} @{$this->{_FieldList}}]; |
|
|
11
|
|
|
|
|
33
|
|
|
|
3
|
|
|
|
|
11
|
|
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
} |
|
1335
|
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=pod |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
=head1 DATA COLUMNS (FIELD DATA) |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
## Getting or setting data in entire columns |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
$t->{$ColName} ## Get a column you know exists |
|
1343
|
|
|
|
|
|
|
$t->col($ColName) ## Get a column or make empty one. |
|
1344
|
|
|
|
|
|
|
$t->col_get($ColName) |
|
1345
|
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
$t->col($ColName, $ListRef) ## Set all of a column all at once. |
|
1347
|
|
|
|
|
|
|
$t->col_set($ColName, $ListRef) |
|
1348
|
|
|
|
|
|
|
$t->col_force($ColName, $ListRef) ## Add but don't check size or |
|
1349
|
|
|
|
|
|
|
## add to custom field list |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
$t->col_set($ColName, undef) ## Delete a column completely |
|
1352
|
|
|
|
|
|
|
$t->col_delete($ColName) |
|
1353
|
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
$t->col_empty() ## An empty col presized for table |
|
1355
|
|
|
|
|
|
|
$t->col_empty(22) ## An empty col of another length |
|
1356
|
|
|
|
|
|
|
$t->col_empty($Col) ## An empty col sized to match another |
|
1357
|
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
$t->col_default() ## Default if req. column not found. |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
$t->col_exists($Field) ## Check existence of column |
|
1361
|
|
|
|
|
|
|
$t->col_active($Field) ## Restrict check to fieldlist() |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
$t->cols($ColList) ## Get list of multiple named columns |
|
1364
|
|
|
|
|
|
|
$t->cols_hash($ColList) ## Get hash " " " |
|
1365
|
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
$t->col_rename($Old => $New) ## Change name of columns |
|
1367
|
|
|
|
|
|
|
$t->col_rename($Old1 => $New1, $Old2 => $New2) ## Change several |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
A "column" is a field in the table and all its data. The column's |
|
1370
|
|
|
|
|
|
|
field name is a key in the object itself, and may also optionally be |
|
1371
|
|
|
|
|
|
|
listed in a custom field list if present. The column's data is the |
|
1372
|
|
|
|
|
|
|
key's value in the hash and is an array ref of values presumed to be |
|
1373
|
|
|
|
|
|
|
of the same data type (e. g. string, integer, etc.) |
|
1374
|
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
Sometimes the terms "column" and "field" are used interchangeably in |
|
1376
|
|
|
|
|
|
|
this documentation. |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
If you already know that a column exists (because you got it from the |
|
1379
|
|
|
|
|
|
|
fieldlist() method and you've not previously manipulated _FieldList |
|
1380
|
|
|
|
|
|
|
directly but instead carefully used the method calls available for |
|
1381
|
|
|
|
|
|
|
that), then you can safely get the column by just looking it up in the |
|
1382
|
|
|
|
|
|
|
object itself. |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
The col() method does the same thing, but forces the column to spring |
|
1385
|
|
|
|
|
|
|
into existence if it did not already (which can also have the |
|
1386
|
|
|
|
|
|
|
potentially unwanted side-effect of hiding coding errors in which you |
|
1387
|
|
|
|
|
|
|
retreive mis-named columns: so beware). Columns brought into |
|
1388
|
|
|
|
|
|
|
existence this way will automatically be pre-sized (i.e. they will |
|
1389
|
|
|
|
|
|
|
will be created and set to whatever col_default() returns). |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
The col() or col_set() methods can also be used to set a column. When |
|
1392
|
|
|
|
|
|
|
the column is set, the list you pass is automatically sized |
|
1393
|
|
|
|
|
|
|
(lengthened or truncated) to match the current length of the table. |
|
1394
|
|
|
|
|
|
|
If this is not what you want, then call col_force() which will not |
|
1395
|
|
|
|
|
|
|
check whether the new column matches the size of the others. |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
No matter how you set it, the object now "owns" the list you gave it. |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
As a convenience, col(), col_set() and col_force() return the column |
|
1400
|
|
|
|
|
|
|
that was set. They silently discard any previous column. |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
All three methods of column setting will append the column to the |
|
1403
|
|
|
|
|
|
|
custom field list if one is present and the column name is not already |
|
1404
|
|
|
|
|
|
|
listed there (by calling fieldlist_add()). They will also call the |
|
1405
|
|
|
|
|
|
|
extend() method to ensure all columns have the same length (either |
|
1406
|
|
|
|
|
|
|
others will be extended to match the length of the new one, or the new |
|
1407
|
|
|
|
|
|
|
one will be extended to match the length of the others). |
|
1408
|
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
col_delete() deletes a column. |
|
1410
|
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
col_empty() returns an anonymous list reference that is pre-sized to |
|
1412
|
|
|
|
|
|
|
the length of the table (by default). You could use it to get an |
|
1413
|
|
|
|
|
|
|
empty column that you intend to fill up and then later insert into the |
|
1414
|
|
|
|
|
|
|
table or use to hold the results of an operation on other columns. If |
|
1415
|
|
|
|
|
|
|
you want a different length, specify it as a number or as an array ref |
|
1416
|
|
|
|
|
|
|
whose length should be matched. |
|
1417
|
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
col_default() is the internal method that implements the "springing |
|
1419
|
|
|
|
|
|
|
into existence" of missing columns. Currently it just calls |
|
1420
|
|
|
|
|
|
|
col_empty(). Other subclasses might want to have it return undef or a |
|
1421
|
|
|
|
|
|
|
string like "NO_SUCH_COLUMN" in order to help track programming errors |
|
1422
|
|
|
|
|
|
|
where nonexistent columns are requested. |
|
1423
|
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
cols($FieldList) returns an ordered list of the requested column |
|
1425
|
|
|
|
|
|
|
names. If no list is given, then fieldlist() is used. |
|
1426
|
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
cols_hash($FieldList) does the same as cols(), but the result is a |
|
1428
|
|
|
|
|
|
|
hash whose keys are the field names and whose values are the columns |
|
1429
|
|
|
|
|
|
|
-- much like the original object itself, but not blessed into the |
|
1430
|
|
|
|
|
|
|
class. The resulting hash, however, could be used as the prototype |
|
1431
|
|
|
|
|
|
|
for a new Data::CTable object (by calling the new() method). However, |
|
1432
|
|
|
|
|
|
|
be warned that both objects will think they "own" the resulting shared |
|
1433
|
|
|
|
|
|
|
so be careful what you do..... which brings us to this: |
|
1434
|
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
IMPORTANT NOTE ABOUT GETTING COLUMNS: The columns you retrieve from a |
|
1436
|
|
|
|
|
|
|
table are still "owned" by the table object as long as it lives. If |
|
1437
|
|
|
|
|
|
|
you modify them, you are modifying the table's data. If you change |
|
1438
|
|
|
|
|
|
|
their length, then you may be invalidating the table's own |
|
1439
|
|
|
|
|
|
|
expectations that all its columns have the same length. So beware. |
|
1440
|
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
Just make yourself a copy of the data if that isn't what you want. |
|
1442
|
|
|
|
|
|
|
For example, instead of this: |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
my $Foo = $Table->col('Foo'); ## Reference to actual column |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
Do this: |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
my $Foo = [@{$Table->col('Foo')}]; ## Shallow copy of the column |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=cut |
|
1451
|
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
sub col ## ($ColName, [$Vector]) |
|
1453
|
|
|
|
|
|
|
{ |
|
1454
|
1472
|
|
|
1472
|
0
|
2828
|
my $this = shift; |
|
1455
|
1472
|
|
|
|
|
1831
|
my ($ColName, $Vector) = @_; |
|
1456
|
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
## Set if specified. |
|
1458
|
1472
|
100
|
|
|
|
3406
|
my $FoundVector = $this->col_set($ColName, $Vector) if defined($Vector); |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
## Get and return. |
|
1461
|
|
|
|
|
|
|
## If not specified, create it with col_default() |
|
1462
|
1472
|
|
|
|
|
2428
|
my $Col = $this->col_get($ColName); |
|
1463
|
|
|
|
|
|
|
|
|
1464
|
1472
|
|
|
|
|
3961
|
return($Col); |
|
1465
|
|
|
|
|
|
|
} |
|
1466
|
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
sub col_get |
|
1468
|
|
|
|
|
|
|
{ |
|
1469
|
1475
|
|
|
1475
|
0
|
1730
|
my $this = shift; |
|
1470
|
1475
|
|
|
|
|
2103
|
my ($ColName) = @_; |
|
1471
|
|
|
|
|
|
|
|
|
1472
|
1475
|
|
66
|
|
|
3413
|
my $Col = ($this->{$ColName} || $this->col_add($ColName)); |
|
1473
|
|
|
|
|
|
|
|
|
1474
|
1475
|
|
|
|
|
2320
|
return($Col); |
|
1475
|
|
|
|
|
|
|
} |
|
1476
|
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
sub col_add |
|
1478
|
|
|
|
|
|
|
{ |
|
1479
|
28
|
|
|
28
|
0
|
38
|
my $this = shift; |
|
1480
|
28
|
|
|
|
|
43
|
my ($ColName) = @_; |
|
1481
|
28
|
|
|
|
|
61
|
my $Col = $this->{$ColName} = $this->col_empty(); |
|
1482
|
|
|
|
|
|
|
|
|
1483
|
28
|
|
|
|
|
67
|
$this->fieldlist_add($ColName); |
|
1484
|
28
|
|
|
|
|
80
|
return($Col); |
|
1485
|
|
|
|
|
|
|
} |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
sub col_set_internal ## ($ColName, [$Vector], [$Force]) |
|
1488
|
|
|
|
|
|
|
{ |
|
1489
|
16
|
|
|
16
|
0
|
18
|
my $this = shift; |
|
1490
|
16
|
|
|
|
|
26
|
my ($ColName, $Vector, $Force) = @_; |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
16
|
|
|
|
|
28
|
my $Valid = (ref($Vector) eq 'ARRAY'); |
|
1493
|
16
|
|
|
|
|
26
|
my $Existing = (ref($this->{$ColName}) eq 'ARRAY'); |
|
1494
|
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
## Delete existing vector by this name... |
|
1496
|
16
|
100
|
66
|
|
|
99
|
if (!$Valid && $Existing) |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
{ |
|
1498
|
1
|
|
|
|
|
3
|
$Vector = delete $this->{$ColName}; ## Delete and save to return to caller. |
|
1499
|
1
|
|
|
|
|
6
|
$this->fieldlist_delete($ColName); ## Delete from field list if needed. |
|
1500
|
|
|
|
|
|
|
} |
|
1501
|
|
|
|
|
|
|
## ...or add one... |
|
1502
|
|
|
|
|
|
|
elsif ($Valid && !$Existing) |
|
1503
|
|
|
|
|
|
|
{ |
|
1504
|
11
|
|
|
|
|
24
|
$this->{$ColName} = $Vector; |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
11
|
50
|
|
|
|
20
|
if (!$Force) |
|
1507
|
|
|
|
|
|
|
{ |
|
1508
|
11
|
|
|
|
|
23
|
$this->extend(); ## Extend all vectors as needed to ensure same length. |
|
1509
|
11
|
|
|
|
|
25
|
$this->fieldlist_add($ColName); ## Add to custom field list if needed. |
|
1510
|
|
|
|
|
|
|
} |
|
1511
|
|
|
|
|
|
|
} |
|
1512
|
|
|
|
|
|
|
## ...otherwise replace. |
|
1513
|
|
|
|
|
|
|
elsif ($Valid) |
|
1514
|
|
|
|
|
|
|
{ |
|
1515
|
4
|
|
|
|
|
9
|
$this->{$ColName} = $Vector; |
|
1516
|
|
|
|
|
|
|
|
|
1517
|
4
|
50
|
|
|
|
18
|
if (!$Force) |
|
1518
|
|
|
|
|
|
|
{ |
|
1519
|
4
|
|
|
|
|
11
|
$this->extend(); ## Extend all vectors as needed to ensure same length. |
|
1520
|
|
|
|
|
|
|
} |
|
1521
|
|
|
|
|
|
|
} |
|
1522
|
|
|
|
|
|
|
|
|
1523
|
16
|
|
|
|
|
44
|
return($Vector); ## Return added or deleted vector for convenience. |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
sub col_delete ## ($ColName) |
|
1527
|
|
|
|
|
|
|
{ |
|
1528
|
1
|
|
|
1
|
0
|
7
|
my $this = shift; |
|
1529
|
1
|
|
|
|
|
3
|
my ($ColName) = @_; |
|
1530
|
|
|
|
|
|
|
|
|
1531
|
1
|
|
|
|
|
4
|
return($this->col_set_internal($ColName)); |
|
1532
|
|
|
|
|
|
|
} |
|
1533
|
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
sub col_set ## ($ColName, $Vector) |
|
1535
|
|
|
|
|
|
|
{ |
|
1536
|
15
|
|
|
15
|
0
|
27
|
my $this = shift; |
|
1537
|
15
|
|
|
|
|
25
|
my ($ColName, $Vector) = @_; |
|
1538
|
|
|
|
|
|
|
|
|
1539
|
15
|
|
|
|
|
37
|
return($this->col_set_internal($ColName, $Vector)); |
|
1540
|
|
|
|
|
|
|
} |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
sub col_force ## ($ColName, $Vector) |
|
1543
|
|
|
|
|
|
|
{ |
|
1544
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
1545
|
0
|
|
|
|
|
0
|
my ($ColName, $Vector) = @_; |
|
1546
|
|
|
|
|
|
|
|
|
1547
|
0
|
|
|
|
|
0
|
return($this->col_set_internal($ColName, $Vector, 1)); |
|
1548
|
|
|
|
|
|
|
} |
|
1549
|
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
sub col_empty |
|
1551
|
|
|
|
|
|
|
{ |
|
1552
|
37
|
|
|
37
|
0
|
56
|
my $this = shift; |
|
1553
|
37
|
|
|
|
|
46
|
my ($Length) = @_; |
|
1554
|
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
## Default to table length. Or get length from sample column. |
|
1556
|
37
|
100
|
|
|
|
114
|
$Length = $this->length() unless defined($Length); |
|
1557
|
37
|
100
|
|
|
|
89
|
$Length = @$Length if ref($Length) eq 'ARRAY'; |
|
1558
|
|
|
|
|
|
|
|
|
1559
|
37
|
|
|
|
|
54
|
my $Col = []; |
|
1560
|
37
|
|
|
|
|
119
|
$#$Col = $Length - 1; |
|
1561
|
|
|
|
|
|
|
|
|
1562
|
37
|
|
|
|
|
93
|
return($Col); |
|
1563
|
|
|
|
|
|
|
} |
|
1564
|
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
sub col_default |
|
1566
|
|
|
|
|
|
|
{ |
|
1567
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
|
1568
|
1
|
|
|
|
|
4
|
my $Col = $this->col_empty(); |
|
1569
|
|
|
|
|
|
|
|
|
1570
|
1
|
|
|
|
|
6
|
return($Col); |
|
1571
|
|
|
|
|
|
|
} |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
sub cols ## ($ColNames) |
|
1574
|
|
|
|
|
|
|
{ |
|
1575
|
33
|
|
|
33
|
0
|
68
|
my $this = shift; |
|
1576
|
33
|
|
|
|
|
49
|
my ($ColNames) = @_; |
|
1577
|
33
|
|
66
|
|
|
89
|
$ColNames ||= $this->fieldlist(); |
|
1578
|
33
|
|
|
|
|
62
|
my $Cols = [map {$this->col($_)} @$ColNames]; |
|
|
106
|
|
|
|
|
214
|
|
|
1579
|
|
|
|
|
|
|
|
|
1580
|
33
|
|
|
|
|
106
|
return($Cols); |
|
1581
|
|
|
|
|
|
|
} |
|
1582
|
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
sub cols_hash ## ($ColNames) |
|
1584
|
|
|
|
|
|
|
{ |
|
1585
|
25
|
|
|
25
|
0
|
60
|
my $this = shift; |
|
1586
|
25
|
|
|
|
|
44
|
my ($ColNames) = @_; |
|
1587
|
25
|
|
66
|
|
|
88
|
$ColNames ||= $this->fieldlist(); |
|
1588
|
25
|
|
|
|
|
73
|
my $Cols = $this->cols($ColNames); |
|
1589
|
25
|
|
|
|
|
56
|
my $ColsHash = {}; @$ColsHash{@$ColNames} = @$Cols; |
|
|
25
|
|
|
|
|
93
|
|
|
1590
|
|
|
|
|
|
|
|
|
1591
|
25
|
|
|
|
|
225
|
return($ColsHash); |
|
1592
|
|
|
|
|
|
|
} |
|
1593
|
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
sub col_exists ## ($ColName, [$FieldList]) |
|
1595
|
|
|
|
|
|
|
{ |
|
1596
|
24
|
|
|
24
|
0
|
93
|
my $this = shift; |
|
1597
|
24
|
|
|
|
|
40
|
my ($ColName, $FieldList) = @_; |
|
1598
|
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
## Default list to search is ALL fields in object. |
|
1600
|
24
|
|
66
|
|
|
78
|
$FieldList ||= $this->fieldlist_all(); |
|
1601
|
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
## Disallow column names starting with underscore. |
|
1603
|
24
|
50
|
|
|
|
60
|
return(0) if $ColName =~ /^_/; |
|
1604
|
|
|
|
|
|
|
|
|
1605
|
24
|
|
|
|
|
55
|
my $FieldHash = $this->fieldlist_hash($FieldList); |
|
1606
|
24
|
|
|
|
|
49
|
my $Exists = exists($FieldHash->{$ColName}); |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
24
|
|
|
|
|
99
|
return($Exists); |
|
1609
|
|
|
|
|
|
|
} |
|
1610
|
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
sub col_active ## ($ColName, [$FieldList]) |
|
1612
|
|
|
|
|
|
|
{ |
|
1613
|
8
|
|
|
8
|
0
|
42
|
my $this = shift; |
|
1614
|
8
|
|
|
|
|
14
|
my ($ColName, $FieldList) = @_; |
|
1615
|
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
## Default list to search is only ACTIVE fields in object. |
|
1617
|
8
|
|
33
|
|
|
30
|
$FieldList ||= $this->fieldlist(); |
|
1618
|
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
## Disallow column names starting with underscore. |
|
1620
|
8
|
50
|
|
|
|
22
|
return(0) if $ColName =~ /^_/; |
|
1621
|
|
|
|
|
|
|
|
|
1622
|
8
|
|
|
|
|
20
|
my $FieldHash = $this->fieldlist_hash($FieldList); |
|
1623
|
8
|
|
|
|
|
17
|
my $Exists = exists($FieldHash->{$ColName}); |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
8
|
|
|
|
|
30
|
return($Exists); |
|
1626
|
|
|
|
|
|
|
} |
|
1627
|
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
sub col_rename ## ($Old => $New, [$Old => New...]) |
|
1629
|
|
|
|
|
|
|
{ |
|
1630
|
3
|
|
|
3
|
0
|
19
|
my $this = shift; |
|
1631
|
|
|
|
|
|
|
|
|
1632
|
3
|
|
|
|
|
4
|
my $Success; |
|
1633
|
|
|
|
|
|
|
|
|
1634
|
3
|
|
|
|
|
8
|
my $Fields = $this->fieldlist_all(); |
|
1635
|
|
|
|
|
|
|
|
|
1636
|
3
|
|
|
|
|
5
|
my ($Old, $New); |
|
1637
|
3
|
|
|
|
|
18
|
while (($Old, $New) = splice(@_, 0, 2)) |
|
1638
|
|
|
|
|
|
|
{ |
|
1639
|
3
|
50
|
|
|
|
17
|
$this->warn("Invalid column name: $New"), next |
|
1640
|
|
|
|
|
|
|
unless ($New =~ /^[^_]+/); |
|
1641
|
|
|
|
|
|
|
|
|
1642
|
3
|
50
|
|
|
|
10
|
$this->warn("Column to be renamed does not exist: $Old"), next |
|
1643
|
|
|
|
|
|
|
unless $this->col_exists($Old, $Fields); |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
3
|
50
|
0
|
|
|
8
|
(($Old ne $New) && $this->warn("Failed to rename column $Old to $New: $New exists.")), next |
|
1646
|
|
|
|
|
|
|
if $this->col_exists($New, $Fields); |
|
1647
|
|
|
|
|
|
|
|
|
1648
|
3
|
|
|
|
|
9
|
my $Col = $this->col($Old); ## Creates if not present. |
|
1649
|
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
## Rename the column... |
|
1651
|
3
|
|
|
|
|
11
|
$this->{$New} = delete $this->{$Old}; |
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
## Then make the same change to _FieldList, _SortOrder, _SortSpecs |
|
1654
|
|
|
|
|
|
|
|
|
1655
|
3
|
100
|
|
|
|
10
|
$this->{_FieldList} = [map {$_ = $New if $_ eq $Old; $_} @{$this->{_FieldList}}] if (defined($this->{_FieldList})); |
|
|
10
|
50
|
|
|
|
21
|
|
|
|
10
|
|
|
|
|
25
|
|
|
|
3
|
|
|
|
|
7
|
|
|
1656
|
3
|
0
|
|
|
|
12
|
$this->{_SortOrder} = [map {$_ = $New if $_ eq $Old; $_} @{$this->{_SortOrder}}] if (defined($this->{_SortOrder})); |
|
|
0
|
50
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1657
|
3
|
50
|
33
|
|
|
29
|
$this->{_SortSpecs}->{$New} = delete $this->{_SortSpecs}->{$Old} if (defined($this->{_SortSpecs}) && |
|
1658
|
|
|
|
|
|
|
( $this->{_SortSpecs}->{$Old})); |
|
1659
|
|
|
|
|
|
|
} |
|
1660
|
|
|
|
|
|
|
|
|
1661
|
3
|
|
|
|
|
5
|
$Success = 1; |
|
1662
|
3
|
|
|
|
|
9
|
done: |
|
1663
|
|
|
|
|
|
|
return($Success); |
|
1664
|
|
|
|
|
|
|
} |
|
1665
|
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
=pod |
|
1667
|
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
=head1 CLEANUP AND VALIDATION |
|
1669
|
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
## Performing your own cleanups or validations |
|
1671
|
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
$t->clean($Sub) ## Clean with custom subroutine |
|
1673
|
|
|
|
|
|
|
$t->clean($Sub, $Fields) ## Clean specified columns only |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
## Cleaning whitespace |
|
1676
|
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
$t->clean_ws() ## Clean whitespace in fieldlist() cols |
|
1678
|
|
|
|
|
|
|
$t->clean_ws($Fields) ## Clean whitespace in specified cols |
|
1679
|
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
## Cleaning methods that map character sets |
|
1681
|
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
$t->clean_mac_to_iso8859() |
|
1683
|
|
|
|
|
|
|
$t->clean_mac_to_iso8859($Fields) |
|
1684
|
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
$t->clean_iso8859_to_mac() |
|
1686
|
|
|
|
|
|
|
$t->clean_iso8859_to_mac($Fields) |
|
1687
|
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
## Character mapping utilities (not methods) |
|
1689
|
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
use Data::CTable qw( |
|
1691
|
|
|
|
|
|
|
ISORoman8859_1ToMacRoman |
|
1692
|
|
|
|
|
|
|
MacRomanToISORoman8859_1 |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
ISORoman8859_1ToMacRoman_clean |
|
1695
|
|
|
|
|
|
|
MacRomanToISORoman8859_1_clean |
|
1696
|
|
|
|
|
|
|
); |
|
1697
|
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman(\ $Str) ## Pass pointer to buffer |
|
1699
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1(\ $Str) ## Pass pointer to buffer |
|
1700
|
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman_clean() ## Operates on $_ |
|
1702
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1_clean() ## Operates on $_ |
|
1703
|
|
|
|
|
|
|
|
|
1704
|
|
|
|
|
|
|
One of the most important things you can do with your data once it's |
|
1705
|
|
|
|
|
|
|
been placed in a table in Perl is to use the power of Perl to scrub it |
|
1706
|
|
|
|
|
|
|
like crazy. |
|
1707
|
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
The built-in clean_ws() method applies a standard white-space cleanup |
|
1709
|
|
|
|
|
|
|
to selected records in every field in the fieldlist() or other list of |
|
1710
|
|
|
|
|
|
|
fields you optionally supply (such as fieldlist_all()). |
|
1711
|
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
It does the following cleanups that are deemed correct for the |
|
1713
|
|
|
|
|
|
|
majority of data out there: |
|
1714
|
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
- Remove all leading whitespace, including returns (\n) |
|
1716
|
|
|
|
|
|
|
- Remove all trailing whitespace, including returns (\n) |
|
1717
|
|
|
|
|
|
|
- Convert runs of spaces to a single space |
|
1718
|
|
|
|
|
|
|
- Convert empty string values back to undef to save space |
|
1719
|
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
Of course, depending on your data, clean_ws() might just be the first |
|
1721
|
|
|
|
|
|
|
thing you do in your cleanup pass. There might be many more cleanups |
|
1722
|
|
|
|
|
|
|
you'd like to apply. |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
clean() is like clean_ws() except you supply as the first argument |
|
1725
|
|
|
|
|
|
|
your own cleaning subroutine (code reference). It should do its work |
|
1726
|
|
|
|
|
|
|
by modifying $_. |
|
1727
|
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
Both clean_ws() and clean() apply cleaning ONLY to selected records. |
|
1729
|
|
|
|
|
|
|
If this isn't what you want, then select_all() before cleaning. |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
Since a cleanup subroutine can do ANY modifications to a field that it |
|
1732
|
|
|
|
|
|
|
likes, you can imagine some cleanup routines that also supply default |
|
1733
|
|
|
|
|
|
|
values and do other validations. |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
For example, a cleanup routine could convert every value in each field |
|
1736
|
|
|
|
|
|
|
to an integer, or apply minimum or maximum numerical limits: |
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
sub {$_ = int($_) } |
|
1739
|
|
|
|
|
|
|
sub {$_ = max(int($_), 0) } |
|
1740
|
|
|
|
|
|
|
sub {$_ = min(int($_), 200)} |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
Or your cleanup routine could use regular expressions to do |
|
1743
|
|
|
|
|
|
|
capitalizations or other regularizations of data: |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
sub Capitalize {/\b([a-z])([a-z]+)\b)/\U$1\E$2/g} |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
$t->clean(\ &Capitalize , ['FirstName', 'LastName']); |
|
1748
|
|
|
|
|
|
|
$t->clean(\ &PhoneFormat, ['Phone', 'Fax' ]); |
|
1749
|
|
|
|
|
|
|
$t->clean(\ &LegalUSZip, ['HomeZip', 'WorkZip' ]); |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
... and so on. Cleanups are easy to write and quick and easy to apply |
|
1752
|
|
|
|
|
|
|
with Data::CTable. Do them early! Do them often! |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
=head2 Hints for writing cleanup routines |
|
1755
|
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
If your cleanup routine may be used to clean up fields that could be |
|
1757
|
|
|
|
|
|
|
empty/undef and empty/undef is a legal value, it should not touch any |
|
1758
|
|
|
|
|
|
|
undef values (unintentionally converting them to strings). |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
Finally, instead of setting any values to the empty string, it should |
|
1761
|
|
|
|
|
|
|
set them to undef instead. This includes any values it might have |
|
1762
|
|
|
|
|
|
|
left empty during cleanup. (Using undef instead of empty string to |
|
1763
|
|
|
|
|
|
|
represent empty values is one way that Data::CTable likes to save |
|
1764
|
|
|
|
|
|
|
memory in tables that may have lots of those.) |
|
1765
|
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
For an example of a well-behaved cleanup routine, consider the |
|
1767
|
|
|
|
|
|
|
following implementation of the builtin CleanWhitespace behavior: |
|
1768
|
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
sub CleanWhitespace |
|
1770
|
|
|
|
|
|
|
{ |
|
1771
|
|
|
|
|
|
|
return unless defined; ## Empty/undef values stay that way |
|
1772
|
|
|
|
|
|
|
s/ \s+$//sx; ## Remove trailing whitespace |
|
1773
|
|
|
|
|
|
|
s/^\s+ //sx; ## Remove leading whitespace |
|
1774
|
|
|
|
|
|
|
s/ +/ /g; ## Runs of spaces to single space |
|
1775
|
|
|
|
|
|
|
$_ = undef unless length; ## (Newly?) empty strings to undef |
|
1776
|
|
|
|
|
|
|
} |
|
1777
|
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
=head2 Roman character set mapping |
|
1779
|
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
The character set mapping cleanup routines can be used to convert |
|
1781
|
|
|
|
|
|
|
upper-ASCII characters bidirectionally between two popular Roman |
|
1782
|
|
|
|
|
|
|
Character sets -- Mac Roman 1 and ISO 8859-1 (also sometimes called |
|
1783
|
|
|
|
|
|
|
ISO Latin 1) -- i.e. the Western European Roman character sets. |
|
1784
|
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
By default, read() converts all incoming data fields in data files |
|
1786
|
|
|
|
|
|
|
with Mac line endings to ISO format when reading in. Conversely, |
|
1787
|
|
|
|
|
|
|
write() does the reverse mapping (ISO to Mac) when writing a file with |
|
1788
|
|
|
|
|
|
|
Mac line endings. |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
However, you may wish to turn off these default behaviors and instead |
|
1791
|
|
|
|
|
|
|
apply the mappings manually, possibly just to certain fields. |
|
1792
|
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
For example, if a table contains fields with non-Roman character sets, |
|
1794
|
|
|
|
|
|
|
you would definitely not want to apply these mappings, and instead |
|
1795
|
|
|
|
|
|
|
might want to apply some different ones that you create yourself. |
|
1796
|
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
=head2 Utility routines for character mapping |
|
1798
|
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
This module can optionally export four utility routines for mapping |
|
1800
|
|
|
|
|
|
|
character Latin 1 character sets. Always be sure to map the correct |
|
1801
|
|
|
|
|
|
|
direction -- otherwise you'll end up with garbage! Be careful to only |
|
1802
|
|
|
|
|
|
|
pass Western Roman strings -- not double-byte strings or strings |
|
1803
|
|
|
|
|
|
|
encoded in any single-byte Eastern European Roman or non-Roman |
|
1804
|
|
|
|
|
|
|
character set. |
|
1805
|
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman(\ $Str) ## Pass pointer to buffer |
|
1807
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1(\ $Str) ## Pass pointer to buffer |
|
1808
|
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
These routines translate characters whose values are 128-255 from one |
|
1810
|
|
|
|
|
|
|
Western Roman encoding to another. The argument is a string buffer of |
|
1811
|
|
|
|
|
|
|
any size passed by reference. |
|
1812
|
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
The functions return a count of the number of characters that were |
|
1814
|
|
|
|
|
|
|
mapped (zero or undef if none were). |
|
1815
|
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
&ISORoman8859_1ToMacRoman_clean() ## Operates on $_ |
|
1817
|
|
|
|
|
|
|
&MacRomanToISORoman8859_1_clean() ## Operates on $_ |
|
1818
|
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
These routines are variants of the above, but they're versions that |
|
1820
|
|
|
|
|
|
|
are compatible with clean() -- they operate on $_ and will take care |
|
1821
|
|
|
|
|
|
|
to leave undefined values undefined. They do not have return values. |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
=head2 More advanced cleaning and validation |
|
1824
|
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
Unfortunately, clean() only lets you operate on a single field value |
|
1826
|
|
|
|
|
|
|
at a time -- and there's no way to know the record number or other |
|
1827
|
|
|
|
|
|
|
useful information inside the cleaning routine. |
|
1828
|
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
For really powerful cleaning and validation involving access to all |
|
1830
|
|
|
|
|
|
|
fields of a record as well as record numbers, see the discussion of |
|
1831
|
|
|
|
|
|
|
the calc() method and other methods for doing complex field |
|
1832
|
|
|
|
|
|
|
calculations in the next section. |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
=cut |
|
1835
|
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
sub clean |
|
1837
|
|
|
|
|
|
|
{ |
|
1838
|
8
|
|
|
8
|
0
|
20
|
my $this = shift; |
|
1839
|
8
|
|
|
|
|
14
|
my ($Sub, $Fields) = @_; |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
## Default is fields in the list. |
|
1842
|
8
|
|
33
|
|
|
36
|
$Fields ||= $this->fieldlist(); |
|
1843
|
|
|
|
|
|
|
|
|
1844
|
8
|
|
|
|
|
22
|
my $Sel = $this->selection(); |
|
1845
|
|
|
|
|
|
|
|
|
1846
|
8
|
|
|
|
|
18
|
foreach (@$Fields) {foreach (@{$this->col($_)}[@$Sel]) {&$Sub()}}; |
|
|
38
|
|
|
|
|
76
|
|
|
|
38
|
|
|
|
|
81
|
|
|
|
114
|
|
|
|
|
237
|
|
|
1847
|
|
|
|
|
|
|
} |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
sub clean_ws |
|
1850
|
|
|
|
|
|
|
{ |
|
1851
|
6
|
|
|
6
|
0
|
32
|
my $this = shift; |
|
1852
|
6
|
|
|
|
|
26
|
return($this->clean(\ &CleanWhitespace, @_)); |
|
1853
|
|
|
|
|
|
|
} |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
sub CleanWhitespace |
|
1856
|
|
|
|
|
|
|
{ |
|
1857
|
84
|
50
|
|
84
|
0
|
156
|
return unless defined; ## Empty/undef values stay that way |
|
1858
|
84
|
|
|
|
|
180
|
s/ \s+$//sx; ## Remove trailing whitespace |
|
1859
|
84
|
|
|
|
|
153
|
s/^\s+ //sx; ## Remove leading whitespace |
|
1860
|
84
|
|
|
|
|
169
|
s/ +/ /g; ## Runs of spaces to single space |
|
1861
|
84
|
50
|
|
|
|
257
|
$_ = undef unless length; ## (Newly?) empty strings to undef |
|
1862
|
|
|
|
|
|
|
} |
|
1863
|
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
sub clean_mac_to_iso8859 |
|
1865
|
|
|
|
|
|
|
{ |
|
1866
|
1
|
|
|
1
|
0
|
8
|
my $this = shift; |
|
1867
|
1
|
|
|
|
|
5
|
return($this->clean(\ &MacRomanToISORoman8859_1_clean, @_)); |
|
1868
|
|
|
|
|
|
|
} |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
sub clean_iso8859_to_mac |
|
1871
|
|
|
|
|
|
|
{ |
|
1872
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
1873
|
0
|
|
|
|
|
0
|
return($this->clean(\ &ISORoman8859_1ToMacRoman_clean, @_)); |
|
1874
|
|
|
|
|
|
|
} |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
sub ISORoman8859_1ToMacRoman |
|
1877
|
|
|
|
|
|
|
{ |
|
1878
|
20
|
|
|
20
|
0
|
32
|
return($ {$_[0]} =~ |
|
|
20
|
|
|
|
|
49
|
|
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
tr/\x80-\xFF/\xDE\xDF\xE2\xC4\xE3\xC9\xA0\xE0\xF6\xE4\xBA\xDC\xCE\xAD\xB3\xB2\xB0\xD4\xD5\xD2\xD3\xA5\xF8\xD1\xF7\xAA\xF9\xDD\xCF\xF0\xDA\xD9\xCA\xC1\xA2\xA3\xDB\xB4\xF5\xA4\xAC\xA9\xBB\xC7\xC2\xD0\xA8\xC3\xA1\xB1\xFA\xFE\xAB\xB5\xA6\xE1\xFC\xFF\xBC\xC8\xC5\xFD\xFB\xC0\xCB\xE7\xE5\xCC\x80\x81\xAE\x82\xE9\x83\xE6\xE8\xED\xEA\xEB\xEC\xC6\x84\xF1\xEE\xEF\xCD\x85\xD7\xAF\xF4\xF2\xF3\x86\xB7\xB8\xA7\x88\x87\x89\x8B\x8A\x8C\xBE\x8D\x8F\x8E\x90\x91\x93\x92\x94\x95\xB6\x96\x98\x97\x99\x9B\x9A\xD6\xBF\x9D\x9C\x9E\x9F\xBD\xB9\xD8/); |
|
1881
|
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
} |
|
1883
|
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
sub ISORoman8859_1ToMacRoman_clean |
|
1885
|
|
|
|
|
|
|
{ |
|
1886
|
0
|
0
|
|
0
|
0
|
0
|
return unless defined; ## Empty/undef values stay that way |
|
1887
|
|
|
|
|
|
|
|
|
1888
|
0
|
|
|
|
|
0
|
tr/\x80-\xFF/\xDE\xDF\xE2\xC4\xE3\xC9\xA0\xE0\xF6\xE4\xBA\xDC\xCE\xAD\xB3\xB2\xB0\xD4\xD5\xD2\xD3\xA5\xF8\xD1\xF7\xAA\xF9\xDD\xCF\xF0\xDA\xD9\xCA\xC1\xA2\xA3\xDB\xB4\xF5\xA4\xAC\xA9\xBB\xC7\xC2\xD0\xA8\xC3\xA1\xB1\xFA\xFE\xAB\xB5\xA6\xE1\xFC\xFF\xBC\xC8\xC5\xFD\xFB\xC0\xCB\xE7\xE5\xCC\x80\x81\xAE\x82\xE9\x83\xE6\xE8\xED\xEA\xEB\xEC\xC6\x84\xF1\xEE\xEF\xCD\x85\xD7\xAF\xF4\xF2\xF3\x86\xB7\xB8\xA7\x88\x87\x89\x8B\x8A\x8C\xBE\x8D\x8F\x8E\x90\x91\x93\x92\x94\x95\xB6\x96\x98\x97\x99\x9B\x9A\xD6\xBF\x9D\x9C\x9E\x9F\xBD\xB9\xD8/; |
|
1889
|
|
|
|
|
|
|
} |
|
1890
|
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
sub MacRomanToISORoman8859_1 |
|
1892
|
|
|
|
|
|
|
{ |
|
1893
|
61
|
|
|
61
|
0
|
73
|
return($ {$_[0]} =~ |
|
|
61
|
|
|
|
|
142
|
|
|
1894
|
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
tr/\x80-\xFF/\xC4\xC5\xC7\xC9\xD1\xD6\xDC\xE1\xE0\xE2\xE4\xE3\xE5\xE7\xE9\xE8\xEA\xEB\xED\xEC\xEE\xEF\xF1\xF3\xF2\xF4\xF6\xF5\xFA\xF9\xFB\xFC\x86\xB0\xA2\xA3\xA7\x95\xB6\xDF\xAE\xA9\x99\xB4\xA8\x8D\xC6\xD8\x90\xB1\x8F\x8E\xA5\xB5\xF0\xDD\xDE\xFE\x8A\xAA\xBA\xFD\xE6\xF8\xBF\xA1\xAC\xAF\x83\xBC\xD0\xAB\xBB\x85\xA0\xC0\xC3\xD5\x8C\x9C\xAD\x97\x93\x94\x91\x92\xF7\xD7\xFF\x9F\x9E\xA4\x8B\x9B\x80\x81\x87\xB7\x82\x84\x89\xC2\xCA\xC1\xCB\xC8\xCD\xCE\xCF\xCC\xD3\xD4\x9D\xD2\xDA\xDB\xD9\xA6\x88\x98\x96\x9A\xB2\xBE\xB8\xBD\xB3\xB9/); |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
} |
|
1898
|
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
sub MacRomanToISORoman8859_1_clean |
|
1900
|
|
|
|
|
|
|
{ |
|
1901
|
15
|
50
|
|
15
|
0
|
27
|
return unless defined; ## Empty/undef values stay that way |
|
1902
|
|
|
|
|
|
|
|
|
1903
|
15
|
|
|
|
|
33
|
tr/\x80-\xFF/\xC4\xC5\xC7\xC9\xD1\xD6\xDC\xE1\xE0\xE2\xE4\xE3\xE5\xE7\xE9\xE8\xEA\xEB\xED\xEC\xEE\xEF\xF1\xF3\xF2\xF4\xF6\xF5\xFA\xF9\xFB\xFC\x86\xB0\xA2\xA3\xA7\x95\xB6\xDF\xAE\xA9\x99\xB4\xA8\x8D\xC6\xD8\x90\xB1\x8F\x8E\xA5\xB5\xF0\xDD\xDE\xFE\x8A\xAA\xBA\xFD\xE6\xF8\xBF\xA1\xAC\xAF\x83\xBC\xD0\xAB\xBB\x85\xA0\xC0\xC3\xD5\x8C\x9C\xAD\x97\x93\x94\x91\x92\xF7\xD7\xFF\x9F\x9E\xA4\x8B\x9B\x80\x81\x87\xB7\x82\x84\x89\xC2\xCA\xC1\xCB\xC8\xCD\xCE\xCF\xCC\xD3\xD4\x9D\xD2\xDA\xDB\xD9\xA6\x88\x98\x96\x9A\xB2\xBE\xB8\xBD\xB3\xB9/; |
|
1904
|
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
} |
|
1906
|
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
=pod |
|
1908
|
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
=head1 CALCULATIONS USING calc() |
|
1910
|
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
## Calculate a new field's values based on two others |
|
1912
|
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
$t->calc($Sub) ## Run $Sub for each row, with |
|
1914
|
|
|
|
|
|
|
## fields bound to local vars |
|
1915
|
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
$t->calc($Sub, $Sel) ## Use these row nums |
|
1917
|
|
|
|
|
|
|
$t->calc($Sub, undef, $Fields) ## Use only these fields |
|
1918
|
|
|
|
|
|
|
$t->calc($Sub, $Sel, $Fields) ## Use custom rows, fields |
|
1919
|
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
my $Col = $t->calc($Sub) ## Gather return vals in vector |
|
1921
|
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
## Example 1: Overwrite values in an existing column. |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
$t->calc(sub{no strict 'vars'; $Size = (stat($Path))[7]}); |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
## Example 2: Create empty column; fill fields 1 by 1 |
|
1929
|
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
$t->col('PersonID'); |
|
1931
|
|
|
|
|
|
|
$t->calc(sub{no strict 'vars'; $PersonID = "$Last$First"}); |
|
1932
|
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
## Example 3: Calculate values; put into to table if desired |
|
1935
|
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
$PersonID = $t->calc(sub{no strict 'vars'; "$Last$First"}); |
|
1937
|
|
|
|
|
|
|
$t->sel('PersonID', $PersonID); |
|
1938
|
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
## Example 4: Using fully-qualified variable names |
|
1941
|
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
$t->calc(sub{$main::PersonID = "$main::Last$main::First"}); |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
calc() runs your custom calculation subroutine $Sub once for every row |
|
1945
|
|
|
|
|
|
|
in the current selection() or other list of row numbers that you |
|
1946
|
|
|
|
|
|
|
specify in the optional $Sel argument. |
|
1947
|
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
This lets you apply a complex calculation to every record in a table |
|
1949
|
|
|
|
|
|
|
in a single statement, storing the results in one or more columns, or |
|
1950
|
|
|
|
|
|
|
retrieving them as a list. |
|
1951
|
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
Your custom subroutine may refer to the value in any field in the |
|
1953
|
|
|
|
|
|
|
current row by using a global variable with the field's name: |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
For example, if the table has fields First, Last, and Age, then $Sub |
|
1956
|
|
|
|
|
|
|
may use, modify, and set the variables $First, $Last, $Age. (Also |
|
1957
|
|
|
|
|
|
|
known as $main::First, $main::Last, $main::Age). |
|
1958
|
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
Modifying any of these specially-bound variables actually modifies the |
|
1960
|
|
|
|
|
|
|
data in the correct record and field within the table. |
|
1961
|
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
By default, the fields available to $Sub are all fields in the table. |
|
1963
|
|
|
|
|
|
|
calc() must bind all the field names for you for each row, which can |
|
1964
|
|
|
|
|
|
|
be time-consuming for tables with very large numbers of fields. |
|
1965
|
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
You can speed up the operation of calc() by listing only the fields |
|
1967
|
|
|
|
|
|
|
your $Sub needs in the optional parameter $Fields. Any field names |
|
1968
|
|
|
|
|
|
|
you don't mention won't be available to $Sub. Conversely, calc() will |
|
1969
|
|
|
|
|
|
|
run faster because it can bind only the fields you actually need. |
|
1970
|
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
If you include non-existent fields in your custom $Fields list, calc() |
|
1972
|
|
|
|
|
|
|
creates them for you before $Sub runs the first time. Then your $Sub |
|
1973
|
|
|
|
|
|
|
can store field values into the new column, referring to it by name. |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
Variables in $Sub are in the "main" package. So you should set $Sub |
|
1976
|
|
|
|
|
|
|
to use pacakge "main" in case the rest of your code is not in "main". |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
Similarly, if you "use strict", Perl will complain about global |
|
1979
|
|
|
|
|
|
|
variables in $Sub. So you may need to assert "no strict 'vars'". |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
{ package Foo; use strict; |
|
1982
|
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
$t = ...; |
|
1984
|
|
|
|
|
|
|
$t->calc(sub {package main; no strict 'vars'; |
|
1985
|
|
|
|
|
|
|
$Age = int($Age)}); |
|
1986
|
|
|
|
|
|
|
} |
|
1987
|
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
## Or this: |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
{ package Foo; use strict; |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
$t = ...; |
|
1993
|
|
|
|
|
|
|
{ package main; no strict 'vars'; |
|
1994
|
|
|
|
|
|
|
my $Sub = sub {$Age = int($Age)}; |
|
1995
|
|
|
|
|
|
|
} |
|
1996
|
|
|
|
|
|
|
$t->calc($Sub); |
|
1997
|
|
|
|
|
|
|
} |
|
1998
|
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
You may be able to get around both problems more easily by prefixing |
|
2000
|
|
|
|
|
|
|
each variable reference in $Sub with "main::". This takes care of the |
|
2001
|
|
|
|
|
|
|
package name issue and bypasses "use strict" at the same time, at the |
|
2002
|
|
|
|
|
|
|
slight cost of making the calculation itself a bit harder to read. |
|
2003
|
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
$t->calc(sub {$main::Age = int($main::Age)}); ## OK in any package |
|
2005
|
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
In addition to the field names, the following three values are defined |
|
2007
|
|
|
|
|
|
|
during each invocation of $Sub: |
|
2008
|
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
$_r ($main::_r) -- the row number in the entire table |
|
2010
|
|
|
|
|
|
|
$_s ($main::_s) -- the item number in selection or $Recs |
|
2011
|
|
|
|
|
|
|
$_t ($main::_t) -- the table object itself |
|
2012
|
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
You could use these values to print diagnostic information or to |
|
2014
|
|
|
|
|
|
|
access any of the data, parameters, or methods of the table itself |
|
2015
|
|
|
|
|
|
|
from within $Sub. Or you could even calculate field values using $_r |
|
2016
|
|
|
|
|
|
|
or $_s. |
|
2017
|
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
For example, after searching & sorting, you could make a field which |
|
2019
|
|
|
|
|
|
|
preserves the resulting sort order for future reference: |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
$t->col('Ranking'); ## Create the empty column first. |
|
2022
|
|
|
|
|
|
|
$t->calc(sub{$main::Ranking = $main::_s}); |
|
2023
|
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
This last example is equivalent to: |
|
2025
|
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
$t->sel(Ranking => [0 .. $#{$t->selection()}]); ## See sel() below |
|
2027
|
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
=head1 "MANUAL" CALCULATIONS |
|
2030
|
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
calc() (see previous secion) is the briefer, more elegant way to do |
|
2032
|
|
|
|
|
|
|
batch calculations on entire columns in a table, but it can be |
|
2033
|
|
|
|
|
|
|
slightly slower than doing the calculations yourself. |
|
2034
|
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
If you have extremely large tables, and you notice the processing time |
|
2036
|
|
|
|
|
|
|
for your calculations taking more than a second, you might want to |
|
2037
|
|
|
|
|
|
|
rewrite your calculations to use the more efficient techniques shown |
|
2038
|
|
|
|
|
|
|
here. |
|
2039
|
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
You will often need to create new calculated columns based on one or |
|
2041
|
|
|
|
|
|
|
more existing ones, and then either insert the columns back in the |
|
2042
|
|
|
|
|
|
|
tables or use them for further calculations or indexing. |
|
2043
|
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
Examples 1a and 1b create a new field 'NameOK' containing either the |
|
2045
|
|
|
|
|
|
|
string "OK" or undef (empty) depending on whether the field 'Name' is |
|
2046
|
|
|
|
|
|
|
empty. Just use map() to iterate over the existing values in the |
|
2047
|
|
|
|
|
|
|
other column, binding $_ to each value in turn. |
|
2048
|
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
### Example 1a: Calculation based on one other column |
|
2050
|
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
## Retrieve column |
|
2052
|
|
|
|
|
|
|
my $Name = $t->col('Name'); |
|
2053
|
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
## Make new column |
|
2055
|
|
|
|
|
|
|
my $NameOK = [map {!!length && 'OK'} @$Name]; |
|
2056
|
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
## Insert column back into table: |
|
2058
|
|
|
|
|
|
|
$t->col(NameOK => $NameOK); |
|
2059
|
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
### Example 1b: Same calculation, in a single statement: |
|
2061
|
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
$t->col(NameOK => [map {!!length && 'OK'} @{$t->col('Name')}]); |
|
2063
|
|
|
|
|
|
|
|
|
2064
|
|
|
|
|
|
|
In order to iterate over MULTIPLE columns at once, you need a list of |
|
2065
|
|
|
|
|
|
|
the row numbers generated by $t->all() so you can index the two |
|
2066
|
|
|
|
|
|
|
columns in tandem. Then, you use map to bind $_ to each row number, |
|
2067
|
|
|
|
|
|
|
and then use the expression $t->col($ColName)->[$_] to retreive each |
|
2068
|
|
|
|
|
|
|
value. |
|
2069
|
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
Examples 2a and 2b demonstrate this method. They create a new field |
|
2071
|
|
|
|
|
|
|
'FullName' which is a string joining the values in the 'First' and |
|
2072
|
|
|
|
|
|
|
'Last' columns with a space between. |
|
2073
|
|
|
|
|
|
|
|
|
2074
|
|
|
|
|
|
|
### Example 2a: Calculation based on multiple columns |
|
2075
|
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
## Retrieve columns |
|
2077
|
|
|
|
|
|
|
my $First = $t->col('First'); |
|
2078
|
|
|
|
|
|
|
my $Last = $t->col('Last' ); |
|
2079
|
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
## Retreive row nums |
|
2081
|
|
|
|
|
|
|
my $Nums = $t->all(); |
|
2082
|
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
## Calculate a new column based on two others |
|
2084
|
|
|
|
|
|
|
my $Full = [map {"$First->[$_] $Last->[$_]"} @$Nums]; |
|
2085
|
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
## Add new column to the table |
|
2087
|
|
|
|
|
|
|
$t->col(FullName => $Full); |
|
2088
|
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
### Example 2b: Same calculation, in a single statement: |
|
2090
|
|
|
|
|
|
|
|
|
2091
|
|
|
|
|
|
|
$t->col(FullName => |
|
2092
|
|
|
|
|
|
|
[map {"$t->col('First')->[$_] t->col('Last')->[$_]"} |
|
2093
|
|
|
|
|
|
|
@{$t->all()}]); |
|
2094
|
|
|
|
|
|
|
|
|
2095
|
|
|
|
|
|
|
In examples 1 and 2, you create entirely new columns and then add or |
|
2096
|
|
|
|
|
|
|
replace them in the table. |
|
2097
|
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
Using the approach in Examples 3a and 3b, you can assign calculated |
|
2099
|
|
|
|
|
|
|
results directly into each value of an existing column as you go. |
|
2100
|
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
## Example 3a: Calculate by assigning directly into fields... |
|
2102
|
|
|
|
|
|
|
|
|
2103
|
|
|
|
|
|
|
my $A = $t->col->('A'); ## This column will be modified |
|
2104
|
|
|
|
|
|
|
my $B = $t->col->('B'); |
|
2105
|
|
|
|
|
|
|
my $C = $t->col->('C'); |
|
2106
|
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
foreach @($t->all()) {$A->[$_] = $B->[$_] + $C->[$_];} |
|
2108
|
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
## Example 3b: Same calculation, in a single statement: |
|
2110
|
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
foreach @($t->all()) {($t->col('A')->[$_] = |
|
2112
|
|
|
|
|
|
|
$t->col('B')->[$_] + |
|
2113
|
|
|
|
|
|
|
$t->col('C')->[$_])}; |
|
2114
|
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
Before writing your code, think about which calculation paradigms best |
|
2116
|
|
|
|
|
|
|
suit your needs and your data set. |
|
2117
|
|
|
|
|
|
|
|
|
2118
|
|
|
|
|
|
|
Just as Perl Hackers know: There's More Than One Way To Do It! |
|
2119
|
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
=cut |
|
2121
|
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
2123
|
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
sub calc |
|
2125
|
|
|
|
|
|
|
{ |
|
2126
|
|
|
|
|
|
|
## We operate in package main for this subroutine so the local |
|
2127
|
|
|
|
|
|
|
## object and row number can be available to the caller's $Sub. |
|
2128
|
|
|
|
|
|
|
|
|
2129
|
|
|
|
|
|
|
## The following local vars will be available to $Sub |
|
2130
|
|
|
|
|
|
|
## $_r ($main::_r) -- the row number in the entire table |
|
2131
|
|
|
|
|
|
|
## $_s ($main::_s) -- the row number in selection or $Recs |
|
2132
|
|
|
|
|
|
|
## $_t ($main::_t) -- the table object itself |
|
2133
|
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
package main; |
|
2135
|
1
|
|
|
1
|
|
13
|
use vars qw($_r $_s $_t); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
139
|
|
|
2136
|
3
|
|
|
3
|
0
|
37
|
local ($_r, $_s, $_t); |
|
2137
|
|
|
|
|
|
|
|
|
2138
|
3
|
|
|
|
|
6
|
$_t = shift; |
|
2139
|
3
|
|
|
|
|
7
|
my ($Sub, $Recs, $Fields) = @_; |
|
2140
|
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
## These optional params default to current field and current sel |
|
2142
|
|
|
|
|
|
|
|
|
2143
|
3
|
|
33
|
|
|
21
|
$Recs ||= $_t->selection(); |
|
2144
|
3
|
|
33
|
|
|
16
|
$Fields ||= $_t->fieldlist_all(); |
|
2145
|
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
## Local copy of symbol table. Didn't seem to help. Odd. |
|
2147
|
|
|
|
|
|
|
## local %main::; |
|
2148
|
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
## We'll build a column of return values from $Sub if needed. |
|
2150
|
3
|
|
|
|
|
5
|
my $WantVals = defined(wantarray); |
|
2151
|
3
|
50
|
|
|
|
10
|
my $Vals = $_t->col_empty() if $WantVals; |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
## Call col() on each field in list to make sure it exists. |
|
2154
|
3
|
|
|
|
|
9
|
foreach (@$Fields) {$_t->col($_)}; |
|
|
15
|
|
|
|
|
31
|
|
|
2155
|
|
|
|
|
|
|
|
|
2156
|
3
|
|
|
|
|
12
|
foreach $_s (0..$#$Recs) |
|
2157
|
|
|
|
|
|
|
{ |
|
2158
|
9
|
|
|
|
|
28
|
$_r = $Recs->[$_s]; |
|
2159
|
|
|
|
|
|
|
|
|
2160
|
|
|
|
|
|
|
## Bind $FieldName1, $FieldName2, (etc. for each field name in |
|
2161
|
|
|
|
|
|
|
## $Fields) point to address of the current value for that |
|
2162
|
|
|
|
|
|
|
## field in this record. |
|
2163
|
|
|
|
|
|
|
|
|
2164
|
1
|
|
|
1
|
|
5
|
no strict 'refs'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
4377
|
|
|
2165
|
9
|
|
|
|
|
18
|
foreach my $F (@$Fields) {*{$F} = \ $_t->{$F}->[$_r]}; |
|
|
45
|
|
|
|
|
64
|
|
|
|
45
|
|
|
|
|
113
|
|
|
2166
|
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
## Now $Sub may refer to $_r, $_s, $_t, and ${any field name} |
|
2168
|
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
## Call $Sub and capture return values iff caller wants them |
|
2170
|
9
|
50
|
|
|
|
30
|
($WantVals ? $Vals->[$_r] = &$Sub() : &$Sub()); |
|
2171
|
|
|
|
|
|
|
} |
|
2172
|
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
## Return scalar column ref unless return context is undef |
|
2174
|
3
|
50
|
|
|
|
22
|
return($WantVals ? $Vals : ()); |
|
2175
|
|
|
|
|
|
|
} |
|
2176
|
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
=pod |
|
2178
|
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
=head1 INDEXES |
|
2180
|
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
## Make indexes of columns or just selected data in columns |
|
2182
|
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
my $Index1 = $t->index_all($Key); ## entire column |
|
2184
|
|
|
|
|
|
|
my $Index2 = $t->index_sel($Key); ## selected data only |
|
2185
|
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
## Make hashes of 2 columns or just selected data in columns |
|
2187
|
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
my $Index1 = $t->hash_all($KeyFld, $ValFld); ## entire column |
|
2189
|
|
|
|
|
|
|
my $Index2 = $t->hash_sel($KeyFld, $ValFld); ## selected data |
|
2190
|
|
|
|
|
|
|
|
|
2191
|
|
|
|
|
|
|
index_all() creates and returns a hash (reference) that maps keys |
|
2192
|
|
|
|
|
|
|
found in the column called $Key to corresponding record numbers. |
|
2193
|
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
Ideally, values in $Key would be unique (that's up to you). |
|
2195
|
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
If any values in $Key are NOT unique, then later values (higher record |
|
2197
|
|
|
|
|
|
|
numbers) will be ignored. |
|
2198
|
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
index_sel() creates and returns a hash (ref) that maps keys found in |
|
2200
|
|
|
|
|
|
|
the SELECTED RECORDS of column $Key to corresponding record numbers. |
|
2201
|
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
Any keys in unselected records are ignored. Otherwise, the behavior |
|
2203
|
|
|
|
|
|
|
is equivalent to index_all(). |
|
2204
|
|
|
|
|
|
|
|
|
2205
|
|
|
|
|
|
|
hash_all() and hash_sel() are similar, except they create and return |
|
2206
|
|
|
|
|
|
|
hashes whose keys are taken from column $KeyFld, but whose values are |
|
2207
|
|
|
|
|
|
|
from $ValFld in the corresponding records. |
|
2208
|
|
|
|
|
|
|
|
|
2209
|
|
|
|
|
|
|
So, for example, imagine you have a tab-delimited file on disk with |
|
2210
|
|
|
|
|
|
|
just a single tab per line (2 fields) and no header row. The entries |
|
2211
|
|
|
|
|
|
|
on the left side of the tab on each line are keys and the right side |
|
2212
|
|
|
|
|
|
|
are values. You could convert that file into a hash in memory like |
|
2213
|
|
|
|
|
|
|
this: |
|
2214
|
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
my $t = Data::CTable->new({_HeaderRow=>0, _FieldList=>[qw(F1 F2)]}, |
|
2216
|
|
|
|
|
|
|
"DeptLookup.txt"); |
|
2217
|
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
my $DeptLookup = $t->hash_all(qw(F1 F2)); |
|
2219
|
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
=head2 Reverse indexes |
|
2221
|
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
If you'd like an index mapping record number to key, just get |
|
2223
|
|
|
|
|
|
|
$t->col($Key). That's what the data columns in Data::CTable are. |
|
2224
|
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
=cut |
|
2226
|
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
sub index_all |
|
2228
|
|
|
|
|
|
|
{ |
|
2229
|
2
|
|
|
2
|
0
|
8
|
my $this = shift; |
|
2230
|
2
|
|
|
|
|
4
|
my ($Key) = @_; |
|
2231
|
|
|
|
|
|
|
|
|
2232
|
2
|
|
|
|
|
4
|
my $Index = {}; @$Index{reverse @{$this->col($Key)}} = reverse @{$this->all()}; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
5
|
|
|
2233
|
|
|
|
|
|
|
|
|
2234
|
2
|
|
|
|
|
16
|
return($Index); |
|
2235
|
|
|
|
|
|
|
} |
|
2236
|
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
sub index_sel |
|
2238
|
|
|
|
|
|
|
{ |
|
2239
|
2
|
|
|
2
|
0
|
6
|
my $this = shift; |
|
2240
|
2
|
|
|
|
|
3
|
my ($Key) = @_; |
|
2241
|
|
|
|
|
|
|
|
|
2242
|
2
|
|
|
|
|
5
|
my $Index = {}; @$Index{reverse @{$this->sel($Key)}} = reverse @{$this->selection()}; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
6
|
|
|
2243
|
|
|
|
|
|
|
|
|
2244
|
2
|
|
|
|
|
15
|
return($Index); |
|
2245
|
|
|
|
|
|
|
} |
|
2246
|
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
sub hash_all |
|
2249
|
|
|
|
|
|
|
{ |
|
2250
|
2
|
|
|
2
|
0
|
3
|
my $this = shift; |
|
2251
|
2
|
|
|
|
|
5
|
my ($Key, $Val) = @_; |
|
2252
|
|
|
|
|
|
|
|
|
2253
|
2
|
|
|
|
|
3
|
my $Hash = {}; @$Hash{reverse @{$this->col($Key)}} = reverse @{$this->col($Val)}; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
6
|
|
|
2254
|
|
|
|
|
|
|
|
|
2255
|
2
|
|
|
|
|
15
|
return($Hash); |
|
2256
|
|
|
|
|
|
|
} |
|
2257
|
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
sub hash_sel |
|
2259
|
|
|
|
|
|
|
{ |
|
2260
|
2
|
|
|
2
|
0
|
4
|
my $this = shift; |
|
2261
|
2
|
|
|
|
|
4
|
my ($Key, $Val) = @_; |
|
2262
|
|
|
|
|
|
|
|
|
2263
|
2
|
|
|
|
|
5
|
my $Hash = {}; @$Hash{reverse @{$this->sel($Key)}} = reverse @{$this->sel($Val)}; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
7
|
|
|
|
2
|
|
|
|
|
6
|
|
|
2264
|
|
|
|
|
|
|
|
|
2265
|
2
|
|
|
|
|
15
|
return($Hash); |
|
2266
|
|
|
|
|
|
|
} |
|
2267
|
|
|
|
|
|
|
|
|
2268
|
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
=pod |
|
2270
|
|
|
|
|
|
|
|
|
2271
|
|
|
|
|
|
|
=head1 DATA ROWS (RECORDS) |
|
2272
|
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
## Getting or setting rows / records |
|
2274
|
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
$t->row($Num) ## Get a row or make empty one. |
|
2276
|
|
|
|
|
|
|
$t->row_get($Num) |
|
2277
|
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
$t->row($Num, $HashRef) ## Set all of a row all at once. |
|
2279
|
|
|
|
|
|
|
$t->row_set($Num, $HashRef) |
|
2280
|
|
|
|
|
|
|
|
|
2281
|
|
|
|
|
|
|
$t->row_set($Num, undef) ## Delete a row completely |
|
2282
|
|
|
|
|
|
|
$t->row_delete($Num) |
|
2283
|
|
|
|
|
|
|
$t->row_delete($Beg, $End)## Delete a range of rows |
|
2284
|
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
$t->row_move($Old, $New) ## Move a row to before $New |
|
2286
|
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
$t->row_empty() ## An empty hash |
|
2288
|
|
|
|
|
|
|
$t->row_exists($Num) ## True if $Num < $t->length() |
|
2289
|
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
$t->rows($RowList) ## Get list of multiple row nums |
|
2291
|
|
|
|
|
|
|
|
|
2292
|
|
|
|
|
|
|
$t->row_list($Num) ## Get row vals as a list |
|
2293
|
|
|
|
|
|
|
$t->row_list($Num, $Fields) ## Get row vals: specified fields |
|
2294
|
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
$t->row_list_set($Num, undef, $Vals) ## Set row vals as a list |
|
2296
|
|
|
|
|
|
|
$t->row_list_set($Num, $Fields, $Vals) ## Set row vals as a list |
|
2297
|
|
|
|
|
|
|
$t->row_list_set($Num, $Fields) ## Set vals to empty/undef |
|
2298
|
|
|
|
|
|
|
|
|
2299
|
|
|
|
|
|
|
Usually, when working with Data::CTable objects, you are operating on |
|
2300
|
|
|
|
|
|
|
entire columns or tables at a time (after all: any transformation you |
|
2301
|
|
|
|
|
|
|
do on one record you almost always want to do on all records or all |
|
2302
|
|
|
|
|
|
|
selected ones). |
|
2303
|
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
You should very rarely need to access data by retrieving rows or |
|
2305
|
|
|
|
|
|
|
setting rows, moving them around individually, and so on. (It's much |
|
2306
|
|
|
|
|
|
|
cleaner, and much more efficient to manipulate the selection() (list |
|
2307
|
|
|
|
|
|
|
of selected row numbers) instead -- just delete a row number from the |
|
2308
|
|
|
|
|
|
|
selection, for example, and then for most operations it's almost as if |
|
2309
|
|
|
|
|
|
|
the row is gone from the table, except the data is really still |
|
2310
|
|
|
|
|
|
|
there.) |
|
2311
|
|
|
|
|
|
|
|
|
2312
|
|
|
|
|
|
|
However, if on rare occasions you really do need direct row |
|
2313
|
|
|
|
|
|
|
operations, you're reading the right section. |
|
2314
|
|
|
|
|
|
|
|
|
2315
|
|
|
|
|
|
|
A row is generally accessed as a hash. The hash you provide or get |
|
2316
|
|
|
|
|
|
|
back is not saved by the object in any way. Data values are always |
|
2317
|
|
|
|
|
|
|
copied in or out of it, so you always "own" the hash. |
|
2318
|
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
Rows are specified by $Num -- the row number with in the unsorted |
|
2320
|
|
|
|
|
|
|
columns (the raw data in the table). These numbers are just array |
|
2321
|
|
|
|
|
|
|
indices into the data columns, and so their legal range is: |
|
2322
|
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
[0 .. ($t->length() - 1)] ## (Zero-based row numbering.) |
|
2324
|
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
The row hash (or "record") has keys that are field names and values |
|
2326
|
|
|
|
|
|
|
that are copies of the scalar values stored in the data columns within |
|
2327
|
|
|
|
|
|
|
the table. |
|
2328
|
|
|
|
|
|
|
|
|
2329
|
|
|
|
|
|
|
row() always copies only the fields in fieldlist(), except for |
|
2330
|
|
|
|
|
|
|
row_list() which allows you to specify an optional $Fields parameter |
|
2331
|
|
|
|
|
|
|
which can override the current fieldlist(). |
|
2332
|
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
If the fieldlist happens to be a subset of all fields, but you really |
|
2334
|
|
|
|
|
|
|
want to get all fields in your record, then call fieldlist_set(0) |
|
2335
|
|
|
|
|
|
|
first to permanently or temporarily delete it. |
|
2336
|
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
row() and row_get() always return a hash. |
|
2338
|
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
row($Num, $Hash), row_set() take a hash and set just the fields you |
|
2340
|
|
|
|
|
|
|
specify in the hash (in the given row of course). Any non-existent |
|
2341
|
|
|
|
|
|
|
field names in the hash are created, so be careful. |
|
2342
|
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
In fact, in general with either getting or setting rows, any |
|
2344
|
|
|
|
|
|
|
non-existent fields mentioned will be created for you (by internally |
|
2345
|
|
|
|
|
|
|
calling col()). So you could build a whole table of 100 rows by |
|
2346
|
|
|
|
|
|
|
starting with an empty, new, table and setting row 99 from a hash that |
|
2347
|
|
|
|
|
|
|
gives the field names. |
|
2348
|
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
Setting a row number higher than any existing row number with row(), |
|
2350
|
|
|
|
|
|
|
row_set() or row_force() will automatically set the new length of the |
|
2351
|
|
|
|
|
|
|
entire table to match (extending all the columns with empty rows as |
|
2352
|
|
|
|
|
|
|
necessary). |
|
2353
|
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
IMPORTANT: IF YOU SIMPLY MUST ADD ROWS SEQUENTIALLY, do not let the |
|
2355
|
|
|
|
|
|
|
table auto-extend by one with each row you set. This is slow and gets |
|
2356
|
|
|
|
|
|
|
rapidly slower if there's lots of data because the arrays holding the |
|
2357
|
|
|
|
|
|
|
data columns will keep getting reallocated on every insert. Instead, |
|
2358
|
|
|
|
|
|
|
first pre-extend the table to your highest row number by calling |
|
2359
|
|
|
|
|
|
|
length($Len), and then set your rows. Or easier: if convenient just |
|
2360
|
|
|
|
|
|
|
set your rows starting with the highest-numbered one first. If you |
|
2361
|
|
|
|
|
|
|
don't know how many you'll have, guess or estimate and pre-extend to |
|
2362
|
|
|
|
|
|
|
the estimated number and then cut back later. This will be faster |
|
2363
|
|
|
|
|
|
|
than extending all columns by one each time. |
|
2364
|
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
row_delete() removes a row or range of rows completely from the table. |
|
2366
|
|
|
|
|
|
|
Any rows above the deleted ones will move down and the table's |
|
2367
|
|
|
|
|
|
|
length() will decrease. If the data columns are very large, this |
|
2368
|
|
|
|
|
|
|
could be a bit slow because a lot of data could be moved around. The |
|
2369
|
|
|
|
|
|
|
low and high row numbers will be limited for you to 0 and length() - |
|
2370
|
|
|
|
|
|
|
1, respectively. Null ranges are OK and are silently ignored. The |
|
2371
|
|
|
|
|
|
|
range is inclusive, so to delete just row 99, call row_delete(99) or |
|
2372
|
|
|
|
|
|
|
row_delete(99,99). |
|
2373
|
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
EFFICIENCY NOTE: Don't call row_delete() to remove lots of individual |
|
2375
|
|
|
|
|
|
|
rows. Instead, select those row numbers by setting the selection (if |
|
2376
|
|
|
|
|
|
|
not already selected), and then invert the selection using |
|
2377
|
|
|
|
|
|
|
selection_invert(), so the undesired rows are deselected, and then use |
|
2378
|
|
|
|
|
|
|
the cull() method to rewrite the entire table at once. The deselected |
|
2379
|
|
|
|
|
|
|
rows will be omitted very efficiently this way. |
|
2380
|
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
row_move() moves a row from its $Old row number to the position before |
|
2382
|
|
|
|
|
|
|
the row currently in row $New (specify $New = length() to move the row |
|
2383
|
|
|
|
|
|
|
to the end). Again, in shuffling data in columns, lots of data could |
|
2384
|
|
|
|
|
|
|
get moved around by this operation, so expect it to be slow. If as |
|
2385
|
|
|
|
|
|
|
with row_delete(), if you will be doing several moves, consider |
|
2386
|
|
|
|
|
|
|
building an appropriate selection() first, and then using cull() |
|
2387
|
|
|
|
|
|
|
instead. |
|
2388
|
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
Using row_delete() and row_move() to shift records around changes the |
|
2390
|
|
|
|
|
|
|
record numbers of the affected records and many others in the table. |
|
2391
|
|
|
|
|
|
|
The record numbers in the custom selection, if any, are updated to |
|
2392
|
|
|
|
|
|
|
reflect these changes, so the records that were selected before will |
|
2393
|
|
|
|
|
|
|
still be selected after the move (except those that were deleted of |
|
2394
|
|
|
|
|
|
|
course). If you had a private copy of the selection, your copy will |
|
2395
|
|
|
|
|
|
|
likely become outdated after these operations. You should get it |
|
2396
|
|
|
|
|
|
|
again by calling selection(). |
|
2397
|
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
row_empty() returns a hash whose keys are the entries in fieldlist() |
|
2399
|
|
|
|
|
|
|
and whose values are undef. (You could use it to fill in values |
|
2400
|
|
|
|
|
|
|
before calling row_set()). Note: in this class, row_empty() does |
|
2401
|
|
|
|
|
|
|
exactly the same thing as fieldlist_hash() when the latter is called |
|
2402
|
|
|
|
|
|
|
with no arguments. |
|
2403
|
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
row_exists() returns true if C<(($Num E= 0) && ($Num E $t-Elength()))>. |
|
2405
|
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
rows() calls row() for each row num in a list and returns a list of |
|
2407
|
|
|
|
|
|
|
the resulting hashes. |
|
2408
|
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
row_list() gets row values as a list instead of a hash. They appear |
|
2410
|
|
|
|
|
|
|
in the order specified in fieldlist() unless you supply an optional |
|
2411
|
|
|
|
|
|
|
$Fields parameter listing the fields you want to get. |
|
2412
|
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
row_list_set() sets row values as a list instead of a hash. Pass your |
|
2414
|
|
|
|
|
|
|
own $Fields list or undef and fieldlist() will be used. $Values |
|
2415
|
|
|
|
|
|
|
should be a list with the same number of values as fields expected; |
|
2416
|
|
|
|
|
|
|
any shortage will result in undef/empty values being set. |
|
2417
|
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
=cut |
|
2419
|
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
2421
|
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
sub row |
|
2423
|
|
|
|
|
|
|
{ |
|
2424
|
69
|
|
|
69
|
0
|
139
|
my $this = shift; |
|
2425
|
69
|
|
|
|
|
95
|
my ($Num, $Row) = @_; |
|
2426
|
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
## Set if specified. |
|
2428
|
69
|
50
|
|
|
|
141
|
return($this->row_set($Num, $Row)) if defined($Row); |
|
2429
|
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
## Else get. |
|
2431
|
69
|
|
|
|
|
132
|
return($this->row_get($Num)); |
|
2432
|
|
|
|
|
|
|
} |
|
2433
|
|
|
|
|
|
|
|
|
2434
|
|
|
|
|
|
|
sub row_get |
|
2435
|
|
|
|
|
|
|
{ |
|
2436
|
69
|
|
|
69
|
0
|
81
|
my $this = shift; |
|
2437
|
69
|
|
|
|
|
94
|
my ($Num) = @_; |
|
2438
|
|
|
|
|
|
|
|
|
2439
|
69
|
|
|
|
|
123
|
my $Fields = $this->fieldlist(); |
|
2440
|
69
|
|
|
|
|
100
|
my $Row = {}; @$Row{@$Fields} = map {$this->col($_)->[$Num]} @$Fields; |
|
|
69
|
|
|
|
|
115
|
|
|
|
270
|
|
|
|
|
532
|
|
|
2441
|
|
|
|
|
|
|
|
|
2442
|
69
|
|
|
|
|
615
|
return($Row); |
|
2443
|
|
|
|
|
|
|
} |
|
2444
|
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
sub row_set |
|
2446
|
|
|
|
|
|
|
{ |
|
2447
|
3
|
|
|
3
|
0
|
18
|
my $this = shift; |
|
2448
|
3
|
|
|
|
|
6
|
my ($Num, $Row) = @_; |
|
2449
|
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
## We thoughtfully sort the keys in case columns will get created |
|
2451
|
|
|
|
|
|
|
## in this order. |
|
2452
|
|
|
|
|
|
|
|
|
2453
|
3
|
|
|
|
|
14
|
my $Fields = [sort keys %$Row]; |
|
2454
|
|
|
|
|
|
|
|
|
2455
|
|
|
|
|
|
|
## Pre-extend the table to accommodate row $Num if necessary. |
|
2456
|
|
|
|
|
|
|
## This will do nothing if there are not yet any fields in the |
|
2457
|
|
|
|
|
|
|
## table (and the length will still be effectively zero). |
|
2458
|
|
|
|
|
|
|
|
|
2459
|
3
|
100
|
|
|
|
10
|
$this->length($Num + 1) unless $this->length() >= $Num + 1; |
|
2460
|
|
|
|
|
|
|
|
|
2461
|
|
|
|
|
|
|
## Insert into columns, creating them if necessary. |
|
2462
|
3
|
|
|
|
|
8
|
foreach (@$Fields) {$this->col($_)->[$Num] = $Row->{$_}}; |
|
|
10
|
|
|
|
|
27
|
|
|
2463
|
|
|
|
|
|
|
|
|
2464
|
3
|
|
|
|
|
12
|
return($Row); ## Why not? |
|
2465
|
|
|
|
|
|
|
} |
|
2466
|
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
sub row_delete |
|
2468
|
|
|
|
|
|
|
{ |
|
2469
|
10
|
|
|
10
|
0
|
92
|
my $this = shift; |
|
2470
|
10
|
|
|
|
|
18
|
my ($First, $Last) = @_; |
|
2471
|
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
## Nothing to do if $First not specified. |
|
2473
|
10
|
100
|
|
|
|
23
|
return() unless defined($First); |
|
2474
|
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
## Default is $Last is same as first (remove one row only) |
|
2476
|
9
|
100
|
|
|
|
23
|
$Last = $First unless defined($Last); |
|
2477
|
|
|
|
|
|
|
|
|
2478
|
9
|
|
|
|
|
18
|
my $LastIndex = $this->length() - 1; |
|
2479
|
|
|
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
## Restrict the range to meaningful values. |
|
2481
|
9
|
|
|
|
|
23
|
$First = max($First, 0 ); ## First could be very high, to indicate a null range. |
|
2482
|
9
|
|
|
|
|
23
|
$Last = min($Last, $LastIndex); ## Last could be negative, like -1, to indicate null range. |
|
2483
|
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
## Nothing to do if the range is empty. |
|
2485
|
9
|
100
|
|
|
|
23
|
return() if $Last < $First; |
|
2486
|
|
|
|
|
|
|
|
|
2487
|
8
|
|
|
|
|
17
|
my $Fields = $this->fieldlist(); |
|
2488
|
|
|
|
|
|
|
|
|
2489
|
8
|
|
|
|
|
13
|
my $RangeSize = ($Last - $First + 1); |
|
2490
|
|
|
|
|
|
|
|
|
2491
|
8
|
|
|
|
|
18
|
foreach (@$Fields) {splice @{$this->col($_)}, $First, $RangeSize}; |
|
|
32
|
|
|
|
|
39
|
|
|
|
32
|
|
|
|
|
58
|
|
|
2492
|
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
## Here we could have trapped all the list segments we spliced out |
|
2494
|
|
|
|
|
|
|
## and return them in nice CTable-ish hash. Maybe we will some |
|
2495
|
|
|
|
|
|
|
## day. This would be a way to split a range of rows out of a |
|
2496
|
|
|
|
|
|
|
## table object to create another table object. We could even |
|
2497
|
|
|
|
|
|
|
## call it the "split" method... |
|
2498
|
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
## After deleting the rows, we need to adjust the _Selection if |
|
2500
|
|
|
|
|
|
|
## present. Deleted row numbers in the selection need to be |
|
2501
|
|
|
|
|
|
|
## omitted; row numbers greater than the range of deleted ones |
|
2502
|
|
|
|
|
|
|
## need to be decreased by the size of the range reduction, and |
|
2503
|
|
|
|
|
|
|
## others need to be left untouched. |
|
2504
|
|
|
|
|
|
|
|
|
2505
|
0
|
0
|
|
|
|
0
|
$this->{_Selection} = |
|
|
|
0
|
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
[map |
|
2507
|
|
|
|
|
|
|
{ |
|
2508
|
0
|
|
|
|
|
0
|
($_ < $First ? $_ : ## Before range: pass through. |
|
2509
|
|
|
|
|
|
|
($_ <= $Last ? () : ## In range: omit. |
|
2510
|
|
|
|
|
|
|
$_ - $RangeSize)) ## After range: reduce by range size |
|
2511
|
8
|
50
|
|
|
|
33
|
} @{ $this->{_Selection}}] |
|
2512
|
|
|
|
|
|
|
if $this->{_Selection}; |
|
2513
|
|
|
|
|
|
|
} |
|
2514
|
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
sub row_move |
|
2516
|
|
|
|
|
|
|
{ |
|
2517
|
8
|
|
|
8
|
0
|
13
|
my $this = shift; |
|
2518
|
8
|
|
|
|
|
12
|
my ($Old, $New) = @_; |
|
2519
|
|
|
|
|
|
|
|
|
2520
|
|
|
|
|
|
|
## $Old and $New are required params and must not be undef. |
|
2521
|
8
|
50
|
33
|
|
|
41
|
goto done unless defined($Old) && defined($New); |
|
2522
|
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
## If $Old and $New are the same or one apart, there's nothing to do. |
|
2524
|
8
|
100
|
|
|
|
23
|
goto done if ($New == $Old); ## This would mean a no-op. |
|
2525
|
7
|
100
|
|
|
|
20
|
goto done if ($New == $Old + 1); ## This would mean a no-op. |
|
2526
|
|
|
|
|
|
|
|
|
2527
|
5
|
|
|
|
|
11
|
my $Length = $this->length(); |
|
2528
|
|
|
|
|
|
|
|
|
2529
|
|
|
|
|
|
|
## Ensure both $Old and $New are legal indices. |
|
2530
|
5
|
50
|
33
|
|
|
28
|
goto done if (($Old < 0) || ($Old > $Length - 1)); |
|
2531
|
5
|
50
|
33
|
|
|
26
|
goto done if (($New < 0) || ($New > $Length )); ## New has a range up to $Length, meaning move to end. |
|
2532
|
|
|
|
|
|
|
|
|
2533
|
5
|
|
|
|
|
11
|
my $Fields = $this->fieldlist_all(); |
|
2534
|
|
|
|
|
|
|
|
|
2535
|
5
|
100
|
|
|
|
14
|
if ($Old < $New) ## Move forward (to higher / later row num) |
|
2536
|
|
|
|
|
|
|
{ |
|
2537
|
|
|
|
|
|
|
## Delete from the lower position and insert into higher -- MINUS ONE to account for prior shortening. |
|
2538
|
2
|
|
|
|
|
3
|
my $Row = {}; |
|
2539
|
2
|
|
|
|
|
4
|
foreach (@$Fields) {$Row->{$_} = splice(@{$this->col($_)}, $Old , 1 )}; |
|
|
8
|
|
|
|
|
95
|
|
|
|
8
|
|
|
|
|
19
|
|
|
2540
|
2
|
|
|
|
|
6
|
foreach (@$Fields) { splice(@{$this->col($_)}, $New - 1, 0, $Row->{$_})}; |
|
|
8
|
|
|
|
|
11
|
|
|
|
8
|
|
|
|
|
16
|
|
|
2541
|
|
|
|
|
|
|
|
|
2542
|
|
|
|
|
|
|
} |
|
2543
|
|
|
|
|
|
|
else ## Move backward (to lower / earlier row num) |
|
2544
|
|
|
|
|
|
|
{ |
|
2545
|
|
|
|
|
|
|
## Delete from the higher position and insert into lower. |
|
2546
|
3
|
|
|
|
|
5
|
my $Row = {}; |
|
2547
|
3
|
|
|
|
|
7
|
foreach (@$Fields) {$Row->{$_} = splice(@{$this->col($_)}, $Old , 1 )}; |
|
|
12
|
|
|
|
|
12
|
|
|
|
12
|
|
|
|
|
26
|
|
|
2548
|
3
|
|
|
|
|
6
|
foreach (@$Fields) { splice(@{$this->col($_)}, $New , 0, $Row->{$_})}; |
|
|
12
|
|
|
|
|
14
|
|
|
|
12
|
|
|
|
|
24
|
|
|
2549
|
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
} |
|
2551
|
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
## After moving the rows, we need to adjust the _Selection if |
|
2553
|
|
|
|
|
|
|
## present. Row numbers outside the shuffled range stay the same; |
|
2554
|
|
|
|
|
|
|
## the moved row number(s) change; others (inside the range) get |
|
2555
|
|
|
|
|
|
|
## shifted down or up by 1. |
|
2556
|
|
|
|
|
|
|
|
|
2557
|
5
|
100
|
|
|
|
17
|
if ($Old < $New) ## Move forward / higher / later |
|
2558
|
|
|
|
|
|
|
{ |
|
2559
|
6
|
100
|
|
|
|
25
|
$this->{_Selection} = |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
[map |
|
2561
|
|
|
|
|
|
|
{ |
|
2562
|
2
|
|
|
|
|
5
|
($_ == $Old ? $New - 1 : ## Moved row: change num to new - 1 |
|
2563
|
|
|
|
|
|
|
($_ < $Old ? $_ : ## Less than $Old: no change |
|
2564
|
|
|
|
|
|
|
($_ >= $New ? $_ : ## Grtr= than $New: no change |
|
2565
|
|
|
|
|
|
|
$_ - 1))) ## In range: shift down by 1. |
|
2566
|
2
|
50
|
|
|
|
7
|
} @{ $this->{_Selection}}] |
|
2567
|
|
|
|
|
|
|
if $this->{_Selection}; |
|
2568
|
|
|
|
|
|
|
|
|
2569
|
|
|
|
|
|
|
} |
|
2570
|
|
|
|
|
|
|
else ## Move backward / lower / earlier |
|
2571
|
|
|
|
|
|
|
{ |
|
2572
|
9
|
100
|
|
|
|
34
|
$this->{_Selection} = |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
[map |
|
2574
|
|
|
|
|
|
|
{ |
|
2575
|
3
|
|
|
|
|
7
|
($_ == $Old ? $New : ## Moved row: change num to new |
|
2576
|
|
|
|
|
|
|
($_ >= $Old ? $_ : ## Grtr= than $Old: no change |
|
2577
|
|
|
|
|
|
|
($_ < $New ? $_ : ## Less than $New: no change |
|
2578
|
|
|
|
|
|
|
$_ + 1))) ## In range: shift up by 1. |
|
2579
|
3
|
50
|
|
|
|
10
|
} @{ $this->{_Selection}}] |
|
2580
|
|
|
|
|
|
|
if $this->{_Selection}; |
|
2581
|
|
|
|
|
|
|
} |
|
2582
|
|
|
|
|
|
|
|
|
2583
|
8
|
|
|
|
|
25
|
done: |
|
2584
|
|
|
|
|
|
|
return; |
|
2585
|
|
|
|
|
|
|
} |
|
2586
|
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
sub row_empty |
|
2588
|
|
|
|
|
|
|
{ |
|
2589
|
2
|
|
|
2
|
0
|
9
|
my $this = shift; |
|
2590
|
|
|
|
|
|
|
|
|
2591
|
2
|
|
|
|
|
5
|
my $Fields = $this->fieldlist(); |
|
2592
|
2
|
|
|
|
|
4
|
my $Row = {}; @$Row{@$Fields} = undef; |
|
|
2
|
|
|
|
|
10
|
|
|
2593
|
|
|
|
|
|
|
|
|
2594
|
2
|
|
|
|
|
12
|
return($Row); |
|
2595
|
|
|
|
|
|
|
} |
|
2596
|
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
sub row_exists |
|
2598
|
|
|
|
|
|
|
{ |
|
2599
|
14
|
|
|
14
|
0
|
38
|
my $this = shift; |
|
2600
|
14
|
|
|
|
|
16
|
my ($Num) = @_; |
|
2601
|
|
|
|
|
|
|
|
|
2602
|
14
|
|
100
|
|
|
46
|
return(($Num >= 0) && ($Num < $this->length())); |
|
2603
|
|
|
|
|
|
|
} |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
sub rows |
|
2606
|
|
|
|
|
|
|
{ |
|
2607
|
4
|
|
|
4
|
0
|
13
|
my $this = shift; |
|
2608
|
4
|
|
|
|
|
6
|
my ($Nums) = @_; |
|
2609
|
|
|
|
|
|
|
|
|
2610
|
4
|
|
|
|
|
7
|
return([map {$this->row($_)} @$Nums]); |
|
|
8
|
|
|
|
|
20
|
|
|
2611
|
|
|
|
|
|
|
} |
|
2612
|
|
|
|
|
|
|
|
|
2613
|
|
|
|
|
|
|
sub row_list |
|
2614
|
|
|
|
|
|
|
{ |
|
2615
|
19
|
|
|
19
|
0
|
33
|
my $this = shift; |
|
2616
|
19
|
|
|
|
|
26
|
my ($Num, $Fields) = @_; |
|
2617
|
|
|
|
|
|
|
|
|
2618
|
|
|
|
|
|
|
## $Fields argument is optional and defaults to fieldlist(); |
|
2619
|
19
|
|
66
|
|
|
54
|
$Fields ||= $this->fieldlist(); |
|
2620
|
|
|
|
|
|
|
|
|
2621
|
19
|
|
|
|
|
32
|
my $Row = [map {$this->col($_)->[$Num]} @$Fields]; |
|
|
105
|
|
|
|
|
193
|
|
|
2622
|
19
|
|
|
|
|
193
|
return($Row); |
|
2623
|
|
|
|
|
|
|
} |
|
2624
|
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
sub row_list_set |
|
2626
|
|
|
|
|
|
|
{ |
|
2627
|
6
|
|
|
6
|
0
|
39
|
my $this = shift; |
|
2628
|
6
|
|
|
|
|
9
|
my ($Num, $Fields, $Vals) = @_; |
|
2629
|
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
## $Fields argument is optional and defaults to fieldlist(); |
|
2631
|
6
|
|
66
|
|
|
19
|
$Fields ||= $this->fieldlist(); |
|
2632
|
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
## $Vals is optional and defaults to []. |
|
2634
|
6
|
|
50
|
|
|
10
|
$Vals ||= []; |
|
2635
|
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
## Pre-extend the table to accommodate row $Num if necessary. |
|
2637
|
6
|
50
|
|
|
|
14
|
$this->length($Num + 1) unless $this->length() >= $Num + 1; |
|
2638
|
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
## Set the $Vals in row $Num in the order given by $Fields. |
|
2640
|
6
|
|
|
|
|
15
|
foreach (0..$#$Fields) {$this->col($Fields->[$_])->[$Num] = $Vals->[$_]} |
|
|
18
|
|
|
|
|
35
|
|
|
2641
|
|
|
|
|
|
|
} |
|
2642
|
|
|
|
|
|
|
|
|
2643
|
|
|
|
|
|
|
=pod |
|
2644
|
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
=head1 ROW / RECORD COUNT (TABLE LENGTH) |
|
2646
|
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
## Getting or setting table length |
|
2648
|
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
$t->length() ## Get length |
|
2650
|
|
|
|
|
|
|
$t->length_get() |
|
2651
|
|
|
|
|
|
|
|
|
2652
|
|
|
|
|
|
|
$t->length(22) ## Set length (truncate or pre-extend) |
|
2653
|
|
|
|
|
|
|
$t->length_set(22) |
|
2654
|
|
|
|
|
|
|
|
|
2655
|
|
|
|
|
|
|
$t->extend() ## Set length of all columns to match longest |
|
2656
|
|
|
|
|
|
|
|
|
2657
|
|
|
|
|
|
|
The length* methods assume the table already has columns of equal |
|
2658
|
|
|
|
|
|
|
length. So the length of the table is the length of any field taken |
|
2659
|
|
|
|
|
|
|
at random. We choose the first one in the field list. |
|
2660
|
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
Setting the length will truncate or pre-extend every column in the |
|
2662
|
|
|
|
|
|
|
table to a given length as required. |
|
2663
|
|
|
|
|
|
|
|
|
2664
|
|
|
|
|
|
|
(Pre-extending means setting each column's length via $# so that it |
|
2665
|
|
|
|
|
|
|
has the correct number of entries already allocated (and filled with |
|
2666
|
|
|
|
|
|
|
undef) so that operations that fill up the table can be done much more |
|
2667
|
|
|
|
|
|
|
quickly than with push(). |
|
2668
|
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
However, if a new column has been added directly, or a table has been |
|
2670
|
|
|
|
|
|
|
constructed out of columns whose length may not initially match, the |
|
2671
|
|
|
|
|
|
|
extend() method may be (should be) called to inspect all columns and |
|
2672
|
|
|
|
|
|
|
extend them all to match the longest one. Note that extend() operates |
|
2673
|
|
|
|
|
|
|
on all fields in the object, ignoring the custom _FieldList if any. |
|
2674
|
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
The length of a table with no columns is zero. |
|
2676
|
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
=cut |
|
2678
|
|
|
|
|
|
|
|
|
2679
|
|
|
|
|
|
|
sub length |
|
2680
|
|
|
|
|
|
|
{ |
|
2681
|
470
|
|
|
470
|
0
|
743
|
my $this = shift; |
|
2682
|
470
|
|
|
|
|
560
|
my ($Length) = @_; |
|
2683
|
|
|
|
|
|
|
|
|
2684
|
470
|
100
|
|
|
|
1172
|
return(defined($Length) ? |
|
2685
|
|
|
|
|
|
|
$this->length_set($Length) : |
|
2686
|
|
|
|
|
|
|
$this->length_get()); |
|
2687
|
|
|
|
|
|
|
} |
|
2688
|
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
sub length_get |
|
2690
|
|
|
|
|
|
|
{ |
|
2691
|
468
|
|
|
468
|
0
|
581
|
my $this = shift; |
|
2692
|
|
|
|
|
|
|
|
|
2693
|
468
|
|
|
|
|
784
|
my $FieldList = $this->fieldlist(); |
|
2694
|
468
|
|
|
|
|
722
|
my $FirstField = $FieldList->[0]; |
|
2695
|
468
|
|
|
|
|
869
|
my $Col = $this->{$FirstField}; |
|
2696
|
468
|
100
|
|
|
|
1119
|
my $Length = (ref($Col) eq 'ARRAY' ? @$Col+0 : 0); |
|
2697
|
|
|
|
|
|
|
|
|
2698
|
468
|
|
|
|
|
1835
|
return($Length); |
|
2699
|
|
|
|
|
|
|
} |
|
2700
|
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
sub length_set |
|
2702
|
|
|
|
|
|
|
{ |
|
2703
|
26
|
|
|
26
|
0
|
36
|
my $this = shift; |
|
2704
|
26
|
|
|
|
|
35
|
my ($Length) = @_; |
|
2705
|
|
|
|
|
|
|
|
|
2706
|
|
|
|
|
|
|
## Apply the length-setting logic to any field found in the hash |
|
2707
|
|
|
|
|
|
|
## OR listed in the field list. They will all be created if not |
|
2708
|
|
|
|
|
|
|
## already present. |
|
2709
|
|
|
|
|
|
|
|
|
2710
|
26
|
|
|
|
|
27
|
my $FieldList = [@{$this->fieldlist_all()}, @{$this->fieldlist()}]; |
|
|
26
|
|
|
|
|
49
|
|
|
|
26
|
|
|
|
|
51
|
|
|
2711
|
|
|
|
|
|
|
|
|
2712
|
26
|
|
|
|
|
60
|
foreach my $FieldName (@$FieldList) |
|
2713
|
|
|
|
|
|
|
{ |
|
2714
|
223
|
|
|
|
|
254
|
$#{$this->col($FieldName)} = ($Length - 1); ## $Length = 0 => $# = -1 => empty list. |
|
|
223
|
|
|
|
|
357
|
|
|
2715
|
|
|
|
|
|
|
}; |
|
2716
|
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
## Since records might have been deleted, re-validate the |
|
2718
|
|
|
|
|
|
|
## _Selection, if it is present. |
|
2719
|
|
|
|
|
|
|
|
|
2720
|
26
|
|
|
|
|
62
|
$this->selection_validate(); |
|
2721
|
|
|
|
|
|
|
|
|
2722
|
26
|
|
|
|
|
65
|
return($Length); |
|
2723
|
|
|
|
|
|
|
} |
|
2724
|
|
|
|
|
|
|
|
|
2725
|
|
|
|
|
|
|
sub extend |
|
2726
|
|
|
|
|
|
|
{ |
|
2727
|
22
|
|
|
22
|
0
|
32
|
my $this = shift; |
|
2728
|
22
|
|
|
|
|
29
|
my $Length = 0; |
|
2729
|
|
|
|
|
|
|
|
|
2730
|
|
|
|
|
|
|
## Find the length of the longest vector... |
|
2731
|
|
|
|
|
|
|
|
|
2732
|
22
|
|
|
|
|
28
|
foreach (@{$this->fieldlist_all()}) {$Length = max($Length, $#{$this->{$_}} + 1)}; |
|
|
22
|
|
|
|
|
41
|
|
|
|
114
|
|
|
|
|
132
|
|
|
|
114
|
|
|
|
|
265
|
|
|
2733
|
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
## ...and set them all to be that length. |
|
2735
|
|
|
|
|
|
|
|
|
2736
|
22
|
|
|
|
|
64
|
$this->length_set($Length); |
|
2737
|
|
|
|
|
|
|
} |
|
2738
|
|
|
|
|
|
|
|
|
2739
|
|
|
|
|
|
|
=pod |
|
2740
|
|
|
|
|
|
|
|
|
2741
|
|
|
|
|
|
|
=head1 SELECTIONS |
|
2742
|
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
## Getting or setting the custom selection list itself (_Selection) |
|
2744
|
|
|
|
|
|
|
|
|
2745
|
|
|
|
|
|
|
$t->selection() ## Get sel if any; else all() |
|
2746
|
|
|
|
|
|
|
$t->selection_get() |
|
2747
|
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
$t->selection($List) ## Set sel (list of rec nums) |
|
2749
|
|
|
|
|
|
|
$t->selection_set($List) |
|
2750
|
|
|
|
|
|
|
|
|
2751
|
|
|
|
|
|
|
$t->selection(0) ## Remove sel (select all) |
|
2752
|
|
|
|
|
|
|
$t->selection_set(undef) |
|
2753
|
|
|
|
|
|
|
$t->selection_delete() |
|
2754
|
|
|
|
|
|
|
$t->select_all() |
|
2755
|
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
$t->selection_inverse() ## Get inverse copy of selection |
|
2757
|
|
|
|
|
|
|
$t->select_inverse() ## Invert the selection |
|
2758
|
|
|
|
|
|
|
|
|
2759
|
|
|
|
|
|
|
$t->selection_validate() ## Remove invalid #s from sel |
|
2760
|
|
|
|
|
|
|
|
|
2761
|
|
|
|
|
|
|
## List of all rec nums present (regardless of selection) |
|
2762
|
|
|
|
|
|
|
|
|
2763
|
|
|
|
|
|
|
$t->all() |
|
2764
|
|
|
|
|
|
|
|
|
2765
|
|
|
|
|
|
|
## Getting or setting just selected fields in columns |
|
2766
|
|
|
|
|
|
|
## (as contrasted with col() and friends). |
|
2767
|
|
|
|
|
|
|
|
|
2768
|
|
|
|
|
|
|
$t->sel($ColName) ## Get col but only records in sel |
|
2769
|
|
|
|
|
|
|
$t->sel_get($ColName) |
|
2770
|
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
$t->sel($ColName, $ListRef) ## Set selected fields in col... |
|
2772
|
|
|
|
|
|
|
$t->sel_set($ColName, $ListRef) ##... in selection order |
|
2773
|
|
|
|
|
|
|
|
|
2774
|
|
|
|
|
|
|
$t->sel_set($ColName) ## Set selected fields to undef |
|
2775
|
|
|
|
|
|
|
$t->sel_clear($ColName) |
|
2776
|
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
$t->sels($ColList) ## Like cols, but selected fields |
|
2778
|
|
|
|
|
|
|
$t->sels_hash($ColList) ## " " cols_hash()... " " " " |
|
2779
|
|
|
|
|
|
|
|
|
2780
|
|
|
|
|
|
|
## Finding out size of selection (number of rows) |
|
2781
|
|
|
|
|
|
|
|
|
2782
|
|
|
|
|
|
|
$t->sel_len() ## Get number of selected rows. |
|
2783
|
|
|
|
|
|
|
|
|
2784
|
|
|
|
|
|
|
A selection is an ordered list of record numbers. The record numbers |
|
2785
|
|
|
|
|
|
|
in the selection may be a subset of available records. Furthermore, |
|
2786
|
|
|
|
|
|
|
they may be in non-record-number order, indicating that the records |
|
2787
|
|
|
|
|
|
|
have been sorted. |
|
2788
|
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
Record numbers are numeric array indices into the columns in the |
|
2790
|
|
|
|
|
|
|
table. It is an error for any selection list to contain an index less |
|
2791
|
|
|
|
|
|
|
than zero or greater than (length() - 1), so if you set a selection |
|
2792
|
|
|
|
|
|
|
explicitly, be careful. |
|
2793
|
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
Any selection list you get or set belongs to the object. Be careful |
|
2795
|
|
|
|
|
|
|
of modifying its contents. |
|
2796
|
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
The custom selection, if any, is stored internally in the _Selection |
|
2798
|
|
|
|
|
|
|
parameter. If this parameter is absent, the selection defaults to |
|
2799
|
|
|
|
|
|
|
all() -- i.e. a list of all record numbers, in order: |
|
2800
|
|
|
|
|
|
|
[0..($this->length() - 1)] (which becomes [] if length() is 0). |
|
2801
|
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
REMEMBER: length() is one-based, but record numbers are zero-based. |
|
2803
|
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
Removing the selection (that is, removing the LIST itself of which |
|
2805
|
|
|
|
|
|
|
records are selected), is the same as selecting all records. |
|
2806
|
|
|
|
|
|
|
consequently, selection(0), selection_delete(), and select_all() are |
|
2807
|
|
|
|
|
|
|
all synonymous. |
|
2808
|
|
|
|
|
|
|
|
|
2809
|
|
|
|
|
|
|
selection_validate() removes any entries from the current _Selection |
|
2810
|
|
|
|
|
|
|
list (if any) that are not valid record numbers -- i.e. it removes any |
|
2811
|
|
|
|
|
|
|
record whose integer value is < 0 or greater than length() - 1. This |
|
2812
|
|
|
|
|
|
|
routine is mainly used by other methods that might delete records, |
|
2813
|
|
|
|
|
|
|
such as length_set(). |
|
2814
|
|
|
|
|
|
|
|
|
2815
|
|
|
|
|
|
|
Getting or setting just selected data from columns |
|
2816
|
|
|
|
|
|
|
|
|
2817
|
|
|
|
|
|
|
Sometimes, you don't want to get/set entire columns, you instead want |
|
2818
|
|
|
|
|
|
|
to get or set data in just the selected fields in a column. |
|
2819
|
|
|
|
|
|
|
|
|
2820
|
|
|
|
|
|
|
The sel(), sel_get(), sel_set(), sels() and sels_hash() methods are |
|
2821
|
|
|
|
|
|
|
analagous to the corresponding col(), ... cols_hash() methods except |
|
2822
|
|
|
|
|
|
|
in these two ways: |
|
2823
|
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
- the 'sels' variants get or set just selected data, as determined by |
|
2825
|
|
|
|
|
|
|
the current selection(), which gives an ordered list of the selected / |
|
2826
|
|
|
|
|
|
|
sorted records. |
|
2827
|
|
|
|
|
|
|
|
|
2828
|
|
|
|
|
|
|
- the 'sels' variants all make COPIES of the data you request or |
|
2829
|
|
|
|
|
|
|
supply -- the data is copied out of or into the correspnding column. |
|
2830
|
|
|
|
|
|
|
So, you "own" any vector you pass or receive in reply. |
|
2831
|
|
|
|
|
|
|
|
|
2832
|
|
|
|
|
|
|
So, for example, imagine you have just set selection() to only list |
|
2833
|
|
|
|
|
|
|
record numbers where the LastName field is not empty. Then you have |
|
2834
|
|
|
|
|
|
|
called sort() to sort those record numbers by the LastName field. You |
|
2835
|
|
|
|
|
|
|
could then call $t->sel('LastName') to get a sorted list of all |
|
2836
|
|
|
|
|
|
|
non-empty last names. |
|
2837
|
|
|
|
|
|
|
|
|
2838
|
|
|
|
|
|
|
It might be helpful to think of "sel" as short for "selected". So |
|
2839
|
|
|
|
|
|
|
$t->sel('LastName') would mean "get the selected field values from the |
|
2840
|
|
|
|
|
|
|
LastName field". |
|
2841
|
|
|
|
|
|
|
|
|
2842
|
|
|
|
|
|
|
=cut |
|
2843
|
|
|
|
|
|
|
|
|
2844
|
|
|
|
|
|
|
sub selection |
|
2845
|
|
|
|
|
|
|
{ |
|
2846
|
477
|
|
|
477
|
1
|
673
|
my $this = shift; |
|
2847
|
477
|
|
|
|
|
597
|
my ($Selection) = @_; |
|
2848
|
|
|
|
|
|
|
|
|
2849
|
|
|
|
|
|
|
## Set if specified. |
|
2850
|
477
|
100
|
|
|
|
842
|
$this->selection_set($Selection) if defined($Selection); |
|
2851
|
|
|
|
|
|
|
|
|
2852
|
|
|
|
|
|
|
## Get and return. |
|
2853
|
477
|
|
|
|
|
770
|
$Selection = $this->selection_get(); |
|
2854
|
|
|
|
|
|
|
|
|
2855
|
477
|
|
|
|
|
1178
|
return($Selection); |
|
2856
|
|
|
|
|
|
|
} |
|
2857
|
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
sub selection_get |
|
2859
|
|
|
|
|
|
|
{ |
|
2860
|
478
|
|
|
478
|
0
|
546
|
my $this = shift; |
|
2861
|
|
|
|
|
|
|
|
|
2862
|
478
|
|
66
|
|
|
1353
|
my $Selection = $this->{_Selection} || $this->selection_default(); |
|
2863
|
|
|
|
|
|
|
|
|
2864
|
478
|
|
|
|
|
784
|
return($Selection); |
|
2865
|
|
|
|
|
|
|
} |
|
2866
|
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
sub selection_set |
|
2868
|
|
|
|
|
|
|
{ |
|
2869
|
159
|
|
|
159
|
0
|
186
|
my $this = shift; |
|
2870
|
159
|
|
|
|
|
185
|
my ($Selection) = @_; |
|
2871
|
|
|
|
|
|
|
|
|
2872
|
159
|
100
|
|
|
|
295
|
if (ref($Selection) eq 'ARRAY') |
|
2873
|
|
|
|
|
|
|
{ |
|
2874
|
|
|
|
|
|
|
## Set if specified... |
|
2875
|
36
|
|
|
|
|
69
|
$this->{_Selection} = $Selection; |
|
2876
|
|
|
|
|
|
|
} |
|
2877
|
|
|
|
|
|
|
else |
|
2878
|
|
|
|
|
|
|
{ |
|
2879
|
|
|
|
|
|
|
## Otherwise, delete and return original if any. |
|
2880
|
123
|
|
|
|
|
246
|
$Selection = delete $this->{_Selection}; |
|
2881
|
|
|
|
|
|
|
} |
|
2882
|
|
|
|
|
|
|
|
|
2883
|
159
|
|
|
|
|
288
|
return($Selection); |
|
2884
|
|
|
|
|
|
|
} |
|
2885
|
|
|
|
|
|
|
|
|
2886
|
|
|
|
|
|
|
sub selection_delete |
|
2887
|
|
|
|
|
|
|
{ |
|
2888
|
2
|
|
|
2
|
0
|
5
|
my $this = shift; |
|
2889
|
2
|
|
|
|
|
6
|
$this->selection_set(); |
|
2890
|
|
|
|
|
|
|
} |
|
2891
|
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
sub select_all |
|
2893
|
|
|
|
|
|
|
{ |
|
2894
|
70
|
|
|
70
|
0
|
115
|
my $this = shift; |
|
2895
|
70
|
|
|
|
|
167
|
$this->selection_set(); |
|
2896
|
|
|
|
|
|
|
} |
|
2897
|
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
sub select_none |
|
2899
|
|
|
|
|
|
|
{ |
|
2900
|
4
|
|
|
4
|
0
|
22
|
my $this = shift; |
|
2901
|
4
|
|
|
|
|
10
|
$this->selection_set([]); |
|
2902
|
|
|
|
|
|
|
} |
|
2903
|
|
|
|
|
|
|
|
|
2904
|
|
|
|
|
|
|
sub selection_default |
|
2905
|
|
|
|
|
|
|
{ |
|
2906
|
322
|
|
|
322
|
0
|
344
|
my $this = shift; |
|
2907
|
|
|
|
|
|
|
|
|
2908
|
322
|
|
|
|
|
8686
|
my $Selection = $this->all(); |
|
2909
|
|
|
|
|
|
|
|
|
2910
|
322
|
|
|
|
|
885
|
return($Selection); |
|
2911
|
|
|
|
|
|
|
} |
|
2912
|
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
sub all |
|
2914
|
|
|
|
|
|
|
{ |
|
2915
|
343
|
|
|
343
|
0
|
384
|
my $this = shift; |
|
2916
|
343
|
|
|
|
|
580
|
my $RowNums = [0..($this->length() - 1)]; |
|
2917
|
|
|
|
|
|
|
|
|
2918
|
343
|
|
|
|
|
587
|
return($RowNums); |
|
2919
|
|
|
|
|
|
|
} |
|
2920
|
|
|
|
|
|
|
|
|
2921
|
|
|
|
|
|
|
sub selection_inverse |
|
2922
|
|
|
|
|
|
|
{ |
|
2923
|
9
|
|
|
9
|
0
|
14
|
my $this = shift; |
|
2924
|
9
|
|
|
|
|
15
|
my $Sel = $this->selection(); |
|
2925
|
9
|
|
|
|
|
18
|
my $All = $this->all(); |
|
2926
|
|
|
|
|
|
|
|
|
2927
|
9
|
|
|
|
|
18
|
@$All[@$Sel] = undef; |
|
2928
|
9
|
|
|
|
|
17
|
$All = [grep {defined} @$All]; |
|
|
27
|
|
|
|
|
54
|
|
|
2929
|
|
|
|
|
|
|
|
|
2930
|
9
|
|
|
|
|
34
|
return($All); |
|
2931
|
|
|
|
|
|
|
} |
|
2932
|
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
sub select_inverse |
|
2934
|
|
|
|
|
|
|
{ |
|
2935
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
|
2936
|
|
|
|
|
|
|
|
|
2937
|
1
|
|
|
|
|
4
|
return($this->{_Selection} = $this->selection_inverse()); |
|
2938
|
|
|
|
|
|
|
} |
|
2939
|
|
|
|
|
|
|
|
|
2940
|
|
|
|
|
|
|
sub selection_validate |
|
2941
|
|
|
|
|
|
|
{ |
|
2942
|
27
|
|
|
27
|
0
|
29
|
my $this = shift; |
|
2943
|
|
|
|
|
|
|
|
|
2944
|
27
|
100
|
|
|
|
84
|
if (ref($this->{_Selection}) eq 'ARRAY') |
|
2945
|
|
|
|
|
|
|
{ |
|
2946
|
1
|
|
|
|
|
4
|
$this->{_Selection} = $this->selection_validate_internal($this->{_Selection}); |
|
2947
|
|
|
|
|
|
|
} |
|
2948
|
|
|
|
|
|
|
} |
|
2949
|
|
|
|
|
|
|
|
|
2950
|
|
|
|
|
|
|
sub selection_validate_internal |
|
2951
|
|
|
|
|
|
|
{ |
|
2952
|
19
|
|
|
19
|
0
|
31
|
my $this = shift; |
|
2953
|
19
|
|
|
|
|
29
|
my ($Selection) = @_; |
|
2954
|
|
|
|
|
|
|
|
|
2955
|
19
|
|
|
|
|
37
|
my $Length = $this->length(); |
|
2956
|
|
|
|
|
|
|
|
|
2957
|
19
|
100
|
|
|
|
39
|
$Selection = [grep {(($_ >= 0) && ($_ < $Length))} @$Selection]; |
|
|
71
|
|
|
|
|
299
|
|
|
2958
|
|
|
|
|
|
|
|
|
2959
|
19
|
|
|
|
|
43
|
return($Selection); |
|
2960
|
|
|
|
|
|
|
} |
|
2961
|
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
sub sel ## ($ColName, [$Vector]) |
|
2963
|
|
|
|
|
|
|
{ |
|
2964
|
284
|
|
|
284
|
0
|
474
|
my $this = shift; |
|
2965
|
284
|
|
|
|
|
380
|
my ($ColName, $Vector) = @_; |
|
2966
|
|
|
|
|
|
|
|
|
2967
|
|
|
|
|
|
|
## Set if specified. |
|
2968
|
284
|
50
|
|
|
|
447
|
if (defined($Vector)) |
|
2969
|
|
|
|
|
|
|
{ |
|
2970
|
0
|
|
|
|
|
0
|
$this->sel_set($ColName, $Vector); |
|
2971
|
|
|
|
|
|
|
## Nothing to return. |
|
2972
|
|
|
|
|
|
|
} |
|
2973
|
|
|
|
|
|
|
## Get and return. |
|
2974
|
|
|
|
|
|
|
else |
|
2975
|
|
|
|
|
|
|
{ |
|
2976
|
284
|
|
|
|
|
521
|
my $Sel = $this->sel_get($ColName); |
|
2977
|
284
|
|
|
|
|
798
|
return($Sel); |
|
2978
|
|
|
|
|
|
|
} |
|
2979
|
|
|
|
|
|
|
} |
|
2980
|
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
sub sel_get |
|
2982
|
|
|
|
|
|
|
{ |
|
2983
|
318
|
|
|
318
|
0
|
500
|
my $this = shift; |
|
2984
|
318
|
|
|
|
|
473
|
my ($ColName, $Selection) = @_; |
|
2985
|
|
|
|
|
|
|
|
|
2986
|
318
|
|
|
|
|
646
|
my $Col = $this->col($ColName); |
|
2987
|
318
|
|
66
|
|
|
1098
|
$Selection ||= $this->selection(); |
|
2988
|
318
|
|
|
|
|
1083
|
my $Sel = [@$Col[@$Selection]]; |
|
2989
|
|
|
|
|
|
|
|
|
2990
|
318
|
|
|
|
|
601
|
return($Sel); |
|
2991
|
|
|
|
|
|
|
} |
|
2992
|
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
sub sel_set ## ($ColName, [$Vector]) |
|
2994
|
|
|
|
|
|
|
{ |
|
2995
|
5
|
|
|
5
|
0
|
18
|
my $this = shift; |
|
2996
|
5
|
|
|
|
|
7
|
my ($ColName, $Vector) = @_; |
|
2997
|
|
|
|
|
|
|
|
|
2998
|
5
|
|
|
|
|
11
|
my $Col = $this->col($ColName); |
|
2999
|
5
|
|
|
|
|
11
|
my $Selection = $this->selection(); |
|
3000
|
|
|
|
|
|
|
|
|
3001
|
5
|
100
|
66
|
|
|
24
|
if (defined($Vector) && (ref($Vector) eq 'ARRAY')) |
|
3002
|
|
|
|
|
|
|
{ |
|
3003
|
3
|
|
|
|
|
13
|
@$Col[@$Selection] = @$Vector; |
|
3004
|
|
|
|
|
|
|
} |
|
3005
|
|
|
|
|
|
|
else |
|
3006
|
|
|
|
|
|
|
{ |
|
3007
|
2
|
|
|
|
|
6
|
@$Col[@$Selection] = undef; |
|
3008
|
|
|
|
|
|
|
} |
|
3009
|
|
|
|
|
|
|
} |
|
3010
|
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
sub sel_clear ## ($ColName) |
|
3012
|
|
|
|
|
|
|
{ |
|
3013
|
2
|
|
|
2
|
0
|
8
|
my $this = shift; |
|
3014
|
2
|
|
|
|
|
3
|
my ($ColName) = @_; |
|
3015
|
|
|
|
|
|
|
|
|
3016
|
2
|
|
|
|
|
6
|
$this->sel_set($ColName); |
|
3017
|
|
|
|
|
|
|
} |
|
3018
|
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
sub sel_len |
|
3020
|
|
|
|
|
|
|
{ |
|
3021
|
20
|
|
|
20
|
0
|
86
|
my $this = shift; |
|
3022
|
|
|
|
|
|
|
|
|
3023
|
16
|
|
|
|
|
50
|
return(ref($this->{_Selection}) eq 'ARRAY' ? |
|
3024
|
20
|
100
|
|
|
|
51
|
@{$this->{_Selection}}+0 : |
|
3025
|
|
|
|
|
|
|
$this->length()); |
|
3026
|
|
|
|
|
|
|
} |
|
3027
|
|
|
|
|
|
|
|
|
3028
|
|
|
|
|
|
|
sub sels ## ($ColNames) |
|
3029
|
|
|
|
|
|
|
{ |
|
3030
|
56
|
|
|
56
|
0
|
71
|
my $this = shift; |
|
3031
|
56
|
|
|
|
|
76
|
my ($ColNames) = @_; |
|
3032
|
56
|
|
66
|
|
|
128
|
$ColNames ||= $this->fieldlist(); |
|
3033
|
56
|
|
|
|
|
196
|
my $Sels = [map {$this->sel($_)} @$ColNames]; |
|
|
216
|
|
|
|
|
427
|
|
|
3034
|
|
|
|
|
|
|
|
|
3035
|
56
|
|
|
|
|
268
|
return($Sels); |
|
3036
|
|
|
|
|
|
|
} |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
sub sels_hash ## ($ColNames) |
|
3039
|
|
|
|
|
|
|
{ |
|
3040
|
4
|
|
|
4
|
0
|
8
|
my $this = shift; |
|
3041
|
4
|
|
|
|
|
7
|
my ($ColNames) = @_; |
|
3042
|
4
|
|
66
|
|
|
18
|
$ColNames ||= $this->fieldlist(); |
|
3043
|
4
|
|
|
|
|
9
|
my $Sels = $this->sels($ColNames); |
|
3044
|
4
|
|
|
|
|
8
|
my $SelsHash = {}; @$SelsHash{@$ColNames} = @$Sels; |
|
|
4
|
|
|
|
|
16
|
|
|
3045
|
|
|
|
|
|
|
|
|
3046
|
4
|
|
|
|
|
35
|
return($SelsHash); |
|
3047
|
|
|
|
|
|
|
} |
|
3048
|
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
=pod |
|
3050
|
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
=head1 SEARCHING / SELECTING RECORDS |
|
3052
|
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
## Modifying the table's custom selection (_Selection) |
|
3054
|
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
$t->select_all() ## Set _Selection = $t->all() or undef |
|
3056
|
|
|
|
|
|
|
$t->select_none() ## Set _Selection = [] |
|
3057
|
|
|
|
|
|
|
$t->select_inverse() ## Invert the curr. sel. (and get it) |
|
3058
|
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
## Specific searches: "the select() methods" |
|
3060
|
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
$t->select($Field1=>$Sub1, ## Del nonmatching recs from sel. |
|
3062
|
|
|
|
|
|
|
$Field2=>$Sub2, ## i.e. narrow sel. to match |
|
3063
|
|
|
|
|
|
|
...); |
|
3064
|
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
$t->omit ($Field1=>$Sub1, ## Del matching recs from sel. |
|
3066
|
|
|
|
|
|
|
$Field2=>$Sub2, |
|
3067
|
|
|
|
|
|
|
...); |
|
3068
|
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
$t->add ($Field1=>$Sub1, ## Add matching recs to sel. |
|
3070
|
|
|
|
|
|
|
$Field2=>$Sub2, |
|
3071
|
|
|
|
|
|
|
...); |
|
3072
|
|
|
|
|
|
|
|
|
3073
|
|
|
|
|
|
|
$t->but ($Field1=>$Sub1, ## Add nonmatching recs to sel. |
|
3074
|
|
|
|
|
|
|
$Field2=>$Sub2, |
|
3075
|
|
|
|
|
|
|
...); |
|
3076
|
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
## Getting useful lists of record numbers... |
|
3078
|
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
$t->all() ## Get "full" sel. (all record #s) |
|
3080
|
|
|
|
|
|
|
$t->selection() ## Get current selection |
|
3081
|
|
|
|
|
|
|
$t->selection_inverse() ## Get inverse copy of curr. sel. |
|
3082
|
|
|
|
|
|
|
|
|
3083
|
|
|
|
|
|
|
## Example 1: Refine a selection by narrowing down... |
|
3084
|
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
$t->select_all() |
|
3086
|
|
|
|
|
|
|
$t->select(Field1 => sub {$_}); |
|
3087
|
|
|
|
|
|
|
$t->select(Field2 => sub {$_}); |
|
3088
|
|
|
|
|
|
|
$t->select(Field3 => sub {$_}); |
|
3089
|
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
## Example 2: Manually refine and set the selection... |
|
3091
|
|
|
|
|
|
|
|
|
3092
|
|
|
|
|
|
|
$Sel = [grep {$t->col($Field1)->[$_]} @{$t->all ()}]; |
|
3093
|
|
|
|
|
|
|
$Sel = [grep {$t->col($Field2)->[$_]} @$Sel]; |
|
3094
|
|
|
|
|
|
|
$Sel = [grep {$t->col($Field3)->[$_]} @$Sel]; |
|
3095
|
|
|
|
|
|
|
$t->selection($Sel); ## Set the selection when done. |
|
3096
|
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
## Example 3: Complex manual search using calculated value |
|
3098
|
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
my $A = $t->col('A'); |
|
3100
|
|
|
|
|
|
|
my $B = $t->col('B'); |
|
3101
|
|
|
|
|
|
|
my $S = [grep |
|
3102
|
|
|
|
|
|
|
{my $X = $A->[$_] + $B->[$_]; ($X > 100 && $X < 200);} |
|
3103
|
|
|
|
|
|
|
@{$t->all()}]; ## Or could start with $t->selection(). |
|
3104
|
|
|
|
|
|
|
$t->selection($S); ## Set the selection when done. |
|
3105
|
|
|
|
|
|
|
|
|
3106
|
|
|
|
|
|
|
## Example 4: Refine a selection by building up... |
|
3107
|
|
|
|
|
|
|
|
|
3108
|
|
|
|
|
|
|
$t->select_none() |
|
3109
|
|
|
|
|
|
|
$t->add(Field1 => sub {$_}); |
|
3110
|
|
|
|
|
|
|
$t->add(Field2 => sub {$_}); |
|
3111
|
|
|
|
|
|
|
$t->add(Field3 => sub {$_}); |
|
3112
|
|
|
|
|
|
|
|
|
3113
|
|
|
|
|
|
|
## Example 5: Combining the select() methods to build a query... |
|
3114
|
|
|
|
|
|
|
|
|
3115
|
|
|
|
|
|
|
$t->select_all() |
|
3116
|
|
|
|
|
|
|
$t->select(Status => sub {/prime/i }); |
|
3117
|
|
|
|
|
|
|
$t->omit (DueDate => sub {$_ > $Today}); |
|
3118
|
|
|
|
|
|
|
$t->add (Force => sub {$_ }); |
|
3119
|
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
select() and its friends omit(), add(), and but(), known collectively |
|
3121
|
|
|
|
|
|
|
as "the select() methods," all work similarly: they take a series of |
|
3122
|
|
|
|
|
|
|
one or more pairs indicating matches to be done, where each match is |
|
3123
|
|
|
|
|
|
|
specified as (FieldName => Subroutine). |
|
3124
|
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
In addition to the field names already present in the table, the |
|
3126
|
|
|
|
|
|
|
FieldName in any Spec may also be one of these two special |
|
3127
|
|
|
|
|
|
|
pseudo-fields: |
|
3128
|
|
|
|
|
|
|
|
|
3129
|
|
|
|
|
|
|
=over 4 |
|
3130
|
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
=item _RecNum |
|
3132
|
|
|
|
|
|
|
|
|
3133
|
|
|
|
|
|
|
the record number of the record being compared |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
=item _SelNum |
|
3136
|
|
|
|
|
|
|
|
|
3137
|
|
|
|
|
|
|
the numerical position of the record being compared within the |
|
3138
|
|
|
|
|
|
|
previous selection (only usable with select() and omit() since add() |
|
3139
|
|
|
|
|
|
|
and but() by definition operate on non-selected records). |
|
3140
|
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
=back |
|
3142
|
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
For example: |
|
3144
|
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
## Match 2nd 100 rec numbers |
|
3146
|
|
|
|
|
|
|
$t->select(_RecNum => sub {$_ >= 100 && $_ <= 199}); |
|
3147
|
|
|
|
|
|
|
|
|
3148
|
|
|
|
|
|
|
## Match 2nd 100 currently selected/sorted items |
|
3149
|
|
|
|
|
|
|
$t->select(_SelNum => sub {$_ >= 100 && $_ <= 199}); |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
Be careful when using _SelNum in a search. In the above _SelNum search |
|
3152
|
|
|
|
|
|
|
example, since the selection itself will be modified by select(), the |
|
3153
|
|
|
|
|
|
|
items that were formerly selection items 100 - 199 will now be _SelNum |
|
3154
|
|
|
|
|
|
|
0 - 99 in the new selection. |
|
3155
|
|
|
|
|
|
|
|
|
3156
|
|
|
|
|
|
|
The Subroutine is an anonymous grep-style predicate that operates on |
|
3157
|
|
|
|
|
|
|
$_ and should return true/false to indicate a match with an element of |
|
3158
|
|
|
|
|
|
|
the field FieldName. |
|
3159
|
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
The order of multiple matches in a single method call is significant |
|
3161
|
|
|
|
|
|
|
only in that the searches can be faster if the field that will match |
|
3162
|
|
|
|
|
|
|
the fewest records is listed first. |
|
3163
|
|
|
|
|
|
|
|
|
3164
|
|
|
|
|
|
|
A given FieldName may be listed in the specs more than once if it has |
|
3165
|
|
|
|
|
|
|
multiple search criteria that you prefer to execute as multiple |
|
3166
|
|
|
|
|
|
|
subroutines (though it would be more efficient on very large tables to |
|
3167
|
|
|
|
|
|
|
combine their logic into one subroutine joined with "&&"). |
|
3168
|
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
Each field match will be applied (with an implied AND joining them) to |
|
3170
|
|
|
|
|
|
|
determine whether the record itself matches. Then, based on whether |
|
3171
|
|
|
|
|
|
|
the record itself matches, it will either be added or deleted from the |
|
3172
|
|
|
|
|
|
|
selection based on which method is being called: |
|
3173
|
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
method... operates on... action.... |
|
3175
|
|
|
|
|
|
|
------------------------------------------------------------------ |
|
3176
|
|
|
|
|
|
|
select() selected records Keep only recs that DO match |
|
3177
|
|
|
|
|
|
|
omit() selected records Keep only recs that DO NOT match |
|
3178
|
|
|
|
|
|
|
add() non-selected recs Add recs that DO match |
|
3179
|
|
|
|
|
|
|
but() non-selected recs Add recs that DO NOT match |
|
3180
|
|
|
|
|
|
|
|
|
3181
|
|
|
|
|
|
|
Here's how to think about what's going on: |
|
3182
|
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
methods... think... |
|
3184
|
|
|
|
|
|
|
------------------------------------------------------------------ |
|
3185
|
|
|
|
|
|
|
select() "SELECT things matching this"... |
|
3186
|
|
|
|
|
|
|
omit() "... then OMIT those matching this." |
|
3187
|
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
select() "SELECT things matching this"... |
|
3189
|
|
|
|
|
|
|
add() "... and ADD any others matching this." |
|
3190
|
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
select() "SELECT things matching this"... |
|
3192
|
|
|
|
|
|
|
but() "... and add any others BUT those matching this." |
|
3193
|
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
select() and omit() both NARROW the selection. |
|
3195
|
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
add() and but() both INCREASE the selection. |
|
3197
|
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
IMPORTANT: You DO NOT need to use these select() routines to work with |
|
3199
|
|
|
|
|
|
|
selections. It may be much easier for you to clarify your logic, or |
|
3200
|
|
|
|
|
|
|
more efficient to express your search, using a single grep or series |
|
3201
|
|
|
|
|
|
|
of grep operations as in Examples 2 or 3 above. |
|
3202
|
|
|
|
|
|
|
|
|
3203
|
|
|
|
|
|
|
Building the selection manually is required if you want to filter |
|
3204
|
|
|
|
|
|
|
based on any COMPLEX RELATIONSHIPS BETWEEN FIELDS. For example, if |
|
3205
|
|
|
|
|
|
|
you want to add two fields and match or reject the record based on the |
|
3206
|
|
|
|
|
|
|
sum of the fields. |
|
3207
|
|
|
|
|
|
|
|
|
3208
|
|
|
|
|
|
|
In Example 3 above, we add the values in fields "A" and "B" and then |
|
3209
|
|
|
|
|
|
|
match the record only if the SUM is between 100 and 199. By grepping |
|
3210
|
|
|
|
|
|
|
to produce a subset of @{$t->all()}, you end up with a Selection -- a |
|
3211
|
|
|
|
|
|
|
list of record numbers you want "selected". Then you call |
|
3212
|
|
|
|
|
|
|
$t->selection() to put the selection you built into the object. |
|
3213
|
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
If you had instead wanted to narrow an existing selection in the above |
|
3215
|
|
|
|
|
|
|
example, you would start with $t->selection() (which defaults to |
|
3216
|
|
|
|
|
|
|
$t->all()) instead of starting with $t->all(). |
|
3217
|
|
|
|
|
|
|
|
|
3218
|
|
|
|
|
|
|
Each of the select() methods returns $this->selection() as a |
|
3219
|
|
|
|
|
|
|
convenience. |
|
3220
|
|
|
|
|
|
|
|
|
3221
|
|
|
|
|
|
|
=head2 The effects of modifying a sorted selection |
|
3222
|
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
Generally, you should sort AFTER finding, and you should not generally |
|
3224
|
|
|
|
|
|
|
rely on sort order after doing a find. But in case you want to know, |
|
3225
|
|
|
|
|
|
|
the following chart explains what happens to the sort order after the |
|
3226
|
|
|
|
|
|
|
various select() commands are called (at least in the current |
|
3227
|
|
|
|
|
|
|
implementation, which may change without notice): |
|
3228
|
|
|
|
|
|
|
|
|
3229
|
|
|
|
|
|
|
method... effect on an existing sort order... |
|
3230
|
|
|
|
|
|
|
------------------------------------------------------------------ |
|
3231
|
|
|
|
|
|
|
select() relative sort order is preserved (stay sorted) |
|
3232
|
|
|
|
|
|
|
omit() all selected recs restored to "natural" order (unsorted) |
|
3233
|
|
|
|
|
|
|
add() orig. recs preserved; new recs appended: "natural" order |
|
3234
|
|
|
|
|
|
|
but() orig. recs preserved; new recs appended: "natural" order |
|
3235
|
|
|
|
|
|
|
|
|
3236
|
|
|
|
|
|
|
In other words, you could sort() first and then call select() to |
|
3237
|
|
|
|
|
|
|
narrow down the selection repeatedly without disrupting the sort |
|
3238
|
|
|
|
|
|
|
order. However, any of the other methods will disrupt the sort order |
|
3239
|
|
|
|
|
|
|
and you would need to re-sort. The preservation of order when using |
|
3240
|
|
|
|
|
|
|
select(), and other sort order effects, are likely but not guaranteed |
|
3241
|
|
|
|
|
|
|
to be preserved in future implementations. |
|
3242
|
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
=head2 Hints about Boolean logic |
|
3244
|
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
Consider the following example and the alternative below it. You |
|
3246
|
|
|
|
|
|
|
might initially think these are equivalent, but they're not: |
|
3247
|
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
## Case 1: |
|
3249
|
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
my $Sel = $t->add(Force => sub {$_ == 1 }); |
|
3251
|
|
|
|
|
|
|
my $Sel = $t->add(Status => sub {$_ eq 'Prime'}); |
|
3252
|
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
## Case 2: |
|
3254
|
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
my $Sel = $t->add(Force => sub {$_ == 1 }, |
|
3256
|
|
|
|
|
|
|
Status => sub {$_ eq 'Prime'}); |
|
3257
|
|
|
|
|
|
|
|
|
3258
|
|
|
|
|
|
|
Case 1 extends the selection by adding all records where Force == 1, |
|
3259
|
|
|
|
|
|
|
and then extends it again by adding all additional records where |
|
3260
|
|
|
|
|
|
|
Status eq 'Prime'. |
|
3261
|
|
|
|
|
|
|
|
|
3262
|
|
|
|
|
|
|
Case 2 adds only those records where: Force == 1 AND ALSO, IN THE SAME |
|
3263
|
|
|
|
|
|
|
RECORD, Status eq 'Prime'. |
|
3264
|
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
One final note about logic. This is not SQL and these select() etc. |
|
3266
|
|
|
|
|
|
|
routines are not meant to replace the full power of a programming |
|
3267
|
|
|
|
|
|
|
language. |
|
3268
|
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
If you want full Boolean expressions, use the power of Perl to form |
|
3270
|
|
|
|
|
|
|
your own arbitrarily complex query using grep as in Example 3 above. |
|
3271
|
|
|
|
|
|
|
|
|
3272
|
|
|
|
|
|
|
Writing your own grep is also almost always faster than chaining the |
|
3273
|
|
|
|
|
|
|
builtin select() methods or using multiple Field / Sub specifications, |
|
3274
|
|
|
|
|
|
|
so keep that in mind when working with extremely large data sets. |
|
3275
|
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
With tables of only a few thousand records or so, you probably won't |
|
3277
|
|
|
|
|
|
|
notice the difference in efficiency. |
|
3278
|
|
|
|
|
|
|
|
|
3279
|
|
|
|
|
|
|
=cut |
|
3280
|
|
|
|
|
|
|
|
|
3281
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
3282
|
|
|
|
|
|
|
|
|
3283
|
4
|
|
|
4
|
0
|
35
|
sub select {my $this = shift; return($this->select_internal(!'Add', !'Not', @_))} |
|
|
4
|
|
|
|
|
13
|
|
|
3284
|
5
|
|
|
5
|
0
|
29
|
sub omit {my $this = shift; return($this->select_internal(!'Add', 'Not', @_))} |
|
|
5
|
|
|
|
|
19
|
|
|
3285
|
3
|
|
|
3
|
0
|
23
|
sub add {my $this = shift; return($this->select_internal( 'Add', !'Not', @_))} |
|
|
3
|
|
|
|
|
8
|
|
|
3286
|
1
|
|
|
1
|
0
|
8
|
sub but {my $this = shift; return($this->select_internal( 'Add', 'Not', @_))} |
|
|
1
|
|
|
|
|
4
|
|
|
3287
|
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
sub select_internal ## Implements all 4 "select() methods" |
|
3289
|
|
|
|
|
|
|
{ |
|
3290
|
13
|
|
|
13
|
0
|
23
|
my $this = shift; |
|
3291
|
13
|
|
|
|
|
34
|
my ($Add, $Not, @Specs) = @_; |
|
3292
|
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
## In "Add" mode, we only operate on not-yet-selected records. |
|
3294
|
|
|
|
|
|
|
## Otherwise, we operate on the current selection. |
|
3295
|
|
|
|
|
|
|
|
|
3296
|
|
|
|
|
|
|
## Either way, start out with all of one or the other. |
|
3297
|
|
|
|
|
|
|
|
|
3298
|
13
|
100
|
|
|
|
40
|
my $Start = ($Add ? $this->selection_inverse() : $this->selection()); |
|
3299
|
|
|
|
|
|
|
|
|
3300
|
|
|
|
|
|
|
## Then grep repeatedly for each Field/Sub spec we were given, in |
|
3301
|
|
|
|
|
|
|
## order. For a record to match, ALL specs must match -- i.e. the |
|
3302
|
|
|
|
|
|
|
## record number must make it through the grep gauntlet once for |
|
3303
|
|
|
|
|
|
|
## each Field/Sub in the Specs. |
|
3304
|
|
|
|
|
|
|
|
|
3305
|
13
|
|
|
|
|
22
|
my $Pseudo = {}; ## hold pseudo-columns _RecNum, _SelNum if requested. |
|
3306
|
|
|
|
|
|
|
|
|
3307
|
13
|
|
|
|
|
19
|
my $Matches = $Start; |
|
3308
|
|
|
|
|
|
|
|
|
3309
|
13
|
|
|
|
|
18
|
my $i = 0; |
|
3310
|
13
|
|
|
|
|
36
|
while ($i < (@Specs - 1)) |
|
3311
|
|
|
|
|
|
|
{ |
|
3312
|
13
|
|
|
|
|
31
|
my ($Field, $Sub) = @Specs[$i++, $i++]; |
|
3313
|
|
|
|
|
|
|
|
|
3314
|
13
|
50
|
33
|
|
|
87
|
next unless ((length($Field)) && (ref($Sub) eq 'CODE')); |
|
3315
|
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
## Create pseudo-fields _RecNum / _SelNum if needed, but at |
|
3317
|
|
|
|
|
|
|
## most once per invocation. |
|
3318
|
|
|
|
|
|
|
|
|
3319
|
13
|
50
|
0
|
|
|
33
|
$Pseudo->{_RecNum} ||= $this->all() if ($Field eq '_RecNum'); |
|
3320
|
13
|
50
|
0
|
|
|
28
|
$Pseudo->{_SelNum} ||= $this->selnum_mask() if ($Field eq '_SelNum'); |
|
3321
|
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
## Narrow down $Matches using this Field/Spec, then move on to next. |
|
3323
|
36
|
50
|
33
|
|
|
142
|
$Matches = |
|
3324
|
|
|
|
|
|
|
[grep |
|
3325
|
|
|
|
|
|
|
{ |
|
3326
|
|
|
|
|
|
|
## Locally bind $_ to value of field in this column / this record. |
|
3327
|
13
|
|
|
|
|
23
|
local $_ = $ {($this ->{$Field} || ## 98% of time: good field name |
|
|
36
|
|
|
|
|
178
|
|
|
3328
|
|
|
|
|
|
|
$Pseudo->{$Field} || ## 1% of time: _RecNum or _SelNum |
|
3329
|
|
|
|
|
|
|
($this ->warn("Bad field name: $Field"), |
|
3330
|
|
|
|
|
|
|
$this ->col_empty()) |
|
3331
|
|
|
|
|
|
|
)}[$_]; ## look up value in record $_ of column |
|
3332
|
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
## Call the sub & let it yield the Boolean value. |
|
3334
|
36
|
|
|
|
|
73
|
&$Sub(); |
|
3335
|
|
|
|
|
|
|
} |
|
3336
|
|
|
|
|
|
|
@$Matches]; |
|
3337
|
|
|
|
|
|
|
} |
|
3338
|
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
## IMPLEMENTATION NOTE: |
|
3340
|
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
## The logic below to support "Not" looks complicated, and indeed |
|
3342
|
|
|
|
|
|
|
## it could be made cleaner if we were to process the "Not" logic |
|
3343
|
|
|
|
|
|
|
## during the previous step. However, doing that would make the |
|
3344
|
|
|
|
|
|
|
## above nested loop(s) above much less efficient because we'd |
|
3345
|
|
|
|
|
|
|
## have to move the while() loop inside the grep -- repeating that |
|
3346
|
|
|
|
|
|
|
## loop up to several times for each record instead of just a few |
|
3347
|
|
|
|
|
|
|
## times total. So the logic below will actually save execution |
|
3348
|
|
|
|
|
|
|
## time. Besides, using array-slicing to achieve the selection |
|
3349
|
|
|
|
|
|
|
## masking is quite fast. |
|
3350
|
|
|
|
|
|
|
|
|
3351
|
|
|
|
|
|
|
## "Add" means append the matching record numbers to the existing |
|
3352
|
|
|
|
|
|
|
## selection, if any. |
|
3353
|
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
## sub select() |
|
3355
|
13
|
100
|
100
|
|
|
173
|
if (!$Not && !$Add) ## ... i.e. remove unfound (i.e. keep (only) the ones we found)... |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
{ |
|
3357
|
4
|
|
|
|
|
11
|
$this->{_Selection} = $Matches; ## sort order is preserved... |
|
3358
|
|
|
|
|
|
|
} |
|
3359
|
|
|
|
|
|
|
|
|
3360
|
|
|
|
|
|
|
## sub add() ## ... i.e. add the ones we found... |
|
3361
|
|
|
|
|
|
|
elsif (!$Not && $Add) |
|
3362
|
|
|
|
|
|
|
{ |
|
3363
|
3
|
|
|
|
|
5
|
push @{$this->selection()}, @$Matches; ## order preserved in first part only |
|
|
3
|
|
|
|
|
8
|
|
|
3364
|
|
|
|
|
|
|
} |
|
3365
|
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
## ! "Add" means replace the existing selection with those that |
|
3367
|
|
|
|
|
|
|
## matched (effectively removing any non-matching ones). |
|
3368
|
|
|
|
|
|
|
|
|
3369
|
|
|
|
|
|
|
## "Not" means select the opposite of the set of records we just |
|
3370
|
|
|
|
|
|
|
## matched. |
|
3371
|
|
|
|
|
|
|
|
|
3372
|
|
|
|
|
|
|
## sub omit() |
|
3373
|
|
|
|
|
|
|
elsif ( $Not && !$Add) ## ... i.e. remove the opposite of the ones we found... |
|
3374
|
|
|
|
|
|
|
{ |
|
3375
|
5
|
|
|
|
|
20
|
my $Sel = $this->col_empty(); ## Start with empty mask (all entries undef). |
|
3376
|
5
|
|
|
|
|
17
|
@$Sel[@$Start] = @$Start; ## Mask in those in the original selection. |
|
3377
|
5
|
|
|
|
|
14
|
@$Sel[@$Matches] = undef; ## Mask out those we found. |
|
3378
|
5
|
|
|
|
|
13
|
my $NonMatches = [grep {defined} @$Sel]; ## The remaining ones are the non-matches. |
|
|
15
|
|
|
|
|
32
|
|
|
3379
|
|
|
|
|
|
|
|
|
3380
|
5
|
|
|
|
|
19
|
$this->{_Selection} = $NonMatches; ## The new selection IS the non-matches. |
|
3381
|
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
## selection order not preserved |
|
3383
|
|
|
|
|
|
|
} |
|
3384
|
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
## sub but() |
|
3386
|
|
|
|
|
|
|
elsif ( $Not && $Add) ## ... i.e. add the opposite of the ones we found... |
|
3387
|
|
|
|
|
|
|
{ |
|
3388
|
1
|
|
|
|
|
5
|
my $Sel = $this->all(); ## Start with a full selection mask. |
|
3389
|
1
|
|
|
|
|
2
|
@$Sel[@{$this->selection()}] = undef; ## Mask out those in original selection. |
|
|
1
|
|
|
|
|
5
|
|
|
3390
|
1
|
|
|
|
|
4
|
@$Sel[@$Matches ] = undef; ## Mask out those we found. |
|
3391
|
1
|
|
|
|
|
3
|
my $NonMatches = [grep {defined} @$Sel]; ## The remaining ones are the non-matches. |
|
|
3
|
|
|
|
|
8
|
|
|
3392
|
|
|
|
|
|
|
|
|
3393
|
1
|
|
|
|
|
3
|
push @{$this->selection()}, @$NonMatches; ## Add the non-matches to the selection. |
|
|
1
|
|
|
|
|
4
|
|
|
3394
|
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
## order preserved in first part only |
|
3396
|
|
|
|
|
|
|
} |
|
3397
|
|
|
|
|
|
|
|
|
3398
|
13
|
|
|
|
|
28
|
return($this->selection()); |
|
3399
|
|
|
|
|
|
|
} |
|
3400
|
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
sub selnum_mask ## Create mask mapping RecNum -> selected item num or undef if not selected |
|
3402
|
|
|
|
|
|
|
{ |
|
3403
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
3404
|
0
|
|
|
|
|
0
|
my $Mask = $this->col_empty(); |
|
3405
|
0
|
|
|
|
|
0
|
my $Sel = $this->selection(); |
|
3406
|
0
|
|
|
|
|
0
|
@$Mask[@$Sel] = [0..$#$Sel]; |
|
3407
|
|
|
|
|
|
|
|
|
3408
|
0
|
|
|
|
|
0
|
return($Mask); |
|
3409
|
|
|
|
|
|
|
} |
|
3410
|
|
|
|
|
|
|
|
|
3411
|
|
|
|
|
|
|
=pod |
|
3412
|
|
|
|
|
|
|
|
|
3413
|
|
|
|
|
|
|
=head1 SORTING |
|
3414
|
|
|
|
|
|
|
|
|
3415
|
|
|
|
|
|
|
## Sort the current table's _Selection |
|
3416
|
|
|
|
|
|
|
|
|
3417
|
|
|
|
|
|
|
$t->sort() ## Use existing/default params |
|
3418
|
|
|
|
|
|
|
$t->sort([qw(Last First Phone)]) ## Specify _SortOrder (fields) |
|
3419
|
|
|
|
|
|
|
$t->sort( ## Named-parameter call: |
|
3420
|
|
|
|
|
|
|
_SortOrder => [...], ## override sort-related params. |
|
3421
|
|
|
|
|
|
|
_Selection => [...], ## (See param lists above). |
|
3422
|
|
|
|
|
|
|
_SortSpecs => {...}, |
|
3423
|
|
|
|
|
|
|
_SRoutines => {...}, |
|
3424
|
|
|
|
|
|
|
_DefaultSortType=>'Integer', |
|
3425
|
|
|
|
|
|
|
_DefaultSortDirection=>-1, |
|
3426
|
|
|
|
|
|
|
); |
|
3427
|
|
|
|
|
|
|
|
|
3428
|
|
|
|
|
|
|
The sort() method modifies the _Selection (creating one with all |
|
3429
|
|
|
|
|
|
|
records if it was missing, undef, or not supplied by caller) so that |
|
3430
|
|
|
|
|
|
|
the record numbers listed there are sorted according to the criteria |
|
3431
|
|
|
|
|
|
|
implied by _SortOrder, _SortSpecs, _SRoutines, etc. |
|
3432
|
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
For example, before sorting, a table's "natural" order might be: |
|
3434
|
|
|
|
|
|
|
|
|
3435
|
|
|
|
|
|
|
Rec# First Last Age State |
|
3436
|
|
|
|
|
|
|
0 Chris Zack 43 CA |
|
3437
|
|
|
|
|
|
|
1 Marco Bart 22 NV |
|
3438
|
|
|
|
|
|
|
2 Pearl Muth 15 HI |
|
3439
|
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
... and its selection() method would yield: [0, 1, 2] -- which is a |
|
3441
|
|
|
|
|
|
|
list of all the records, in order. |
|
3442
|
|
|
|
|
|
|
|
|
3443
|
|
|
|
|
|
|
After calling $t->sort([Last]), selection() would yield [1, 2, 0]. So |
|
3444
|
|
|
|
|
|
|
displaying the table in "selection" order would yield: |
|
3445
|
|
|
|
|
|
|
|
|
3446
|
|
|
|
|
|
|
Rec# First Last Age State |
|
3447
|
|
|
|
|
|
|
1 Marco Bart 22 NV |
|
3448
|
|
|
|
|
|
|
2 Pearl Muth 15 HI |
|
3449
|
|
|
|
|
|
|
0 Chris Zack 43 CA |
|
3450
|
|
|
|
|
|
|
|
|
3451
|
|
|
|
|
|
|
IMPORTANT: sorting does not alter any data in the table. It merely |
|
3452
|
|
|
|
|
|
|
alters the _Selection parameter (which you can then get and set using |
|
3453
|
|
|
|
|
|
|
the selection() methods described above). |
|
3454
|
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
If you want to permanently alter the table's data in memory so that |
|
3456
|
|
|
|
|
|
|
the new sorted order becomes the "natural" order, you can use the |
|
3457
|
|
|
|
|
|
|
cull() method to modify the original object, the snapshot() method to |
|
3458
|
|
|
|
|
|
|
make a new object, or use the write() method to write the data to disk |
|
3459
|
|
|
|
|
|
|
in selected/sorted order and then read() it back again. |
|
3460
|
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
=head2 Using the Named-parameter calling convention with sort() |
|
3462
|
|
|
|
|
|
|
|
|
3463
|
|
|
|
|
|
|
You may specify any combination of the parameters listed above when |
|
3464
|
|
|
|
|
|
|
calling sort(). Any you specify will be used IN PLACE OF the |
|
3465
|
|
|
|
|
|
|
corresponding parameters already found in the object. |
|
3466
|
|
|
|
|
|
|
|
|
3467
|
|
|
|
|
|
|
If you specify _Selection using the named-parameter calling, the |
|
3468
|
|
|
|
|
|
|
sort() method reserves the right to "own" the list you provide, and |
|
3469
|
|
|
|
|
|
|
use it as the object's new _Selection, possibly discarding the |
|
3470
|
|
|
|
|
|
|
previous _Selection, if any and modifying the one you provided. So |
|
3471
|
|
|
|
|
|
|
don't make any assumptions about ownership of that list object after |
|
3472
|
|
|
|
|
|
|
calling sort(). Luckily, you will rarely need to provide _Selection |
|
3473
|
|
|
|
|
|
|
explicitly since generally you'll want to be sorting the selection() |
|
3474
|
|
|
|
|
|
|
already inherent in the object. |
|
3475
|
|
|
|
|
|
|
|
|
3476
|
|
|
|
|
|
|
sort() returns the _Selection list owned by the object (the same list |
|
3477
|
|
|
|
|
|
|
that would be returned if you called the selection() method |
|
3478
|
|
|
|
|
|
|
immediately after calling sort()). |
|
3479
|
|
|
|
|
|
|
|
|
3480
|
|
|
|
|
|
|
See the next sections for complete descriptions of _SortOrder and |
|
3481
|
|
|
|
|
|
|
other sorting parameters. |
|
3482
|
|
|
|
|
|
|
|
|
3483
|
|
|
|
|
|
|
=cut |
|
3484
|
|
|
|
|
|
|
|
|
3485
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
3486
|
|
|
|
|
|
|
|
|
3487
|
|
|
|
|
|
|
sub sort |
|
3488
|
|
|
|
|
|
|
{ |
|
3489
|
10
|
|
|
10
|
1
|
64
|
my $this = shift; |
|
3490
|
10
|
100
|
|
|
|
43
|
my $Params = (@_ == 1 ? {_SortOrder => $_[0]} : {@_}); |
|
3491
|
|
|
|
|
|
|
|
|
3492
|
60
|
|
|
|
|
111
|
my($SortOrder, $Selection, $SortSpecs, $SRoutines, $DefaultSortType, $DefaultSortDirection) |
|
3493
|
10
|
|
|
|
|
23
|
= map {$this->getparam($Params, $_)} |
|
3494
|
|
|
|
|
|
|
qw(_SortOrder _Selection _SortSpecs _SRoutines _DefaultSortType _DefaultSortDirection); |
|
3495
|
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
## Validate / rectify all parameters... |
|
3497
|
|
|
|
|
|
|
|
|
3498
|
|
|
|
|
|
|
## Default sort order is Record Number |
|
3499
|
|
|
|
|
|
|
|
|
3500
|
10
|
50
|
33
|
|
|
69
|
$SortOrder = [qw(_RecNum)] unless ((ref($SortOrder) eq 'ARRAY') && @$SortOrder); |
|
3501
|
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
## Note if we're going to sort on _RecNum ( requires extra work). |
|
3503
|
|
|
|
|
|
|
|
|
3504
|
10
|
|
|
|
|
18
|
my $NeedRecNum = grep {$_ eq '_RecNum'} @$SortOrder; |
|
|
14
|
|
|
|
|
38
|
|
|
3505
|
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
## Default list of record numbers is all of them. |
|
3507
|
|
|
|
|
|
|
|
|
3508
|
10
|
100
|
|
|
|
34
|
$Selection = $this->selection() unless (ref($Selection) eq 'ARRAY'); |
|
3509
|
10
|
|
|
|
|
29
|
$Selection = $this->selection_validate_internal($Selection); |
|
3510
|
|
|
|
|
|
|
|
|
3511
|
|
|
|
|
|
|
## Our private copy of SortSpecs includes a spec for _RecNum |
|
3512
|
|
|
|
|
|
|
|
|
3513
|
10
|
50
|
|
|
|
32
|
$SortSpecs = {} unless (ref($SortSpecs) eq 'HASH'); |
|
3514
|
10
|
|
|
|
|
24
|
$SortSpecs = {%$SortSpecs}; |
|
3515
|
10
|
50
|
0
|
|
|
24
|
$SortSpecs ->{_RecNum} ||= {SortType => '_RecNum', SortDirection => 1} if $NeedRecNum; |
|
3516
|
|
|
|
|
|
|
|
|
3517
|
|
|
|
|
|
|
## Our private copy of SRoutines also has the builtin entries |
|
3518
|
|
|
|
|
|
|
## added in (including one for _RecNum) |
|
3519
|
|
|
|
|
|
|
|
|
3520
|
10
|
50
|
|
|
|
30
|
$SRoutines = {} unless (ref($SRoutines) eq 'HASH'); |
|
3521
|
10
|
|
|
|
|
20
|
$SRoutines = {%$SRoutines, %{$this->sortroutines_builtin()}}; |
|
|
10
|
|
|
|
|
26
|
|
|
3522
|
|
|
|
|
|
|
|
|
3523
|
|
|
|
|
|
|
## Ensure that DefaultSortType has a reasonable value for which we |
|
3524
|
|
|
|
|
|
|
## have a sort routine. |
|
3525
|
|
|
|
|
|
|
|
|
3526
|
10
|
50
|
33
|
|
|
77
|
$DefaultSortType = 'String' unless (length($DefaultSortType) && |
|
3527
|
|
|
|
|
|
|
exists($SRoutines->{$DefaultSortType})); |
|
3528
|
|
|
|
|
|
|
|
|
3529
|
|
|
|
|
|
|
## Ensure that DefaultSortDirection has a legal value (1 or -1; |
|
3530
|
|
|
|
|
|
|
## undef/0 will be treated as -1 (descending)) |
|
3531
|
|
|
|
|
|
|
|
|
3532
|
10
|
|
50
|
|
|
37
|
$DefaultSortDirection = (max(min(int($DefaultSortDirection), 1), -1) || -1); |
|
3533
|
|
|
|
|
|
|
|
|
3534
|
|
|
|
|
|
|
## Make some optimized lists of things to speed sorting. |
|
3535
|
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
## Get a hash of all data columns in $this plus a temporary one |
|
3537
|
|
|
|
|
|
|
## for _RecNum if needed. |
|
3538
|
10
|
50
|
|
|
|
13
|
my $Cols = {%{$this->cols_hash($this->fieldlist_all())}, |
|
|
10
|
|
|
|
|
26
|
|
|
3539
|
|
|
|
|
|
|
($NeedRecNum ? (_RecNum => $this->all()) : ())}; |
|
3540
|
|
|
|
|
|
|
|
|
3541
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to data columns |
|
3542
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to sort directions |
|
3543
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to sort types |
|
3544
|
|
|
|
|
|
|
## Get a list mapping field numbers in $SortOrder to sort routines |
|
3545
|
|
|
|
|
|
|
## Get a list of the field numbers in $SortOrder |
|
3546
|
|
|
|
|
|
|
|
|
3547
|
10
|
50
|
|
|
|
43
|
my $SortCols = [map { $Cols->{$_} || $this->col($_) } @$SortOrder]; |
|
|
14
|
|
|
|
|
53
|
|
|
3548
|
10
|
100
|
|
|
|
21
|
my $SortDirs = [map {$ {$SortSpecs->{$_}||{}}{SortDirection} || $DefaultSortDirection} @$SortOrder]; |
|
|
14
|
100
|
|
|
|
17
|
|
|
|
14
|
|
|
|
|
108
|
|
|
3549
|
10
|
100
|
|
|
|
18
|
my $SortTypes = [map {$ {$SortSpecs->{$_}||{}}{SortType } || $DefaultSortType } @$SortOrder]; |
|
|
14
|
100
|
|
|
|
20
|
|
|
|
14
|
|
|
|
|
104
|
|
|
3550
|
10
|
50
|
33
|
0
|
|
17
|
my $SortSubs = [map { $SRoutines->{$_} || $SRoutines->{'String'} || sub {0} } @$SortTypes]; |
|
|
14
|
|
|
|
|
58
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3551
|
10
|
|
|
|
|
29
|
my $FieldNums = [0 .. $#$SortOrder]; |
|
3552
|
|
|
|
|
|
|
|
|
3553
|
|
|
|
|
|
|
## Construct a sort subroutine that sorts record numbers by |
|
3554
|
|
|
|
|
|
|
## examining values in given fields in the table, in the order |
|
3555
|
|
|
|
|
|
|
## specified in $SortOrder. |
|
3556
|
|
|
|
|
|
|
|
|
3557
|
|
|
|
|
|
|
## If a given field's sort routine produces a zero $CmpVal, it |
|
3558
|
|
|
|
|
|
|
## means that the values are considered equal, and so to |
|
3559
|
|
|
|
|
|
|
## disambiguate, we keep trying the next fields in the sort order, |
|
3560
|
|
|
|
|
|
|
## until we've found one that compares non-zero or exhausted all |
|
3561
|
|
|
|
|
|
|
## the fields. If we get through all the specified sort fields |
|
3562
|
|
|
|
|
|
|
## and still get zeroes, the values must be equal in all the |
|
3563
|
|
|
|
|
|
|
## fields, and so the records are considered equal, so return 0. |
|
3564
|
|
|
|
|
|
|
|
|
3565
|
10
|
|
|
|
|
18
|
my $ProgCount; |
|
3566
|
|
|
|
|
|
|
my $ShowedProgress; |
|
3567
|
58
|
|
|
|
|
60
|
$Selection = |
|
3568
|
|
|
|
|
|
|
[sort |
|
3569
|
|
|
|
|
|
|
{ |
|
3570
|
10
|
|
|
|
|
30
|
my $CmpVal; |
|
3571
|
58
|
|
|
|
|
89
|
foreach (@$FieldNums) |
|
3572
|
|
|
|
|
|
|
{ |
|
3573
|
|
|
|
|
|
|
## $a and $b are record numbers to be compared. |
|
3574
|
|
|
|
|
|
|
## $_ is the number of a field in the above lists. |
|
3575
|
|
|
|
|
|
|
|
|
3576
|
62
|
|
|
|
|
107
|
$CmpVal = (&{ $SortSubs->[$_] } ## Call the sort routine for field $_ with... |
|
|
62
|
|
|
|
|
114
|
|
|
3577
|
|
|
|
|
|
|
(\ $SortCols->[$_]->[$a], ## 1st arg: ref to value in field $_, record $a |
|
3578
|
|
|
|
|
|
|
\ $SortCols->[$_]->[$b]) ## 2nd arg: ref to value in field $_, record $b. |
|
3579
|
|
|
|
|
|
|
* $SortDirs->[$_] ); ## Then invert cmp value if descending (-1) |
|
3580
|
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
# print "($_, $SortCols->[$_]->[$a], $SortCols->[$_]->[$b]) ==> $CmpVal\n"; |
|
3582
|
|
|
|
|
|
|
|
|
3583
|
62
|
100
|
|
|
|
143
|
last if $CmpVal; ## Keep going if $CmpVal == 0 (same) |
|
3584
|
|
|
|
|
|
|
} |
|
3585
|
|
|
|
|
|
|
|
|
3586
|
|
|
|
|
|
|
## Maybe show timed progress (only after 2 seconds have elapsed) |
|
3587
|
58
|
100
|
|
|
|
142
|
my $Did = $this->progress_timed("Sorting", $ProgCount, undef, undef, 1) |
|
3588
|
|
|
|
|
|
|
if ((($ProgCount++) % 200) == 0); |
|
3589
|
58
|
|
33
|
|
|
165
|
$ShowedProgress ||= $Did; |
|
3590
|
|
|
|
|
|
|
|
|
3591
|
58
|
|
|
|
|
87
|
$CmpVal; |
|
3592
|
|
|
|
|
|
|
} |
|
3593
|
|
|
|
|
|
|
@$Selection]; |
|
3594
|
|
|
|
|
|
|
|
|
3595
|
|
|
|
|
|
|
## If no progress shown yet (sort took less than 2 seconds or 200 |
|
3596
|
|
|
|
|
|
|
## operations), show a message now. |
|
3597
|
|
|
|
|
|
|
|
|
3598
|
10
|
50
|
|
|
|
47
|
$this->progress("Sorted.") unless $ShowedProgress; |
|
3599
|
|
|
|
|
|
|
|
|
3600
|
|
|
|
|
|
|
## Replace any existing selection with the new, sorted, one. |
|
3601
|
10
|
|
|
|
|
17
|
$this->{_Selection} = $Selection; |
|
3602
|
|
|
|
|
|
|
|
|
3603
|
10
|
|
|
|
|
70
|
done: |
|
3604
|
|
|
|
|
|
|
return($Selection); |
|
3605
|
|
|
|
|
|
|
} |
|
3606
|
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
=pod |
|
3608
|
|
|
|
|
|
|
|
|
3609
|
|
|
|
|
|
|
=head1 SORT ORDER |
|
3610
|
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
## Getting / Setting table's default _SortOrder |
|
3612
|
|
|
|
|
|
|
|
|
3613
|
|
|
|
|
|
|
$t->sortorder() ## Get sortorder (default is []) |
|
3614
|
|
|
|
|
|
|
|
|
3615
|
|
|
|
|
|
|
my $Order = [qw(Last First State Zip)]; |
|
3616
|
|
|
|
|
|
|
$t->sortorder($Order) ## Set sortorder (use [] for none) |
|
3617
|
|
|
|
|
|
|
|
|
3618
|
|
|
|
|
|
|
$t->sortorder_default() ## Get the object's default sort order ([]) |
|
3619
|
|
|
|
|
|
|
|
|
3620
|
|
|
|
|
|
|
The sort order is an optional list of field names on which to sort and |
|
3621
|
|
|
|
|
|
|
sub-sort the data when sorting is requested. The field names must be |
|
3622
|
|
|
|
|
|
|
the names of actual columns in the table. The names in the sort order |
|
3623
|
|
|
|
|
|
|
do not necessarily need to coincide with the custom fieldlist if any. |
|
3624
|
|
|
|
|
|
|
|
|
3625
|
|
|
|
|
|
|
There is one special value that can be included: _RecNum. This sorts |
|
3626
|
|
|
|
|
|
|
on the imaginary "record number" field. So for example, you could |
|
3627
|
|
|
|
|
|
|
specify a sort order this way: |
|
3628
|
|
|
|
|
|
|
|
|
3629
|
|
|
|
|
|
|
[qw(Last First _RecNum)] |
|
3630
|
|
|
|
|
|
|
|
|
3631
|
|
|
|
|
|
|
(There is no point in putting _RecNum anywhere except at the end of |
|
3632
|
|
|
|
|
|
|
the sort order because no two records will ever have the same record |
|
3633
|
|
|
|
|
|
|
number so there will be no further need to disambiguate by referring |
|
3634
|
|
|
|
|
|
|
to additional fields.) |
|
3635
|
|
|
|
|
|
|
|
|
3636
|
|
|
|
|
|
|
Sorting by _RecNum adds a bit of computational overhead because sort() |
|
3637
|
|
|
|
|
|
|
first builds a record number vector for use in sorting, so for very |
|
3638
|
|
|
|
|
|
|
large tables, don't do it unless you really need it. |
|
3639
|
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
A sort order can be specified each time the object is sorted (see the |
|
3641
|
|
|
|
|
|
|
sort() method for details). |
|
3642
|
|
|
|
|
|
|
|
|
3643
|
|
|
|
|
|
|
Or, the object's sort order can be set once, and then sort() will use |
|
3644
|
|
|
|
|
|
|
that sort order when no other sort order is specified. |
|
3645
|
|
|
|
|
|
|
|
|
3646
|
|
|
|
|
|
|
If sorting is done when there is no sort order present in the object |
|
3647
|
|
|
|
|
|
|
or specifed for the sort() method, the selection is sorted by record |
|
3648
|
|
|
|
|
|
|
number (i.e. it is "unsorted" or returned to its "natural" order). |
|
3649
|
|
|
|
|
|
|
|
|
3650
|
|
|
|
|
|
|
In order words, a sortorder that is undef or [] is considered the same |
|
3651
|
|
|
|
|
|
|
as: [qw(_RecNum)]. This is sometimes called "unsorting". |
|
3652
|
|
|
|
|
|
|
|
|
3653
|
|
|
|
|
|
|
In order to decide how values in each field should be compared, sort() |
|
3654
|
|
|
|
|
|
|
is informed by SortSpecs (specifying SortType and SortDirection for |
|
3655
|
|
|
|
|
|
|
each field) and by SortRoutines, each of which may similarly either be |
|
3656
|
|
|
|
|
|
|
pre-set for the object or specified when calling sort() -- see below |
|
3657
|
|
|
|
|
|
|
for further details. |
|
3658
|
|
|
|
|
|
|
|
|
3659
|
|
|
|
|
|
|
=cut |
|
3660
|
|
|
|
|
|
|
|
|
3661
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
3662
|
|
|
|
|
|
|
|
|
3663
|
|
|
|
|
|
|
sub sortorder |
|
3664
|
|
|
|
|
|
|
{ |
|
3665
|
8
|
|
|
8
|
0
|
21
|
my $this = shift; |
|
3666
|
8
|
|
|
|
|
13
|
my ($SortOrder) = @_; |
|
3667
|
|
|
|
|
|
|
|
|
3668
|
8
|
|
100
|
|
|
32
|
my $Valid = ((ref($SortOrder) eq 'ARRAY') && (@$SortOrder > 0)); |
|
3669
|
|
|
|
|
|
|
|
|
3670
|
|
|
|
|
|
|
## Set if specified. |
|
3671
|
8
|
100
|
100
|
|
|
37
|
if (defined($SortOrder) && $Valid) |
|
|
|
100
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
{ |
|
3673
|
2
|
|
|
|
|
4
|
$this->{_SortOrder} = $SortOrder; |
|
3674
|
|
|
|
|
|
|
} |
|
3675
|
|
|
|
|
|
|
elsif (defined($SortOrder)) |
|
3676
|
|
|
|
|
|
|
{ |
|
3677
|
1
|
|
|
|
|
4
|
$this->{_SortOrder} = undef; ## Store undef instead of [] |
|
3678
|
|
|
|
|
|
|
} |
|
3679
|
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
## Get and return. |
|
3681
|
8
|
|
66
|
|
|
33
|
$SortOrder = $this->{_SortOrder} || $this->sortorder_default(); |
|
3682
|
|
|
|
|
|
|
|
|
3683
|
8
|
|
|
|
|
31
|
return($SortOrder); |
|
3684
|
|
|
|
|
|
|
} |
|
3685
|
|
|
|
|
|
|
|
|
3686
|
|
|
|
|
|
|
sub sortorder_default |
|
3687
|
|
|
|
|
|
|
{ |
|
3688
|
5
|
|
|
5
|
0
|
13
|
my $this = shift; |
|
3689
|
|
|
|
|
|
|
|
|
3690
|
5
|
|
|
|
|
8
|
my $SortOrder = []; |
|
3691
|
|
|
|
|
|
|
|
|
3692
|
5
|
|
|
|
|
19
|
return($SortOrder); |
|
3693
|
|
|
|
|
|
|
} |
|
3694
|
|
|
|
|
|
|
|
|
3695
|
|
|
|
|
|
|
sub sortorder_check |
|
3696
|
|
|
|
|
|
|
{ |
|
3697
|
63
|
|
|
63
|
0
|
80
|
my $this = shift; |
|
3698
|
63
|
|
|
|
|
149
|
my $FieldsHash = $this->fieldlist_hash(); |
|
3699
|
|
|
|
|
|
|
|
|
3700
|
|
|
|
|
|
|
## Remove any bogus field names from the sort order, if any. |
|
3701
|
|
|
|
|
|
|
|
|
3702
|
4
|
|
|
|
|
13
|
$this->{_SortOrder} = [grep {exists($FieldsHash->{$_})} |
|
|
1
|
|
|
|
|
3
|
|
|
3703
|
63
|
100
|
|
|
|
244
|
@{$this->{_SortOrder}}] if defined $this->{_SortOrder}; |
|
3704
|
|
|
|
|
|
|
} |
|
3705
|
|
|
|
|
|
|
|
|
3706
|
|
|
|
|
|
|
=pod |
|
3707
|
|
|
|
|
|
|
|
|
3708
|
|
|
|
|
|
|
=head1 SORT SPECIFICATIONS |
|
3709
|
|
|
|
|
|
|
|
|
3710
|
|
|
|
|
|
|
## Getting / Setting table's default _SortSpecs |
|
3711
|
|
|
|
|
|
|
|
|
3712
|
|
|
|
|
|
|
$t->sortspecs() ## Get sortspecs (default is {} -- none) |
|
3713
|
|
|
|
|
|
|
|
|
3714
|
|
|
|
|
|
|
my $Specs = {Last => {SortType => 'String' , |
|
3715
|
|
|
|
|
|
|
SortDirection => -1 }, |
|
3716
|
|
|
|
|
|
|
Zip => {SortType => 'Integer' }}; |
|
3717
|
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
$t->sortspecs($Specs) ## Set sortspecs |
|
3719
|
|
|
|
|
|
|
|
|
3720
|
|
|
|
|
|
|
$t->sortspecs_default() ## Get the object's default sort specs ({}) |
|
3721
|
|
|
|
|
|
|
|
|
3722
|
|
|
|
|
|
|
The sortspecs are an optional hash mapping field names to "sort |
|
3723
|
|
|
|
|
|
|
specifications". |
|
3724
|
|
|
|
|
|
|
|
|
3725
|
|
|
|
|
|
|
Each field's sort specification may specify zero or more of these |
|
3726
|
|
|
|
|
|
|
values: |
|
3727
|
|
|
|
|
|
|
|
|
3728
|
|
|
|
|
|
|
=over 4 |
|
3729
|
|
|
|
|
|
|
|
|
3730
|
|
|
|
|
|
|
=item SortType |
|
3731
|
|
|
|
|
|
|
|
|
3732
|
|
|
|
|
|
|
the sort type to use (For example: String, Integer) |
|
3733
|
|
|
|
|
|
|
|
|
3734
|
|
|
|
|
|
|
=item SortDirection |
|
3735
|
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
the sort direction (1: ascending, -1: descending) |
|
3737
|
|
|
|
|
|
|
|
|
3738
|
|
|
|
|
|
|
=back |
|
3739
|
|
|
|
|
|
|
|
|
3740
|
|
|
|
|
|
|
Sortspecs can be specified when calling the sort() routine, or, a set |
|
3741
|
|
|
|
|
|
|
of specs can be placed beforehand into the object itself and those |
|
3742
|
|
|
|
|
|
|
will be used by sort() if no other specs are given. |
|
3743
|
|
|
|
|
|
|
|
|
3744
|
|
|
|
|
|
|
For any field listed in the sort order at the time of sorting, but |
|
3745
|
|
|
|
|
|
|
lacking a sort spec or any component of the sort spec, the object's |
|
3746
|
|
|
|
|
|
|
default sort type (see sorttype_default()) and default sort direction |
|
3747
|
|
|
|
|
|
|
(see sortdirection_default()) will be used. |
|
3748
|
|
|
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
In addition to getting/setting sort specs as a whole, they may be |
|
3750
|
|
|
|
|
|
|
gotten/set on a per-field basis, too: |
|
3751
|
|
|
|
|
|
|
|
|
3752
|
|
|
|
|
|
|
sortspec($Field) ## Get sortspec for $Field or default spec |
|
3753
|
|
|
|
|
|
|
|
|
3754
|
|
|
|
|
|
|
my $Spec = {SortType => 'Integer', SortDirection => -1}; |
|
3755
|
|
|
|
|
|
|
sortspec('Zip', $Spec) ## Set sortspec |
|
3756
|
|
|
|
|
|
|
|
|
3757
|
|
|
|
|
|
|
sortspec_default() ## Get a sortspec with all defaults filled in |
|
3758
|
|
|
|
|
|
|
|
|
3759
|
|
|
|
|
|
|
For any $Field not found in the object's sortspecs, sortspec($Field) |
|
3760
|
|
|
|
|
|
|
returns the same thing returned by sortspec_default(), which is a |
|
3761
|
|
|
|
|
|
|
sortspec filled in with the default sort type and sort direction (see |
|
3762
|
|
|
|
|
|
|
below). |
|
3763
|
|
|
|
|
|
|
|
|
3764
|
|
|
|
|
|
|
For a list of available built-in SortTypes, and instructions for how |
|
3765
|
|
|
|
|
|
|
to define your own, see SORT ROUTINES, below. |
|
3766
|
|
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
=cut |
|
3768
|
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
3770
|
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
sub sortspecs |
|
3772
|
|
|
|
|
|
|
{ |
|
3773
|
46
|
|
|
46
|
0
|
62
|
my $this = shift; |
|
3774
|
46
|
|
|
|
|
55
|
my ($SortSpecs) = @_; |
|
3775
|
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
## Set if specified. |
|
3777
|
46
|
100
|
|
|
|
86
|
$this->{_SortSpecs} = $SortSpecs if $SortSpecs; |
|
3778
|
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
## Get and return |
|
3780
|
46
|
|
33
|
|
|
101
|
$SortSpecs = $this->{_SortSpecs} || $this->sortspecs_default(); |
|
3781
|
|
|
|
|
|
|
|
|
3782
|
46
|
|
|
|
|
93
|
return($SortSpecs); |
|
3783
|
|
|
|
|
|
|
} |
|
3784
|
|
|
|
|
|
|
|
|
3785
|
|
|
|
|
|
|
sub sortspecs_default |
|
3786
|
|
|
|
|
|
|
{ |
|
3787
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
|
3788
|
|
|
|
|
|
|
|
|
3789
|
1
|
|
|
|
|
3
|
my $SortSpecs = {}; |
|
3790
|
|
|
|
|
|
|
|
|
3791
|
1
|
|
|
|
|
6
|
return($SortSpecs); |
|
3792
|
|
|
|
|
|
|
} |
|
3793
|
|
|
|
|
|
|
|
|
3794
|
|
|
|
|
|
|
sub sortspec |
|
3795
|
|
|
|
|
|
|
{ |
|
3796
|
4
|
|
|
4
|
0
|
17
|
my $this = shift; |
|
3797
|
4
|
|
|
|
|
9
|
my ($FieldName, $SortSpec) = @_; |
|
3798
|
|
|
|
|
|
|
|
|
3799
|
|
|
|
|
|
|
## Set if specified. |
|
3800
|
4
|
100
|
|
|
|
18
|
$this->{_SortSpecs}->{$FieldName} = $SortSpec if $SortSpec; |
|
3801
|
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
## Get and return. |
|
3803
|
4
|
|
33
|
|
|
19
|
my $SortSpec = ($this->{_SortSpecs}->{$FieldName} || |
|
3804
|
|
|
|
|
|
|
$this->sortspec_default($FieldName)); |
|
3805
|
|
|
|
|
|
|
|
|
3806
|
|
|
|
|
|
|
## Provide defaults for needed fields of sort spec. |
|
3807
|
4
|
|
33
|
|
|
14
|
$SortSpec->{SortType} ||= $this->sorttype_default (); |
|
3808
|
4
|
50
|
|
|
|
15
|
$SortSpec->{SortDirection} = $this->sortdirection_default() |
|
3809
|
|
|
|
|
|
|
unless defined($SortSpec->{SortDirection}); |
|
3810
|
|
|
|
|
|
|
|
|
3811
|
4
|
|
|
|
|
14
|
return($SortSpec); |
|
3812
|
|
|
|
|
|
|
} |
|
3813
|
|
|
|
|
|
|
|
|
3814
|
|
|
|
|
|
|
sub sortspec_default |
|
3815
|
|
|
|
|
|
|
{ |
|
3816
|
2
|
|
|
2
|
0
|
17
|
my $this = shift; |
|
3817
|
2
|
|
|
|
|
4
|
my ($FieldName) = @_; |
|
3818
|
|
|
|
|
|
|
|
|
3819
|
|
|
|
|
|
|
## Default sortspec for a field |
|
3820
|
|
|
|
|
|
|
|
|
3821
|
2
|
|
|
|
|
8
|
my $SortType = $this->sorttype_default (); |
|
3822
|
2
|
|
|
|
|
7
|
my $SortDir = $this->sortdirection_default(); |
|
3823
|
|
|
|
|
|
|
|
|
3824
|
2
|
|
|
|
|
8
|
my $Spec = {SortType => $SortType, SortDirection => $SortDir}; |
|
3825
|
|
|
|
|
|
|
|
|
3826
|
2
|
|
|
|
|
8
|
return($Spec); |
|
3827
|
|
|
|
|
|
|
} |
|
3828
|
|
|
|
|
|
|
|
|
3829
|
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
=pod |
|
3831
|
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
=head1 DEFAULT SORT DIRECTION |
|
3833
|
|
|
|
|
|
|
|
|
3834
|
|
|
|
|
|
|
## Getting / Setting table's default _DefaultSortDirection |
|
3835
|
|
|
|
|
|
|
|
|
3836
|
|
|
|
|
|
|
$t->sortdirection_default() ## Get default sort direction |
|
3837
|
|
|
|
|
|
|
|
|
3838
|
|
|
|
|
|
|
$t->sortdirection_default(-1) ## Set default sort direction |
|
3839
|
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
Each element in a sort specification can optionally specify a sort |
|
3841
|
|
|
|
|
|
|
direction. |
|
3842
|
|
|
|
|
|
|
|
|
3843
|
|
|
|
|
|
|
1 = ascending, -1 = descending |
|
3844
|
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
For any sort specs that don't specify a direction, the object's |
|
3846
|
|
|
|
|
|
|
default sort direction will be used. Use these routines to get/set |
|
3847
|
|
|
|
|
|
|
the default sort direction. |
|
3848
|
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
=cut |
|
3850
|
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
3852
|
|
|
|
|
|
|
|
|
3853
|
|
|
|
|
|
|
sub sortdirection_default |
|
3854
|
|
|
|
|
|
|
{ |
|
3855
|
6
|
|
|
6
|
0
|
14
|
my $this = shift; |
|
3856
|
6
|
|
|
|
|
10
|
my ($DefaultSortDir) = @_; |
|
3857
|
|
|
|
|
|
|
|
|
3858
|
6
|
100
|
|
|
|
17
|
if (defined($DefaultSortDir)) |
|
3859
|
|
|
|
|
|
|
{ |
|
3860
|
|
|
|
|
|
|
## Set if specified. Force to 1 or -1. Treat 0 as -1 (descending). |
|
3861
|
1
|
|
50
|
|
|
6
|
$this->{_DefaultSortDirection} = |
|
3862
|
|
|
|
|
|
|
(max(min(int($DefaultSortDir), 1), -1) || -1); |
|
3863
|
|
|
|
|
|
|
} |
|
3864
|
|
|
|
|
|
|
|
|
3865
|
|
|
|
|
|
|
## Get and return. If not present, then 1 (ascending) is the default. |
|
3866
|
6
|
50
|
|
|
|
16
|
my $SortDir = (defined($this->{_DefaultSortDirection}) ? |
|
3867
|
|
|
|
|
|
|
$this->{_DefaultSortDirection} : |
|
3868
|
|
|
|
|
|
|
1); |
|
3869
|
6
|
|
|
|
|
21
|
return($SortDir); |
|
3870
|
|
|
|
|
|
|
} |
|
3871
|
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
=pod |
|
3873
|
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
=head1 DEFAULT SORT TYPE |
|
3875
|
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
## Getting / Setting table's default _DefaultSortType |
|
3877
|
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
$t->sorttype_default() ## Get default sort type |
|
3879
|
|
|
|
|
|
|
|
|
3880
|
|
|
|
|
|
|
$t->sorttype_default('Integer') ## Set default sort type |
|
3881
|
|
|
|
|
|
|
|
|
3882
|
|
|
|
|
|
|
Each element in a sort specification can optionally specify a sort |
|
3883
|
|
|
|
|
|
|
type. The sort type is a string (like 'String' or 'Integer' or |
|
3884
|
|
|
|
|
|
|
'Date') that selects from one or more sort routines. (See Sort |
|
3885
|
|
|
|
|
|
|
Routines, below). |
|
3886
|
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
There are several sort routines built into the CTable object, and you |
|
3888
|
|
|
|
|
|
|
can also add as many of your own routines (and hence Sort Types) as |
|
3889
|
|
|
|
|
|
|
you like or need. This allows for very flexible sorting. |
|
3890
|
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
For any sort specs that don't specify a type, the object's default |
|
3892
|
|
|
|
|
|
|
sort type will be used. Use these routines to get/set the default |
|
3893
|
|
|
|
|
|
|
sort type, which initially is 'String'. |
|
3894
|
|
|
|
|
|
|
|
|
3895
|
|
|
|
|
|
|
=cut |
|
3896
|
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
3898
|
|
|
|
|
|
|
|
|
3899
|
|
|
|
|
|
|
sub sorttype_default |
|
3900
|
|
|
|
|
|
|
{ |
|
3901
|
6
|
|
|
6
|
0
|
10
|
my $this = shift; |
|
3902
|
6
|
|
|
|
|
9
|
my ($DefaultSortType) = @_; |
|
3903
|
|
|
|
|
|
|
|
|
3904
|
6
|
100
|
|
|
|
15
|
if (defined($DefaultSortType)) |
|
3905
|
|
|
|
|
|
|
{ |
|
3906
|
|
|
|
|
|
|
## Set if specified. |
|
3907
|
1
|
|
|
|
|
4
|
$this->{_DefaultSortType} = "$DefaultSortType"; |
|
3908
|
|
|
|
|
|
|
} |
|
3909
|
|
|
|
|
|
|
|
|
3910
|
|
|
|
|
|
|
## Get and return. If not present, then 'String' is the default. |
|
3911
|
6
|
50
|
|
|
|
21
|
my $SortDir = (defined($this->{_DefaultSortType}) ? |
|
3912
|
|
|
|
|
|
|
$this->{_DefaultSortType} : |
|
3913
|
|
|
|
|
|
|
'String'); |
|
3914
|
6
|
|
|
|
|
18
|
return($SortDir); |
|
3915
|
|
|
|
|
|
|
} |
|
3916
|
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
=pod |
|
3918
|
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
=head1 SORT ROUTINES: BUILTIN AND CUSTOM |
|
3920
|
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
## Getting / Setting table's custom sort routines (_SRoutines) |
|
3922
|
|
|
|
|
|
|
|
|
3923
|
|
|
|
|
|
|
$t->sortroutine($Type) ## Get a sort routine for $Type |
|
3924
|
|
|
|
|
|
|
|
|
3925
|
|
|
|
|
|
|
$t->sortroutine($Type, $Sub) ## Set a sort routine for $Type |
|
3926
|
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
$t->sortroutine($Type, 0 ) ## Remove sort routine for $Type |
|
3928
|
|
|
|
|
|
|
$t->sortroutine_set($Type) |
|
3929
|
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
$t->sortroutines() ## Get hash of any sort routines |
|
3931
|
|
|
|
|
|
|
|
|
3932
|
|
|
|
|
|
|
$t->sortroutines_builtin() ## Get hash of builtin routines |
|
3933
|
|
|
|
|
|
|
|
|
3934
|
|
|
|
|
|
|
Each SortType in the sortspecs should have a corresponding sort |
|
3935
|
|
|
|
|
|
|
routine (any unrecognized type will be sorted using the 'String' sort |
|
3936
|
|
|
|
|
|
|
routine). |
|
3937
|
|
|
|
|
|
|
|
|
3938
|
|
|
|
|
|
|
The sort() command looks up the appropriate sort routine for each |
|
3939
|
|
|
|
|
|
|
field it is asked to sort, based on the SortType for that field, as |
|
3940
|
|
|
|
|
|
|
given in the sortspecs, as described above. |
|
3941
|
|
|
|
|
|
|
|
|
3942
|
|
|
|
|
|
|
Builtin sort types, recognized and implemented natively by this |
|
3943
|
|
|
|
|
|
|
module, are: |
|
3944
|
|
|
|
|
|
|
|
|
3945
|
|
|
|
|
|
|
String ## Fastest case-sensitive compare (data is string) |
|
3946
|
|
|
|
|
|
|
Text ## Insensitive compare (lowercases, then compares) |
|
3947
|
|
|
|
|
|
|
Number ## Number works for floats or integers |
|
3948
|
|
|
|
|
|
|
Integer ## Faster than floats. Uses "use integer" |
|
3949
|
|
|
|
|
|
|
DateSecs ## Same as integer; assumes date in seconds |
|
3950
|
|
|
|
|
|
|
Boolean ## Treats item as a Perlish boolean (empty/undef = false) |
|
3951
|
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
The above sort types are always recognized. Additional sort types may |
|
3953
|
|
|
|
|
|
|
be added by subclasses (and could shadow the builtin implementations |
|
3954
|
|
|
|
|
|
|
of the above types if desired) and/or may be added to instances (and |
|
3955
|
|
|
|
|
|
|
again could shadow the above implementations), and/or may be specified |
|
3956
|
|
|
|
|
|
|
when the sort() method is called, once again optionally shadowing any |
|
3957
|
|
|
|
|
|
|
deeper definitions. |
|
3958
|
|
|
|
|
|
|
|
|
3959
|
|
|
|
|
|
|
=head1 CUSTOM SORT ROUTINE INTERFACE |
|
3960
|
|
|
|
|
|
|
|
|
3961
|
|
|
|
|
|
|
A custom sort routine is called with two arguments, each of which is a |
|
3962
|
|
|
|
|
|
|
pointer to a scalar. The sort routine should dereference each pointer |
|
3963
|
|
|
|
|
|
|
and compare the resulting scalars, returning -1 if the first scalar is |
|
3964
|
|
|
|
|
|
|
smaller than the second, 1 if it is larger, and 0 if they are |
|
3965
|
|
|
|
|
|
|
considered equal. |
|
3966
|
|
|
|
|
|
|
|
|
3967
|
|
|
|
|
|
|
For example, here is the built-in comparison routine for 'String': |
|
3968
|
|
|
|
|
|
|
|
|
3969
|
|
|
|
|
|
|
sub { $ {$_[0]} cmp $ {$_[1]} } |
|
3970
|
|
|
|
|
|
|
|
|
3971
|
|
|
|
|
|
|
NOTE: Your custom sort routines should NOT compare $a and $b as with |
|
3972
|
|
|
|
|
|
|
Perl's builtin sort() command. |
|
3973
|
|
|
|
|
|
|
|
|
3974
|
|
|
|
|
|
|
Examine the variable $BuiltinSortRoutines in this module's |
|
3975
|
|
|
|
|
|
|
implementation to see some additional examples of sort routines. |
|
3976
|
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
Internally, sort() calls the sortroutines() method to get a hash that |
|
3978
|
|
|
|
|
|
|
should consist of all builtin sort routines with the per-object sort |
|
3979
|
|
|
|
|
|
|
routines, if any, overlaid. sortroutines() in turn calls the |
|
3980
|
|
|
|
|
|
|
sortroutines_builtin() method to get a copy of the hash of all builtin |
|
3981
|
|
|
|
|
|
|
sort routines for the object. (So a subclass could easily add |
|
3982
|
|
|
|
|
|
|
additional SortTypes or reimplement them by just overriding |
|
3983
|
|
|
|
|
|
|
sortroutines_builtin() and adding its own additional routines to the |
|
3984
|
|
|
|
|
|
|
resulting hash.) |
|
3985
|
|
|
|
|
|
|
|
|
3986
|
|
|
|
|
|
|
sortroutine() may be called to get or set a custom sort routine for a |
|
3987
|
|
|
|
|
|
|
given type in the given object. |
|
3988
|
|
|
|
|
|
|
|
|
3989
|
|
|
|
|
|
|
There is no way to directly manipulate the builtin sort routines for |
|
3990
|
|
|
|
|
|
|
the entire class. To accomplish that, you should define and use a |
|
3991
|
|
|
|
|
|
|
subclass that extends sortroutines_builtin() to add its own routines. |
|
3992
|
|
|
|
|
|
|
|
|
3993
|
|
|
|
|
|
|
For example: |
|
3994
|
|
|
|
|
|
|
|
|
3995
|
|
|
|
|
|
|
BEGIN |
|
3996
|
|
|
|
|
|
|
{ ## A subclass of Data::CTable with an INetAddr SortType. |
|
3997
|
|
|
|
|
|
|
package IATable; use vars qw(@ISA); @ISA = qw(Data::CTable); |
|
3998
|
|
|
|
|
|
|
|
|
3999
|
|
|
|
|
|
|
sub sortroutines_builtin |
|
4000
|
|
|
|
|
|
|
{ |
|
4001
|
|
|
|
|
|
|
my $this = shift; |
|
4002
|
|
|
|
|
|
|
my $CustomRoutines = |
|
4003
|
|
|
|
|
|
|
{INetAddr => |
|
4004
|
|
|
|
|
|
|
sub {use integer; ip2int($ {$_[0]}) <=> ip2int($ {$_[1]})}}; |
|
4005
|
|
|
|
|
|
|
my $AllRoutines = |
|
4006
|
|
|
|
|
|
|
{%{$this->SUPER::sortroutines_builtin()} %$CustomRoutines}; |
|
4007
|
|
|
|
|
|
|
return($AllRoutines); |
|
4008
|
|
|
|
|
|
|
}; |
|
4009
|
|
|
|
|
|
|
|
|
4010
|
|
|
|
|
|
|
sub ip2int {.....} $# Could memoize & inline for efficiency |
|
4011
|
|
|
|
|
|
|
} |
|
4012
|
|
|
|
|
|
|
|
|
4013
|
|
|
|
|
|
|
my $Table = IATable::new(......); |
|
4014
|
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
The IATable class would then have all the same features of |
|
4016
|
|
|
|
|
|
|
Data::CTable but would then also support the INetAddr SortType. |
|
4017
|
|
|
|
|
|
|
|
|
4018
|
|
|
|
|
|
|
=cut |
|
4019
|
|
|
|
|
|
|
|
|
4020
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
4021
|
|
|
|
|
|
|
|
|
4022
|
|
|
|
|
|
|
BEGIN |
|
4023
|
|
|
|
|
|
|
{ |
|
4024
|
|
|
|
|
|
|
my $BuiltinSortRoutines = |
|
4025
|
|
|
|
|
|
|
{( |
|
4026
|
64
|
|
|
|
|
68
|
String => sub { $ {$_[0]} cmp $ {$_[1]} }, |
|
|
64
|
|
|
|
|
79
|
|
|
|
64
|
|
|
|
|
122
|
|
|
4027
|
5
|
|
|
|
|
6
|
Text => sub { lc($ {$_[0]}) cmp lc($ {$_[1]})}, |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
15
|
|
|
4028
|
8
|
|
|
|
|
9
|
Number => sub { $ {$_[0]} <=> $ {$_[1]} }, |
|
|
8
|
|
|
|
|
11
|
|
|
|
8
|
|
|
|
|
21
|
|
|
4029
|
1
|
|
|
1
|
|
1074
|
Integer => sub {use integer; $ {$_[0]} <=> $ {$_[1]} }, |
|
|
1
|
|
|
|
|
11
|
|
|
|
1
|
|
|
|
|
4
|
|
|
|
8
|
|
|
|
|
10
|
|
|
|
8
|
|
|
|
|
16
|
|
|
|
8
|
|
|
|
|
21
|
|
|
4030
|
1
|
|
|
1
|
|
54
|
DateSecs => sub {use integer; $ {$_[0]} <=> $ {$_[1]} }, |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
51
|
|
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
9
|
|
|
4031
|
1
|
|
|
1
|
|
48
|
_RecNum => sub {use integer; $ {$_[0]} <=> $ {$_[1]} }, |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
4
|
|
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
9
|
|
|
4032
|
4
|
|
|
|
|
5
|
Boolean => sub { !!$ {$_[0]} <=> !!$ {$_[1]} }, |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
12
|
|
|
4033
|
1
|
|
|
1
|
|
956
|
)}; |
|
4034
|
|
|
|
|
|
|
|
|
4035
|
|
|
|
|
|
|
sub sortroutines_builtin ## Class or instance method |
|
4036
|
|
|
|
|
|
|
{ |
|
4037
|
53
|
|
|
53
|
0
|
351
|
return({%$BuiltinSortRoutines}); ## Copy of above private hash. |
|
4038
|
|
|
|
|
|
|
} |
|
4039
|
|
|
|
|
|
|
} |
|
4040
|
|
|
|
|
|
|
|
|
4041
|
|
|
|
|
|
|
sub sortroutine |
|
4042
|
|
|
|
|
|
|
{ |
|
4043
|
38
|
|
|
38
|
0
|
179
|
my $this = shift; |
|
4044
|
38
|
|
|
|
|
59
|
my ($Type, $Routine) = @_; |
|
4045
|
|
|
|
|
|
|
|
|
4046
|
38
|
100
|
|
|
|
78
|
if (defined($Routine)) |
|
4047
|
|
|
|
|
|
|
{ |
|
4048
|
|
|
|
|
|
|
## Set if $Routine provided. |
|
4049
|
3
|
|
|
|
|
10
|
$this->sortroutine_set($Type, $Routine); |
|
4050
|
|
|
|
|
|
|
} |
|
4051
|
|
|
|
|
|
|
|
|
4052
|
|
|
|
|
|
|
## Get and return. |
|
4053
|
38
|
|
|
|
|
70
|
$Routine = $this->sortroutine_get($Type); |
|
4054
|
|
|
|
|
|
|
|
|
4055
|
38
|
|
|
|
|
123
|
return($Routine); |
|
4056
|
|
|
|
|
|
|
} |
|
4057
|
|
|
|
|
|
|
|
|
4058
|
|
|
|
|
|
|
sub sortroutine_get |
|
4059
|
|
|
|
|
|
|
{ |
|
4060
|
38
|
|
|
38
|
0
|
45
|
my $this = shift; |
|
4061
|
38
|
|
|
|
|
42
|
my ($Type) = @_; |
|
4062
|
38
|
|
|
|
|
72
|
my $Routines = $this->sortroutines(); |
|
4063
|
38
|
|
66
|
|
|
96
|
my $Routine = $Routines->{$Type} || $Routines->{'String'}; |
|
4064
|
|
|
|
|
|
|
|
|
4065
|
38
|
|
|
|
|
99
|
return($Routine); |
|
4066
|
|
|
|
|
|
|
} |
|
4067
|
|
|
|
|
|
|
|
|
4068
|
|
|
|
|
|
|
sub sortroutine_set |
|
4069
|
|
|
|
|
|
|
{ |
|
4070
|
4
|
|
|
4
|
0
|
5
|
my $this = shift; |
|
4071
|
4
|
|
|
|
|
6
|
my ($Type, $Routine) = @_; |
|
4072
|
|
|
|
|
|
|
|
|
4073
|
4
|
|
|
|
|
7
|
my $Valid = (ref($Routine) eq 'CODE'); |
|
4074
|
|
|
|
|
|
|
|
|
4075
|
4
|
100
|
|
|
|
10
|
if ($Valid) |
|
4076
|
|
|
|
|
|
|
{ |
|
4077
|
|
|
|
|
|
|
## Add / replace if a routine was supplied. |
|
4078
|
2
|
|
50
|
|
|
3
|
$ {$this->{_SRoutines} ||= {}}{$Type} = $Routine; |
|
|
2
|
|
|
|
|
8
|
|
|
4079
|
|
|
|
|
|
|
} |
|
4080
|
|
|
|
|
|
|
else |
|
4081
|
|
|
|
|
|
|
{ |
|
4082
|
|
|
|
|
|
|
## Otherwise delete. |
|
4083
|
2
|
|
50
|
|
|
3
|
$Routine = delete $ {$this->{_SRoutines} ||= {}}{$Type}; |
|
|
2
|
|
|
|
|
8
|
|
|
4084
|
|
|
|
|
|
|
} |
|
4085
|
|
|
|
|
|
|
|
|
4086
|
4
|
|
|
|
|
14
|
return($Routine); |
|
4087
|
|
|
|
|
|
|
} |
|
4088
|
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
sub sortroutines |
|
4090
|
|
|
|
|
|
|
{ |
|
4091
|
42
|
|
|
42
|
0
|
54
|
my $this = shift; |
|
4092
|
|
|
|
|
|
|
|
|
4093
|
42
|
50
|
|
|
|
86
|
my $Routines = {%{$this->sortroutines_builtin()}, ## First builtin ones |
|
|
42
|
|
|
|
|
186
|
|
|
4094
|
42
|
|
|
|
|
46
|
%{$this->{_SRoutines} || {}}}; ## Shadow with object's own |
|
4095
|
|
|
|
|
|
|
|
|
4096
|
42
|
|
|
|
|
193
|
return($Routines); |
|
4097
|
|
|
|
|
|
|
} |
|
4098
|
|
|
|
|
|
|
|
|
4099
|
|
|
|
|
|
|
=pod |
|
4100
|
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
=head1 FREEZING SELECTION & FIELD LIST |
|
4102
|
|
|
|
|
|
|
|
|
4103
|
|
|
|
|
|
|
## Freeze data layout: re-order columns; omit unused fields |
|
4104
|
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
$t->cull(...params...) ## Rebuild table in order |
|
4106
|
|
|
|
|
|
|
my $s = $t->snapshot(...params...) ## Make copy as if rebuilt |
|
4107
|
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
The cull() method re-writes all data in the table to be in the order |
|
4109
|
|
|
|
|
|
|
indicated in _Selection (if present). This will cause any records not |
|
4110
|
|
|
|
|
|
|
listed in _Selection to be omitted (unless selection is null in which |
|
4111
|
|
|
|
|
|
|
case all records are retained in original order). |
|
4112
|
|
|
|
|
|
|
|
|
4113
|
|
|
|
|
|
|
In addition, if there is a custom field list present, it removes any |
|
4114
|
|
|
|
|
|
|
fields NOT mentioned in _FieldList. |
|
4115
|
|
|
|
|
|
|
|
|
4116
|
|
|
|
|
|
|
The snapshot() method is similar, except instead of modifying the |
|
4117
|
|
|
|
|
|
|
object itself, it makes a copy of the object that's equivalent to what |
|
4118
|
|
|
|
|
|
|
cull() would have created, and returns that new object, leaving the |
|
4119
|
|
|
|
|
|
|
original untouched. (All data structures are deep-copied from the old |
|
4120
|
|
|
|
|
|
|
object to the new one, leaving the objects totally independent.) |
|
4121
|
|
|
|
|
|
|
|
|
4122
|
|
|
|
|
|
|
cull() and snapshot() both take two optional named parameters: |
|
4123
|
|
|
|
|
|
|
_FieldList and/or _Selection to be used in place of the corresponding |
|
4124
|
|
|
|
|
|
|
parameters found in the object. |
|
4125
|
|
|
|
|
|
|
|
|
4126
|
|
|
|
|
|
|
If only a single argument is supplied, it is assumed to be _Selection. |
|
4127
|
|
|
|
|
|
|
|
|
4128
|
|
|
|
|
|
|
=cut |
|
4129
|
|
|
|
|
|
|
|
|
4130
|
|
|
|
|
|
|
sub cull |
|
4131
|
|
|
|
|
|
|
{ |
|
4132
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
|
4133
|
1
|
50
|
|
|
|
5
|
my $Params = (@_ == 1 ? {_Selection => $_[0]} : {@_}); |
|
4134
|
|
|
|
|
|
|
|
|
4135
|
1
|
|
|
|
|
2
|
my($Selection, $FieldList) = map {$this->getparam($Params, $_)} |
|
|
2
|
|
|
|
|
4
|
|
|
4136
|
|
|
|
|
|
|
qw(_Selection _FieldList); |
|
4137
|
|
|
|
|
|
|
|
|
4138
|
1
|
|
33
|
|
|
3
|
$FieldList ||= $this->{_FieldList}; |
|
4139
|
1
|
|
33
|
|
|
4
|
$Selection ||= $this->{_Selection}; |
|
4140
|
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
## First cull any fields/columns not mentioned in _FieldList, if any. |
|
4142
|
1
|
50
|
|
|
|
3
|
if ($FieldList) |
|
4143
|
|
|
|
|
|
|
{ |
|
4144
|
1
|
|
|
|
|
2
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
|
1
|
|
|
|
|
4
|
|
|
4145
|
1
|
|
|
|
|
2
|
my $DeadFields = [grep {!exists($FieldHash->{$_})} @{$this->fieldlist_all()}]; |
|
|
4
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
4
|
|
|
4146
|
1
|
|
|
|
|
4
|
delete @$this{@$DeadFields}; |
|
4147
|
|
|
|
|
|
|
|
|
4148
|
|
|
|
|
|
|
## Set the (possibly) new field list in the object. |
|
4149
|
1
|
|
|
|
|
5
|
$this->fieldlist_set($FieldList); |
|
4150
|
|
|
|
|
|
|
} |
|
4151
|
|
|
|
|
|
|
|
|
4152
|
|
|
|
|
|
|
## Then cull / rearrange all the columns |
|
4153
|
1
|
50
|
|
|
|
5
|
if ($Selection) |
|
4154
|
|
|
|
|
|
|
{ |
|
4155
|
|
|
|
|
|
|
## Temporarily set the selection() to be the one we may have been given... |
|
4156
|
1
|
|
|
|
|
5
|
$this->selection($Selection); |
|
4157
|
|
|
|
|
|
|
|
|
4158
|
|
|
|
|
|
|
## Get the de-facto field list if we don't already have it explicitly |
|
4159
|
1
|
|
33
|
|
|
4
|
$FieldList ||= $this->fieldlist(); |
|
4160
|
|
|
|
|
|
|
|
|
4161
|
|
|
|
|
|
|
## Rearrange each column |
|
4162
|
1
|
|
|
|
|
2
|
foreach my $FieldName (@$FieldList) |
|
4163
|
|
|
|
|
|
|
{ |
|
4164
|
3
|
|
|
|
|
7
|
$this->{$FieldName} = $this->sel($FieldName); |
|
4165
|
|
|
|
|
|
|
} |
|
4166
|
|
|
|
|
|
|
|
|
4167
|
|
|
|
|
|
|
## Remove the _Selection since it is no longer valid. |
|
4168
|
1
|
|
|
|
|
3
|
$this->selection_delete(); |
|
4169
|
|
|
|
|
|
|
} |
|
4170
|
|
|
|
|
|
|
} |
|
4171
|
|
|
|
|
|
|
|
|
4172
|
|
|
|
|
|
|
sub snapshot |
|
4173
|
|
|
|
|
|
|
{ |
|
4174
|
50
|
|
|
50
|
0
|
1096
|
my $this = shift; |
|
4175
|
50
|
50
|
|
|
|
151
|
my $Params = (@_ == 1 ? {_Selection => $_[0]} : {@_}); |
|
4176
|
|
|
|
|
|
|
|
|
4177
|
50
|
|
|
|
|
85
|
my($Selection, $FieldList) = map {$this->getparam($Params, $_)} |
|
|
100
|
|
|
|
|
234
|
|
|
4178
|
|
|
|
|
|
|
qw(_Selection _FieldList); |
|
4179
|
|
|
|
|
|
|
|
|
4180
|
50
|
|
33
|
|
|
296
|
$FieldList ||= $this->{_FieldList}; |
|
4181
|
50
|
|
66
|
|
|
230
|
$Selection ||= $this->{_Selection}; |
|
4182
|
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
## First make a shallow copy of $this |
|
4184
|
50
|
|
|
|
|
812
|
my $copy = {%$this}; |
|
4185
|
|
|
|
|
|
|
|
|
4186
|
|
|
|
|
|
|
## Then delete any (references to) data columns owned by $this... |
|
4187
|
50
|
|
|
|
|
144
|
delete @$copy{@{$this->fieldlist_all()}}; |
|
|
50
|
|
|
|
|
125
|
|
|
4188
|
|
|
|
|
|
|
|
|
4189
|
|
|
|
|
|
|
## Then deep-copy all other parameters from $this... |
|
4190
|
50
|
|
|
|
|
2619
|
$copy = dclone($copy); |
|
4191
|
|
|
|
|
|
|
|
|
4192
|
|
|
|
|
|
|
## Then new/bless/initialize the copy into the same class as $this... |
|
4193
|
50
|
|
|
|
|
401
|
$copy = ref($this)->new($copy); |
|
4194
|
|
|
|
|
|
|
|
|
4195
|
|
|
|
|
|
|
## Temporarily override selection if necessary. |
|
4196
|
50
|
|
|
|
|
240
|
my $OldSel = $this->{_Selection}; |
|
4197
|
50
|
|
|
|
|
122
|
$this->selection($Selection); |
|
4198
|
|
|
|
|
|
|
|
|
4199
|
|
|
|
|
|
|
## Then insert all the rearranged columns into $copy... |
|
4200
|
50
|
|
|
|
|
72
|
@$copy{@$FieldList} = @{$this->sels($FieldList)}; |
|
|
50
|
|
|
|
|
119
|
|
|
4201
|
|
|
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
## Restore old selection, if any. |
|
4203
|
50
|
|
|
|
|
151
|
$this->selection_set($OldSel); |
|
4204
|
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
## Remove the selection in copy. |
|
4206
|
50
|
|
|
|
|
86
|
delete $copy->{_Selection}; |
|
4207
|
|
|
|
|
|
|
|
|
4208
|
|
|
|
|
|
|
## Set copy's fieldlist to a copy of the one we used. |
|
4209
|
50
|
|
|
|
|
175
|
$copy->{_FieldList} = [@$FieldList]; |
|
4210
|
|
|
|
|
|
|
|
|
4211
|
50
|
|
|
|
|
204
|
return($copy); |
|
4212
|
|
|
|
|
|
|
} |
|
4213
|
|
|
|
|
|
|
|
|
4214
|
|
|
|
|
|
|
=pod |
|
4215
|
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
=head1 LINE ENDINGS |
|
4217
|
|
|
|
|
|
|
|
|
4218
|
|
|
|
|
|
|
## Get current value |
|
4219
|
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
$t->lineending() ## Get actual setting: string or symbol |
|
4221
|
|
|
|
|
|
|
$t->lineending_symbol() ## Get setting's symbolic value if possible |
|
4222
|
|
|
|
|
|
|
$t->lineending_string() ## Get setting's string value if possible |
|
4223
|
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
## Set value |
|
4225
|
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
$t->lineending($Ending) ## Will be converted internally to symbol |
|
4227
|
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
## Convert a value to symbol or string form |
|
4229
|
|
|
|
|
|
|
|
|
4230
|
|
|
|
|
|
|
$t->lineending_symbol($L) ## Convert string form to symbolic form |
|
4231
|
|
|
|
|
|
|
$t->lineending_string($L) ## Convert symbol form to string form |
|
4232
|
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
## Get internal conversion hash tables |
|
4234
|
|
|
|
|
|
|
|
|
4235
|
|
|
|
|
|
|
$t->lineending_symbols() ## Hash ref mapping known strings to symbols |
|
4236
|
|
|
|
|
|
|
$t->lineending_strings() ## Hash ref mapping known symbols to strings |
|
4237
|
|
|
|
|
|
|
|
|
4238
|
|
|
|
|
|
|
Use these accessor functions to get/set the _LineEnding parameter. |
|
4239
|
|
|
|
|
|
|
|
|
4240
|
|
|
|
|
|
|
You can set the parameter in either string or symbol form as you wish. |
|
4241
|
|
|
|
|
|
|
You can get it in its raw, as-stored, form, or, you can get it in |
|
4242
|
|
|
|
|
|
|
string form or symbol form as desired. |
|
4243
|
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
Finally, some utility conversion calls allows you to convert a string |
|
4245
|
|
|
|
|
|
|
you have on hand to a symbolic form. For example: |
|
4246
|
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
$L = "\x0D"; |
|
4248
|
|
|
|
|
|
|
print ("This file uses " . $t->lineending_symbol($L) . " endings."); |
|
4249
|
|
|
|
|
|
|
|
|
4250
|
|
|
|
|
|
|
This would print: |
|
4251
|
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
This file uses mac endings. |
|
4253
|
|
|
|
|
|
|
|
|
4254
|
|
|
|
|
|
|
=cut |
|
4255
|
|
|
|
|
|
|
|
|
4256
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
4257
|
|
|
|
|
|
|
|
|
4258
|
|
|
|
|
|
|
BEGIN |
|
4259
|
|
|
|
|
|
|
{ |
|
4260
|
|
|
|
|
|
|
## Map any recognized _LineEnding value to its actual string |
|
4261
|
1
|
|
|
1
|
|
8
|
my $LineEnding_Strings = |
|
4262
|
|
|
|
|
|
|
{( |
|
4263
|
|
|
|
|
|
|
dos => "\x0D\x0A", |
|
4264
|
|
|
|
|
|
|
mac => "\x0D", |
|
4265
|
|
|
|
|
|
|
unix => "\x0A", |
|
4266
|
|
|
|
|
|
|
"\x0D\x0A" => "\x0D\x0A", |
|
4267
|
|
|
|
|
|
|
"\x0D" => "\x0D", |
|
4268
|
|
|
|
|
|
|
"\x0A" => "\x0A", |
|
4269
|
|
|
|
|
|
|
)}; |
|
4270
|
|
|
|
|
|
|
|
|
4271
|
|
|
|
|
|
|
## Map any recognized _LineEnding value to its logical form |
|
4272
|
1
|
|
|
|
|
781
|
my $LineEnding_Symbols = |
|
4273
|
|
|
|
|
|
|
{( |
|
4274
|
|
|
|
|
|
|
"\x0D\x0A" => "dos", |
|
4275
|
|
|
|
|
|
|
"\x0D" => "mac", |
|
4276
|
|
|
|
|
|
|
"\x0A" => "unix", |
|
4277
|
|
|
|
|
|
|
dos => "dos", |
|
4278
|
|
|
|
|
|
|
mac => "mac", |
|
4279
|
|
|
|
|
|
|
unix => "unix", |
|
4280
|
|
|
|
|
|
|
)}; |
|
4281
|
|
|
|
|
|
|
|
|
4282
|
|
|
|
|
|
|
sub lineending_strings ## Class or instance method |
|
4283
|
|
|
|
|
|
|
{ |
|
4284
|
33
|
|
|
33
|
0
|
311
|
return({%$LineEnding_Strings}); ## Copy of above private hash. |
|
4285
|
|
|
|
|
|
|
} |
|
4286
|
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
sub lineending_symbols ## Class or instance method |
|
4288
|
|
|
|
|
|
|
{ |
|
4289
|
25
|
|
|
25
|
0
|
1388
|
return({%$LineEnding_Symbols}); ## Copy of above private hash. |
|
4290
|
|
|
|
|
|
|
} |
|
4291
|
|
|
|
|
|
|
} |
|
4292
|
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
sub lineending() |
|
4294
|
|
|
|
|
|
|
{ |
|
4295
|
3
|
|
|
3
|
0
|
12
|
my $this = shift; |
|
4296
|
3
|
|
|
|
|
3
|
my ($LineEnding) = @_; |
|
4297
|
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
## Set if specified. Try to convert to symbolic form if possible. |
|
4299
|
3
|
50
|
0
|
|
|
7
|
$this->{_LineEnding} = $this->lineending_symbol($LineEnding) || $LineEnding if $LineEnding; |
|
4300
|
|
|
|
|
|
|
|
|
4301
|
|
|
|
|
|
|
## Otherwise / either case, get whatever value we have and return it.... |
|
4302
|
3
|
|
|
|
|
4
|
my $LineEnding = $this->{_LineEnding}; |
|
4303
|
|
|
|
|
|
|
|
|
4304
|
3
|
|
|
|
|
7
|
return($LineEnding); |
|
4305
|
|
|
|
|
|
|
} |
|
4306
|
|
|
|
|
|
|
|
|
4307
|
|
|
|
|
|
|
sub lineending_symbol |
|
4308
|
|
|
|
|
|
|
{ |
|
4309
|
22
|
|
|
22
|
0
|
34
|
my $this = shift; |
|
4310
|
22
|
|
|
|
|
30
|
my ($LineEnding) = @_; |
|
4311
|
|
|
|
|
|
|
|
|
4312
|
22
|
|
66
|
|
|
57
|
$LineEnding ||= $this->{_LineEnding}; |
|
4313
|
|
|
|
|
|
|
|
|
4314
|
22
|
|
33
|
|
|
30
|
return($ {$this->lineending_symbols()}{$LineEnding} || $LineEnding); |
|
4315
|
|
|
|
|
|
|
} |
|
4316
|
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
|
|
4318
|
|
|
|
|
|
|
sub lineending_string |
|
4319
|
|
|
|
|
|
|
{ |
|
4320
|
30
|
|
|
30
|
0
|
50
|
my $this = shift; |
|
4321
|
30
|
|
|
|
|
42
|
my ($LineEnding) = @_; |
|
4322
|
|
|
|
|
|
|
|
|
4323
|
30
|
|
100
|
|
|
125
|
$LineEnding ||= $this->{_LineEnding}; |
|
4324
|
|
|
|
|
|
|
|
|
4325
|
30
|
|
66
|
|
|
33
|
return($ {$this->lineending_strings()}{$LineEnding} || $LineEnding); |
|
4326
|
|
|
|
|
|
|
} |
|
4327
|
|
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
=pod |
|
4329
|
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
=head1 AUTOMATIC CACHEING |
|
4331
|
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
By default, Data::CTable makes cached versions of files it reads so it |
|
4333
|
|
|
|
|
|
|
can read them much more quickly the next time. Optionally, it can |
|
4334
|
|
|
|
|
|
|
also cache any file it writes for quicker re-reading later. |
|
4335
|
|
|
|
|
|
|
|
|
4336
|
|
|
|
|
|
|
On Unix systems, cache files are always created with 0666 |
|
4337
|
|
|
|
|
|
|
(world-write) permissions for easy cleanup. |
|
4338
|
|
|
|
|
|
|
|
|
4339
|
|
|
|
|
|
|
When reading files, Data::CTable checks the _CacheOnRead parameter. |
|
4340
|
|
|
|
|
|
|
If that parameter is true, which it is by default, the module tries to |
|
4341
|
|
|
|
|
|
|
find an up-to-date cache file to read instead of the original. |
|
4342
|
|
|
|
|
|
|
Reading a cache file can be 10x faster than reading and parsing the |
|
4343
|
|
|
|
|
|
|
original text file. |
|
4344
|
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
In order to look for the cache file, it must first calculate the path |
|
4346
|
|
|
|
|
|
|
where the cache file should be located, based on the _FileName of the |
|
4347
|
|
|
|
|
|
|
file to be read. |
|
4348
|
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
The path of the cache file is calculated as follows: |
|
4350
|
|
|
|
|
|
|
|
|
4351
|
|
|
|
|
|
|
If the _CacheSubDir parameter is a RELATIVE PATH, then it is appended |
|
4352
|
|
|
|
|
|
|
to the directory component of _FileName to arrive at the directory to |
|
4353
|
|
|
|
|
|
|
use to store the cache file. If it is an ABSOLUTE PATH, then |
|
4354
|
|
|
|
|
|
|
_CacheSubDir is used by itself. (The trailing path separator is |
|
4355
|
|
|
|
|
|
|
optional and an appropriate one will be added by Data::CTable if it is |
|
4356
|
|
|
|
|
|
|
missing.) |
|
4357
|
|
|
|
|
|
|
|
|
4358
|
|
|
|
|
|
|
The file name of the cache file is calculated as follows: |
|
4359
|
|
|
|
|
|
|
|
|
4360
|
|
|
|
|
|
|
If the _CacheExtension parameter is specified, it is appended to the |
|
4361
|
|
|
|
|
|
|
base file name component from the _FileName parameter. If you want |
|
4362
|
|
|
|
|
|
|
the cached file name to be the same as the name of the original file, |
|
4363
|
|
|
|
|
|
|
you can set _CacheExtension to "", which is not recommended. |
|
4364
|
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
Then, the cache path and cache file name are joined to arrive at the |
|
4366
|
|
|
|
|
|
|
name of the cache file. If both _CacheSubDir and _CacheExtension were |
|
4367
|
|
|
|
|
|
|
empty, then the cache file path will be the same as the _FileName, and |
|
4368
|
|
|
|
|
|
|
Data::CTable will refuse to either read or write a cache file, so |
|
4369
|
|
|
|
|
|
|
setting these fields both to empty is equivalent to setting |
|
4370
|
|
|
|
|
|
|
_CacheOnRead to false. |
|
4371
|
|
|
|
|
|
|
|
|
4372
|
|
|
|
|
|
|
The cache file contains a highly-efficient representation of all the |
|
4373
|
|
|
|
|
|
|
following data that would otherwise have to be determined by reading |
|
4374
|
|
|
|
|
|
|
and parsing the entire text file: |
|
4375
|
|
|
|
|
|
|
|
|
4376
|
|
|
|
|
|
|
- All the data columns (field values) |
|
4377
|
|
|
|
|
|
|
- _FieldList: The list of fields, in order |
|
4378
|
|
|
|
|
|
|
- _HeaderRow: Whether a header row is / should be present |
|
4379
|
|
|
|
|
|
|
- _LineEnding: The line ending setting |
|
4380
|
|
|
|
|
|
|
- _FDelimiter: The field delimiter setting |
|
4381
|
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
If found prior to a read(), AND, the date of the cache file is LATER |
|
4383
|
|
|
|
|
|
|
than the date of the original file, the cache file is used instead. |
|
4384
|
|
|
|
|
|
|
(If the date is EARLIER, then the cache file is ignored because it can |
|
4385
|
|
|
|
|
|
|
be presumed that the data inside the text file is newer.) |
|
4386
|
|
|
|
|
|
|
|
|
4387
|
|
|
|
|
|
|
If cacheing is ON, then after successfully reading the text file |
|
4388
|
|
|
|
|
|
|
(either because there was no cache file yet or the cache file was out |
|
4389
|
|
|
|
|
|
|
of date or corrupted or otherwise unusable), read() will then try to |
|
4390
|
|
|
|
|
|
|
create a cache file. This, of course, takes some time, but the time |
|
4391
|
|
|
|
|
|
|
taken will be more than made up in the speedup of the next read() |
|
4392
|
|
|
|
|
|
|
operation on the same file. |
|
4393
|
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
If creating the cache file fails (for example, because file |
|
4395
|
|
|
|
|
|
|
permissions didn't allow the cache directory to be created or the |
|
4396
|
|
|
|
|
|
|
cache file to be written), read() generates a warning explaining why |
|
4397
|
|
|
|
|
|
|
cacheing failed, but the read() operation itself still succeeds. |
|
4398
|
|
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
No parameters in the object itself are set or modified to indicate the |
|
4400
|
|
|
|
|
|
|
success or failure of writing the cache file. |
|
4401
|
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
Similarly, there is no way to tell whether a successful read() |
|
4403
|
|
|
|
|
|
|
operation read from the cache or from the original data file. If you |
|
4404
|
|
|
|
|
|
|
want to be SURE the reading was from the data file, either turn off |
|
4405
|
|
|
|
|
|
|
_CacheOnRead, or call the read_file() method instead of read(). |
|
4406
|
|
|
|
|
|
|
|
|
4407
|
|
|
|
|
|
|
NOTE: because the name of the cache file to be used is calculated just |
|
4408
|
|
|
|
|
|
|
before the read() is actually done, the cache file can only be found |
|
4409
|
|
|
|
|
|
|
if the _CacheSubDir and _CacheExtension are the same as they were when |
|
4410
|
|
|
|
|
|
|
the cache was last created. If you change these parameters after |
|
4411
|
|
|
|
|
|
|
having previously cached a file, the older caches could be "orphaned" |
|
4412
|
|
|
|
|
|
|
and just sit around wasting disk space. |
|
4413
|
|
|
|
|
|
|
|
|
4414
|
|
|
|
|
|
|
=head2 Cacheing on write() |
|
4415
|
|
|
|
|
|
|
|
|
4416
|
|
|
|
|
|
|
You may optionally set _CacheOnWrite (default = false) to true. If |
|
4417
|
|
|
|
|
|
|
done, then a cache file will be saved for files written using the |
|
4418
|
|
|
|
|
|
|
write() command. Read about write() below for more about why you |
|
4419
|
|
|
|
|
|
|
might want to do this. |
|
4420
|
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
=head1 AUTOMATIC DIRECTORY CREATION |
|
4422
|
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
When Data::CTable needs to write a file, (a cache file or a data |
|
4424
|
|
|
|
|
|
|
file), it automatically tries to create any directories or |
|
4425
|
|
|
|
|
|
|
subdirectories you specify in the _FileName or _CacheSubDir |
|
4426
|
|
|
|
|
|
|
parameters. |
|
4427
|
|
|
|
|
|
|
|
|
4428
|
|
|
|
|
|
|
If it fails while writing a data file, write() will fail (and you will |
|
4429
|
|
|
|
|
|
|
be warned). If it fails to create a directory while writing a cache |
|
4430
|
|
|
|
|
|
|
file, a warning will be issued, but the overall read() or write() |
|
4431
|
|
|
|
|
|
|
operation will still return a result indicating success. |
|
4432
|
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
Any directories created will have the permissions 0777 (world-write) |
|
4434
|
|
|
|
|
|
|
for easy cleanup. |
|
4435
|
|
|
|
|
|
|
|
|
4436
|
|
|
|
|
|
|
Generally, the only directory the module will have to create is a |
|
4437
|
|
|
|
|
|
|
subdirectory to hold cache files. |
|
4438
|
|
|
|
|
|
|
|
|
4439
|
|
|
|
|
|
|
However, since other directories could be created, be sure to exercise |
|
4440
|
|
|
|
|
|
|
caution when allowing the module to create any directories for you on |
|
4441
|
|
|
|
|
|
|
any system where security might be an issue. |
|
4442
|
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
Also, if the 0666 permissions on the cache files themselves are too |
|
4444
|
|
|
|
|
|
|
liberal, you can either 1) turn off cacheing, or 2) call the |
|
4445
|
|
|
|
|
|
|
prep_cache_file() method to get the name of the cache file that would |
|
4446
|
|
|
|
|
|
|
have been written, if any, and then restrict its permissions: |
|
4447
|
|
|
|
|
|
|
|
|
4448
|
|
|
|
|
|
|
chmod (0600, $this->prep_cache_file()); |
|
4449
|
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
=head1 READING DATA FILES |
|
4451
|
|
|
|
|
|
|
|
|
4452
|
|
|
|
|
|
|
## Replacing data in table with data read from a file |
|
4453
|
|
|
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
$t->read($Path) ## Simple calling convention |
|
4455
|
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
$t->read( ## Named-parameter convention |
|
4457
|
|
|
|
|
|
|
|
|
4458
|
|
|
|
|
|
|
## Params that override params in the object if supplied... |
|
4459
|
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
_FileName => $Path, ## Full or partial path of file to read |
|
4461
|
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
_FieldList => [...], ## Fields to read; others to be discarded |
|
4463
|
|
|
|
|
|
|
|
|
4464
|
|
|
|
|
|
|
_HeaderRow => 0, ## No header row (_FieldList required!) |
|
4465
|
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
_LineEnding => undef, ## Text line ending (undef means guess) |
|
4467
|
|
|
|
|
|
|
_FDelimiter => undef, ## Field delimiter (undef means guess) |
|
4468
|
|
|
|
|
|
|
|
|
4469
|
|
|
|
|
|
|
_ReturnMap => 1, ## Whether to decode internal returns |
|
4470
|
|
|
|
|
|
|
_ReturnEncoding=>"\x0B", ## How to decode returns. |
|
4471
|
|
|
|
|
|
|
_MacRomanMap => undef, ## Whether/when to read Mac char set |
|
4472
|
|
|
|
|
|
|
|
|
4473
|
|
|
|
|
|
|
_CacheOnRead => 0, ## Enable/disable cacheing behavior |
|
4474
|
|
|
|
|
|
|
_CacheExtension=> ".x", ## Extension to add to cache file name |
|
4475
|
|
|
|
|
|
|
_CacheSubDir => "", ## (Sub-)dir, if any, for cache files |
|
4476
|
|
|
|
|
|
|
|
|
4477
|
|
|
|
|
|
|
## Params specific to the read()/write() methods... |
|
4478
|
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
_MaxRecords => 200, ## Limit on how many records to read |
|
4480
|
|
|
|
|
|
|
) |
|
4481
|
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
$t->read_file() ## Internal: same as read(); ignores cacheing |
|
4483
|
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
read() opens a Merge, CSV, or Tab-delimited file and reads in all or |
|
4485
|
|
|
|
|
|
|
some fields, and all or some records, REPLACING ANY EXISTING DATA in |
|
4486
|
|
|
|
|
|
|
the CTable object. |
|
4487
|
|
|
|
|
|
|
|
|
4488
|
|
|
|
|
|
|
Using the simple calling convention, just pass it a file name. All |
|
4489
|
|
|
|
|
|
|
other parameters will come from the object (or will be defaulted if |
|
4490
|
|
|
|
|
|
|
absent). To specify additional parameters or override any parameters |
|
4491
|
|
|
|
|
|
|
in the object while reading, use the named-parameter calling |
|
4492
|
|
|
|
|
|
|
convention. |
|
4493
|
|
|
|
|
|
|
|
|
4494
|
|
|
|
|
|
|
See the full PARAMETER LIST, above, or read on for some extra details: |
|
4495
|
|
|
|
|
|
|
|
|
4496
|
|
|
|
|
|
|
_ReturnMap controls whether return characters encoded as ASCII 11 |
|
4497
|
|
|
|
|
|
|
should be mapped back to real newlines (C<"\n">) when read into memory. |
|
4498
|
|
|
|
|
|
|
If false, they are left as ASCII 11 characters. (default is "true") |
|
4499
|
|
|
|
|
|
|
|
|
4500
|
|
|
|
|
|
|
_ReturnEncoding controls the character that returns are encoded as, if |
|
4501
|
|
|
|
|
|
|
different from ASCII 11. |
|
4502
|
|
|
|
|
|
|
|
|
4503
|
|
|
|
|
|
|
_FieldList is an array (reference) listing the names of fields to |
|
4504
|
|
|
|
|
|
|
import, in order (and will become the object's _FieldList upon |
|
4505
|
|
|
|
|
|
|
successful completion of the read() operation). If not provided and |
|
4506
|
|
|
|
|
|
|
not found in the object, or empty, then all fields found in the file |
|
4507
|
|
|
|
|
|
|
are imported and the object's field list will be set from those found |
|
4508
|
|
|
|
|
|
|
in the file, in the order found there. If _HeaderRow is false, then |
|
4509
|
|
|
|
|
|
|
this parameter is required (either in the object or as a formal |
|
4510
|
|
|
|
|
|
|
parameter) and is assumed to give the correct names for the fields as |
|
4511
|
|
|
|
|
|
|
they actually occur in the file. If _HeaderRow is true and _FieldList |
|
4512
|
|
|
|
|
|
|
is provided, then _FieldList specifies the (sub-)set of fields to be |
|
4513
|
|
|
|
|
|
|
read from the file and others will be ignored. |
|
4514
|
|
|
|
|
|
|
|
|
4515
|
|
|
|
|
|
|
_HeaderRow, which defaults to true, if set to false, tells read() to |
|
4516
|
|
|
|
|
|
|
not expect a header row showing the field names in the file. Instead, |
|
4517
|
|
|
|
|
|
|
it assumes that the _FieldList gives those (and _FieldList must |
|
4518
|
|
|
|
|
|
|
therefore be specified either as a parameter or an existing parameter |
|
4519
|
|
|
|
|
|
|
in the object). |
|
4520
|
|
|
|
|
|
|
|
|
4521
|
|
|
|
|
|
|
_MaxRecords (optional) is an upper limit on the number of fields to |
|
4522
|
|
|
|
|
|
|
import. If not specified, or zero, or undef, then there is no limit; |
|
4523
|
|
|
|
|
|
|
all records will be imported or memory will be exhausted. |
|
4524
|
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
read() returns a Boolean "success" code. |
|
4526
|
|
|
|
|
|
|
|
|
4527
|
|
|
|
|
|
|
If read() returns false, then it will also have set the _ErrorMsg |
|
4528
|
|
|
|
|
|
|
parameter in the object. It may or may not have partially altered |
|
4529
|
|
|
|
|
|
|
data in the object if an error is encountered. |
|
4530
|
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
After a successful read: |
|
4532
|
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
fieldlist() (the object's _FieldList parameter) tells which |
|
4534
|
|
|
|
|
|
|
fields were actually read, in what order. It may omit any fields |
|
4535
|
|
|
|
|
|
|
requested in _FieldList that were not actually found in the file for |
|
4536
|
|
|
|
|
|
|
whatever reason. |
|
4537
|
|
|
|
|
|
|
|
|
4538
|
|
|
|
|
|
|
length() tells how many fields were read. |
|
4539
|
|
|
|
|
|
|
|
|
4540
|
|
|
|
|
|
|
The selection() is reset to no selection (all selected / unsorted) |
|
4541
|
|
|
|
|
|
|
|
|
4542
|
|
|
|
|
|
|
The object's _FileName parameter contains the path to the file |
|
4543
|
|
|
|
|
|
|
that was read. If the _FileName you specified did not have a path, |
|
4544
|
|
|
|
|
|
|
then _FileName will be prepended with a path component indicating |
|
4545
|
|
|
|
|
|
|
"current directory" (e.g. "./" on Unix). |
|
4546
|
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
_FDelimiter will contain the actual delimiter character that was |
|
4548
|
|
|
|
|
|
|
used to read the file (either tab or comma if the delimiter was |
|
4549
|
|
|
|
|
|
|
guessed, or whatever delimiter you specified). |
|
4550
|
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
_LineEnding will contain the actual line-ending setting used to |
|
4552
|
|
|
|
|
|
|
read the file. This will be either "mac" ("\x0D"), "unix" ("\x0D"), |
|
4553
|
|
|
|
|
|
|
or "dos" ("\x0D\x0A") if the line endings were guessed by read(). |
|
4554
|
|
|
|
|
|
|
Otherwise it will be whatever _LineEnding you specified. |
|
4555
|
|
|
|
|
|
|
|
|
4556
|
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
=head1 FILE FORMAT NOTES |
|
4558
|
|
|
|
|
|
|
|
|
4559
|
|
|
|
|
|
|
As mentioned, read() allows the following flexibilities in reading |
|
4560
|
|
|
|
|
|
|
text-based tabular data files: |
|
4561
|
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
You may specify the line endings (record delimiters), or it can |
|
4563
|
|
|
|
|
|
|
guess them (mac, unix, dos are supported). |
|
4564
|
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
You may specify the field delimiters, or it can guess them (tab |
|
4566
|
|
|
|
|
|
|
and comma are supported). |
|
4567
|
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
It can get field names from a header row, or, if there is no |
|
4569
|
|
|
|
|
|
|
header row, you can tell it the field names, in order. |
|
4570
|
|
|
|
|
|
|
|
|
4571
|
|
|
|
|
|
|
You can tell it whether or not to decode embedded returns in |
|
4572
|
|
|
|
|
|
|
data fields, and if so, which character they were encoded as. |
|
4573
|
|
|
|
|
|
|
|
|
4574
|
|
|
|
|
|
|
Beyond supporting the above flexible options, read() makes the |
|
4575
|
|
|
|
|
|
|
following non-flexible assumptions: |
|
4576
|
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
Fields must NOT contain unencoded returns -- that is: whatever |
|
4578
|
|
|
|
|
|
|
character sequence is specified for _LineEnding will NEVER occur |
|
4579
|
|
|
|
|
|
|
inside a field in the text file; in addition, the current platform's |
|
4580
|
|
|
|
|
|
|
definition of C<"\n"> will NEVER occur; these characters if present in |
|
4581
|
|
|
|
|
|
|
field data, MUST have been encoded to some safe character string |
|
4582
|
|
|
|
|
|
|
before the file was created. |
|
4583
|
|
|
|
|
|
|
|
|
4584
|
|
|
|
|
|
|
Each field may OPTIONALLY be surrounded with double-quote marks. |
|
4585
|
|
|
|
|
|
|
However, if the field data itself contains either a double-quote |
|
4586
|
|
|
|
|
|
|
character (C<">) or the current file's field delimiter (such as tab or |
|
4587
|
|
|
|
|
|
|
comma), then the field MUST be surrounded with double-quotes. |
|
4588
|
|
|
|
|
|
|
(Currently, all data written by Data::CTable have all field values |
|
4589
|
|
|
|
|
|
|
surrounded by double-quotes, but a more selective policy may be used |
|
4590
|
|
|
|
|
|
|
in the future.) |
|
4591
|
|
|
|
|
|
|
|
|
4592
|
|
|
|
|
|
|
If a field contains a double-quote character, then each double-quote |
|
4593
|
|
|
|
|
|
|
character in the field must be encoded as C<""> -- i.e. each C<"> in the |
|
4594
|
|
|
|
|
|
|
original data becomes C<""> in the text file. |
|
4595
|
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
Data files may not mix line-ending types or field delimiter types. |
|
4597
|
|
|
|
|
|
|
Once determined, the same endings and delimiters will be used to read |
|
4598
|
|
|
|
|
|
|
the entire file. |
|
4599
|
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
The fields recognized on each line will either be determined by |
|
4601
|
|
|
|
|
|
|
the header row or the _FieldList provided by the caller. Any extra |
|
4602
|
|
|
|
|
|
|
fields on any given line will be ignored. Any missing fields will be |
|
4603
|
|
|
|
|
|
|
treated as undef/empty. |
|
4604
|
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
If you are having trouble reading a delimited text file, check that |
|
4606
|
|
|
|
|
|
|
all data in the file obeys these assumptions. |
|
4607
|
|
|
|
|
|
|
|
|
4608
|
|
|
|
|
|
|
=cut |
|
4609
|
|
|
|
|
|
|
|
|
4610
|
|
|
|
|
|
|
sub read ## Read, cacheing if possible |
|
4611
|
|
|
|
|
|
|
{ |
|
4612
|
62
|
|
|
62
|
1
|
99
|
my $this = shift; |
|
4613
|
62
|
|
|
|
|
191
|
return($this->read_file_or_cache(@_)); |
|
4614
|
|
|
|
|
|
|
} |
|
4615
|
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
sub read_file ## Read, ignoring cacheing |
|
4617
|
|
|
|
|
|
|
{ |
|
4618
|
19
|
|
|
19
|
0
|
32
|
my $this = shift; |
|
4619
|
19
|
50
|
|
|
|
82
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
|
4620
|
|
|
|
|
|
|
|
|
4621
|
171
|
|
|
|
|
312
|
my($FileName, $FieldList, $MaxRecords, $LineEnding, $FDelimiter, $ReturnMap, $ReturnEncoding, $MacRomanMap, $HeaderRow) |
|
4622
|
19
|
|
|
|
|
54
|
= map {$this->getparam($Params, $_)} |
|
4623
|
|
|
|
|
|
|
qw(_FileName _FieldList _MaxRecords _LineEnding _FDelimiter _ReturnMap _ReturnEncoding _MacRomanMap _HeaderRow); |
|
4624
|
|
|
|
|
|
|
|
|
4625
|
19
|
|
|
|
|
34
|
my $Success; |
|
4626
|
|
|
|
|
|
|
|
|
4627
|
|
|
|
|
|
|
## Default error message is none. |
|
4628
|
19
|
|
|
|
|
38
|
$this->{_ErrorMsg} = ""; |
|
4629
|
|
|
|
|
|
|
|
|
4630
|
|
|
|
|
|
|
## Default for HeaderRow is true. |
|
4631
|
19
|
50
|
|
|
|
45
|
$HeaderRow = 1 unless defined($HeaderRow); |
|
4632
|
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
## Default for ReturnEncoding is "\x0B" (control-K; ASCII 11) |
|
4634
|
19
|
50
|
|
|
|
51
|
$ReturnEncoding = "\x0B" unless length($ReturnEncoding); |
|
4635
|
|
|
|
|
|
|
|
|
4636
|
|
|
|
|
|
|
## Default for ReturnMap is true. |
|
4637
|
19
|
50
|
|
|
|
42
|
$ReturnMap = 1 unless defined($ReturnMap); |
|
4638
|
|
|
|
|
|
|
|
|
4639
|
|
|
|
|
|
|
## Default for MacRomanMap is undef ("Auto"); |
|
4640
|
19
|
100
|
|
|
|
49
|
$MacRomanMap = undef unless defined($MacRomanMap); |
|
4641
|
|
|
|
|
|
|
|
|
4642
|
|
|
|
|
|
|
## Default for MaxRecords is 0 (meaning import all records) |
|
4643
|
19
|
50
|
|
|
|
69
|
$MaxRecords = 0 unless (int($MaxRecords) == $MaxRecords); |
|
4644
|
|
|
|
|
|
|
|
|
4645
|
|
|
|
|
|
|
## Precompile a regex for the return encoding since we'll call it often (on each field!) later. |
|
4646
|
19
|
|
|
|
|
101
|
my $RetRegex = qr/$ReturnEncoding/; |
|
4647
|
|
|
|
|
|
|
|
|
4648
|
19
|
|
|
|
|
70
|
$this->progress("Reading $FileName..."); |
|
4649
|
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
## Open the data file. |
|
4651
|
0
|
|
|
|
|
0
|
my $File = IO::File->new("<$FileName") or |
|
4652
|
19
|
50
|
|
|
|
135
|
do {$this->{_ErrorMsg} = "Failed to open $FileName: $!"; goto done}; |
|
|
0
|
|
|
|
|
0
|
|
|
4653
|
|
|
|
|
|
|
|
|
4654
|
|
|
|
|
|
|
## Get its total file size (useful for estimating table size later on). |
|
4655
|
19
|
50
|
|
|
|
1437
|
my $FileSize = (stat($File))[7] or |
|
4656
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "File $FileName contains no data.", goto done; |
|
4657
|
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
## Convert from optional "dos", "mac", "unix" symbolic values. |
|
4659
|
19
|
|
|
|
|
68
|
$LineEnding = $this->lineending_string($LineEnding); |
|
4660
|
|
|
|
|
|
|
|
|
4661
|
|
|
|
|
|
|
## Default for LineEnding is found by inspecting data in the file. |
|
4662
|
19
|
50
|
33
|
|
|
114
|
$LineEnding ||= guess_endings($File) or |
|
4663
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find any line endings in the file $FileName.", goto done; |
|
4664
|
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
## DoMacMapping is the actual setting for auto charset mapping |
|
4666
|
19
|
|
66
|
|
|
110
|
my $DoMacMapping = |
|
4667
|
|
|
|
|
|
|
((!defined($MacRomanMap) && ($LineEnding eq "\x0D")) || ## Auto |
|
4668
|
|
|
|
|
|
|
($MacRomanMap)); ## On |
|
4669
|
|
|
|
|
|
|
|
|
4670
|
19
|
100
|
|
|
|
146
|
$this->progress("Will convert upper-ascii characters if any, from Mac Roman to ISO 8859-1.") if $DoMacMapping; |
|
4671
|
|
|
|
|
|
|
|
|
4672
|
|
|
|
|
|
|
## FieldList is usable is it is a list and has at least one entry. |
|
4673
|
19
|
|
66
|
|
|
73
|
my $FieldListValid = ((ref($FieldList) eq 'ARRAY') && @$FieldList); |
|
4674
|
|
|
|
|
|
|
|
|
4675
|
|
|
|
|
|
|
## Set <$File> to use the line ending sequence we no known we are looking for. |
|
4676
|
19
|
|
|
|
|
91
|
local $/ = $LineEnding; |
|
4677
|
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
## We use $_ explicitly, so must localize. |
|
4679
|
19
|
|
|
|
|
27
|
local $_; |
|
4680
|
|
|
|
|
|
|
|
|
4681
|
19
|
|
|
|
|
29
|
my $IncomingFields; |
|
4682
|
|
|
|
|
|
|
|
|
4683
|
19
|
50
|
|
|
|
48
|
if ($HeaderRow) |
|
4684
|
|
|
|
|
|
|
{ |
|
4685
|
|
|
|
|
|
|
## Get the list of fields available in the file (first line of file). |
|
4686
|
|
|
|
|
|
|
|
|
4687
|
19
|
50
|
|
|
|
190
|
$_ = <$File> or |
|
4688
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find a first line with field names in $FileName.", goto done; |
|
4689
|
|
|
|
|
|
|
|
|
4690
|
|
|
|
|
|
|
## Try to guess file delimiter from the header row if not yet specified. |
|
4691
|
19
|
50
|
33
|
|
|
93
|
$FDelimiter ||= guess_delimiter($_) or |
|
4692
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done; |
|
4693
|
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
## Maybe convert entire line (all records) Mac to ISO before splitting. |
|
4695
|
19
|
100
|
|
|
|
78
|
&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping; |
|
4696
|
|
|
|
|
|
|
|
|
4697
|
19
|
|
|
|
|
48
|
chomp; |
|
4698
|
|
|
|
|
|
|
|
|
4699
|
19
|
|
|
|
|
42
|
s/^\"//; s/\"$//; ## remove possible leading, trailing quotes surrounding header row (rare) |
|
|
19
|
|
|
|
|
31
|
|
|
4700
|
|
|
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
## Split header row into field names, removing optional "" around each at the same time. |
|
4702
|
19
|
|
|
|
|
306
|
$IncomingFields = [split(/\"?$FDelimiter\"?/, $_)]; |
|
4703
|
|
|
|
|
|
|
|
|
4704
|
|
|
|
|
|
|
} |
|
4705
|
|
|
|
|
|
|
else |
|
4706
|
|
|
|
|
|
|
{ |
|
4707
|
|
|
|
|
|
|
## Otherwise, require that the caller specifies it in _FieldList |
|
4708
|
|
|
|
|
|
|
|
|
4709
|
0
|
0
|
|
|
|
0
|
$this->{_ErrorMsg} = "Must specify a _FieldList if _HeaderRow says no header row is present.", goto done |
|
4710
|
|
|
|
|
|
|
unless $FieldListValid; |
|
4711
|
|
|
|
|
|
|
|
|
4712
|
0
|
|
|
|
|
0
|
$IncomingFields = [@$FieldList]; |
|
4713
|
|
|
|
|
|
|
} |
|
4714
|
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
## Remove any leading underscores in the names of the incoming |
|
4716
|
|
|
|
|
|
|
## fields (not allowed because such field names are reserved for |
|
4717
|
|
|
|
|
|
|
## other object data). Note: this could result in |
|
4718
|
|
|
|
|
|
|
## duplicate/overwritten field names that were otherwise |
|
4719
|
|
|
|
|
|
|
## apparently unique in the incoming data file. |
|
4720
|
|
|
|
|
|
|
|
|
4721
|
19
|
|
|
|
|
55
|
$IncomingFields = [map {(/^_*(.*)/)[0]} @$IncomingFields]; |
|
|
79
|
|
|
|
|
259
|
|
|
4722
|
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
## Make a hash that can be used to map these fields' names to their numbers. |
|
4724
|
1
|
|
|
1
|
|
725
|
my $IncomingFieldNameToNum = {}; @$IncomingFieldNameToNum{@$IncomingFields} = ($[ .. $#$IncomingFields); |
|
|
1
|
|
|
|
|
409
|
|
|
|
1
|
|
|
|
|
2533
|
|
|
|
19
|
|
|
|
|
59
|
|
|
|
19
|
|
|
|
|
188
|
|
|
4725
|
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
## Make a list of the fields we'll be importing (by taking the |
|
4727
|
|
|
|
|
|
|
## list the caller requested, and paring it down to only those |
|
4728
|
|
|
|
|
|
|
## fields that are actually available in the table.) |
|
4729
|
|
|
|
|
|
|
|
|
4730
|
71
|
|
|
|
|
172
|
my $FieldsToGet = |
|
4731
|
19
|
100
|
|
|
|
61
|
[grep {exists($IncomingFieldNameToNum->{$_})} |
|
4732
|
|
|
|
|
|
|
($FieldListValid ? @$FieldList : @$IncomingFields)]; |
|
4733
|
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
## Make a note of whether we're getting a subset of available |
|
4735
|
|
|
|
|
|
|
## fields because the caller requested such. If we are, we'll add |
|
4736
|
|
|
|
|
|
|
## a _Subset => 1 marker to the data for use later in ensuring the |
|
4737
|
|
|
|
|
|
|
## cache is OK. |
|
4738
|
|
|
|
|
|
|
|
|
4739
|
19
|
|
100
|
|
|
59
|
my $GettingSubset = ($FieldListValid && ("@{[sort @$IncomingFields]}" ne |
|
4740
|
|
|
|
|
|
|
"@{[sort @$FieldList ]}")); |
|
4741
|
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
## Make an array of the incoming indices of these fields. |
|
4743
|
|
|
|
|
|
|
|
|
4744
|
|
|
|
|
|
|
## Allocate a list of empty arrays into which we can import the |
|
4745
|
|
|
|
|
|
|
## data. Initially they'll each have 100 empty slots for data; |
|
4746
|
|
|
|
|
|
|
## after we have imported 100 records, we'll re-consider the size |
|
4747
|
|
|
|
|
|
|
## estimate. When we're all done, we'll prune them back. |
|
4748
|
|
|
|
|
|
|
|
|
4749
|
19
|
|
|
|
|
69
|
my $FieldNums = [@$IncomingFieldNameToNum{@$FieldsToGet}]; |
|
4750
|
19
|
|
|
|
|
29
|
my $FieldVectors = []; foreach (@$FieldNums) {$#{$FieldVectors->[$_] = []} = 100}; |
|
|
19
|
|
|
|
|
95
|
|
|
|
71
|
|
|
|
|
78
|
|
|
|
71
|
|
|
|
|
431
|
|
|
4751
|
|
|
|
|
|
|
|
|
4752
|
|
|
|
|
|
|
## We want to be cool and support any embedded NULL (ascii zero) |
|
4753
|
|
|
|
|
|
|
## characters should they exist in the data, even though we are |
|
4754
|
|
|
|
|
|
|
## going to use NULL chars to encode embedded delimiters before we |
|
4755
|
|
|
|
|
|
|
## split.... |
|
4756
|
|
|
|
|
|
|
|
|
4757
|
|
|
|
|
|
|
## First we create a sufficiently obscure placeholder for any |
|
4758
|
|
|
|
|
|
|
## ascii zero characters in the input text (a rare occurrence |
|
4759
|
|
|
|
|
|
|
## anyway). |
|
4760
|
|
|
|
|
|
|
|
|
4761
|
19
|
|
|
|
|
78
|
my $ZeroMarker = "\001ASCII_ZERO" . time() . "\001"; |
|
4762
|
|
|
|
|
|
|
|
|
4763
|
|
|
|
|
|
|
## Now ready to go through the file line-by-line (record-by-record) |
|
4764
|
|
|
|
|
|
|
|
|
4765
|
19
|
|
|
|
|
26
|
my $WroteProg; |
|
4766
|
19
|
|
|
|
|
23
|
my $RecordsRead = 0; |
|
4767
|
19
|
|
|
|
|
86
|
while (<$File>) |
|
4768
|
|
|
|
|
|
|
{ |
|
4769
|
|
|
|
|
|
|
## Try to guess file delimiter from the header row if not yet specified. |
|
4770
|
57
|
50
|
33
|
|
|
164
|
$FDelimiter ||= guess_delimiter($_) or |
|
4771
|
|
|
|
|
|
|
$this->{_ErrorMsg} = "Could not find comma or tab delimiters in $FileName.", goto done; |
|
4772
|
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
## Maybe convert entire line (all records) ISO to Mac before splitting. |
|
4774
|
57
|
100
|
|
|
|
152
|
&MacRomanToISORoman8859_1(\ $_) if $DoMacMapping; |
|
4775
|
|
|
|
|
|
|
|
|
4776
|
|
|
|
|
|
|
## Manipulate the single line of data fields into a splittable format. |
|
4777
|
|
|
|
|
|
|
|
|
4778
|
57
|
|
|
|
|
197
|
chomp; |
|
4779
|
|
|
|
|
|
|
|
|
4780
|
|
|
|
|
|
|
## Replace any delimiters inside quotes with ASCII 0. |
|
4781
|
|
|
|
|
|
|
## Split fields on delimiters. |
|
4782
|
|
|
|
|
|
|
## Delete leading or trailing quote marks from each field. |
|
4783
|
|
|
|
|
|
|
## Restore delimiters ASCII 0 back to delimiters. |
|
4784
|
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
## Protect delimiters inside fields. |
|
4786
|
57
|
|
|
|
|
88
|
s/\000/$ZeroMarker/go; ## Preserve genuine ASCII 0 chars. |
|
4787
|
57
|
|
|
|
|
75
|
my $InQuote = 0; ## Initialize InQuote flag to zero. |
|
4788
|
57
|
|
|
|
|
368
|
s/(\")|($FDelimiter)/ ## Replace delimiters inside quotes with ASCII 0 ... |
|
4789
|
180
|
50
|
|
|
|
1239
|
($1 ? do{$InQuote^=1; $1} : ## ... if quote char, toggle InQuote flag |
|
|
0
|
50
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
4790
|
|
|
|
|
|
|
($InQuote ? "\000" : $2))/eg; ## ... if delimiter, InQuote sez whether to replace or retain. |
|
4791
|
|
|
|
|
|
|
|
|
4792
|
|
|
|
|
|
|
## Split record into fields, then clean each field. |
|
4793
|
|
|
|
|
|
|
|
|
4794
|
57
|
|
|
|
|
119
|
s/^\"//; s/\"$//; ## Kill leading, trailing quotes surrounding each record |
|
|
57
|
|
|
|
|
90
|
|
|
4795
|
|
|
|
|
|
|
my @FieldVals = |
|
4796
|
|
|
|
|
|
|
map |
|
4797
|
57
|
50
|
|
|
|
439
|
{if (length($_)) |
|
|
237
|
|
|
|
|
473
|
|
|
4798
|
|
|
|
|
|
|
{ |
|
4799
|
237
|
|
|
|
|
290
|
s/\"\"/\"/g; ## Restore Merge format's quoted double-quotes. ("" ==> ") |
|
4800
|
237
|
|
|
|
|
259
|
s/\000/$FDelimiter/g; ## Restore delimiters inside fields |
|
4801
|
237
|
|
|
|
|
262
|
s/$ZeroMarker/\000/go; ## Restore preserved ASCII 0 chars. |
|
4802
|
237
|
50
|
|
|
|
836
|
s/$RetRegex/\n/g if $ReturnMap;## Restore return characters that were coded as ASCII 11 (^K) |
|
4803
|
|
|
|
|
|
|
} |
|
4804
|
237
|
|
|
|
|
504
|
$_;} ## Return field val after above mods. |
|
4805
|
|
|
|
|
|
|
split(/\"?$FDelimiter\"?/, $_); ## Split on delimiters, killing optional surrounding quotes at same time. |
|
4806
|
|
|
|
|
|
|
|
|
4807
|
|
|
|
|
|
|
## Put the data into the vectors |
|
4808
|
57
|
|
|
|
|
140
|
foreach (@$FieldNums) |
|
4809
|
|
|
|
|
|
|
{ |
|
4810
|
213
|
50
|
|
|
|
623
|
$FieldVectors->[$_]->[$RecordsRead] = $FieldVals[$_] if (length($FieldVals[$_])); |
|
4811
|
|
|
|
|
|
|
} |
|
4812
|
57
|
|
|
|
|
80
|
$RecordsRead++; |
|
4813
|
|
|
|
|
|
|
|
|
4814
|
|
|
|
|
|
|
## Stop if we've read all the records we wanted. |
|
4815
|
57
|
50
|
33
|
|
|
175
|
last if ($MaxRecords && ($RecordsRead >= $MaxRecords)); |
|
4816
|
|
|
|
|
|
|
|
|
4817
|
|
|
|
|
|
|
## Optimization: |
|
4818
|
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
## After importing 100, 200, 300, 400, etc. records, we |
|
4820
|
|
|
|
|
|
|
## re-estimate the size of the table. To help make the field |
|
4821
|
|
|
|
|
|
|
## insertion more efficient (by avoiding frequent |
|
4822
|
|
|
|
|
|
|
## array-resizing), we can set the sizes of the field vectors |
|
4823
|
|
|
|
|
|
|
## to hold at least our estimated number of records). |
|
4824
|
|
|
|
|
|
|
## Ideally, this estimation/resize step will happen at most 2 |
|
4825
|
|
|
|
|
|
|
## or 3 times no matter how big the incoming data file is. |
|
4826
|
|
|
|
|
|
|
|
|
4827
|
57
|
|
|
|
|
59
|
my $EstTotalRecords; |
|
4828
|
57
|
50
|
33
|
|
|
140
|
if ((($RecordsRead % 100) == 0) && ## If we're on a record divisble by 100... |
|
|
0
|
|
|
|
|
0
|
|
|
4829
|
|
|
|
|
|
|
(($RecordsRead + 100 > $#{$FieldVectors->[$FieldNums->[0]]}))) ## ... and we're getting close to max size... |
|
4830
|
|
|
|
|
|
|
{ |
|
4831
|
|
|
|
|
|
|
## Then estimate the size we'd like to resize it to. |
|
4832
|
0
|
|
|
|
|
0
|
$EstTotalRecords = (100 + int($RecordsRead * ($FileSize / tell($File)))); |
|
4833
|
0
|
0
|
0
|
|
|
0
|
$EstTotalRecords = $MaxRecords if ($MaxRecords && ($MaxRecords < $EstTotalRecords)); |
|
4834
|
|
|
|
|
|
|
|
|
4835
|
|
|
|
|
|
|
## If this size is greater than the actual size... |
|
4836
|
0
|
0
|
|
|
|
0
|
if ($EstTotalRecords > $#{$FieldVectors->[$FieldNums->[0]]}) |
|
|
0
|
|
|
|
|
0
|
|
|
4837
|
|
|
|
|
|
|
{ |
|
4838
|
|
|
|
|
|
|
## Then resize all the vectors. |
|
4839
|
|
|
|
|
|
|
## $this->progress("$RecordsRead: Resizing to $EstTotalRecords...\n"); ## Debugging |
|
4840
|
0
|
|
|
|
|
0
|
foreach (@$FieldNums) {$#{$FieldVectors->[$_]} = $EstTotalRecords}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
4841
|
|
|
|
|
|
|
} |
|
4842
|
|
|
|
|
|
|
} |
|
4843
|
|
|
|
|
|
|
|
|
4844
|
|
|
|
|
|
|
## Try doing timed (throttled to 1 per 2 secs) progress at |
|
4845
|
|
|
|
|
|
|
## most every 100th record. |
|
4846
|
57
|
0
|
|
|
|
111
|
my $Did = ($EstTotalRecords ? |
|
|
|
50
|
|
|
|
|
|
|
4847
|
|
|
|
|
|
|
$this->progress_timed("Reading", "$RecordsRead of $EstTotalRecords (est.)", tell($File), $FileSize, 1) : |
|
4848
|
|
|
|
|
|
|
$this->progress_timed("Reading", "$RecordsRead" , tell($File), $FileSize, 1)) |
|
4849
|
|
|
|
|
|
|
if (($RecordsRead % 100) == 0); |
|
4850
|
57
|
|
33
|
|
|
477
|
$WroteProg ||= $Did; |
|
4851
|
|
|
|
|
|
|
} |
|
4852
|
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
## If we wrote timed progress but didn't get to give the 100% |
|
4854
|
|
|
|
|
|
|
## message yet, print the 100% message now. |
|
4855
|
|
|
|
|
|
|
|
|
4856
|
19
|
50
|
|
|
|
48
|
if ($WroteProg) |
|
4857
|
|
|
|
|
|
|
{ |
|
4858
|
0
|
0
|
|
|
|
0
|
$this->progress_timed("Reading", "$RecordsRead of $RecordsRead", $FileSize, $FileSize, 1) |
|
4859
|
|
|
|
|
|
|
unless (($RecordsRead % 100) == 0); |
|
4860
|
|
|
|
|
|
|
} |
|
4861
|
|
|
|
|
|
|
|
|
4862
|
|
|
|
|
|
|
## Print the regular Done message. |
|
4863
|
19
|
|
|
|
|
78
|
$this->progress("Read $FileName."); |
|
4864
|
|
|
|
|
|
|
|
|
4865
|
|
|
|
|
|
|
## Set the field vectors' length to the exact length we really |
|
4866
|
|
|
|
|
|
|
## read. |
|
4867
|
|
|
|
|
|
|
|
|
4868
|
|
|
|
|
|
|
## $this->progress("$RecordsRead: Truncating to @{[$RecordsRead - 1]}... \n"); ## Debugging |
|
4869
|
19
|
|
|
|
|
41
|
foreach (@$FieldNums) {$#{$FieldVectors->[$_]} = ($RecordsRead - 1)}; |
|
|
71
|
|
|
|
|
79
|
|
|
|
71
|
|
|
|
|
160
|
|
|
4870
|
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
## Delete any existing columns in the object. |
|
4872
|
19
|
|
|
|
|
25
|
delete @$this{@{$this->fieldlist_all()}}; |
|
|
19
|
|
|
|
|
64
|
|
|
4873
|
|
|
|
|
|
|
|
|
4874
|
|
|
|
|
|
|
## Put the new columns into the object. |
|
4875
|
19
|
|
|
|
|
96
|
@$this{@$FieldsToGet} = @$FieldVectors[@$FieldNums]; |
|
4876
|
|
|
|
|
|
|
|
|
4877
|
|
|
|
|
|
|
## Set fieldlist to fields we actually read, in order. |
|
4878
|
19
|
|
|
|
|
60
|
$this->fieldlist($FieldsToGet); |
|
4879
|
|
|
|
|
|
|
|
|
4880
|
|
|
|
|
|
|
## Remember the line ending char or chars that were successfully |
|
4881
|
|
|
|
|
|
|
## used to read the file. The same ending will be used by default |
|
4882
|
|
|
|
|
|
|
## to write any file based on this object. |
|
4883
|
19
|
|
|
|
|
57
|
$this->{_LineEnding} = $this->lineending_symbol($LineEnding); |
|
4884
|
|
|
|
|
|
|
|
|
4885
|
|
|
|
|
|
|
## Remember the field delimiter that was successfully used to read |
|
4886
|
|
|
|
|
|
|
## the file. The same delimiter will be used by default to write |
|
4887
|
|
|
|
|
|
|
## any file based on this object. |
|
4888
|
19
|
|
|
|
|
66
|
$this->{_FDelimiter} = $FDelimiter; |
|
4889
|
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
## Remember the header row setting. |
|
4891
|
19
|
|
|
|
|
29
|
$this->{_HeaderRow} = $HeaderRow; |
|
4892
|
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
## Remember the filename used for reading. |
|
4894
|
19
|
|
|
|
|
30
|
$this->{_FileName} = $FileName; |
|
4895
|
|
|
|
|
|
|
|
|
4896
|
|
|
|
|
|
|
## Remember whether we read (and maybe will cache) a subset of available fields. |
|
4897
|
19
|
|
100
|
|
|
76
|
$this->{_Subset} = $GettingSubset || 0; |
|
4898
|
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
## Clean out _Selection and verify _SortOrder to ensure compatibility |
|
4900
|
|
|
|
|
|
|
## with current _FieldList. |
|
4901
|
19
|
|
|
|
|
49
|
$this->read_postcheck(); |
|
4902
|
|
|
|
|
|
|
|
|
4903
|
|
|
|
|
|
|
## Other informational data and options, like sort specs, sort |
|
4904
|
|
|
|
|
|
|
## routines and so on, need not be changed or replaced when data |
|
4905
|
|
|
|
|
|
|
## changes. |
|
4906
|
|
|
|
|
|
|
|
|
4907
|
19
|
|
|
|
|
26
|
$Success = 1; |
|
4908
|
|
|
|
|
|
|
|
|
4909
|
19
|
50
|
|
|
|
40
|
done: |
|
4910
|
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
$this->warn("FAILURE: $this->{_ErrorMsg}") unless $Success; |
|
4912
|
|
|
|
|
|
|
|
|
4913
|
19
|
50
|
|
|
|
281
|
close $File if $File; |
|
4914
|
19
|
|
|
|
|
258
|
return($Success); |
|
4915
|
|
|
|
|
|
|
} |
|
4916
|
|
|
|
|
|
|
|
|
4917
|
|
|
|
|
|
|
sub read_postcheck ## Called to clean up after a successful read |
|
4918
|
|
|
|
|
|
|
{ |
|
4919
|
62
|
|
|
62
|
0
|
92
|
my $this = shift; |
|
4920
|
|
|
|
|
|
|
|
|
4921
|
|
|
|
|
|
|
## Run select_all to empty out the _Selection. |
|
4922
|
62
|
|
|
|
|
180
|
$this->select_all(); |
|
4923
|
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
## Remove any bogus field names from the sort order, if any. |
|
4925
|
62
|
|
|
|
|
167
|
$this->sortorder_check(); |
|
4926
|
|
|
|
|
|
|
} |
|
4927
|
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
sub read_file_or_cache ## Read, cacheing if possible |
|
4929
|
|
|
|
|
|
|
{ |
|
4930
|
62
|
|
|
62
|
0
|
91
|
my $this = shift; |
|
4931
|
62
|
100
|
|
|
|
219
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
|
4932
|
|
|
|
|
|
|
|
|
4933
|
62
|
|
|
|
|
117
|
my($FileName, $FieldList, $CacheOnRead, $CacheExtension, $CacheSubDir) = map {$this->getparam($Params, $_)} |
|
|
310
|
|
|
|
|
784
|
|
|
4934
|
|
|
|
|
|
|
qw(_FileName _FieldList _CacheOnRead _CacheExtension _CacheSubDir); |
|
4935
|
|
|
|
|
|
|
|
|
4936
|
62
|
|
|
|
|
106
|
my $Success; |
|
4937
|
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
## If cacheing is turned off, just bail prematurely and treat this |
|
4939
|
|
|
|
|
|
|
## as a call to read_file(). |
|
4940
|
|
|
|
|
|
|
|
|
4941
|
62
|
100
|
|
|
|
163
|
return($this->read_file(%$Params)) unless $CacheOnRead; |
|
4942
|
|
|
|
|
|
|
|
|
4943
|
|
|
|
|
|
|
## Otherwise... check if cacheing is possible. |
|
4944
|
|
|
|
|
|
|
|
|
4945
|
|
|
|
|
|
|
## Calculate the cache file name. If it comes back empty, it |
|
4946
|
|
|
|
|
|
|
## means the cache directory probably could not be created, or the |
|
4947
|
|
|
|
|
|
|
## cache file itself either does not exist or could not be |
|
4948
|
|
|
|
|
|
|
## preflighted (either read or touched/deleted). |
|
4949
|
|
|
|
|
|
|
|
|
4950
|
51
|
|
|
|
|
185
|
my $CacheFileName = $this->prep_cache_file($FileName, $CacheExtension, $CacheSubDir); |
|
4951
|
|
|
|
|
|
|
|
|
4952
|
|
|
|
|
|
|
## If the cache file preflight failed, treat this as a regular |
|
4953
|
|
|
|
|
|
|
## read_file() without cacheing. |
|
4954
|
|
|
|
|
|
|
|
|
4955
|
51
|
50
|
|
|
|
141
|
return($this->read_file(%$Params)) unless length($CacheFileName); |
|
4956
|
|
|
|
|
|
|
|
|
4957
|
|
|
|
|
|
|
## At this point we believe we'll either be able to read or write |
|
4958
|
|
|
|
|
|
|
## the cache file as needed. |
|
4959
|
|
|
|
|
|
|
|
|
4960
|
|
|
|
|
|
|
## Try to read the cache if both files exist and the mod date is |
|
4961
|
|
|
|
|
|
|
## later. |
|
4962
|
|
|
|
|
|
|
|
|
4963
|
51
|
|
|
|
|
63
|
my $Data; |
|
4964
|
51
|
100
|
66
|
|
|
2501
|
if ((-e $FileName ) && |
|
|
|
|
100
|
|
|
|
|
|
4965
|
|
|
|
|
|
|
(-e $CacheFileName ) && |
|
4966
|
|
|
|
|
|
|
(((stat($CacheFileName))[9]) > ((stat($FileName))[9])) ) |
|
4967
|
|
|
|
|
|
|
{ |
|
4968
|
46
|
|
|
|
|
205
|
$this->progress("Thawing $CacheFileName..."); |
|
4969
|
46
|
|
|
|
|
237
|
$Data = &retrieve($CacheFileName); |
|
4970
|
|
|
|
|
|
|
|
|
4971
|
46
|
50
|
|
|
|
4512
|
$this-warn("Cache restore from $CacheFileName failed: $!"), unlink($CacheFileName) |
|
4972
|
|
|
|
|
|
|
unless defined ($Data); |
|
4973
|
|
|
|
|
|
|
} |
|
4974
|
|
|
|
|
|
|
|
|
4975
|
51
|
100
|
|
|
|
217
|
if (ref($Data) eq 'HASH') |
|
4976
|
|
|
|
|
|
|
{ |
|
4977
|
|
|
|
|
|
|
## Retrieval succeeded. |
|
4978
|
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
## Verify that the data in the cache is usable. |
|
4980
|
|
|
|
|
|
|
|
|
4981
|
|
|
|
|
|
|
## First, check newline-encoding compatibility. |
|
4982
|
|
|
|
|
|
|
( |
|
4983
|
46
|
50
|
|
|
|
135
|
$this->warn("Abandoning cache due to incompatible newline encoding"), |
|
4984
|
|
|
|
|
|
|
unlink $CacheFileName, goto cache_failed) unless $Data->{_Newline} eq "\n"; |
|
4985
|
|
|
|
|
|
|
|
|
4986
|
|
|
|
|
|
|
## Simulate an actual read_file() using data |
|
4987
|
|
|
|
|
|
|
## from the cache instead of the original file. |
|
4988
|
|
|
|
|
|
|
|
|
4989
|
46
|
100
|
66
|
|
|
218
|
if ((ref($FieldList) eq 'ARRAY') && @$FieldList) |
|
|
|
100
|
|
|
|
|
|
|
4990
|
|
|
|
|
|
|
{ |
|
4991
|
|
|
|
|
|
|
## If $FieldList requests fields not found in the cache, |
|
4992
|
|
|
|
|
|
|
## we abandon (delete and maybe rewrite) the cache: maybe |
|
4993
|
|
|
|
|
|
|
## we previously cached a different subset of fields from |
|
4994
|
|
|
|
|
|
|
## a previous request and so the cache is no longer |
|
4995
|
|
|
|
|
|
|
## adequate. |
|
4996
|
|
|
|
|
|
|
|
|
4997
|
18
|
|
|
|
|
33
|
my $MissingFields = [grep {!exists($Data->{$_})} @$FieldList]; |
|
|
48
|
|
|
|
|
134
|
|
|
4998
|
|
|
|
|
|
|
( |
|
4999
|
|
|
|
|
|
|
## $this->warn("Abandoning cache due to change in requested field list"), |
|
5000
|
18
|
100
|
|
|
|
79
|
unlink $CacheFileName, goto cache_failed) if @$MissingFields; |
|
5001
|
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
## If there was a _FieldList supplied in $Params or $this, |
|
5003
|
|
|
|
|
|
|
## we might need to omit any fields read from cache but |
|
5004
|
|
|
|
|
|
|
## not mentioned (just as read_file() would have done). |
|
5005
|
|
|
|
|
|
|
|
|
5006
|
16
|
|
|
|
|
113
|
my $FieldHash = {}; @$FieldHash{@$FieldList} = undef; |
|
|
16
|
|
|
|
|
53
|
|
|
5007
|
16
|
|
|
|
|
61
|
my $OmitFields = [grep {!exists($FieldHash->{$_})} grep {!/^_/} keys %$Data]; |
|
|
64
|
|
|
|
|
120
|
|
|
|
160
|
|
|
|
|
327
|
|
|
5008
|
|
|
|
|
|
|
|
|
5009
|
16
|
|
|
|
|
52
|
delete @$Data{@$OmitFields}; |
|
5010
|
|
|
|
|
|
|
|
|
5011
|
|
|
|
|
|
|
## Finally, also pare down _FieldList to only mention |
|
5012
|
|
|
|
|
|
|
## those fields that were desired.... |
|
5013
|
|
|
|
|
|
|
|
|
5014
|
16
|
|
|
|
|
26
|
my $AvailFields = $Data->{_FieldList}; |
|
5015
|
16
|
|
|
|
|
27
|
$Data->{_FieldList} = [grep {exists($Data->{$_})} @$FieldList]; |
|
|
42
|
|
|
|
|
105
|
|
|
5016
|
|
|
|
|
|
|
|
|
5017
|
|
|
|
|
|
|
## We might have ended up reading a subset of the |
|
5018
|
|
|
|
|
|
|
## available fields in the cache. (This is logically |
|
5019
|
|
|
|
|
|
|
## equivalent to reading a subset of the available fields |
|
5020
|
|
|
|
|
|
|
## in the real file.) If so, then change the value of |
|
5021
|
|
|
|
|
|
|
## $Data->{_Subset} to indicate that. |
|
5022
|
|
|
|
|
|
|
|
|
5023
|
16
|
|
100
|
|
|
80
|
my $GettingSubset = (@$OmitFields && 1 || 0); |
|
5024
|
16
|
|
66
|
|
|
92
|
$Data->{_Subset} ||= $GettingSubset; |
|
5025
|
|
|
|
|
|
|
} |
|
5026
|
|
|
|
|
|
|
elsif ($Data->{_Subset}) |
|
5027
|
|
|
|
|
|
|
{ |
|
5028
|
|
|
|
|
|
|
|
|
5029
|
|
|
|
|
|
|
## Conversely, if no field list was specified (hence all |
|
5030
|
|
|
|
|
|
|
## fields are desired), but a subset of fields has |
|
5031
|
|
|
|
|
|
|
## previously been cached, we have to delete / abandon the |
|
5032
|
|
|
|
|
|
|
## cache and re-read the file so we are getting all |
|
5033
|
|
|
|
|
|
|
## fields. |
|
5034
|
|
|
|
|
|
|
|
|
5035
|
|
|
|
|
|
|
( |
|
5036
|
|
|
|
|
|
|
## $this->warn("Abandoning partial cache due to request of full field list"), |
|
5037
|
1
|
|
|
|
|
20
|
unlink $CacheFileName, goto cache_failed); |
|
5038
|
|
|
|
|
|
|
} |
|
5039
|
|
|
|
|
|
|
|
|
5040
|
|
|
|
|
|
|
## Copy all elements from $Data, including possibly overridden |
|
5041
|
|
|
|
|
|
|
## _FieldList element if any, but excepting the cache-only |
|
5042
|
|
|
|
|
|
|
## _Newline element, into $this. |
|
5043
|
|
|
|
|
|
|
|
|
5044
|
43
|
|
|
|
|
102
|
delete $Data->{_Newline}; |
|
5045
|
43
|
|
|
|
|
312
|
@$this{keys %$Data} = values %$Data; |
|
5046
|
|
|
|
|
|
|
|
|
5047
|
|
|
|
|
|
|
## Set the file name to the name of the original (not the |
|
5048
|
|
|
|
|
|
|
## cache) file, just as read() would have done. |
|
5049
|
|
|
|
|
|
|
|
|
5050
|
43
|
|
|
|
|
125
|
$this->{_FileName} = $FileName; |
|
5051
|
|
|
|
|
|
|
|
|
5052
|
|
|
|
|
|
|
## Run the "read post-check" -- the same things we do in |
|
5053
|
|
|
|
|
|
|
## read_file() after completing the read process: Clean out |
|
5054
|
|
|
|
|
|
|
## _Selection and verify _SortOrder to ensure compatibility |
|
5055
|
|
|
|
|
|
|
## with current _FieldList. |
|
5056
|
|
|
|
|
|
|
|
|
5057
|
43
|
|
|
|
|
176
|
$this->read_postcheck(); |
|
5058
|
|
|
|
|
|
|
|
|
5059
|
43
|
|
|
|
|
148
|
$this->progress("Thawed $FileName."); |
|
5060
|
|
|
|
|
|
|
|
|
5061
|
43
|
|
|
|
|
100
|
$Success = 1; |
|
5062
|
43
|
|
|
|
|
621
|
goto done; ## Successful completion: we read from the cache. |
|
5063
|
|
|
|
|
|
|
} |
|
5064
|
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
## Could not retrieve for whatever reason (maybe cache did not |
|
5066
|
|
|
|
|
|
|
## exist yet or was out of date or had to be abandoned). So just |
|
5067
|
|
|
|
|
|
|
## read normally and possibly write the cache. |
|
5068
|
|
|
|
|
|
|
|
|
5069
|
|
|
|
|
|
|
cache_failed: |
|
5070
|
|
|
|
|
|
|
{ |
|
5071
|
8
|
50
|
|
|
|
15
|
$Success = $this->read_file(%$Params) or goto done; |
|
|
8
|
|
|
|
|
70
|
|
|
5072
|
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
## Now, having read successfully, we try to write the cache |
|
5074
|
|
|
|
|
|
|
## for next time. Writing the cache is optional; failing to |
|
5075
|
|
|
|
|
|
|
## write it is not a failure of the method. |
|
5076
|
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
{ ## Code in this block may fail and that's OK. |
|
5078
|
|
|
|
|
|
|
|
|
5079
|
|
|
|
|
|
|
## First, pre-flight. |
|
5080
|
8
|
50
|
|
|
|
19
|
$this->warn("Cache file $CacheFileName cannot be created/overwritten: $!"), |
|
|
8
|
|
|
|
|
37
|
|
|
5081
|
|
|
|
|
|
|
goto done ## Successful completion. |
|
5082
|
|
|
|
|
|
|
unless $this->try_file_write($CacheFileName); |
|
5083
|
|
|
|
|
|
|
|
|
5084
|
|
|
|
|
|
|
## The data to be stored is: |
|
5085
|
|
|
|
|
|
|
|
|
5086
|
|
|
|
|
|
|
## 1) All data columns read by read_file() |
|
5087
|
|
|
|
|
|
|
## 2) Any parameters set by read_file() |
|
5088
|
|
|
|
|
|
|
## 3) _Subset param indicating partial fieldlist was read from file. |
|
5089
|
|
|
|
|
|
|
## 4) _Newline setting so we know if it's compatabile when read back. |
|
5090
|
|
|
|
|
|
|
|
|
5091
|
|
|
|
|
|
|
## No other parameters should be cached because we want a |
|
5092
|
|
|
|
|
|
|
## read from the cache to produce exactly the same result |
|
5093
|
|
|
|
|
|
|
## as a read from the file itself would have produced. |
|
5094
|
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
## After a read, fieldlist() will contain the fields |
|
5096
|
|
|
|
|
|
|
## actually read, so cols_hash() WILL yield all the |
|
5097
|
|
|
|
|
|
|
## columns. |
|
5098
|
|
|
|
|
|
|
|
|
5099
|
8
|
|
|
|
|
39
|
my $Data = {( |
|
5100
|
|
|
|
|
|
|
## Refs to each column read by read_file() |
|
5101
|
|
|
|
|
|
|
%{ $this->cols_hash() }, |
|
5102
|
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
## Other parameters set by read_file() |
|
5104
|
|
|
|
|
|
|
_FieldList => $this->{_FieldList }, |
|
5105
|
|
|
|
|
|
|
_LineEnding => $this->{_LineEnding}, |
|
5106
|
|
|
|
|
|
|
_FDelimiter => $this->{_FDelimiter}, |
|
5107
|
|
|
|
|
|
|
_HeaderRow => $this->{_HeaderRow }, |
|
5108
|
|
|
|
|
|
|
_Subset => $this->{_Subset }, |
|
5109
|
8
|
|
|
|
|
23
|
_Newline => "\n", |
|
5110
|
|
|
|
|
|
|
)}; |
|
5111
|
|
|
|
|
|
|
|
|
5112
|
8
|
50
|
|
|
|
48
|
$this->warn("Failed to cache $CacheFileName"), |
|
5113
|
|
|
|
|
|
|
unlink($CacheFileName), |
|
5114
|
|
|
|
|
|
|
goto done ## Successful completion. |
|
5115
|
|
|
|
|
|
|
unless $this->write_cache($Data, $CacheFileName); |
|
5116
|
8
|
|
|
|
|
235
|
chmod 0666, $CacheFileName; ## Liberal perms if possible. |
|
5117
|
|
|
|
|
|
|
} |
|
5118
|
|
|
|
|
|
|
|
|
5119
|
8
|
|
|
|
|
73
|
goto done; ## Successful completion: we read from the file & maybe saved cache. |
|
5120
|
|
|
|
|
|
|
} |
|
5121
|
|
|
|
|
|
|
|
|
5122
|
|
|
|
|
|
|
done: |
|
5123
|
51
|
|
|
|
|
317
|
return ($Success); |
|
5124
|
|
|
|
|
|
|
} |
|
5125
|
|
|
|
|
|
|
|
|
5126
|
|
|
|
|
|
|
=pod |
|
5127
|
|
|
|
|
|
|
|
|
5128
|
|
|
|
|
|
|
=head1 WRITING DATA FILES |
|
5129
|
|
|
|
|
|
|
|
|
5130
|
|
|
|
|
|
|
## Writing some or all data from table into a data file |
|
5131
|
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
$t->write($Path) ## Simple calling convention |
|
5133
|
|
|
|
|
|
|
|
|
5134
|
|
|
|
|
|
|
$t->write( ## Named-parameter convention |
|
5135
|
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
## Params that override params in the object if supplied... |
|
5137
|
|
|
|
|
|
|
|
|
5138
|
|
|
|
|
|
|
_FileName => $Path, ## "Base path"; see _WriteExtension |
|
5139
|
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
_WriteExtension=> ".out",## Insert/append extension to _FileName |
|
5141
|
|
|
|
|
|
|
|
|
5142
|
|
|
|
|
|
|
_FieldList => [...], ## Fields to write; others ignored |
|
5143
|
|
|
|
|
|
|
_Selection => [...], ## Record (#s) to write; others ignored |
|
5144
|
|
|
|
|
|
|
|
|
5145
|
|
|
|
|
|
|
_HeaderRow => 0, ## Include header row in file |
|
5146
|
|
|
|
|
|
|
|
|
5147
|
|
|
|
|
|
|
_LineEnding => undef, ## Record delimiter (default is "\n") |
|
5148
|
|
|
|
|
|
|
_FDelimiter => undef, ## Field delimiter (default is comma) |
|
5149
|
|
|
|
|
|
|
|
|
5150
|
|
|
|
|
|
|
_ReturnMap => 1, ## Whether to encode internal returns |
|
5151
|
|
|
|
|
|
|
_ReturnEncoding=>"\x0B", ## How to encode returns |
|
5152
|
|
|
|
|
|
|
_MacRomanMap => undef, ## Whether/when to write Mac char set |
|
5153
|
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
|
|
5155
|
|
|
|
|
|
|
_CacheOnWrite => 1, ## Enable saving cache after write() |
|
5156
|
|
|
|
|
|
|
_CacheExtension=> ".x", ## Extension to add to cache file name |
|
5157
|
|
|
|
|
|
|
_CacheSubDir => "", ## (Sub-)dir, if any, for cache files |
|
5158
|
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
## Params specific to the read()/write() methods... |
|
5160
|
|
|
|
|
|
|
|
|
5161
|
|
|
|
|
|
|
_MaxRecords => 200, ## Limit on how many records to write |
|
5162
|
|
|
|
|
|
|
) |
|
5163
|
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
$t->write_file() ## Internal: same as write(); ignores cacheing |
|
5165
|
|
|
|
|
|
|
|
|
5166
|
|
|
|
|
|
|
write() writes a Merge, CSV, or Tab-delimited file. |
|
5167
|
|
|
|
|
|
|
|
|
5168
|
|
|
|
|
|
|
It uses parameters as described above. Any parameters not supplied |
|
5169
|
|
|
|
|
|
|
will be gotten from the object. |
|
5170
|
|
|
|
|
|
|
|
|
5171
|
|
|
|
|
|
|
Using the simple calling convention, just pass it a path which will |
|
5172
|
|
|
|
|
|
|
override the _FileName parameter in the object, if any. |
|
5173
|
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
All other parameters will come from the object (or will be defaulted |
|
5175
|
|
|
|
|
|
|
if absent). |
|
5176
|
|
|
|
|
|
|
|
|
5177
|
|
|
|
|
|
|
If no _FileName or path is specified, or it is the special string "-" |
|
5178
|
|
|
|
|
|
|
(dash), then the file handle \ * STDOUT will be used by default (and |
|
5179
|
|
|
|
|
|
|
you could redirect it to a file). You can supply any open file handle |
|
5180
|
|
|
|
|
|
|
or IO::File object of your own for the _FileName parameter. |
|
5181
|
|
|
|
|
|
|
|
|
5182
|
|
|
|
|
|
|
If write() is writing to a file handle by default or because you |
|
5183
|
|
|
|
|
|
|
specified one, then no write-cacheing will occur. |
|
5184
|
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
To specify additional parameters or override any parameters in the |
|
5186
|
|
|
|
|
|
|
object while reading, use the named-parameter calling convention. |
|
5187
|
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
If the object's data was previously filled in using new() or read(), |
|
5189
|
|
|
|
|
|
|
then the file format parameters from the previous read() method will |
|
5190
|
|
|
|
|
|
|
still be in the object, so the format of the written file will |
|
5191
|
|
|
|
|
|
|
correspond as much as possible to the file that was read(). |
|
5192
|
|
|
|
|
|
|
|
|
5193
|
|
|
|
|
|
|
write() returns the path name of the file actually written, or the |
|
5194
|
|
|
|
|
|
|
empty string if a supplied file handle or STDOUT was written to, or |
|
5195
|
|
|
|
|
|
|
undef if there was a failure. |
|
5196
|
|
|
|
|
|
|
|
|
5197
|
|
|
|
|
|
|
If write() returns undef, then it will also have set the _ErrorMsg |
|
5198
|
|
|
|
|
|
|
parameter in the object. |
|
5199
|
|
|
|
|
|
|
|
|
5200
|
|
|
|
|
|
|
write() never modifies any data in the object itself. |
|
5201
|
|
|
|
|
|
|
|
|
5202
|
|
|
|
|
|
|
Consequently, if you specify a _FieldList or a _Selection, only those |
|
5203
|
|
|
|
|
|
|
fields or records will be written, but the corresponding parameters in |
|
5204
|
|
|
|
|
|
|
the object itself will be left untouched. |
|
5205
|
|
|
|
|
|
|
|
|
5206
|
|
|
|
|
|
|
=head2 How write() calculates the Path |
|
5207
|
|
|
|
|
|
|
|
|
5208
|
|
|
|
|
|
|
The _FileName parameter is shared with the read() method. This |
|
5209
|
|
|
|
|
|
|
parameter is set by read() and may be overridden when calling write(). |
|
5210
|
|
|
|
|
|
|
|
|
5211
|
|
|
|
|
|
|
In the base implementation of Data::CTable, write() will try not to |
|
5212
|
|
|
|
|
|
|
overwrite the same file that was read, which could possibly cause data |
|
5213
|
|
|
|
|
|
|
loss. |
|
5214
|
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
To avoid this, it does not use the _FileName parameter directly. |
|
5216
|
|
|
|
|
|
|
Instead, it starts with _FileName and inserts or appends the value of |
|
5217
|
|
|
|
|
|
|
the _WriteExtension parameter (which defaults to ".out") into the file |
|
5218
|
|
|
|
|
|
|
name before writing. |
|
5219
|
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
If the _FileName already has an extension at the end, write() will |
|
5221
|
|
|
|
|
|
|
place the _WriteExtension BEFORE the final extension; otherwise the |
|
5222
|
|
|
|
|
|
|
_WriteExtension will be placed at the end of the _FileName. |
|
5223
|
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
For example: |
|
5225
|
|
|
|
|
|
|
|
|
5226
|
|
|
|
|
|
|
Foobar.txt ==> Foobar.out.txt |
|
5227
|
|
|
|
|
|
|
Foobar.merge.txt ==> Foobar.merge.out.txt |
|
5228
|
|
|
|
|
|
|
My_Merge_Data ==> My_Merge_Data.out |
|
5229
|
|
|
|
|
|
|
|
|
5230
|
|
|
|
|
|
|
If you DON'T want write() to add a _WriteExtension to _FileName before |
|
5231
|
|
|
|
|
|
|
it writes the file, then you must set _WriteExtension to empty/undef |
|
5232
|
|
|
|
|
|
|
either in the object or when calling write(). Or, you could make a |
|
5233
|
|
|
|
|
|
|
subclass that initializes _WriteExtension to be empty. If |
|
5234
|
|
|
|
|
|
|
_WriteExtension is empty, then _FileName will be used exactly, which |
|
5235
|
|
|
|
|
|
|
may result in overwriting the original data file. |
|
5236
|
|
|
|
|
|
|
|
|
5237
|
|
|
|
|
|
|
Remember: write() returns the path name it actually used to |
|
5238
|
|
|
|
|
|
|
successfully write the file. Just as with read(), if the _FileName |
|
5239
|
|
|
|
|
|
|
you specified did not have a path, then write() will prepend a path |
|
5240
|
|
|
|
|
|
|
component indicating "current directory" (e.g. "./" on Unix) and this |
|
5241
|
|
|
|
|
|
|
will be part of the return value. |
|
5242
|
|
|
|
|
|
|
|
|
5243
|
|
|
|
|
|
|
|
|
5244
|
|
|
|
|
|
|
=head2 Cacheing with write() |
|
5245
|
|
|
|
|
|
|
|
|
5246
|
|
|
|
|
|
|
By default, Data::CTable only creates a cached version of a file when |
|
5247
|
|
|
|
|
|
|
it reads that file for the first time (on the assumption that it will |
|
5248
|
|
|
|
|
|
|
need to read the file again more often than the file's data will |
|
5249
|
|
|
|
|
|
|
change.) |
|
5250
|
|
|
|
|
|
|
|
|
5251
|
|
|
|
|
|
|
But by default, it does not create a cached version of a file when |
|
5252
|
|
|
|
|
|
|
writing it, on the assumption that the current program probably will |
|
5253
|
|
|
|
|
|
|
not be re-reading the written file and any other program that wants to |
|
5254
|
|
|
|
|
|
|
read it can cache it at that time. |
|
5255
|
|
|
|
|
|
|
|
|
5256
|
|
|
|
|
|
|
However, if you want write() to create a cache for its output file, it |
|
5257
|
|
|
|
|
|
|
is much faster to create it on write() than waiting for the next |
|
5258
|
|
|
|
|
|
|
read() because the next read() will be able to use the cache the very |
|
5259
|
|
|
|
|
|
|
first time. |
|
5260
|
|
|
|
|
|
|
|
|
5261
|
|
|
|
|
|
|
To enable write-cacheing, set _CacheOnWrite to true. Then, after the |
|
5262
|
|
|
|
|
|
|
write() successfully completes (and only if it does), the cached |
|
5263
|
|
|
|
|
|
|
version will be written. |
|
5264
|
|
|
|
|
|
|
|
|
5265
|
|
|
|
|
|
|
=head1 FORMATTED TABLES (Using Data::ShowTable) |
|
5266
|
|
|
|
|
|
|
|
|
5267
|
|
|
|
|
|
|
## Get formatted data in memory |
|
5268
|
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
my $StringRef = $t->format(); ## Format same data as write() |
|
5270
|
|
|
|
|
|
|
my $StringRef = $t->format(10); ## Limit records to 10 |
|
5271
|
|
|
|
|
|
|
my $StringRef = $t->format(...); ## Specify arbitrary params |
|
5272
|
|
|
|
|
|
|
print $$StringRef; |
|
5273
|
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
## Write formatted table to file or terminal |
|
5275
|
|
|
|
|
|
|
|
|
5276
|
|
|
|
|
|
|
$t->out($Dest, ....);## $Dest as follows; other params to format() |
|
5277
|
|
|
|
|
|
|
$t->out($Dest, 10, ....) ## Limit recs to 10; params to format() |
|
5278
|
|
|
|
|
|
|
|
|
5279
|
|
|
|
|
|
|
$t->out() ## print formatted data to STDOUT |
|
5280
|
|
|
|
|
|
|
$t->out(\*STDERR) ## print to STDERR (or any named handle) |
|
5281
|
|
|
|
|
|
|
$t->out("Foo.txt") ## print to any path (file to be overwritten) |
|
5282
|
|
|
|
|
|
|
$t->out($FileObj) ## print to any object with a print() method |
|
5283
|
|
|
|
|
|
|
|
|
5284
|
|
|
|
|
|
|
out() takes a first argument specifying a destination for the output, |
|
5285
|
|
|
|
|
|
|
then passes all other arguments to format() to create a nice-looking |
|
5286
|
|
|
|
|
|
|
table designed to be human-readable; it takes the resulting buffer and |
|
5287
|
|
|
|
|
|
|
print()s it to the destination you specified. |
|
5288
|
|
|
|
|
|
|
|
|
5289
|
|
|
|
|
|
|
Sample output: |
|
5290
|
|
|
|
|
|
|
|
|
5291
|
|
|
|
|
|
|
+-------+------+-----+-------+ |
|
5292
|
|
|
|
|
|
|
| First | Last | Age | State | |
|
5293
|
|
|
|
|
|
|
+-------+------+-----+-------+ |
|
5294
|
|
|
|
|
|
|
| Chris | Zack | 43 | CA | |
|
5295
|
|
|
|
|
|
|
| Marco | Bart | 22 | NV | |
|
5296
|
|
|
|
|
|
|
| Pearl | Muth | 15 | HI | |
|
5297
|
|
|
|
|
|
|
+-------+------+-----+-------+ |
|
5298
|
|
|
|
|
|
|
|
|
5299
|
|
|
|
|
|
|
(Note extra space character before each line.) |
|
5300
|
|
|
|
|
|
|
|
|
5301
|
|
|
|
|
|
|
The destination may be a file handle (default if undef is \*STDOUT), a |
|
5302
|
|
|
|
|
|
|
string (treated as a path to be overwritten), or any object that has a |
|
5303
|
|
|
|
|
|
|
print() method, especially an object of type IO::File. |
|
5304
|
|
|
|
|
|
|
|
|
5305
|
|
|
|
|
|
|
The main purpose of out() is to give you a quick way to dump a table |
|
5306
|
|
|
|
|
|
|
when debugging. out() calls format() to create the output, so read |
|
5307
|
|
|
|
|
|
|
on... |
|
5308
|
|
|
|
|
|
|
|
|
5309
|
|
|
|
|
|
|
format() produces a human-readable version of a table, in the form of |
|
5310
|
|
|
|
|
|
|
a reference to a string buffer (which could be very large), and |
|
5311
|
|
|
|
|
|
|
returns the buffer to you. Dereference the resulting string reference |
|
5312
|
|
|
|
|
|
|
before using. |
|
5313
|
|
|
|
|
|
|
|
|
5314
|
|
|
|
|
|
|
If format() is given one argument, that argument is the _MaxRecords |
|
5315
|
|
|
|
|
|
|
parameter, which limits the length of the output. |
|
5316
|
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
Otherwise, format() takes the following named-parameter arguments, |
|
5318
|
|
|
|
|
|
|
which can optionally override the corresponding parameters, if any, in |
|
5319
|
|
|
|
|
|
|
the object: |
|
5320
|
|
|
|
|
|
|
|
|
5321
|
|
|
|
|
|
|
_FieldList ## Fields to include in table |
|
5322
|
|
|
|
|
|
|
_Selection ## Records to be included, in order |
|
5323
|
|
|
|
|
|
|
|
|
5324
|
|
|
|
|
|
|
_SortSpecs ## SortType controls number formatting |
|
5325
|
|
|
|
|
|
|
_DefaultSortType |
|
5326
|
|
|
|
|
|
|
|
|
5327
|
|
|
|
|
|
|
_MaxRecords ## Limit number of records output |
|
5328
|
|
|
|
|
|
|
|
|
5329
|
|
|
|
|
|
|
_MaxWidth ## Limit width of per-col. data in printout |
|
5330
|
|
|
|
|
|
|
|
|
5331
|
|
|
|
|
|
|
format() will obey _MaxRecords, if you'd like to limit the number of |
|
5332
|
|
|
|
|
|
|
rows to be output. _MaxRecords can also be a single argument to |
|
5333
|
|
|
|
|
|
|
format(), or a second argument to out() if no other parameters are |
|
5334
|
|
|
|
|
|
|
passed. |
|
5335
|
|
|
|
|
|
|
|
|
5336
|
|
|
|
|
|
|
format() also recognizes the _SortSpecs->{SortType} and |
|
5337
|
|
|
|
|
|
|
_DefaultSortType parameters to help it determine the data types of the |
|
5338
|
|
|
|
|
|
|
fields being formatted. Fields of type "Number" are output as |
|
5339
|
|
|
|
|
|
|
right-justified floats; "Integer" or "Boolean" are output as |
|
5340
|
|
|
|
|
|
|
right-justified integers, and all others (including the default: |
|
5341
|
|
|
|
|
|
|
String) are output as left-justified strings. |
|
5342
|
|
|
|
|
|
|
|
|
5343
|
|
|
|
|
|
|
In addition, there is one parameter uniquely supported by format() and |
|
5344
|
|
|
|
|
|
|
out(): |
|
5345
|
|
|
|
|
|
|
|
|
5346
|
|
|
|
|
|
|
=over 4 |
|
5347
|
|
|
|
|
|
|
|
|
5348
|
|
|
|
|
|
|
=item _MaxWidth ||= 15; |
|
5349
|
|
|
|
|
|
|
|
|
5350
|
|
|
|
|
|
|
=back |
|
5351
|
|
|
|
|
|
|
|
|
5352
|
|
|
|
|
|
|
_MaxWidth specifies the maximum width of columns. If unspecifed, this |
|
5353
|
|
|
|
|
|
|
will be 15; the minimum legal value is 2. Each column may actually |
|
5354
|
|
|
|
|
|
|
take up 3 more characters than _MaxWidth due to divider characters. |
|
5355
|
|
|
|
|
|
|
|
|
5356
|
|
|
|
|
|
|
The data to be output will be examined, and only the necessary width |
|
5357
|
|
|
|
|
|
|
will be used for each column. _MaxWidth just limits the upper bound, |
|
5358
|
|
|
|
|
|
|
not the lower. |
|
5359
|
|
|
|
|
|
|
|
|
5360
|
|
|
|
|
|
|
Data values that are too wide to fit in _MaxWidth spaces will be |
|
5361
|
|
|
|
|
|
|
truncated and the tilde character "~" will appear as the last |
|
5362
|
|
|
|
|
|
|
character to indicate the truncation. |
|
5363
|
|
|
|
|
|
|
|
|
5364
|
|
|
|
|
|
|
Data values with internal returns will have the return characters |
|
5365
|
|
|
|
|
|
|
mapped to slashes for display. |
|
5366
|
|
|
|
|
|
|
|
|
5367
|
|
|
|
|
|
|
format() and out() will NOT wrap entries onto a second line, |
|
5368
|
|
|
|
|
|
|
like you may have seen Data::ShowTable::ShowBoxTable do in some cases. |
|
5369
|
|
|
|
|
|
|
Each record will get exactly one line. |
|
5370
|
|
|
|
|
|
|
|
|
5371
|
|
|
|
|
|
|
format() and out() ignore the _HeaderRow parameter. A header |
|
5372
|
|
|
|
|
|
|
row showing the field names is always printed. |
|
5373
|
|
|
|
|
|
|
|
|
5374
|
|
|
|
|
|
|
format() and out() make no attempt to map upper-ascii characters from |
|
5375
|
|
|
|
|
|
|
or to any particular dataset. The encoding used in memory (generally |
|
5376
|
|
|
|
|
|
|
ISO 8859-1 by default) is the encoding used in the output. If you |
|
5377
|
|
|
|
|
|
|
want to manipulate the encoding, first call format(), then change the |
|
5378
|
|
|
|
|
|
|
encoding, then format the resulting table. |
|
5379
|
|
|
|
|
|
|
|
|
5380
|
|
|
|
|
|
|
=cut |
|
5381
|
|
|
|
|
|
|
|
|
5382
|
|
|
|
|
|
|
sub write ## Write, cacheing afterward if possible |
|
5383
|
|
|
|
|
|
|
{ |
|
5384
|
8
|
|
|
8
|
1
|
122
|
my $this = shift; |
|
5385
|
8
|
|
|
|
|
26
|
return($this->write_file_and_cache(@_)); |
|
5386
|
|
|
|
|
|
|
} |
|
5387
|
|
|
|
|
|
|
|
|
5388
|
|
|
|
|
|
|
sub write_file_and_cache ## Write, cacheing afterward if possible |
|
5389
|
|
|
|
|
|
|
{ |
|
5390
|
8
|
|
|
8
|
0
|
14
|
my $this = shift; |
|
5391
|
|
|
|
|
|
|
|
|
5392
|
8
|
50
|
|
|
|
34
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
|
5393
|
|
|
|
|
|
|
|
|
5394
|
8
|
|
|
|
|
20
|
my($FieldList, $LineEnding, $FDelimiter, $HeaderRow, $CacheOnWrite, $CacheExtension, $CacheSubDir) = map {$this->getparam($Params, $_)} |
|
|
56
|
|
|
|
|
99
|
|
|
5395
|
|
|
|
|
|
|
qw($FieldList _LineEnding _FDelimiter _HeaderRow _CacheOnWrite _CacheExtension _CacheSubDir); |
|
5396
|
|
|
|
|
|
|
|
|
5397
|
|
|
|
|
|
|
## First write the file and go to done if it failed. |
|
5398
|
8
|
50
|
|
|
|
36
|
my $WriteFileName = $this->write_file(@_) or goto done; |
|
5399
|
|
|
|
|
|
|
|
|
5400
|
|
|
|
|
|
|
## Only try to cache if we got a non-empty $WriteFileName back. |
|
5401
|
|
|
|
|
|
|
## We won't get a name back in the case where we wrote directly to |
|
5402
|
|
|
|
|
|
|
## an open file handle. |
|
5403
|
|
|
|
|
|
|
|
|
5404
|
8
|
50
|
|
|
|
21
|
goto done unless $WriteFileName; |
|
5405
|
|
|
|
|
|
|
|
|
5406
|
|
|
|
|
|
|
## Only try to cache if $CacheOnWrite has been turned ON. |
|
5407
|
8
|
50
|
|
|
|
566
|
goto done unless $CacheOnWrite; |
|
5408
|
|
|
|
|
|
|
|
|
5409
|
|
|
|
|
|
|
## Now, having written successfully, we try to write the cache for |
|
5410
|
|
|
|
|
|
|
## next time. Writing the cache is always optional; failing to |
|
5411
|
|
|
|
|
|
|
## write it is not a failure of this method. Consequently, any |
|
5412
|
|
|
|
|
|
|
## "goto done" statements beyond this point will still result in a |
|
5413
|
|
|
|
|
|
|
## successful outcome since $WriteFileName will have a value. |
|
5414
|
|
|
|
|
|
|
|
|
5415
|
|
|
|
|
|
|
## Calculate the name of the cache file and fail the directory |
|
5416
|
|
|
|
|
|
|
## creation fails. prep_cache_file will have generated a warning |
|
5417
|
|
|
|
|
|
|
## if an attempt to create needed subdirectories has failed. |
|
5418
|
|
|
|
|
|
|
|
|
5419
|
0
|
0
|
|
|
|
0
|
my $CacheFileName = $this->prep_cache_file($WriteFileName, $CacheExtension, $CacheSubDir) |
|
5420
|
|
|
|
|
|
|
or goto done; |
|
5421
|
|
|
|
|
|
|
|
|
5422
|
|
|
|
|
|
|
## Pre-flight the cache file for writing. |
|
5423
|
0
|
0
|
|
|
|
0
|
$this->warn("Cache file $CacheFileName cannot be created/overwritten: $!"), |
|
5424
|
|
|
|
|
|
|
goto done ## Successful completion. |
|
5425
|
|
|
|
|
|
|
unless $this->try_file_write($CacheFileName); |
|
5426
|
|
|
|
|
|
|
|
|
5427
|
|
|
|
|
|
|
## The data to be stored is: |
|
5428
|
|
|
|
|
|
|
|
|
5429
|
|
|
|
|
|
|
## 1) All data columns written by write_file() |
|
5430
|
|
|
|
|
|
|
## 2) Any file format parameters used by write_file() |
|
5431
|
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
## Calculate the main writing-related parameters using the same |
|
5433
|
|
|
|
|
|
|
## logic that write_file() uses... |
|
5434
|
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
## Default for FieldList is all fields. |
|
5436
|
0
|
|
0
|
|
|
0
|
$FieldList ||= $this->fieldlist(); |
|
5437
|
|
|
|
|
|
|
|
|
5438
|
|
|
|
|
|
|
## Convert from optional "dos", "mac", "unix" symbolic values. |
|
5439
|
0
|
|
|
|
|
0
|
$LineEnding = $this->lineending_string($LineEnding); |
|
5440
|
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
## Default for LineEnding is "\n" (CR on Mac; LF on Unix; CR/LF on DOS) |
|
5442
|
0
|
0
|
|
|
|
0
|
$LineEnding = "\n" unless length($LineEnding); |
|
5443
|
|
|
|
|
|
|
|
|
5444
|
|
|
|
|
|
|
## Default for FDelimiter is comma |
|
5445
|
0
|
0
|
|
|
|
0
|
$FDelimiter = ',' unless length($FDelimiter); |
|
5446
|
|
|
|
|
|
|
|
|
5447
|
|
|
|
|
|
|
## Default for HeaderRow is true. |
|
5448
|
0
|
0
|
|
|
|
0
|
$HeaderRow = 1 unless defined($HeaderRow); |
|
5449
|
|
|
|
|
|
|
|
|
5450
|
|
|
|
|
|
|
## No other parameters should be cached because we want a |
|
5451
|
|
|
|
|
|
|
## read from the cache to produce exactly the same result |
|
5452
|
|
|
|
|
|
|
## as a read from the file itself would have produced. |
|
5453
|
|
|
|
|
|
|
|
|
5454
|
0
|
|
|
|
|
0
|
my $Data = {( |
|
5455
|
|
|
|
|
|
|
## Refs to each column written |
|
5456
|
0
|
|
0
|
|
|
0
|
%{ $this->cols_hash($FieldList)}, |
|
5457
|
|
|
|
|
|
|
|
|
5458
|
|
|
|
|
|
|
## Other relevant file-format parameters |
|
5459
|
|
|
|
|
|
|
_FieldList => $FieldList, |
|
5460
|
|
|
|
|
|
|
_LineEnding => $LineEnding, |
|
5461
|
|
|
|
|
|
|
_FDelimiter => $FDelimiter, |
|
5462
|
|
|
|
|
|
|
_HeaderRow => $HeaderRow, |
|
5463
|
|
|
|
|
|
|
_Subset => $this->{_Subset} || 0, |
|
5464
|
|
|
|
|
|
|
_Newline => "\n", |
|
5465
|
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
## We don't need to save _ReturnMap and |
|
5467
|
|
|
|
|
|
|
## _ReturnEncoding because those only are relevant |
|
5468
|
|
|
|
|
|
|
## when reading physical files. Cached data has the |
|
5469
|
|
|
|
|
|
|
## return chars already encoded as returns. |
|
5470
|
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
)}; |
|
5472
|
|
|
|
|
|
|
|
|
5473
|
0
|
0
|
|
|
|
0
|
$this->warn("Failed to cache $CacheFileName"), |
|
5474
|
|
|
|
|
|
|
unlink($CacheFileName), ## Delete cache if failure |
|
5475
|
|
|
|
|
|
|
goto done ## Successful completion. |
|
5476
|
|
|
|
|
|
|
unless $this->write_cache($Data, $CacheFileName); |
|
5477
|
0
|
|
|
|
|
0
|
chmod 0666, $CacheFileName; ## Liberal perms if possible. |
|
5478
|
|
|
|
|
|
|
|
|
5479
|
8
|
|
|
|
|
36
|
done: |
|
5480
|
|
|
|
|
|
|
return($WriteFileName); |
|
5481
|
|
|
|
|
|
|
} |
|
5482
|
|
|
|
|
|
|
|
|
5483
|
|
|
|
|
|
|
sub write_cache |
|
5484
|
|
|
|
|
|
|
{ |
|
5485
|
8
|
|
|
8
|
0
|
17
|
my $this = shift; |
|
5486
|
8
|
|
|
|
|
15
|
my ($Data, $CacheFileName) = @_; |
|
5487
|
|
|
|
|
|
|
|
|
5488
|
8
|
|
|
|
|
34
|
$this->progress("Storing $CacheFileName..."); |
|
5489
|
|
|
|
|
|
|
|
|
5490
|
8
|
|
|
|
|
42
|
my $Success = nstore($Data, $CacheFileName); |
|
5491
|
|
|
|
|
|
|
|
|
5492
|
8
|
50
|
|
|
|
1641
|
$this->progress("Stored $CacheFileName.") if $Success; |
|
5493
|
|
|
|
|
|
|
|
|
5494
|
8
|
|
|
|
|
36
|
done: |
|
5495
|
|
|
|
|
|
|
return($Success); |
|
5496
|
|
|
|
|
|
|
} |
|
5497
|
|
|
|
|
|
|
|
|
5498
|
|
|
|
|
|
|
sub write_file ## Just write; don't worry about cacheing |
|
5499
|
|
|
|
|
|
|
{ |
|
5500
|
8
|
|
|
8
|
0
|
14
|
my $this = shift; |
|
5501
|
8
|
50
|
|
|
|
31
|
my $Params = (@_ == 1 ? {_FileName => $_[0]} : {@_}); |
|
5502
|
|
|
|
|
|
|
|
|
5503
|
96
|
|
|
|
|
267
|
my($FileName, $FieldList, $Selection, $MaxRecords, $LineEnding, $FDelimiter, $QuoteFields, $ReturnMap, $ReturnEncoding, $MacRomanMap, $HeaderRow, $WriteExtension) |
|
5504
|
8
|
|
|
|
|
23
|
= map {$this->getparam($Params, $_)} |
|
5505
|
|
|
|
|
|
|
qw(_FileName _FieldList _Selection _MaxRecords _LineEnding _FDelimiter _QuoteFields _ReturnMap _ReturnEncoding _MacRomanMap _HeaderRow _WriteExtension); |
|
5506
|
|
|
|
|
|
|
|
|
5507
|
8
|
|
|
|
|
17
|
my $Success; |
|
5508
|
|
|
|
|
|
|
|
|
5509
|
8
|
|
|
|
|
27
|
$this->{_ErrorMsg} = ""; |
|
5510
|
|
|
|
|
|
|
|
|
5511
|
|
|
|
|
|
|
## if FileName is unspecified, or is the single character "-", |
|
5512
|
|
|
|
|
|
|
## then default to STDOUT. |
|
5513
|
8
|
50
|
|
|
|
42
|
$FileName = \ *STDOUT if ($FileName =~ /^-?$/); |
|
5514
|
|
|
|
|
|
|
|
|
5515
|
|
|
|
|
|
|
## If we have a regular file handle, bless it into IO::File. |
|
5516
|
8
|
50
|
|
|
|
30
|
$FileName = bless ($FileName, 'IO::File') if ref($FileName) =~ /(HANDLE)|(GLOB)/; |
|
5517
|
|
|
|
|
|
|
|
|
5518
|
|
|
|
|
|
|
## If we have a file handle either passed or constructed, make note of that fact. |
|
5519
|
8
|
|
|
|
|
18
|
my $GotHandle = ref($FileName) eq 'IO::File'; |
|
5520
|
|
|
|
|
|
|
|
|
5521
|
8
|
50
|
33
|
|
|
6343
|
$this->{_ErrorMsg} = "FileName must be specified for write()", goto done |
|
5522
|
|
|
|
|
|
|
unless $GotHandle or length($FileName); |
|
5523
|
|
|
|
|
|
|
|
|
5524
|
|
|
|
|
|
|
## Default for FieldList is all fields. |
|
5525
|
8
|
|
33
|
|
|
20
|
$FieldList ||= $this->fieldlist(); |
|
5526
|
|
|
|
|
|
|
|
|
5527
|
|
|
|
|
|
|
## Default for Selection is all records. |
|
5528
|
8
|
|
33
|
|
|
38
|
$Selection ||= $this->selection(); |
|
5529
|
|
|
|
|
|
|
|
|
5530
|
|
|
|
|
|
|
## Default for MaxRecords is 0 (meaning write all records) |
|
5531
|
8
|
50
|
|
|
|
27
|
$MaxRecords = 0 unless (int($MaxRecords) == $MaxRecords); |
|
5532
|
|
|
|
|
|
|
|
|
5533
|
|
|
|
|
|
|
## Convert from optional "dos", "mac", "unix" symbolic values. |
|
5534
|
8
|
|
|
|
|
20
|
$LineEnding = $this->lineending_string($LineEnding); |
|
5535
|
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
## Default for LineEnding is "\n" (CR on Mac; LF on Unix; CR/LF on DOS) |
|
5537
|
8
|
50
|
|
|
|
30
|
$LineEnding = "\n" unless length($LineEnding); |
|
5538
|
|
|
|
|
|
|
|
|
5539
|
|
|
|
|
|
|
## Default for FDelimiter is comma |
|
5540
|
8
|
50
|
|
|
|
22
|
$FDelimiter = ',' unless length($FDelimiter); |
|
5541
|
|
|
|
|
|
|
|
|
5542
|
|
|
|
|
|
|
## Default for QuoteFields is undef (auto) |
|
5543
|
8
|
50
|
|
|
|
81
|
$QuoteFields = undef unless defined($QuoteFields); |
|
5544
|
|
|
|
|
|
|
|
|
5545
|
|
|
|
|
|
|
## "QuoteCheck" mode means check each field -- this is the "auto" |
|
5546
|
|
|
|
|
|
|
## mode that kicks in when _QuoteFields is undef. |
|
5547
|
8
|
|
|
|
|
13
|
my $QuoteCheck = (!defined($QuoteFields)); |
|
5548
|
|
|
|
|
|
|
|
|
5549
|
|
|
|
|
|
|
## Default for ReturnMap is true. |
|
5550
|
8
|
50
|
|
|
|
22
|
$ReturnMap = 1 unless defined($ReturnMap); |
|
5551
|
|
|
|
|
|
|
|
|
5552
|
|
|
|
|
|
|
## Default for MacRomanMap is undef ("Auto"); |
|
5553
|
8
|
50
|
|
|
|
17
|
$MacRomanMap = undef unless defined($MacRomanMap); |
|
5554
|
|
|
|
|
|
|
|
|
5555
|
|
|
|
|
|
|
## DoMacMapping is the actual setting for auto charset mapping |
|
5556
|
8
|
|
66
|
|
|
54
|
my $DoMacMapping = |
|
5557
|
|
|
|
|
|
|
((!defined($MacRomanMap) && ($LineEnding eq "\x0D")) || ## Auto |
|
5558
|
|
|
|
|
|
|
($MacRomanMap)); ## On |
|
5559
|
|
|
|
|
|
|
|
|
5560
|
8
|
100
|
|
|
|
28
|
$this->progress("Will convert upper-ascii characters if any, from ISO-8859-1 to Mac Roman.") if $DoMacMapping; |
|
5561
|
|
|
|
|
|
|
|
|
5562
|
|
|
|
|
|
|
## Default for ReturnEncoding is "\x0B" (control-K; ASCII 11) |
|
5563
|
8
|
50
|
|
|
|
22
|
$ReturnEncoding = "\x0B" unless length($ReturnEncoding); |
|
5564
|
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
## Default for HeaderRow is true. |
|
5566
|
8
|
50
|
|
|
|
17
|
$HeaderRow = 1 unless defined($HeaderRow); |
|
5567
|
|
|
|
|
|
|
|
|
5568
|
|
|
|
|
|
|
## Default for $WriteExtension is "" (none) -- meaning use exact $FileName |
|
5569
|
8
|
50
|
|
|
|
18
|
$WriteExtension = "" unless defined($WriteExtension); |
|
5570
|
|
|
|
|
|
|
|
|
5571
|
|
|
|
|
|
|
## Get a hash of fields actually present... |
|
5572
|
|
|
|
|
|
|
|
|
5573
|
8
|
|
|
|
|
22
|
my $AllFields = $this->fieldlist_all(); |
|
5574
|
8
|
|
|
|
|
14
|
my $AllFieldsHash = {}; @$AllFieldsHash{@$AllFields} = undef; |
|
|
8
|
|
|
|
|
32
|
|
|
5575
|
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
## Cull $FieldList to only include fields we have... |
|
5577
|
|
|
|
|
|
|
|
|
5578
|
8
|
|
|
|
|
18
|
$FieldList = [grep {exists($AllFieldsHash->{$_})} @$FieldList]; |
|
|
33
|
|
|
|
|
74
|
|
|
5579
|
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
## Ensure $Selection contains only valid record numbers... |
|
5581
|
|
|
|
|
|
|
|
|
5582
|
8
|
|
|
|
|
31
|
$Selection = $this->selection_validate_internal($Selection); |
|
5583
|
|
|
|
|
|
|
|
|
5584
|
|
|
|
|
|
|
## Get an ordered list of the columns. |
|
5585
|
8
|
|
|
|
|
34
|
my $Columns = [@$this{@$FieldList}]; |
|
5586
|
|
|
|
|
|
|
|
|
5587
|
|
|
|
|
|
|
## Calculate the name of the file we'll write to, if any, and get |
|
5588
|
|
|
|
|
|
|
## the file handle either from the one we were given or by opening |
|
5589
|
|
|
|
|
|
|
## the specified file for writing. |
|
5590
|
|
|
|
|
|
|
|
|
5591
|
8
|
|
|
|
|
12
|
my $WriteFileName; |
|
5592
|
|
|
|
|
|
|
my $OutFile; |
|
5593
|
|
|
|
|
|
|
|
|
5594
|
8
|
50
|
|
|
|
26
|
if ($GotHandle) |
|
5595
|
|
|
|
|
|
|
{ |
|
5596
|
0
|
|
|
|
|
0
|
$WriteFileName = ""; |
|
5597
|
0
|
|
|
|
|
0
|
$OutFile = $FileName; ## Actually a handle. |
|
5598
|
|
|
|
|
|
|
} |
|
5599
|
|
|
|
|
|
|
else |
|
5600
|
|
|
|
|
|
|
{ |
|
5601
|
|
|
|
|
|
|
## Calculate the name of the file we're going to try to write to. |
|
5602
|
8
|
50
|
|
|
|
36
|
$WriteFileName = |
|
5603
|
|
|
|
|
|
|
|
|
5604
|
|
|
|
|
|
|
## Use file name exactly if specified in write() call. |
|
5605
|
|
|
|
|
|
|
($Params->{_FileName} ? $FileName : |
|
5606
|
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
## Otherwise, calculate the name by adding/appending the _WriteExtension. |
|
5608
|
|
|
|
|
|
|
$this->write_file_name($FileName, $WriteExtension)); |
|
5609
|
|
|
|
|
|
|
|
|
5610
|
|
|
|
|
|
|
## Ensure the directory that will hold the file actually exists. |
|
5611
|
1
|
|
|
1
|
|
7
|
use File::Basename qw(fileparse); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
212
|
|
|
5612
|
8
|
|
|
|
|
182
|
my ($Basename, $Path, $Ext) = fileparse($WriteFileName, '\.[^\.]+'); |
|
5613
|
8
|
|
|
|
|
18
|
my ($Sep, $Up, $Cur) = @{$this->path_info()}{qw(sep up cur)}; |
|
|
8
|
|
|
|
|
17
|
|
|
5614
|
8
|
|
33
|
|
|
26
|
$Path ||= $Cur; ## Once again, default $Path to cwd just in case. |
|
5615
|
|
|
|
|
|
|
|
|
5616
|
8
|
50
|
|
|
|
24
|
$this->{_ErrorMsg} = "Can't make directory $Path to save $WriteFileName: $!", goto done |
|
5617
|
|
|
|
|
|
|
unless $this->verify_or_create_path($Path, $Sep); |
|
5618
|
|
|
|
|
|
|
|
|
5619
|
|
|
|
|
|
|
## Ensure the directory is writeable and the file is overwriteable |
|
5620
|
|
|
|
|
|
|
## if it exists. |
|
5621
|
8
|
50
|
|
|
|
93
|
$this->{_ErrorMsg} = "Directory $Path is not writeable.", goto done |
|
5622
|
|
|
|
|
|
|
unless (-w $Path); |
|
5623
|
|
|
|
|
|
|
|
|
5624
|
8
|
50
|
33
|
|
|
206
|
$this->{_ErrorMsg} = "File $WriteFileName cannot be overwritten.", goto done |
|
5625
|
|
|
|
|
|
|
if (-e $WriteFileName && !(-w $WriteFileName)); |
|
5626
|
|
|
|
|
|
|
|
|
5627
|
|
|
|
|
|
|
## Open the file for write. |
|
5628
|
|
|
|
|
|
|
|
|
5629
|
1
|
|
|
1
|
|
6
|
use IO::File; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1613
|
|
|
5630
|
8
|
|
|
|
|
60
|
$OutFile = IO::File->new(">$WriteFileName"); |
|
5631
|
8
|
50
|
|
|
|
983
|
$this->{_ErrorMsg} = "Failed to open $WriteFileName for writing: $!", goto done |
|
5632
|
|
|
|
|
|
|
unless $OutFile; |
|
5633
|
|
|
|
|
|
|
|
|
5634
|
8
|
|
|
|
|
37
|
$this->progress("Writing $WriteFileName..."); |
|
5635
|
|
|
|
|
|
|
} |
|
5636
|
|
|
|
|
|
|
|
|
5637
|
|
|
|
|
|
|
## Figure out the line initiator & ender strings, and delimiter sequence. |
|
5638
|
|
|
|
|
|
|
|
|
5639
|
8
|
|
|
|
|
12
|
my ($LineStartQuote, $Delim, $LineEndQuote); |
|
5640
|
8
|
50
|
|
|
|
17
|
if ($QuoteFields) |
|
5641
|
|
|
|
|
|
|
{ |
|
5642
|
|
|
|
|
|
|
## In "forced" quote mode, we just do the quoting by always |
|
5643
|
|
|
|
|
|
|
## putting them at the start and end of lines and in between |
|
5644
|
|
|
|
|
|
|
## each field. |
|
5645
|
|
|
|
|
|
|
|
|
5646
|
0
|
|
|
|
|
0
|
$LineStartQuote = "\""; |
|
5647
|
0
|
|
|
|
|
0
|
$LineEndQuote = "\""; |
|
5648
|
0
|
|
|
|
|
0
|
$Delim = "\"$FDelimiter\""; |
|
5649
|
|
|
|
|
|
|
} |
|
5650
|
|
|
|
|
|
|
else |
|
5651
|
|
|
|
|
|
|
{ |
|
5652
|
|
|
|
|
|
|
## In no-quote or auto-quote mode, we don't put the quotes in |
|
5653
|
|
|
|
|
|
|
## these places; they'll either be omitted entirely or |
|
5654
|
|
|
|
|
|
|
## inserted per-field. |
|
5655
|
|
|
|
|
|
|
|
|
5656
|
8
|
|
|
|
|
59
|
$LineStartQuote = ''; |
|
5657
|
8
|
|
|
|
|
82
|
$LineEndQuote = ''; |
|
5658
|
8
|
|
|
|
|
14
|
$Delim = $FDelimiter; |
|
5659
|
|
|
|
|
|
|
} |
|
5660
|
|
|
|
|
|
|
|
|
5661
|
|
|
|
|
|
|
## Precompile a regex that checks for quotes, the field delimiter |
|
5662
|
|
|
|
|
|
|
## sequence, the line ending sequence, or any line-ending-ish |
|
5663
|
|
|
|
|
|
|
## characters at all. In $QuoteCheck mode, we'll use this to |
|
5664
|
|
|
|
|
|
|
## identify fields needing double-quotes. |
|
5665
|
|
|
|
|
|
|
|
|
5666
|
8
|
50
|
|
|
|
211
|
my $QuoteOrDelimCheck = qr{(?:\Q$LineEnding\E)|(?:\Q$FDelimiter\E)|[\"\x0D\x0A]} if $QuoteCheck; |
|
5667
|
|
|
|
|
|
|
|
|
5668
|
|
|
|
|
|
|
## Precompile a return-map-checking regex that checks first for |
|
5669
|
|
|
|
|
|
|
## the line ending actively in use and then for the platform's |
|
5670
|
|
|
|
|
|
|
## "\n". These may be the same, but are not necessarily always |
|
5671
|
|
|
|
|
|
|
## the same. We don't return-map any old \x0D or \x0A because, |
|
5672
|
|
|
|
|
|
|
## for example in a pure Unix world, \x0D would not be interpreted |
|
5673
|
|
|
|
|
|
|
## as line-ending-related and hence is a valid character in a |
|
5674
|
|
|
|
|
|
|
## field. |
|
5675
|
|
|
|
|
|
|
|
|
5676
|
8
|
50
|
|
|
|
125
|
my $ReturnMapCheck = qr{(?:\Q$LineEnding\E)|(?:\n)} if $ReturnMap; |
|
5677
|
|
|
|
|
|
|
|
|
5678
|
|
|
|
|
|
|
## Print out the header row that lists the field names in order. |
|
5679
|
|
|
|
|
|
|
|
|
5680
|
8
|
50
|
|
|
|
27
|
if ($HeaderRow) |
|
5681
|
|
|
|
|
|
|
{ |
|
5682
|
33
|
|
|
|
|
62
|
my $Line = ($LineStartQuote . |
|
5683
|
|
|
|
|
|
|
join($Delim, |
|
5684
|
|
|
|
|
|
|
map |
|
5685
|
|
|
|
|
|
|
{ |
|
5686
|
|
|
|
|
|
|
## Quote any " character as "" |
|
5687
|
8
|
|
|
|
|
19
|
(my $X = $_) =~ s/\"/\"\"/g; |
|
5688
|
|
|
|
|
|
|
|
|
5689
|
|
|
|
|
|
|
## In QuoteCheck mode _QuoteFields => |
|
5690
|
|
|
|
|
|
|
## undef ("auto"): put quotes around |
|
5691
|
|
|
|
|
|
|
## field only if required. |
|
5692
|
|
|
|
|
|
|
|
|
5693
|
33
|
50
|
33
|
|
|
258
|
$X = "\"$X\"" if $QuoteCheck && $X =~ $QuoteOrDelimCheck; |
|
5694
|
|
|
|
|
|
|
|
|
5695
|
|
|
|
|
|
|
## Convert returns back to \x0B |
|
5696
|
33
|
50
|
|
|
|
142
|
$X =~ s/$ReturnMapCheck/$ReturnEncoding/g if $ReturnMap; |
|
5697
|
|
|
|
|
|
|
|
|
5698
|
33
|
|
|
|
|
105
|
$X; |
|
5699
|
|
|
|
|
|
|
} @$FieldList) . |
|
5700
|
|
|
|
|
|
|
$LineEndQuote . |
|
5701
|
|
|
|
|
|
|
$LineEnding); |
|
5702
|
|
|
|
|
|
|
|
|
5703
|
|
|
|
|
|
|
## Maybe convert entire line (all records) Mac to ISO before writing it. |
|
5704
|
8
|
100
|
|
|
|
41
|
&ISORoman8859_1ToMacRoman(\ $Line) if $DoMacMapping; |
|
5705
|
|
|
|
|
|
|
|
|
5706
|
8
|
50
|
|
|
|
55
|
$OutFile->print($Line) if $HeaderRow; |
|
5707
|
|
|
|
|
|
|
} |
|
5708
|
|
|
|
|
|
|
|
|
5709
|
|
|
|
|
|
|
## Print out each row (record). Fields are output in $FieldList |
|
5710
|
|
|
|
|
|
|
## order (same order as they were in the header row, if any). |
|
5711
|
|
|
|
|
|
|
## Records are printed in the order specified in $Selection. |
|
5712
|
|
|
|
|
|
|
|
|
5713
|
8
|
|
|
|
|
114
|
my $WroteProg; |
|
5714
|
8
|
|
|
|
|
16
|
my $TotalLen = @$Selection+0; |
|
5715
|
8
|
50
|
|
|
|
16
|
my $RecordsToWrite = ($MaxRecords ? min($this, $MaxRecords, $TotalLen) : $TotalLen); |
|
5716
|
8
|
|
|
|
|
9
|
my $RecordsWritten = 0; |
|
5717
|
|
|
|
|
|
|
|
|
5718
|
8
|
|
|
|
|
15
|
foreach my $i (@$Selection) |
|
5719
|
|
|
|
|
|
|
{ |
|
5720
|
99
|
|
|
|
|
175
|
my $Line = ($LineStartQuote . |
|
5721
|
|
|
|
|
|
|
join($Delim, |
|
5722
|
|
|
|
|
|
|
map |
|
5723
|
|
|
|
|
|
|
{ |
|
5724
|
|
|
|
|
|
|
## Quote any " character as "" |
|
5725
|
24
|
|
|
|
|
42
|
(my $X = $_->[$i]) =~ s/\"/\"\"/g; |
|
5726
|
|
|
|
|
|
|
|
|
5727
|
|
|
|
|
|
|
## In QuoteCheck mode _QuoteFields => |
|
5728
|
|
|
|
|
|
|
## undef ("auto"): put quotes around |
|
5729
|
|
|
|
|
|
|
## field only if required. |
|
5730
|
|
|
|
|
|
|
|
|
5731
|
99
|
50
|
33
|
|
|
713
|
$X = "\"$X\"" if $QuoteCheck && $X =~ $QuoteOrDelimCheck; |
|
5732
|
|
|
|
|
|
|
|
|
5733
|
|
|
|
|
|
|
## Convert returns back to \x0B |
|
5734
|
99
|
50
|
|
|
|
369
|
$X =~ s/$ReturnMapCheck/$ReturnEncoding/g if $ReturnMap; |
|
5735
|
|
|
|
|
|
|
|
|
5736
|
99
|
|
|
|
|
252
|
$X; |
|
5737
|
|
|
|
|
|
|
} @$Columns) . |
|
5738
|
|
|
|
|
|
|
$LineEndQuote . |
|
5739
|
|
|
|
|
|
|
$LineEnding); |
|
5740
|
|
|
|
|
|
|
|
|
5741
|
|
|
|
|
|
|
## Maybe convert entire line (all records) ISO to Mac before writing it. |
|
5742
|
24
|
100
|
|
|
|
79
|
&ISORoman8859_1ToMacRoman(\ $Line) if $DoMacMapping; |
|
5743
|
|
|
|
|
|
|
|
|
5744
|
24
|
|
|
|
|
70
|
$OutFile->print($Line); |
|
5745
|
|
|
|
|
|
|
|
|
5746
|
24
|
|
|
|
|
117
|
$RecordsWritten++; |
|
5747
|
|
|
|
|
|
|
|
|
5748
|
|
|
|
|
|
|
## Try doing timed (throttled to 1 per 2 secs) progress at |
|
5749
|
|
|
|
|
|
|
## most every 100th record. |
|
5750
|
24
|
50
|
|
|
|
54
|
my $Did = $this->progress_timed("Writing", $RecordsWritten, $RecordsWritten, $RecordsToWrite, 1) |
|
5751
|
|
|
|
|
|
|
if (($RecordsWritten % 100) == 0); |
|
5752
|
24
|
|
33
|
|
|
82
|
$WroteProg ||= $Did; |
|
5753
|
|
|
|
|
|
|
|
|
5754
|
|
|
|
|
|
|
## Stop if we have written all the records we wanted. |
|
5755
|
24
|
100
|
|
|
|
65
|
last if ($RecordsWritten >= $RecordsToWrite); |
|
5756
|
|
|
|
|
|
|
} |
|
5757
|
|
|
|
|
|
|
|
|
5758
|
|
|
|
|
|
|
## If we wrote timed progress but didn't get to give the 100% |
|
5759
|
|
|
|
|
|
|
## message yet, print the 100% message now. |
|
5760
|
8
|
50
|
|
|
|
18
|
if ($WroteProg) |
|
5761
|
|
|
|
|
|
|
{ |
|
5762
|
0
|
0
|
|
|
|
0
|
my $FinalProg = $this->progress_timed("Writing", $RecordsWritten, $RecordsWritten, $RecordsToWrite, 1) |
|
5763
|
|
|
|
|
|
|
unless (($RecordsWritten % 100) == 0); |
|
5764
|
|
|
|
|
|
|
} |
|
5765
|
|
|
|
|
|
|
|
|
5766
|
|
|
|
|
|
|
## Print the regular Done message. |
|
5767
|
8
|
50
|
|
|
|
20
|
if ($GotHandle) |
|
5768
|
|
|
|
|
|
|
{ |
|
5769
|
0
|
|
|
|
|
0
|
$this->progress("Done writing."); |
|
5770
|
0
|
|
|
|
|
0
|
$Success = 1; |
|
5771
|
|
|
|
|
|
|
} |
|
5772
|
|
|
|
|
|
|
else |
|
5773
|
|
|
|
|
|
|
{ |
|
5774
|
8
|
|
|
|
|
33
|
$this->progress("Wrote $WriteFileName."); |
|
5775
|
|
|
|
|
|
|
} |
|
5776
|
|
|
|
|
|
|
|
|
5777
|
8
|
50
|
|
|
|
29
|
if (!$GotHandle) |
|
5778
|
|
|
|
|
|
|
{ |
|
5779
|
|
|
|
|
|
|
## Close the file and check the exit code. |
|
5780
|
8
|
|
|
|
|
36
|
$OutFile->close(); |
|
5781
|
8
|
|
|
|
|
425
|
$Success = (($?>>8) == 0); |
|
5782
|
|
|
|
|
|
|
|
|
5783
|
8
|
50
|
|
|
|
27
|
$this->{_ErrorMsg} = "Unexpected failure writing $WriteFileName ($!)", goto done |
|
5784
|
|
|
|
|
|
|
unless $Success; |
|
5785
|
|
|
|
|
|
|
} |
|
5786
|
|
|
|
|
|
|
|
|
5787
|
|
|
|
|
|
|
done: |
|
5788
|
8
|
50
|
|
|
|
22
|
$this->warn("FAILURE: $this->{_ErrorMsg}") unless $Success; |
|
5789
|
|
|
|
|
|
|
|
|
5790
|
8
|
50
|
|
|
|
105
|
return($Success ? $WriteFileName : undef); |
|
5791
|
|
|
|
|
|
|
} |
|
5792
|
|
|
|
|
|
|
|
|
5793
|
|
|
|
|
|
|
sub write_file_name ## Calculate the name of the file to be written. |
|
5794
|
|
|
|
|
|
|
{ |
|
5795
|
8
|
|
|
8
|
0
|
15
|
my $this = shift; |
|
5796
|
8
|
|
|
|
|
14
|
my ($FileName, $WriteExtension) = @_; |
|
5797
|
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
## Break the path into its parts... |
|
5799
|
1
|
|
|
1
|
|
6
|
use File::Basename qw(fileparse); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
4249
|
|
|
5800
|
8
|
|
|
|
|
246
|
my ($Basename, $Path, $Ext) = fileparse($FileName, '\.[^\.]+'); |
|
5801
|
|
|
|
|
|
|
|
|
5802
|
|
|
|
|
|
|
## If no directory is explicitly named, set $Path to be the |
|
5803
|
|
|
|
|
|
|
## implicit "current directory" (e.g. "./") |
|
5804
|
|
|
|
|
|
|
|
|
5805
|
8
|
|
|
|
|
19
|
my ($Sep, $Up, $Cur) = @{$this->path_info()}{qw(sep up cur)}; |
|
|
8
|
|
|
|
|
21
|
|
|
5806
|
8
|
|
33
|
|
|
35
|
$Path ||= $Cur; |
|
5807
|
|
|
|
|
|
|
|
|
5808
|
|
|
|
|
|
|
## If $WriteExtension is empty, which is allowed, then the result |
|
5809
|
|
|
|
|
|
|
## will be the same as $FileName, which could in some cases result |
|
5810
|
|
|
|
|
|
|
## in the overwriting of the same file that was read in (and may |
|
5811
|
|
|
|
|
|
|
## be what is intended). |
|
5812
|
|
|
|
|
|
|
|
|
5813
|
8
|
|
|
|
|
19
|
my $WriteFileName = "$Path$Basename$WriteExtension$Ext"; |
|
5814
|
|
|
|
|
|
|
|
|
5815
|
8
|
|
|
|
|
24
|
return($WriteFileName); |
|
5816
|
|
|
|
|
|
|
} |
|
5817
|
|
|
|
|
|
|
|
|
5818
|
|
|
|
|
|
|
sub out |
|
5819
|
|
|
|
|
|
|
{ |
|
5820
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
5821
|
0
|
|
|
|
|
0
|
my $Dest = shift; |
|
5822
|
|
|
|
|
|
|
|
|
5823
|
0
|
|
|
|
|
0
|
my $Success; |
|
5824
|
|
|
|
|
|
|
|
|
5825
|
|
|
|
|
|
|
## First do the formatting (or fail) -- format() will warn if needed. |
|
5826
|
0
|
0
|
|
|
|
0
|
my $Data = $this->format(@_) or goto done; |
|
5827
|
|
|
|
|
|
|
|
|
5828
|
|
|
|
|
|
|
## If $Dest is empty or not defined, use STDOUT. |
|
5829
|
0
|
|
0
|
|
|
0
|
$Dest ||= \*STDOUT; |
|
5830
|
|
|
|
|
|
|
|
|
5831
|
|
|
|
|
|
|
## If given an IO handle (such as \*STDERR), bless and use it. |
|
5832
|
0
|
0
|
|
|
|
0
|
if (ref($Dest) eq 'HANDLE') {$Dest = bless($Dest , 'IO::File')}; |
|
|
0
|
|
|
|
|
0
|
|
|
5833
|
|
|
|
|
|
|
|
|
5834
|
|
|
|
|
|
|
## If it is not an object, treat it as a file name to be opened. |
|
5835
|
0
|
0
|
0
|
|
|
0
|
if (!ref($Dest)) {$Dest = (IO::File->new(">$Dest") or |
|
|
0
|
|
|
|
|
0
|
|
|
5836
|
|
|
|
|
|
|
$this->warn("Can't open file $Dest: $!"), goto done)}; |
|
5837
|
|
|
|
|
|
|
|
|
5838
|
|
|
|
|
|
|
## At this point treat $Dest as an object with a print method and |
|
5839
|
|
|
|
|
|
|
## complain if the print method doesn't return a true value. |
|
5840
|
|
|
|
|
|
|
|
|
5841
|
0
|
0
|
|
|
|
0
|
$Dest->print($$Data) or $this->warn("Had trouble writing file: $!"), goto done; |
|
5842
|
|
|
|
|
|
|
|
|
5843
|
0
|
|
|
|
|
0
|
$Success = 1; |
|
5844
|
0
|
|
|
|
|
0
|
done: |
|
5845
|
|
|
|
|
|
|
return($Success); |
|
5846
|
|
|
|
|
|
|
} |
|
5847
|
|
|
|
|
|
|
|
|
5848
|
|
|
|
|
|
|
sub format ## use Data::ShowTable to format the table in a pretty way. |
|
5849
|
|
|
|
|
|
|
{ |
|
5850
|
1
|
|
|
1
|
1
|
3
|
my $this = shift; |
|
5851
|
1
|
50
|
|
|
|
7
|
my $Params = (@_ == 1 ? {_MaxRecords => $_[0]} : {@_}); |
|
5852
|
|
|
|
|
|
|
|
|
5853
|
1
|
|
|
|
|
3
|
my($Selection, $FieldList, $SortSpecs, $DefaultSortType, $MaxRecords, $MaxWidth) = map {$this->getparam($Params, $_)} |
|
|
6
|
|
|
|
|
24
|
|
|
5854
|
|
|
|
|
|
|
qw(_Selection _FieldList _SortSpecs _DefaultSortType _MaxRecords _MaxWidth); |
|
5855
|
|
|
|
|
|
|
|
|
5856
|
|
|
|
|
|
|
## This method relies on Data::ShowTable. |
|
5857
|
1
|
50
|
|
|
|
7
|
$this->warn("@{[__PACKAGE__]}::show() requires optional Data::ShowTable module."), goto done |
|
|
1
|
|
|
|
|
10
|
|
|
5858
|
|
|
|
|
|
|
unless $HaveShowTable; |
|
5859
|
|
|
|
|
|
|
|
|
5860
|
0
|
|
0
|
|
|
0
|
$FieldList ||= $this->fieldlist(); |
|
5861
|
0
|
|
0
|
|
|
0
|
$Selection ||= $this->selection(); |
|
5862
|
0
|
|
0
|
|
|
0
|
$SortSpecs ||= {}; |
|
5863
|
0
|
0
|
|
|
|
0
|
$DefaultSortType = 'String' unless (length($DefaultSortType)); |
|
5864
|
0
|
|
0
|
|
|
0
|
$MaxRecords ||= 0; ## Default is no maximum (all records). |
|
5865
|
0
|
|
0
|
|
|
0
|
$MaxWidth ||= 15; ## Zero or undef means use default. |
|
5866
|
0
|
|
|
|
|
0
|
$MaxWidth = max(2, $MaxWidth); ## MaxWidth must not be less than 2 |
|
5867
|
|
|
|
|
|
|
|
|
5868
|
0
|
|
|
|
|
0
|
my $TypeMap = {qw(string char |
|
5869
|
|
|
|
|
|
|
text text |
|
5870
|
|
|
|
|
|
|
integer int |
|
5871
|
|
|
|
|
|
|
number numeric |
|
5872
|
|
|
|
|
|
|
boolean int)}; |
|
5873
|
|
|
|
|
|
|
|
|
5874
|
0
|
0
|
0
|
|
|
0
|
my $Types = [map {$TypeMap->{lc(@{$SortSpecs->{$_} || {}}{SortType} || |
|
|
0
|
|
|
|
|
0
|
|
|
5875
|
|
|
|
|
|
|
$DefaultSortType) } || 'string'} |
|
5876
|
|
|
|
|
|
|
@$FieldList]; |
|
5877
|
|
|
|
|
|
|
|
|
5878
|
0
|
|
|
|
|
0
|
my $TotalLen = @$Selection+0; |
|
5879
|
0
|
0
|
|
|
|
0
|
my $RecordsToWrite = ($MaxRecords ? min($this, $MaxRecords, $TotalLen) : $TotalLen); |
|
5880
|
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
## The row-yielder subroutine and its private state variables. |
|
5882
|
0
|
|
|
|
|
0
|
my $SelNum = 0; |
|
5883
|
|
|
|
|
|
|
my $RowSub = sub ## A closure over the local vars in this subroutine. |
|
5884
|
|
|
|
|
|
|
{ |
|
5885
|
|
|
|
|
|
|
## We might be asked to rewind. |
|
5886
|
0
|
|
|
0
|
|
0
|
my ($Rewind) = @_; |
|
5887
|
0
|
0
|
|
|
|
0
|
$SelNum = 0, return(1) if ($Rewind); |
|
5888
|
|
|
|
|
|
|
|
|
5889
|
|
|
|
|
|
|
## Done if we've written all rows. |
|
5890
|
0
|
0
|
|
|
|
0
|
return() if $SelNum >= $RecordsToWrite; |
|
5891
|
|
|
|
|
|
|
|
|
5892
|
|
|
|
|
|
|
## Otherwise, yield a row if we still can. |
|
5893
|
0
|
0
|
|
|
|
0
|
my $List = [map |
|
5894
|
|
|
|
|
|
|
{ |
|
5895
|
|
|
|
|
|
|
## Truncate as needed. |
|
5896
|
0
|
|
|
|
|
0
|
my $X = (length > $MaxWidth ? |
|
5897
|
|
|
|
|
|
|
(substr($_, 0, ($MaxWidth - 1)) . '>') : $_); |
|
5898
|
|
|
|
|
|
|
|
|
5899
|
|
|
|
|
|
|
## Encode returns, tabs as carets. |
|
5900
|
0
|
|
|
|
|
0
|
$X =~ s{(?:\x0D\x0A)|[\x0D\x0A\x09]}{^}g; |
|
5901
|
|
|
|
|
|
|
|
|
5902
|
0
|
|
|
|
|
0
|
$X; |
|
5903
|
|
|
|
|
|
|
} |
|
5904
|
0
|
|
|
|
|
0
|
@{$this->row_list($Selection->[$SelNum++], $FieldList)}]; |
|
5905
|
|
|
|
|
|
|
|
|
5906
|
0
|
|
|
|
|
0
|
return(@$List); |
|
5907
|
0
|
|
|
|
|
0
|
}; |
|
5908
|
|
|
|
|
|
|
|
|
5909
|
|
|
|
|
|
|
## Locally replace put() and out() in Data::ShowTable so we can |
|
5910
|
|
|
|
|
|
|
## gather the data into memory instead of having it go right out |
|
5911
|
|
|
|
|
|
|
## to STDOUT before we may want it to. |
|
5912
|
|
|
|
|
|
|
|
|
5913
|
|
|
|
|
|
|
## Too bad Data::ShowTable is not a subclassable object instead. |
|
5914
|
|
|
|
|
|
|
|
|
5915
|
0
|
|
|
|
|
0
|
my $Data = [""]; ## Array to hold output from out() and put() |
|
5916
|
|
|
|
|
|
|
{ |
|
5917
|
0
|
|
|
|
|
0
|
local *{Data::ShowTable::out} = sub ## See sub out in ShowTable.pm |
|
5918
|
|
|
|
|
|
|
{ |
|
5919
|
0
|
|
|
0
|
|
0
|
my $fmt = shift; |
|
5920
|
0
|
0
|
|
|
|
0
|
$fmt .= "\n" unless $fmt =~ /\n$/; |
|
5921
|
0
|
|
|
|
|
0
|
$Data->[-1] .= sprintf($fmt, @_); |
|
5922
|
0
|
|
|
|
|
0
|
push @$Data, ""; |
|
5923
|
0
|
|
|
|
|
0
|
}; |
|
5924
|
|
|
|
|
|
|
|
|
5925
|
|
|
|
|
|
|
local *{Data::ShowTable::put} = sub ## See sub put in ShowTable.pm |
|
5926
|
|
|
|
|
|
|
{ |
|
5927
|
0
|
|
|
0
|
|
0
|
my $fmt = shift(); |
|
5928
|
0
|
|
|
|
|
0
|
$Data->[-1] .= sprintf($fmt, @_); |
|
5929
|
0
|
|
|
|
|
0
|
}; |
|
5930
|
|
|
|
|
|
|
|
|
5931
|
0
|
|
|
|
|
0
|
&ShowBoxTable({titles => $FieldList, |
|
5932
|
|
|
|
|
|
|
types => $Types, |
|
5933
|
|
|
|
|
|
|
row_sub => $RowSub, |
|
5934
|
|
|
|
|
|
|
widths => [], ## Will calculate from the data |
|
5935
|
|
|
|
|
|
|
}); |
|
5936
|
|
|
|
|
|
|
} |
|
5937
|
|
|
|
|
|
|
|
|
5938
|
|
|
|
|
|
|
## Remove spurious extra newline entries at end of $Data |
|
5939
|
0
|
0
|
|
|
|
0
|
pop @$Data if $Data->[-1] =~ /^\s*$/s; |
|
5940
|
0
|
0
|
|
|
|
0
|
pop @$Data if $Data->[-1] =~ /^\s*$/s; |
|
5941
|
|
|
|
|
|
|
|
|
5942
|
0
|
|
|
|
|
0
|
my $Formatted = join("", @$Data); |
|
5943
|
|
|
|
|
|
|
|
|
5944
|
1
|
|
|
|
|
7
|
done: |
|
5945
|
|
|
|
|
|
|
return(\ $Formatted); |
|
5946
|
|
|
|
|
|
|
} |
|
5947
|
|
|
|
|
|
|
|
|
5948
|
|
|
|
|
|
|
|
|
5949
|
|
|
|
|
|
|
=pod |
|
5950
|
|
|
|
|
|
|
|
|
5951
|
|
|
|
|
|
|
=head1 APPENDING / MERGING / JOINING TABLES |
|
5952
|
|
|
|
|
|
|
|
|
5953
|
|
|
|
|
|
|
## Append all records from a second table |
|
5954
|
|
|
|
|
|
|
|
|
5955
|
|
|
|
|
|
|
$t->append($Other) ## Append records from $Other |
|
5956
|
|
|
|
|
|
|
$t->append_file($File, $Params) ## Append from new($Params, $File) |
|
5957
|
|
|
|
|
|
|
$t->append_files($Files, $Params) ## Call append_file for all files |
|
5958
|
|
|
|
|
|
|
$t->append_files_new($Files, $Params) ## Internal helper routine |
|
5959
|
|
|
|
|
|
|
|
|
5960
|
|
|
|
|
|
|
## Combine all fields from a second table |
|
5961
|
|
|
|
|
|
|
|
|
5962
|
|
|
|
|
|
|
$t->combine($Other) ## Combine fields from $Other |
|
5963
|
|
|
|
|
|
|
$t->combine_file($File, $Params) ## Combine new($Params, $File) |
|
5964
|
|
|
|
|
|
|
$t->combine_files($Files, $Params) ## combine_file on each file |
|
5965
|
|
|
|
|
|
|
|
|
5966
|
|
|
|
|
|
|
## Left-join records from a second table (lookup field vals) |
|
5967
|
|
|
|
|
|
|
|
|
5968
|
|
|
|
|
|
|
$t->join ($Other, $KeyField1, [$KeyField2, $Fields]) |
|
5969
|
|
|
|
|
|
|
$t->join_file ($File, $Params, $KeyField1, [$KeyField2, $Fields]) |
|
5970
|
|
|
|
|
|
|
$t->join_files($Files, $Params, $KeyField1, [$KeyField2, $Fields]) |
|
5971
|
|
|
|
|
|
|
|
|
5972
|
|
|
|
|
|
|
The append() method concatenates all the records from two CTable |
|
5973
|
|
|
|
|
|
|
objects together -- even if the two tables didn't start out with |
|
5974
|
|
|
|
|
|
|
exactly the same fields (or even any of the same fields). |
|
5975
|
|
|
|
|
|
|
|
|
5976
|
|
|
|
|
|
|
It takes all the data records from another CTable object and appends |
|
5977
|
|
|
|
|
|
|
them into the present table. Any columns present in the $Other table |
|
5978
|
|
|
|
|
|
|
but not in the first table, are created (and the corresponding field |
|
5979
|
|
|
|
|
|
|
values in the first table will all be empty/undef). Similarly, any |
|
5980
|
|
|
|
|
|
|
columns present in $t but not present in $Other will be extended |
|
5981
|
|
|
|
|
|
|
to the correct new length as necessary and the field values in the |
|
5982
|
|
|
|
|
|
|
original columns will be empty/undef. Columns present in both will, |
|
5983
|
|
|
|
|
|
|
of course, have all the data from both the original sets of data. |
|
5984
|
|
|
|
|
|
|
|
|
5985
|
|
|
|
|
|
|
All data from the second table is brought into the first one. No |
|
5986
|
|
|
|
|
|
|
attempt whatsoever is made to eliminate any duplicate records that |
|
5987
|
|
|
|
|
|
|
might result. |
|
5988
|
|
|
|
|
|
|
|
|
5989
|
|
|
|
|
|
|
The number of records (length()) after this call is the sum of the |
|
5990
|
|
|
|
|
|
|
length() of each of the tables before the operation. |
|
5991
|
|
|
|
|
|
|
|
|
5992
|
|
|
|
|
|
|
IMPORTANT NOTE: The data from the $Other object is COPIED in memory |
|
5993
|
|
|
|
|
|
|
into the new object. This could be hard on memory if $Other is big. |
|
5994
|
|
|
|
|
|
|
Might want to be sure to discard $Other when you're done with it. |
|
5995
|
|
|
|
|
|
|
|
|
5996
|
|
|
|
|
|
|
$Other is left untouched by the operation. |
|
5997
|
|
|
|
|
|
|
|
|
5998
|
|
|
|
|
|
|
All columns from both tables are combined whether or not they are |
|
5999
|
|
|
|
|
|
|
mentioned in the custom field list of either. |
|
6000
|
|
|
|
|
|
|
|
|
6001
|
|
|
|
|
|
|
The custom field lists, if present in either table object, are |
|
6002
|
|
|
|
|
|
|
concatenated into this object's custom field list, but with |
|
6003
|
|
|
|
|
|
|
duplications eliminated, and order retained. |
|
6004
|
|
|
|
|
|
|
|
|
6005
|
|
|
|
|
|
|
Any existing custom selections, custom sort order, sort specs, and/or |
|
6006
|
|
|
|
|
|
|
sort routines are also combined appropriately, with settings from this |
|
6007
|
|
|
|
|
|
|
object taking precedence over those from $Other anywhere the two have |
|
6008
|
|
|
|
|
|
|
conflicting settings. |
|
6009
|
|
|
|
|
|
|
|
|
6010
|
|
|
|
|
|
|
append_file() takes a file name and optional $Params hash. It uses |
|
6011
|
|
|
|
|
|
|
those to create a new() object with data read from the file. Then, |
|
6012
|
|
|
|
|
|
|
the new table is appended to $t using append() and then the new table |
|
6013
|
|
|
|
|
|
|
is discarded. |
|
6014
|
|
|
|
|
|
|
|
|
6015
|
|
|
|
|
|
|
append_files() is a convenience function that calls append_file() on |
|
6016
|
|
|
|
|
|
|
each file in a list, using the same optional $Params for each. |
|
6017
|
|
|
|
|
|
|
|
|
6018
|
|
|
|
|
|
|
append_files_new() is the internal routine that implements the |
|
6019
|
|
|
|
|
|
|
processing done by new() on the optional list of files to be read. It |
|
6020
|
|
|
|
|
|
|
does the following: It calls read() on the first file in the list. |
|
6021
|
|
|
|
|
|
|
Then, it calls append_files() to read the remaining into their own |
|
6022
|
|
|
|
|
|
|
new() objects of the same class as $t and using the same $Params to |
|
6023
|
|
|
|
|
|
|
new() (if any were supplied). Then each of these is append()-ed in |
|
6024
|
|
|
|
|
|
|
turn to $t and discarded. The final result will be that $t will hold |
|
6025
|
|
|
|
|
|
|
a concatenation of all the data in all the files mentioned. However, |
|
6026
|
|
|
|
|
|
|
consistent with the behavior of append(), the _FileName parameter and |
|
6027
|
|
|
|
|
|
|
other read()-controlled settings will correspond to the first file |
|
6028
|
|
|
|
|
|
|
read. The intermediate objects are discarded. |
|
6029
|
|
|
|
|
|
|
|
|
6030
|
|
|
|
|
|
|
NOTE: As with new() and read(), if a non-empty _FieldList Param is |
|
6031
|
|
|
|
|
|
|
specified, the read() methods called internally by the append_file*() |
|
6032
|
|
|
|
|
|
|
methods will read only the fields mentioned and will ignore any other |
|
6033
|
|
|
|
|
|
|
fields in the files. |
|
6034
|
|
|
|
|
|
|
|
|
6035
|
|
|
|
|
|
|
=head2 Combining tables |
|
6036
|
|
|
|
|
|
|
|
|
6037
|
|
|
|
|
|
|
combine() adds columns from a second table into the current one. |
|
6038
|
|
|
|
|
|
|
|
|
6039
|
|
|
|
|
|
|
CAUTION: You should only use combine() when you have two tables where |
|
6040
|
|
|
|
|
|
|
all the (possibly selected) records in the second table line up |
|
6041
|
|
|
|
|
|
|
perfectly with all the (unselected) records in the first table -- in |
|
6042
|
|
|
|
|
|
|
other words, each table before combine() should contain a few of the |
|
6043
|
|
|
|
|
|
|
columns of the new table -- for example, maybe one table contains a |
|
6044
|
|
|
|
|
|
|
column of file names, and the other contains columns of corresponding |
|
6045
|
|
|
|
|
|
|
file sizes and modification times. If you don't understand the |
|
6046
|
|
|
|
|
|
|
consequences of combine, don't use it or you could end up with some |
|
6047
|
|
|
|
|
|
|
records whose field values don't refer to the same object. (Maybe you |
|
6048
|
|
|
|
|
|
|
meant to use append() or join() instead.) |
|
6049
|
|
|
|
|
|
|
|
|
6050
|
|
|
|
|
|
|
If the second table has a custom field list, only those columns are |
|
6051
|
|
|
|
|
|
|
brought in. |
|
6052
|
|
|
|
|
|
|
|
|
6053
|
|
|
|
|
|
|
If any column in the second table has the same name as one in the |
|
6054
|
|
|
|
|
|
|
current table, the incoming column replaces the one by the same name. |
|
6055
|
|
|
|
|
|
|
|
|
6056
|
|
|
|
|
|
|
All columns are COPIED from the second table, so the first table owns |
|
6057
|
|
|
|
|
|
|
the new data exclusively. |
|
6058
|
|
|
|
|
|
|
|
|
6059
|
|
|
|
|
|
|
If the second table has a selection, only those records are copied, in |
|
6060
|
|
|
|
|
|
|
selection order. (select_all() first if that's not what you want.) |
|
6061
|
|
|
|
|
|
|
|
|
6062
|
|
|
|
|
|
|
The selection in the first table, if any, is ignored during the |
|
6063
|
|
|
|
|
|
|
combine. If this isn't what you want, then consider using cull() |
|
6064
|
|
|
|
|
|
|
before combine(). |
|
6065
|
|
|
|
|
|
|
|
|
6066
|
|
|
|
|
|
|
Field list and sort order are concatenated (but retaining uniqueness: |
|
6067
|
|
|
|
|
|
|
second mentions of a field in the combined lists are omitted). |
|
6068
|
|
|
|
|
|
|
|
|
6069
|
|
|
|
|
|
|
Custom sort routines and sort specs are combined, with those in the |
|
6070
|
|
|
|
|
|
|
first table taking precedence over any copied in with the same name. |
|
6071
|
|
|
|
|
|
|
|
|
6072
|
|
|
|
|
|
|
The custom _Selection from the first table, if any, is retained. (It |
|
6073
|
|
|
|
|
|
|
will initially omit any records added by extend()). |
|
6074
|
|
|
|
|
|
|
|
|
6075
|
|
|
|
|
|
|
All other parameters from the first table are retained, and from the |
|
6076
|
|
|
|
|
|
|
second table are ignored. |
|
6077
|
|
|
|
|
|
|
|
|
6078
|
|
|
|
|
|
|
combine() calls extend() after combining to ensure that all columns |
|
6079
|
|
|
|
|
|
|
have the same length: if either the older or newer columns were |
|
6080
|
|
|
|
|
|
|
shorter, they will all be set to the length of the longest columns in |
|
6081
|
|
|
|
|
|
|
the table -- creating some empty field values at the end of the |
|
6082
|
|
|
|
|
|
|
lengthened columns. |
|
6083
|
|
|
|
|
|
|
|
|
6084
|
|
|
|
|
|
|
combine_file() does the same as combine() except starting with a file |
|
6085
|
|
|
|
|
|
|
name, first creating the $Other object by creating it using |
|
6086
|
|
|
|
|
|
|
new($Params, $File), then discarding it after combining. |
|
6087
|
|
|
|
|
|
|
|
|
6088
|
|
|
|
|
|
|
=head2 Joining tables (Looking up data from another table) |
|
6089
|
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
join() looks up field values from a second table, based on common |
|
6091
|
|
|
|
|
|
|
values in key fields which may have different or the same names in |
|
6092
|
|
|
|
|
|
|
each table. It adds columns to the current table if necessary to hold |
|
6093
|
|
|
|
|
|
|
any new field values that must be brought in. |
|
6094
|
|
|
|
|
|
|
|
|
6095
|
|
|
|
|
|
|
join() never adds any new or non-matching records to the table: |
|
6096
|
|
|
|
|
|
|
records where the lookup fails will simply have empty/undef values in |
|
6097
|
|
|
|
|
|
|
the corresponding columns. |
|
6098
|
|
|
|
|
|
|
|
|
6099
|
|
|
|
|
|
|
## Example: |
|
6100
|
|
|
|
|
|
|
|
|
6101
|
|
|
|
|
|
|
$t->join ($People, 'FullName', 'FirstAndLast'); ## or |
|
6102
|
|
|
|
|
|
|
$t->join_file("People.txt", {}, 'FullName', 'FirstAndLast'); |
|
6103
|
|
|
|
|
|
|
|
|
6104
|
|
|
|
|
|
|
|
|
6105
|
|
|
|
|
|
|
Here's how join() calculates the list of fields to bring in: |
|
6106
|
|
|
|
|
|
|
|
|
6107
|
|
|
|
|
|
|
- Legal field names from the optional $Fields list, if supplied |
|
6108
|
|
|
|
|
|
|
- Otherwise, the fieldlist() from second table |
|
6109
|
|
|
|
|
|
|
- ... minus any fields with same name as $KeyField1 or $KeyField2 |
|
6110
|
|
|
|
|
|
|
|
|
6111
|
|
|
|
|
|
|
Join starts by adding new empty columns in the first table for any |
|
6112
|
|
|
|
|
|
|
field to be brought in from the second but not yet present in the |
|
6113
|
|
|
|
|
|
|
first. |
|
6114
|
|
|
|
|
|
|
|
|
6115
|
|
|
|
|
|
|
Here's how join() calculates the records eligible for lookup: |
|
6116
|
|
|
|
|
|
|
|
|
6117
|
|
|
|
|
|
|
- Join only modifies the selected records in the first table |
|
6118
|
|
|
|
|
|
|
- Join only looks up values from selected records in second table |
|
6119
|
|
|
|
|
|
|
|
|
6120
|
|
|
|
|
|
|
(If you want all records to be used in both or either table, call the |
|
6121
|
|
|
|
|
|
|
table's select_all() method before calling join().) |
|
6122
|
|
|
|
|
|
|
|
|
6123
|
|
|
|
|
|
|
Then, for every selected record in $t (using the example above), join |
|
6124
|
|
|
|
|
|
|
examines the FullName field ($KeyField1), and looks up a corresponding |
|
6125
|
|
|
|
|
|
|
entry (must be 'eq') in the FirstAndLast field ($KeyField2) in the |
|
6126
|
|
|
|
|
|
|
second table. |
|
6127
|
|
|
|
|
|
|
|
|
6128
|
|
|
|
|
|
|
IMPORTANT NOTE ABOUT KEY LENGTH: To speed lookup, hash-based indices |
|
6129
|
|
|
|
|
|
|
are made. The strings in $Key1 and $Key2 fields should not be so long |
|
6130
|
|
|
|
|
|
|
that the hash lookups bog down or things could get ugly fast. There |
|
6131
|
|
|
|
|
|
|
is no fixed limit to hash key length in Perl, but fewer than 128 |
|
6132
|
|
|
|
|
|
|
characters in length is longer than customary for such things. (Many |
|
6133
|
|
|
|
|
|
|
systems require text-based keys to be no longer than 31 characters.) |
|
6134
|
|
|
|
|
|
|
So be judicious about the values in $Key1 and $Key2 fields. |
|
6135
|
|
|
|
|
|
|
|
|
6136
|
|
|
|
|
|
|
The first record found in the second table's selection with a matching |
|
6137
|
|
|
|
|
|
|
value in the key field is then copied over (but only the appropriate |
|
6138
|
|
|
|
|
|
|
fields are copied, as explained above). Any field values being |
|
6139
|
|
|
|
|
|
|
brought over will REPLACE corresponding field values in the first |
|
6140
|
|
|
|
|
|
|
table, possibly overwriting any previous values if the field being |
|
6141
|
|
|
|
|
|
|
looked up was already present in the first table and contained data. |
|
6142
|
|
|
|
|
|
|
|
|
6143
|
|
|
|
|
|
|
The first table's _FieldList is updated to reflect new fields added. |
|
6144
|
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
Its _Selection is untouched. |
|
6146
|
|
|
|
|
|
|
|
|
6147
|
|
|
|
|
|
|
Its _SortOrder is untouched. |
|
6148
|
|
|
|
|
|
|
|
|
6149
|
|
|
|
|
|
|
Its _SortSpecs are augmented to include any entries from the second |
|
6150
|
|
|
|
|
|
|
table that should be brought over due to the field additions. |
|
6151
|
|
|
|
|
|
|
|
|
6152
|
|
|
|
|
|
|
Its _SRoutines are augmented to add new ones from the second table. |
|
6153
|
|
|
|
|
|
|
|
|
6154
|
|
|
|
|
|
|
All other parameters of table 1 are untouched. |
|
6155
|
|
|
|
|
|
|
|
|
6156
|
|
|
|
|
|
|
The second table is not modified. No data structures will be shared |
|
6157
|
|
|
|
|
|
|
between the tables. Data is only copied. |
|
6158
|
|
|
|
|
|
|
|
|
6159
|
|
|
|
|
|
|
join_file() calls join() after creating a seond table from your $File. |
|
6160
|
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
join_files() calls join_file() repeatedly for each file in a list, but |
|
6162
|
|
|
|
|
|
|
it is important to note that each file in the list of files to be |
|
6163
|
|
|
|
|
|
|
joined must have a $Key2 field -- AND, that any values looked up from |
|
6164
|
|
|
|
|
|
|
the second file will overwrite any values of the same key found in the |
|
6165
|
|
|
|
|
|
|
first file, and so on. You probably will not ever need join_files(). |
|
6166
|
|
|
|
|
|
|
It is mainly here for completeness. |
|
6167
|
|
|
|
|
|
|
|
|
6168
|
|
|
|
|
|
|
=cut |
|
6169
|
|
|
|
|
|
|
|
|
6170
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
6171
|
|
|
|
|
|
|
|
|
6172
|
|
|
|
|
|
|
sub append ## ($this, $OtherCTable) |
|
6173
|
|
|
|
|
|
|
{ |
|
6174
|
10
|
|
|
10
|
0
|
41
|
my $this = shift; |
|
6175
|
10
|
|
|
|
|
16
|
my ($that) = @_; |
|
6176
|
|
|
|
|
|
|
|
|
6177
|
10
|
|
|
|
|
13
|
my $Success; |
|
6178
|
|
|
|
|
|
|
|
|
6179
|
|
|
|
|
|
|
## Get all fields in $this, but only selected ones in $that |
|
6180
|
10
|
|
|
|
|
28
|
my $ThisFieldsAll = $this->fieldlist_all(); |
|
6181
|
10
|
|
|
|
|
23
|
my $ThatFields = $that->fieldlist(); |
|
6182
|
|
|
|
|
|
|
|
|
6183
|
|
|
|
|
|
|
## Figure out how many data fields in each of the tables. |
|
6184
|
10
|
|
|
|
|
25
|
my $ThisFieldCount = @$ThisFieldsAll+0; |
|
6185
|
10
|
|
|
|
|
15
|
my $ThatFieldCount = @$ThatFields +0; |
|
6186
|
|
|
|
|
|
|
|
|
6187
|
|
|
|
|
|
|
## We're going to bring over only the selected records in $that. |
|
6188
|
10
|
|
|
|
|
21
|
my $ThisSel = $this->selection(); |
|
6189
|
10
|
|
|
|
|
20
|
my $ThatSel = $that->selection(); |
|
6190
|
|
|
|
|
|
|
|
|
6191
|
|
|
|
|
|
|
## Figure out how many records there were to start with. |
|
6192
|
10
|
|
|
|
|
22
|
my $ThisLength = $this->length(); |
|
6193
|
10
|
|
|
|
|
16
|
my $ThatLength = @$ThatSel+0; |
|
6194
|
|
|
|
|
|
|
|
|
6195
|
|
|
|
|
|
|
## New record count is sum of the other two. |
|
6196
|
10
|
|
|
|
|
13
|
my $NewLength = $ThisLength + $ThatLength; |
|
6197
|
|
|
|
|
|
|
|
|
6198
|
|
|
|
|
|
|
## Create any missing columns not yet present in $this and, |
|
6199
|
|
|
|
|
|
|
## whether new or not, presize all vectors to the new length, |
|
6200
|
|
|
|
|
|
|
## which will create empty/undef entries as necessary. |
|
6201
|
|
|
|
|
|
|
|
|
6202
|
10
|
|
50
|
|
|
179
|
foreach (@$ThisFieldsAll, @$ThatFields) {$#{$this->{$_} ||= []} = ($NewLength - 1)}; |
|
|
72
|
|
|
|
|
81
|
|
|
|
72
|
|
|
|
|
735
|
|
|
6203
|
|
|
|
|
|
|
|
|
6204
|
|
|
|
|
|
|
## Then copy the field data from the second table into the already |
|
6205
|
|
|
|
|
|
|
## pre-sized columns in this one. |
|
6206
|
|
|
|
|
|
|
|
|
6207
|
10
|
|
|
|
|
185
|
foreach my $FieldName (@$ThatFields) |
|
6208
|
|
|
|
|
|
|
{ |
|
6209
|
34
|
|
|
|
|
48
|
my $NewVector = $this->{$FieldName}; |
|
6210
|
34
|
|
|
|
|
71
|
my $OldVector = $that->sel_get($FieldName, $ThatSel); |
|
6211
|
|
|
|
|
|
|
|
|
6212
|
34
|
|
|
|
|
65
|
foreach my $RecordNum (0..$#$OldVector) |
|
|
86
|
|
|
|
|
750
|
|
|
6213
|
|
|
|
|
|
|
{($NewVector->[$ThisLength + $RecordNum] = |
|
6214
|
|
|
|
|
|
|
$OldVector->[ $RecordNum])}; |
|
6215
|
|
|
|
|
|
|
} |
|
6216
|
|
|
|
|
|
|
|
|
6217
|
|
|
|
|
|
|
## Now all the data columns have been combined. We just have to |
|
6218
|
|
|
|
|
|
|
## combine any custom metadata. |
|
6219
|
|
|
|
|
|
|
|
|
6220
|
|
|
|
|
|
|
## If either table had a custom fieldlist, then make a new custom |
|
6221
|
|
|
|
|
|
|
## field list which is the result of concatenating both field |
|
6222
|
|
|
|
|
|
|
## lists together, without duplicates, and of course preserving |
|
6223
|
|
|
|
|
|
|
## the original order as completely as possible (with the order |
|
6224
|
|
|
|
|
|
|
## given in the first table taking precedence). |
|
6225
|
|
|
|
|
|
|
|
|
6226
|
10
|
50
|
33
|
|
|
35
|
if (defined($this->{_FieldList}) || |
|
6227
|
|
|
|
|
|
|
defined($that->{_FieldList})) |
|
6228
|
|
|
|
|
|
|
|
|
6229
|
|
|
|
|
|
|
{ |
|
6230
|
10
|
|
|
|
|
23
|
my $ThisFields = $this->fieldlist(); |
|
6231
|
10
|
|
|
|
|
221
|
my $ThatFields = $that->fieldlist(); |
|
6232
|
|
|
|
|
|
|
|
|
6233
|
|
|
|
|
|
|
## Make a hash mapping field names from both tables to the |
|
6234
|
|
|
|
|
|
|
## order they should appear |
|
6235
|
|
|
|
|
|
|
|
|
6236
|
10
|
|
|
|
|
18
|
my $FieldOrderHash = {}; |
|
6237
|
10
|
|
66
|
|
|
21
|
foreach (@$ThisFields, @$ThatFields) {$FieldOrderHash->{$_} ||= (keys %$FieldOrderHash) + 1}; |
|
|
69
|
|
|
|
|
226
|
|
|
6238
|
|
|
|
|
|
|
|
|
6239
|
10
|
|
|
|
|
33
|
my $FieldList = [sort {$FieldOrderHash->{$a} <=> $FieldOrderHash->{$b}} keys %$FieldOrderHash]; |
|
|
42
|
|
|
|
|
71
|
|
|
6240
|
|
|
|
|
|
|
|
|
6241
|
10
|
|
|
|
|
37
|
$this->{_FieldList} = $FieldList; |
|
6242
|
|
|
|
|
|
|
} |
|
6243
|
|
|
|
|
|
|
|
|
6244
|
|
|
|
|
|
|
## If either table had a custom sortorder, then make a new custom |
|
6245
|
|
|
|
|
|
|
## sort order which is the result of concatenating both orders |
|
6246
|
|
|
|
|
|
|
## together, without duplicates, and of course preserving the |
|
6247
|
|
|
|
|
|
|
## original order as completely as possible (with the order given |
|
6248
|
|
|
|
|
|
|
## in the first table taking precedence). |
|
6249
|
|
|
|
|
|
|
|
|
6250
|
10
|
50
|
33
|
|
|
50
|
if (defined($this->{_SortOrder}) || |
|
6251
|
|
|
|
|
|
|
defined($that->{_SortOrder})) |
|
6252
|
|
|
|
|
|
|
{ |
|
6253
|
0
|
|
|
|
|
0
|
my $ThisOrder = $this->sortorder(); |
|
6254
|
0
|
|
|
|
|
0
|
my $ThatOrder = $that->sortorder(); |
|
6255
|
|
|
|
|
|
|
|
|
6256
|
|
|
|
|
|
|
## Make a hash mapping field names from both lists to the |
|
6257
|
|
|
|
|
|
|
## order they should appear |
|
6258
|
|
|
|
|
|
|
|
|
6259
|
0
|
|
|
|
|
0
|
my $OrderHash = {}; |
|
6260
|
0
|
|
0
|
|
|
0
|
foreach (@$ThisOrder, @$ThatOrder) {$OrderHash->{$_} ||= (keys %$OrderHash) + 1}; |
|
|
0
|
|
|
|
|
0
|
|
|
6261
|
|
|
|
|
|
|
|
|
6262
|
0
|
|
|
|
|
0
|
my $OrderList = [sort {$OrderHash->{$a} <=> $OrderHash->{$b}} keys %$OrderHash]; |
|
|
0
|
|
|
|
|
0
|
|
|
6263
|
|
|
|
|
|
|
|
|
6264
|
0
|
|
|
|
|
0
|
$this->{_SortOrder} = $OrderList; |
|
6265
|
|
|
|
|
|
|
} |
|
6266
|
|
|
|
|
|
|
|
|
6267
|
|
|
|
|
|
|
## If either table had a custom selection, then create a new |
|
6268
|
|
|
|
|
|
|
## selection which is the concatenation of the two selections. |
|
6269
|
|
|
|
|
|
|
|
|
6270
|
10
|
100
|
100
|
|
|
46
|
if (defined($this->{_Selection}) || |
|
6271
|
|
|
|
|
|
|
defined($that->{_Selection})) |
|
6272
|
|
|
|
|
|
|
{ |
|
6273
|
3
|
|
|
|
|
11
|
$this->{_Selection} = [@$ThisSel, ## Original selected records... |
|
6274
|
|
|
|
|
|
|
## Plus an adjusted entry for newly-added ones. |
|
6275
|
|
|
|
|
|
|
($ThisLength .. ($ThisLength + @$ThatSel - 1)) |
|
6276
|
|
|
|
|
|
|
]; |
|
6277
|
|
|
|
|
|
|
} |
|
6278
|
|
|
|
|
|
|
|
|
6279
|
|
|
|
|
|
|
## If either table had custom sortspecs, then create a new |
|
6280
|
|
|
|
|
|
|
## sortspecs hash by starting with all the entries from the other |
|
6281
|
|
|
|
|
|
|
## table and adding/overwriting with those from this table. |
|
6282
|
|
|
|
|
|
|
|
|
6283
|
10
|
50
|
33
|
|
|
34
|
if (defined($this->{_SortSpecs}) || |
|
6284
|
|
|
|
|
|
|
defined($that->{_SortSpecs})) |
|
6285
|
|
|
|
|
|
|
{ |
|
6286
|
10
|
|
|
|
|
24
|
my $ThisSpecs = $this->sortspecs(); |
|
6287
|
10
|
|
|
|
|
20
|
my $ThatSpecs = $that->sortspecs(); |
|
6288
|
|
|
|
|
|
|
|
|
6289
|
10
|
|
|
|
|
37
|
$this->{_SortSpecs} = {%$ThatSpecs, %$ThisSpecs}; |
|
6290
|
|
|
|
|
|
|
} |
|
6291
|
|
|
|
|
|
|
|
|
6292
|
|
|
|
|
|
|
## If either table had custom sortroutines, then create a new |
|
6293
|
|
|
|
|
|
|
## sortroutines hash by starting with all the entries from the |
|
6294
|
|
|
|
|
|
|
## other table and adding/overwriting with those from this table. |
|
6295
|
|
|
|
|
|
|
|
|
6296
|
10
|
50
|
33
|
|
|
32
|
if (defined($this->{_SRoutines}) || |
|
6297
|
|
|
|
|
|
|
defined($that->{_SRoutines})) |
|
6298
|
|
|
|
|
|
|
{ |
|
6299
|
10
|
|
50
|
|
|
24
|
my $ThisRoutines = $this->{_SRoutines} || {}; |
|
6300
|
10
|
|
50
|
|
|
22
|
my $ThatRoutines = $that->{_SRoutines} || {}; |
|
6301
|
|
|
|
|
|
|
|
|
6302
|
10
|
|
|
|
|
29
|
$this->{_SRoutines} = {%$ThatRoutines, %$ThisRoutines}; |
|
6303
|
|
|
|
|
|
|
} |
|
6304
|
|
|
|
|
|
|
|
|
6305
|
|
|
|
|
|
|
## All other settings are kept from $this and those from $that are |
|
6306
|
|
|
|
|
|
|
## ignored. |
|
6307
|
|
|
|
|
|
|
|
|
6308
|
10
|
|
|
|
|
15
|
$Success = 1; |
|
6309
|
10
|
|
|
|
|
41
|
done: |
|
6310
|
|
|
|
|
|
|
return($Success); |
|
6311
|
|
|
|
|
|
|
} |
|
6312
|
|
|
|
|
|
|
|
|
6313
|
|
|
|
|
|
|
sub append_file |
|
6314
|
|
|
|
|
|
|
{ |
|
6315
|
2
|
|
|
2
|
0
|
3
|
my $this = shift; |
|
6316
|
2
|
|
|
|
|
5
|
my ($FileName, $Params) = @_; |
|
6317
|
|
|
|
|
|
|
|
|
6318
|
2
|
|
|
|
|
2
|
my $Success; |
|
6319
|
|
|
|
|
|
|
|
|
6320
|
|
|
|
|
|
|
## $Params argument is optional. If supplied, it must be a hash. |
|
6321
|
2
|
|
100
|
|
|
8
|
$Params ||= {}; |
|
6322
|
|
|
|
|
|
|
|
|
6323
|
|
|
|
|
|
|
## Create a new empty table object of the same class as $this and |
|
6324
|
|
|
|
|
|
|
## read just the specified file into it. (note: this could be a |
|
6325
|
|
|
|
|
|
|
## recursive call here). |
|
6326
|
|
|
|
|
|
|
|
|
6327
|
2
|
50
|
|
|
|
13
|
my $that = ref($this)->new($Params, $FileName) or goto done; |
|
6328
|
|
|
|
|
|
|
|
|
6329
|
|
|
|
|
|
|
## Append the data from $that table into this one. |
|
6330
|
|
|
|
|
|
|
|
|
6331
|
2
|
50
|
|
|
|
8
|
$this->append($that) or goto done; |
|
6332
|
|
|
|
|
|
|
|
|
6333
|
2
|
|
|
|
|
4
|
$Success = 1; |
|
6334
|
2
|
|
|
|
|
17
|
done: |
|
6335
|
|
|
|
|
|
|
return($Success); |
|
6336
|
|
|
|
|
|
|
} |
|
6337
|
|
|
|
|
|
|
|
|
6338
|
|
|
|
|
|
|
sub append_files |
|
6339
|
|
|
|
|
|
|
{ |
|
6340
|
109
|
|
|
109
|
0
|
134
|
my $this = shift; |
|
6341
|
109
|
|
|
|
|
157
|
my ($FileNames, $Params) = @_; |
|
6342
|
|
|
|
|
|
|
|
|
6343
|
109
|
|
|
|
|
107
|
my $Success; |
|
6344
|
|
|
|
|
|
|
|
|
6345
|
109
|
|
|
|
|
255
|
foreach my $FileName (@$FileNames) |
|
6346
|
|
|
|
|
|
|
{ |
|
6347
|
2
|
50
|
|
|
|
7
|
goto done unless $this->append_file($FileName, $Params); |
|
6348
|
|
|
|
|
|
|
} |
|
6349
|
|
|
|
|
|
|
|
|
6350
|
109
|
|
|
|
|
128
|
$Success = 1; |
|
6351
|
109
|
|
|
|
|
274
|
done: |
|
6352
|
|
|
|
|
|
|
return($Success); |
|
6353
|
|
|
|
|
|
|
} |
|
6354
|
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
sub append_files_new ## Called by new() to process its file name args. |
|
6356
|
|
|
|
|
|
|
{ |
|
6357
|
109
|
|
|
109
|
0
|
137
|
my $this = shift; |
|
6358
|
109
|
|
|
|
|
149
|
my ($FileNames, $Params) = @_; |
|
6359
|
|
|
|
|
|
|
|
|
6360
|
109
|
|
|
|
|
133
|
my $Success; |
|
6361
|
|
|
|
|
|
|
|
|
6362
|
|
|
|
|
|
|
## First we read the first file, if any, into this object using |
|
6363
|
|
|
|
|
|
|
## the read() method. |
|
6364
|
|
|
|
|
|
|
|
|
6365
|
109
|
|
|
|
|
155
|
my $FirstFile = shift @$FileNames; |
|
6366
|
109
|
100
|
|
|
|
233
|
if (defined($FirstFile)) |
|
6367
|
|
|
|
|
|
|
{ |
|
6368
|
57
|
100
|
|
|
|
63
|
goto done unless $this->read(%{$Params || {}}, _FileName => $FirstFile); |
|
|
57
|
50
|
|
|
|
305
|
|
|
6369
|
|
|
|
|
|
|
} |
|
6370
|
|
|
|
|
|
|
|
|
6371
|
109
|
50
|
|
|
|
366
|
goto done unless $this->append_files($FileNames, $Params); |
|
6372
|
|
|
|
|
|
|
|
|
6373
|
109
|
|
|
|
|
145
|
$Success = 1; |
|
6374
|
109
|
|
|
|
|
266
|
done: |
|
6375
|
|
|
|
|
|
|
return($Success); |
|
6376
|
|
|
|
|
|
|
} |
|
6377
|
|
|
|
|
|
|
|
|
6378
|
|
|
|
|
|
|
sub combine |
|
6379
|
|
|
|
|
|
|
{ |
|
6380
|
6
|
|
|
6
|
0
|
10
|
my $this = shift; |
|
6381
|
6
|
|
|
|
|
8
|
my ($that) = @_; |
|
6382
|
|
|
|
|
|
|
|
|
6383
|
6
|
|
|
|
|
6
|
my $Success; |
|
6384
|
|
|
|
|
|
|
|
|
6385
|
|
|
|
|
|
|
## Get a snapshot of field lists before any modifications. |
|
6386
|
6
|
|
|
|
|
9
|
my $ThisFields = $this->fieldlist(); |
|
6387
|
6
|
|
|
|
|
12
|
my $ThatFields = $that->fieldlist(); |
|
6388
|
|
|
|
|
|
|
|
|
6389
|
|
|
|
|
|
|
## Bring in all (listed) fields from other table. |
|
6390
|
6
|
|
|
|
|
8
|
my $IncomingFields = $ThatFields; |
|
6391
|
|
|
|
|
|
|
|
|
6392
|
|
|
|
|
|
|
## Preserve any previous non-selection and force one to be saved |
|
6393
|
|
|
|
|
|
|
## in the interim. This will prevent $that->sel() from |
|
6394
|
|
|
|
|
|
|
## recalculating the selection each time if there is none. |
|
6395
|
6
|
|
|
|
|
7
|
my $OldSel = $that->{_Selection}; |
|
6396
|
6
|
|
|
|
|
12
|
$that->{_Selection} = $that->selection(); ## Might be a no-op |
|
6397
|
|
|
|
|
|
|
|
|
6398
|
|
|
|
|
|
|
## Copy columns from $that in selection order; (re)place into $this |
|
6399
|
6
|
|
|
|
|
11
|
foreach (@$IncomingFields) {$this->col_set($_, $that->sel($_))}; |
|
|
12
|
|
|
|
|
22
|
|
|
6400
|
|
|
|
|
|
|
|
|
6401
|
|
|
|
|
|
|
## Restore the possibly-undef selection in other table. |
|
6402
|
6
|
|
|
|
|
12
|
$that->{_Selection} = $OldSel; ## Might be a no-op |
|
6403
|
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
## Extend any short columns (whether originating from other table |
|
6405
|
|
|
|
|
|
|
## or from this one) to be the same length as all others. |
|
6406
|
|
|
|
|
|
|
|
|
6407
|
6
|
|
|
|
|
15
|
$this->extend(); |
|
6408
|
|
|
|
|
|
|
|
|
6409
|
|
|
|
|
|
|
## If either table had a custom fieldlist, then make a new custom |
|
6410
|
|
|
|
|
|
|
## field list which is the result of concatenating both field |
|
6411
|
|
|
|
|
|
|
## lists together, without duplicates, and of course preserving |
|
6412
|
|
|
|
|
|
|
## the original order as completely as possible (with the order |
|
6413
|
|
|
|
|
|
|
## given in the first table taking precedence). |
|
6414
|
|
|
|
|
|
|
|
|
6415
|
6
|
50
|
33
|
|
|
20
|
if (defined($this->{_FieldList}) || |
|
6416
|
|
|
|
|
|
|
defined($that->{_FieldList})) |
|
6417
|
|
|
|
|
|
|
{ |
|
6418
|
|
|
|
|
|
|
## Make a hash mapping field names from both tables to the |
|
6419
|
|
|
|
|
|
|
## order they should appear |
|
6420
|
6
|
|
|
|
|
9
|
my $FieldOrderHash = {}; |
|
6421
|
6
|
|
66
|
|
|
13
|
foreach (@$ThisFields, @$ThatFields) {$FieldOrderHash->{$_} ||= (keys %$FieldOrderHash) + 1}; |
|
|
47
|
|
|
|
|
148
|
|
|
6422
|
|
|
|
|
|
|
|
|
6423
|
6
|
|
|
|
|
20
|
my $FieldList = [sort {$FieldOrderHash->{$a} <=> $FieldOrderHash->{$b}} keys %$FieldOrderHash]; |
|
|
60
|
|
|
|
|
75
|
|
|
6424
|
|
|
|
|
|
|
|
|
6425
|
6
|
|
|
|
|
19
|
$this->{_FieldList} = $FieldList; |
|
6426
|
|
|
|
|
|
|
} |
|
6427
|
|
|
|
|
|
|
|
|
6428
|
|
|
|
|
|
|
## If either table had a custom sortorder, then make a new custom |
|
6429
|
|
|
|
|
|
|
## sort order which is the result of concatenating both orders |
|
6430
|
|
|
|
|
|
|
## together, without duplicates, and of course preserving the |
|
6431
|
|
|
|
|
|
|
## original order as completely as possible (with the order given |
|
6432
|
|
|
|
|
|
|
## in the first table taking precedence). |
|
6433
|
|
|
|
|
|
|
|
|
6434
|
6
|
50
|
33
|
|
|
30
|
if (defined($this->{_SortOrder}) || |
|
6435
|
|
|
|
|
|
|
defined($that->{_SortOrder})) |
|
6436
|
|
|
|
|
|
|
{ |
|
6437
|
0
|
|
|
|
|
0
|
my $ThisOrder = $this->sortorder(); |
|
6438
|
0
|
|
|
|
|
0
|
my $ThatOrder = $that->sortorder(); |
|
6439
|
|
|
|
|
|
|
|
|
6440
|
|
|
|
|
|
|
## Make a hash mapping field names from both lists to the |
|
6441
|
|
|
|
|
|
|
## order they should appear |
|
6442
|
|
|
|
|
|
|
|
|
6443
|
0
|
|
|
|
|
0
|
my $OrderHash = {}; |
|
6444
|
0
|
|
0
|
|
|
0
|
foreach (@$ThisOrder, @$ThatOrder) {$OrderHash->{$_} ||= (keys %$OrderHash) + 1}; |
|
|
0
|
|
|
|
|
0
|
|
|
6445
|
|
|
|
|
|
|
|
|
6446
|
0
|
|
|
|
|
0
|
my $OrderList = [sort {$OrderHash->{$a} <=> $OrderHash->{$b}} keys %$OrderHash]; |
|
|
0
|
|
|
|
|
0
|
|
|
6447
|
|
|
|
|
|
|
|
|
6448
|
0
|
|
|
|
|
0
|
$this->{_SortOrder} = $OrderList; |
|
6449
|
|
|
|
|
|
|
} |
|
6450
|
|
|
|
|
|
|
|
|
6451
|
|
|
|
|
|
|
## If either table had custom sortspecs, then create a new |
|
6452
|
|
|
|
|
|
|
## sortspecs hash by starting with all the entries from the other |
|
6453
|
|
|
|
|
|
|
## table and adding/overwriting with those from this table. |
|
6454
|
|
|
|
|
|
|
|
|
6455
|
6
|
50
|
33
|
|
|
19
|
if (defined($this->{_SortSpecs}) || |
|
6456
|
|
|
|
|
|
|
defined($that->{_SortSpecs})) |
|
6457
|
|
|
|
|
|
|
{ |
|
6458
|
6
|
|
|
|
|
193
|
my $ThisSpecs = $this->sortspecs(); |
|
6459
|
6
|
|
|
|
|
14
|
my $ThatSpecs = $that->sortspecs(); |
|
6460
|
|
|
|
|
|
|
|
|
6461
|
6
|
|
|
|
|
21
|
$this->{_SortSpecs} = {%$ThatSpecs, %$ThisSpecs}; |
|
6462
|
|
|
|
|
|
|
} |
|
6463
|
|
|
|
|
|
|
|
|
6464
|
|
|
|
|
|
|
## If either table had custom sortroutines, then create a new |
|
6465
|
|
|
|
|
|
|
## sortroutines hash by starting with all the entries from the |
|
6466
|
|
|
|
|
|
|
## other table and adding/overwriting with those from this table. |
|
6467
|
|
|
|
|
|
|
|
|
6468
|
6
|
50
|
33
|
|
|
19
|
if (defined($this->{_SRoutines}) || |
|
6469
|
|
|
|
|
|
|
defined($that->{_SRoutines})) |
|
6470
|
|
|
|
|
|
|
{ |
|
6471
|
6
|
|
50
|
|
|
16
|
my $ThisRoutines = $this->{_SRoutines} || {}; |
|
6472
|
6
|
|
50
|
|
|
12
|
my $ThatRoutines = $that->{_SRoutines} || {}; |
|
6473
|
|
|
|
|
|
|
|
|
6474
|
6
|
|
|
|
|
16
|
$this->{_SRoutines} = {%$ThatRoutines, %$ThisRoutines}; |
|
6475
|
|
|
|
|
|
|
} |
|
6476
|
|
|
|
|
|
|
|
|
6477
|
|
|
|
|
|
|
## All other settings are kept from $this and those from $that are |
|
6478
|
|
|
|
|
|
|
## ignored. |
|
6479
|
|
|
|
|
|
|
|
|
6480
|
6
|
|
|
|
|
9
|
$Success = 1; |
|
6481
|
6
|
|
|
|
|
21
|
done: |
|
6482
|
|
|
|
|
|
|
return($Success); |
|
6483
|
|
|
|
|
|
|
} |
|
6484
|
|
|
|
|
|
|
|
|
6485
|
|
|
|
|
|
|
sub combine_file |
|
6486
|
|
|
|
|
|
|
{ |
|
6487
|
5
|
|
|
5
|
0
|
29
|
my $this = shift; |
|
6488
|
5
|
|
|
|
|
7
|
my ($FileName, $Params) = @_; |
|
6489
|
|
|
|
|
|
|
|
|
6490
|
5
|
|
|
|
|
6
|
my $Success; |
|
6491
|
|
|
|
|
|
|
|
|
6492
|
|
|
|
|
|
|
## $Params argument is optional. If supplied, it must be a hash. |
|
6493
|
5
|
|
50
|
|
|
39
|
$Params ||= {}; |
|
6494
|
|
|
|
|
|
|
|
|
6495
|
|
|
|
|
|
|
## Create a new empty table object of the same class as $this and |
|
6496
|
|
|
|
|
|
|
## read just the specified file into it. |
|
6497
|
|
|
|
|
|
|
|
|
6498
|
5
|
50
|
|
|
|
17
|
my $that = ref($this)->new($Params, $FileName) or goto done; |
|
6499
|
|
|
|
|
|
|
|
|
6500
|
|
|
|
|
|
|
## Combine the data from $that table into this one. |
|
6501
|
|
|
|
|
|
|
|
|
6502
|
5
|
50
|
|
|
|
15
|
$this->combine($that) or goto done; |
|
6503
|
|
|
|
|
|
|
|
|
6504
|
5
|
|
|
|
|
8
|
$Success = 1; |
|
6505
|
5
|
|
|
|
|
35
|
done: |
|
6506
|
|
|
|
|
|
|
return($Success); |
|
6507
|
|
|
|
|
|
|
} |
|
6508
|
|
|
|
|
|
|
|
|
6509
|
|
|
|
|
|
|
sub combine_files |
|
6510
|
|
|
|
|
|
|
{ |
|
6511
|
1
|
|
|
1
|
0
|
13
|
my $this = shift; |
|
6512
|
1
|
|
|
|
|
3
|
my ($FileNames, $Params) = @_; |
|
6513
|
|
|
|
|
|
|
|
|
6514
|
1
|
|
|
|
|
2
|
my $Success; |
|
6515
|
|
|
|
|
|
|
|
|
6516
|
1
|
|
|
|
|
3
|
foreach my $FileName (@$FileNames) |
|
6517
|
|
|
|
|
|
|
{ |
|
6518
|
1
|
50
|
|
|
|
6
|
goto done unless $this->combine_file($FileName, $Params); |
|
6519
|
|
|
|
|
|
|
} |
|
6520
|
|
|
|
|
|
|
|
|
6521
|
1
|
|
|
|
|
3
|
$Success = 1; |
|
6522
|
1
|
|
|
|
|
4
|
done: |
|
6523
|
|
|
|
|
|
|
return($Success); |
|
6524
|
|
|
|
|
|
|
} |
|
6525
|
|
|
|
|
|
|
|
|
6526
|
|
|
|
|
|
|
sub join |
|
6527
|
|
|
|
|
|
|
{ |
|
6528
|
5
|
|
|
5
|
0
|
31
|
my $this = shift; |
|
6529
|
5
|
|
|
|
|
9
|
my ($that, $Key1, $Key2, $Fields) = @_; |
|
6530
|
|
|
|
|
|
|
|
|
6531
|
5
|
|
|
|
|
7
|
my $Success; |
|
6532
|
|
|
|
|
|
|
|
|
6533
|
|
|
|
|
|
|
## $Key1 is required. |
|
6534
|
5
|
50
|
|
|
|
12
|
$this->warn("Key1 is required for join()"), goto done |
|
6535
|
|
|
|
|
|
|
unless ($Key1); |
|
6536
|
|
|
|
|
|
|
|
|
6537
|
|
|
|
|
|
|
## $Key2 defaults to the same as $Key1. |
|
6538
|
5
|
|
33
|
|
|
10
|
$Key2 ||= $Key1; |
|
6539
|
|
|
|
|
|
|
|
|
6540
|
|
|
|
|
|
|
## The fields we'll be getting can optionally be overridden by |
|
6541
|
|
|
|
|
|
|
## caller; otherwise, they're the field list of the other table. |
|
6542
|
5
|
|
33
|
|
|
17
|
my $IncomingFields = $Fields || $that->fieldlist(); |
|
6543
|
|
|
|
|
|
|
|
|
6544
|
|
|
|
|
|
|
## Cull $Key1 and $Key2 from the incoming field list. |
|
6545
|
5
|
50
|
|
|
|
9
|
$IncomingFields = [grep {($_ ne $Key1) && ($_ ne $Key2)} @$IncomingFields]; |
|
|
26
|
|
|
|
|
89
|
|
|
6546
|
|
|
|
|
|
|
|
|
6547
|
|
|
|
|
|
|
## Preserve any previous non-selection and force one to be saved |
|
6548
|
|
|
|
|
|
|
## in the interim. This will prevent $that->sel() from |
|
6549
|
|
|
|
|
|
|
## recalculating the selection each time if there is none. |
|
6550
|
5
|
|
|
|
|
11
|
my $OldSel = $that->{_Selection}; |
|
6551
|
5
|
|
|
|
|
11
|
$that->{_Selection} = $that->selection(); ## Might be a no-op |
|
6552
|
|
|
|
|
|
|
|
|
6553
|
|
|
|
|
|
|
## Make an index mapping values in $Key2 to record numbers in |
|
6554
|
|
|
|
|
|
|
## $that. We reverse the order of insertion into the $Index |
|
6555
|
|
|
|
|
|
|
## because we want the items earliest in selection order to have |
|
6556
|
|
|
|
|
|
|
## precedence in case keys are not unique as they should be. |
|
6557
|
|
|
|
|
|
|
|
|
6558
|
5
|
|
|
|
|
9
|
my $Index = {}; @$Index{reverse @{$that->sel($Key2)}} = reverse @{$that->{_Selection}}; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
15
|
|
|
|
5
|
|
|
|
|
12
|
|
|
6559
|
|
|
|
|
|
|
|
|
6560
|
|
|
|
|
|
|
## Get a list of record numbers in $this that we'll be copying data into. |
|
6561
|
5
|
|
|
|
|
14
|
my $Recs1 = $this->selection(); |
|
6562
|
|
|
|
|
|
|
|
|
6563
|
|
|
|
|
|
|
## Get a corresponding list of keys we're going to look up. |
|
6564
|
5
|
|
|
|
|
9
|
my $Key1s = $this->sel($Key1); |
|
6565
|
|
|
|
|
|
|
|
|
6566
|
|
|
|
|
|
|
## The default "record number" in table 2 is $that->length() |
|
6567
|
|
|
|
|
|
|
## ... i.e. an invalid record number past the end of the table. |
|
6568
|
|
|
|
|
|
|
## This will ensure that failed lookups result in lookups to this |
|
6569
|
|
|
|
|
|
|
## illegal record number, correctly producing undef in the |
|
6570
|
|
|
|
|
|
|
## corresponding joined fields, whereas looking up "undef" would |
|
6571
|
|
|
|
|
|
|
## have produced record number zero. |
|
6572
|
|
|
|
|
|
|
|
|
6573
|
5
|
|
|
|
|
9
|
my $DefaultRecNum = $that->length(); |
|
6574
|
|
|
|
|
|
|
|
|
6575
|
|
|
|
|
|
|
## Look up @$Key1s in @$Index to get a list of data-source record |
|
6576
|
|
|
|
|
|
|
## numbers in $that. Failed lookups map to $DefaultRecNum. |
|
6577
|
|
|
|
|
|
|
|
|
6578
|
5
|
100
|
|
|
|
16
|
my $Recs2 = [map {defined() ? $_ : $DefaultRecNum} @$Index{@$Key1s}]; |
|
|
15
|
|
|
|
|
29
|
|
|
6579
|
|
|
|
|
|
|
|
|
6580
|
|
|
|
|
|
|
## Copy data into selected positions within columns of $this, one |
|
6581
|
|
|
|
|
|
|
## column at a time, creating pre-sized columns in $this as |
|
6582
|
|
|
|
|
|
|
## necessary (col()). These array slice operations are very, very |
|
6583
|
|
|
|
|
|
|
## fast. |
|
6584
|
|
|
|
|
|
|
|
|
6585
|
5
|
|
|
|
|
11
|
foreach my $Field (@$IncomingFields) |
|
6586
|
|
|
|
|
|
|
{ |
|
6587
|
21
|
|
|
|
|
36
|
(@{$this->col($Field)}[@$Recs1] = ## Put values into selected records of $this |
|
|
21
|
|
|
|
|
36
|
|
|
6588
|
21
|
|
|
|
|
26
|
@{$that->col($Field)}[@$Recs2]); ## Get values from looked-up records of $that |
|
6589
|
|
|
|
|
|
|
} |
|
6590
|
|
|
|
|
|
|
|
|
6591
|
|
|
|
|
|
|
## Restore the possibly-undef selection in other table. |
|
6592
|
5
|
|
|
|
|
8
|
$that->{_Selection} = $OldSel; ## Might be a no-op |
|
6593
|
|
|
|
|
|
|
|
|
6594
|
|
|
|
|
|
|
## If this table had a custom fieldlist, then make a new custom |
|
6595
|
|
|
|
|
|
|
## field list which is the result of concatenating both field |
|
6596
|
|
|
|
|
|
|
## lists together, without duplicates, and of course preserving |
|
6597
|
|
|
|
|
|
|
## the original order as completely as possible (with the order |
|
6598
|
|
|
|
|
|
|
## given in the first table taking precedence). |
|
6599
|
|
|
|
|
|
|
|
|
6600
|
5
|
50
|
|
|
|
13
|
if (defined($this->{_FieldList})) |
|
6601
|
|
|
|
|
|
|
{ |
|
6602
|
5
|
|
|
|
|
10
|
my $ThisFields = $this->fieldlist(); |
|
6603
|
5
|
|
|
|
|
8
|
my $ThatFields = $IncomingFields; |
|
6604
|
|
|
|
|
|
|
|
|
6605
|
|
|
|
|
|
|
## Make a hash mapping field names from both tables to the |
|
6606
|
|
|
|
|
|
|
## order they should appear |
|
6607
|
|
|
|
|
|
|
|
|
6608
|
5
|
|
|
|
|
7
|
my $FieldOrderHash = {}; |
|
6609
|
5
|
|
66
|
|
|
12
|
foreach (@$ThisFields, @$ThatFields) {$FieldOrderHash->{$_} ||= (keys %$FieldOrderHash) + 1}; |
|
|
56
|
|
|
|
|
223
|
|
|
6610
|
|
|
|
|
|
|
|
|
6611
|
5
|
|
|
|
|
21
|
my $FieldList = [sort {$FieldOrderHash->{$a} <=> $FieldOrderHash->{$b}} keys %$FieldOrderHash]; |
|
|
64
|
|
|
|
|
86
|
|
|
6612
|
|
|
|
|
|
|
|
|
6613
|
5
|
|
|
|
|
23
|
$this->{_FieldList} = $FieldList; |
|
6614
|
|
|
|
|
|
|
} |
|
6615
|
|
|
|
|
|
|
|
|
6616
|
|
|
|
|
|
|
## If either table had custom sortspecs, then create a new |
|
6617
|
|
|
|
|
|
|
## sortspecs hash by starting with all the entries from the other |
|
6618
|
|
|
|
|
|
|
## table and adding/overwriting with those from this table. |
|
6619
|
|
|
|
|
|
|
|
|
6620
|
5
|
50
|
33
|
|
|
19
|
if (defined($this->{_SortSpecs}) || |
|
6621
|
|
|
|
|
|
|
defined($that->{_SortSpecs})) |
|
6622
|
|
|
|
|
|
|
{ |
|
6623
|
5
|
|
|
|
|
15
|
my $ThisSpecs = $this->sortspecs(); |
|
6624
|
5
|
|
|
|
|
9
|
my $ThatSpecs = $that->sortspecs(); |
|
6625
|
|
|
|
|
|
|
|
|
6626
|
5
|
|
|
|
|
16
|
$this->{_SortSpecs} = {%$ThatSpecs, %$ThisSpecs}; |
|
6627
|
|
|
|
|
|
|
} |
|
6628
|
|
|
|
|
|
|
|
|
6629
|
|
|
|
|
|
|
## If either table had custom sortroutines, then create a new |
|
6630
|
|
|
|
|
|
|
## sortroutines hash by starting with all the entries from the |
|
6631
|
|
|
|
|
|
|
## other table and adding/overwriting with those from this table. |
|
6632
|
|
|
|
|
|
|
|
|
6633
|
5
|
50
|
33
|
|
|
24
|
if (defined($this->{_SRoutines}) || |
|
6634
|
|
|
|
|
|
|
defined($that->{_SRoutines})) |
|
6635
|
|
|
|
|
|
|
{ |
|
6636
|
5
|
|
50
|
|
|
12
|
my $ThisRoutines = $this->{_SRoutines} || {}; |
|
6637
|
5
|
|
50
|
|
|
11
|
my $ThatRoutines = $that->{_SRoutines} || {}; |
|
6638
|
|
|
|
|
|
|
|
|
6639
|
5
|
|
|
|
|
13
|
$this->{_SRoutines} = {%$ThatRoutines, %$ThisRoutines}; |
|
6640
|
|
|
|
|
|
|
} |
|
6641
|
|
|
|
|
|
|
|
|
6642
|
|
|
|
|
|
|
## All other settings are kept from $this and those from $that are |
|
6643
|
|
|
|
|
|
|
## ignored. |
|
6644
|
|
|
|
|
|
|
|
|
6645
|
5
|
|
|
|
|
7
|
$Success = 1; |
|
6646
|
5
|
|
|
|
|
24
|
done: |
|
6647
|
|
|
|
|
|
|
return($Success); |
|
6648
|
|
|
|
|
|
|
} |
|
6649
|
|
|
|
|
|
|
|
|
6650
|
|
|
|
|
|
|
sub join_file |
|
6651
|
|
|
|
|
|
|
{ |
|
6652
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
6653
|
0
|
|
|
|
|
0
|
my ($FileName, $Params, $Key1, $Key2, $Fields) = @_; |
|
6654
|
|
|
|
|
|
|
|
|
6655
|
0
|
|
|
|
|
0
|
my $Success; |
|
6656
|
|
|
|
|
|
|
|
|
6657
|
|
|
|
|
|
|
## $Params argument may be undef. If supplied, it must be a hash. |
|
6658
|
0
|
|
0
|
|
|
0
|
$Params ||= {}; |
|
6659
|
|
|
|
|
|
|
|
|
6660
|
|
|
|
|
|
|
## Create a new empty table object of the same class as $this and |
|
6661
|
|
|
|
|
|
|
## read just the specified file into it. |
|
6662
|
|
|
|
|
|
|
|
|
6663
|
0
|
0
|
|
|
|
0
|
my $that = ref($this)->new($Params, $FileName) or goto done; |
|
6664
|
|
|
|
|
|
|
|
|
6665
|
|
|
|
|
|
|
## Join the data from $that table into this one. |
|
6666
|
|
|
|
|
|
|
|
|
6667
|
0
|
0
|
|
|
|
0
|
$this->join($that, $Key1, $Key2, $Fields) or goto done; |
|
6668
|
|
|
|
|
|
|
|
|
6669
|
0
|
|
|
|
|
0
|
$Success = 1; |
|
6670
|
0
|
|
|
|
|
0
|
done: |
|
6671
|
|
|
|
|
|
|
return($Success); |
|
6672
|
|
|
|
|
|
|
} |
|
6673
|
|
|
|
|
|
|
|
|
6674
|
|
|
|
|
|
|
sub join_files |
|
6675
|
|
|
|
|
|
|
{ |
|
6676
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
6677
|
0
|
|
|
|
|
0
|
my ($FileNames, $Params, $Key1, $Key2, $Fields) = @_; |
|
6678
|
|
|
|
|
|
|
|
|
6679
|
0
|
|
|
|
|
0
|
my $Success; |
|
6680
|
|
|
|
|
|
|
|
|
6681
|
0
|
|
|
|
|
0
|
foreach my $FileName (@$FileNames) |
|
6682
|
|
|
|
|
|
|
{ |
|
6683
|
0
|
0
|
|
|
|
0
|
goto done unless $this->join_file($FileName, $Params, $Key1, $Key2, $Fields); |
|
6684
|
|
|
|
|
|
|
} |
|
6685
|
|
|
|
|
|
|
|
|
6686
|
0
|
|
|
|
|
0
|
$Success = 1; |
|
6687
|
0
|
|
|
|
|
0
|
done: |
|
6688
|
|
|
|
|
|
|
return($Success); |
|
6689
|
|
|
|
|
|
|
} |
|
6690
|
|
|
|
|
|
|
|
|
6691
|
|
|
|
|
|
|
=pod |
|
6692
|
|
|
|
|
|
|
|
|
6693
|
|
|
|
|
|
|
=head1 INVERTING A TABLE'S ROWS/COLUMNS |
|
6694
|
|
|
|
|
|
|
|
|
6695
|
|
|
|
|
|
|
## Re-orient table's data using vals from $ColName as field names... |
|
6696
|
|
|
|
|
|
|
$t-invert($ColName) |
|
6697
|
|
|
|
|
|
|
|
|
6698
|
|
|
|
|
|
|
Sometimes a situation gives you a table that's initially organized |
|
6699
|
|
|
|
|
|
|
with column data in rows, and field names in one of the columns, so |
|
6700
|
|
|
|
|
|
|
you need to flip the table in order to be able to work meaningfully |
|
6701
|
|
|
|
|
|
|
with it. |
|
6702
|
|
|
|
|
|
|
|
|
6703
|
|
|
|
|
|
|
"Inverting" a table means to rewrite each row as a column. One row is |
|
6704
|
|
|
|
|
|
|
designated to be used as the field names. |
|
6705
|
|
|
|
|
|
|
|
|
6706
|
|
|
|
|
|
|
For example, consider this table: |
|
6707
|
|
|
|
|
|
|
|
|
6708
|
|
|
|
|
|
|
F01 F02 F03 F04 |
|
6709
|
|
|
|
|
|
|
------------------------ |
|
6710
|
|
|
|
|
|
|
First Chris Agnes James |
|
6711
|
|
|
|
|
|
|
Last Bart Marco Nixon |
|
6712
|
|
|
|
|
|
|
Age 22 33 44 |
|
6713
|
|
|
|
|
|
|
|
|
6714
|
|
|
|
|
|
|
Calling invert() using field names from "F01"... |
|
6715
|
|
|
|
|
|
|
|
|
6716
|
|
|
|
|
|
|
$t->invert('F01'); |
|
6717
|
|
|
|
|
|
|
|
|
6718
|
|
|
|
|
|
|
... would change the table to look like this: |
|
6719
|
|
|
|
|
|
|
|
|
6720
|
|
|
|
|
|
|
First Last Age |
|
6721
|
|
|
|
|
|
|
---------------- |
|
6722
|
|
|
|
|
|
|
Chris Bart 22 |
|
6723
|
|
|
|
|
|
|
Agnes Marco 33 |
|
6724
|
|
|
|
|
|
|
James Nixon 44 |
|
6725
|
|
|
|
|
|
|
|
|
6726
|
|
|
|
|
|
|
The field F01 which formerly contained the field names, is now gone, |
|
6727
|
|
|
|
|
|
|
and the remaining data columns have been converted from their old row |
|
6728
|
|
|
|
|
|
|
orientation into a column orientation. |
|
6729
|
|
|
|
|
|
|
|
|
6730
|
|
|
|
|
|
|
=cut |
|
6731
|
|
|
|
|
|
|
|
|
6732
|
|
|
|
|
|
|
sub invert |
|
6733
|
|
|
|
|
|
|
{ |
|
6734
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
6735
|
0
|
|
|
|
|
0
|
my ($HeaderField) = @_; |
|
6736
|
|
|
|
|
|
|
|
|
6737
|
0
|
|
|
|
|
0
|
my $Success; |
|
6738
|
|
|
|
|
|
|
|
|
6739
|
|
|
|
|
|
|
## Get new field names from an existing column and delete it at the same time. |
|
6740
|
0
|
0
|
|
|
|
0
|
my $NewColNames = $this->col_delete($HeaderField) or |
|
6741
|
|
|
|
|
|
|
$this->warn("Invalid field name given to invert() method"), goto done; |
|
6742
|
|
|
|
|
|
|
|
|
6743
|
|
|
|
|
|
|
## Get a hash of all existing (remaining) data columns. |
|
6744
|
0
|
|
|
|
|
0
|
my $OldColNames = $this->fieldlist_all(); |
|
6745
|
0
|
|
|
|
|
0
|
my $OldCols = $this->cols_hash($OldColNames); |
|
6746
|
|
|
|
|
|
|
|
|
6747
|
|
|
|
|
|
|
## Make the new columns... |
|
6748
|
0
|
|
|
|
|
0
|
my $NewCols = [map {$this->row_list($_, $OldColNames)} (0..$#$NewColNames)]; |
|
|
0
|
|
|
|
|
0
|
|
|
6749
|
|
|
|
|
|
|
|
|
6750
|
|
|
|
|
|
|
## Delete old columns from the object. |
|
6751
|
0
|
|
|
|
|
0
|
delete @$this{@$OldColNames}; |
|
6752
|
|
|
|
|
|
|
|
|
6753
|
|
|
|
|
|
|
## Add new columns |
|
6754
|
0
|
|
|
|
|
0
|
@$this{@$NewColNames} = @$NewCols; |
|
6755
|
|
|
|
|
|
|
|
|
6756
|
|
|
|
|
|
|
## Set the field name list... |
|
6757
|
0
|
|
|
|
|
0
|
$this->{_FieldList} = $NewColNames; |
|
6758
|
|
|
|
|
|
|
|
|
6759
|
0
|
|
|
|
|
0
|
$Success = 1; |
|
6760
|
0
|
|
|
|
|
0
|
done: |
|
6761
|
|
|
|
|
|
|
return($Success); |
|
6762
|
|
|
|
|
|
|
} |
|
6763
|
|
|
|
|
|
|
|
|
6764
|
|
|
|
|
|
|
=pod |
|
6765
|
|
|
|
|
|
|
|
|
6766
|
|
|
|
|
|
|
=head1 PROGRESS MESSAGES |
|
6767
|
|
|
|
|
|
|
|
|
6768
|
|
|
|
|
|
|
## Printing a progress message.... |
|
6769
|
|
|
|
|
|
|
|
|
6770
|
|
|
|
|
|
|
$t->progress($Msg) ## Print a message per current settings |
|
6771
|
|
|
|
|
|
|
|
|
6772
|
|
|
|
|
|
|
## Progress settings applying to this object only... |
|
6773
|
|
|
|
|
|
|
|
|
6774
|
|
|
|
|
|
|
$t->progress_get() ## Get current progress setting |
|
6775
|
|
|
|
|
|
|
|
|
6776
|
|
|
|
|
|
|
$t->progress_set(1) ## Use progress_default() method |
|
6777
|
|
|
|
|
|
|
$t->progress_set($Sub) ## Set a custom progress routine |
|
6778
|
|
|
|
|
|
|
$t->progress_set(0) ## Disable progress |
|
6779
|
|
|
|
|
|
|
$t->progress_set(undef) ## Use class's settings (default)... |
|
6780
|
|
|
|
|
|
|
|
|
6781
|
|
|
|
|
|
|
## Class's settings (for instances with _Progress == undef) |
|
6782
|
|
|
|
|
|
|
|
|
6783
|
|
|
|
|
|
|
$t->progress_class() ## Get current setting. |
|
6784
|
|
|
|
|
|
|
|
|
6785
|
|
|
|
|
|
|
$t->progress_class(1) ## Use progress_default() method |
|
6786
|
|
|
|
|
|
|
$t->progress_class($Sub) ## Set shared custom prog routine |
|
6787
|
|
|
|
|
|
|
$t->progress_class(0) ## Disable class-default progress |
|
6788
|
|
|
|
|
|
|
|
|
6789
|
|
|
|
|
|
|
Data::CTable->progress_class(..) ## Call without an object |
|
6790
|
|
|
|
|
|
|
|
|
6791
|
|
|
|
|
|
|
## Call builtin default progress method regardless of settings |
|
6792
|
|
|
|
|
|
|
|
|
6793
|
|
|
|
|
|
|
$t->progress_default($Msg) ## Default prog. routine for class |
|
6794
|
|
|
|
|
|
|
|
|
6795
|
|
|
|
|
|
|
## Generate a warning (used internally by other methods) |
|
6796
|
|
|
|
|
|
|
|
|
6797
|
|
|
|
|
|
|
$t->warn($Msg) ## In this class, calls progress_default() |
|
6798
|
|
|
|
|
|
|
|
|
6799
|
|
|
|
|
|
|
## Timed progress: print msg to start, then at most once/2 sec |
|
6800
|
|
|
|
|
|
|
|
|
6801
|
|
|
|
|
|
|
$t->progress_timed($Op, $Item) ## Re-print msg every 2 sec |
|
6802
|
|
|
|
|
|
|
$t->progress_timed($Op, $Item, $Pos, $Tot) ##... with % readout |
|
6803
|
|
|
|
|
|
|
$t->progress_timed($Op, $Item, $Pos, $Tot, $Wait) ## Not 1st x |
|
6804
|
|
|
|
|
|
|
|
|
6805
|
|
|
|
|
|
|
$t->progress_timed_default($Msg) ## Called by progress_timed |
|
6806
|
|
|
|
|
|
|
|
|
6807
|
|
|
|
|
|
|
Data::CTable is especially useful in creating batch-oriented |
|
6808
|
|
|
|
|
|
|
applications for processing data. As such, routines that may perform |
|
6809
|
|
|
|
|
|
|
time-consuming tasks will, by default, generate helpful progress |
|
6810
|
|
|
|
|
|
|
messages. The progress mechanism is highly customizable, however, to |
|
6811
|
|
|
|
|
|
|
suit the needs of applications that don't require this output, or that |
|
6812
|
|
|
|
|
|
|
require the output to go somewhere other than STDERR or the console. |
|
6813
|
|
|
|
|
|
|
|
|
6814
|
|
|
|
|
|
|
The default progress routine is one that prints a message with a |
|
6815
|
|
|
|
|
|
|
date/time stamp to STDERR if and only if STDERR is an interactive |
|
6816
|
|
|
|
|
|
|
terminal, and otherwise is silent. |
|
6817
|
|
|
|
|
|
|
|
|
6818
|
|
|
|
|
|
|
You could write a custom progress routine that does something else or |
|
6819
|
|
|
|
|
|
|
something in addition (e.g. logs to a file or syslog). The custom |
|
6820
|
|
|
|
|
|
|
routine could either be implemented by overriding the |
|
6821
|
|
|
|
|
|
|
progress_default() method in a subclass, or by calling progress_set() |
|
6822
|
|
|
|
|
|
|
in any instance. |
|
6823
|
|
|
|
|
|
|
|
|
6824
|
|
|
|
|
|
|
The custom progress routine, if any, is stored in the _Progress |
|
6825
|
|
|
|
|
|
|
parameter of the object. But use progress_set() and progress_get() to |
|
6826
|
|
|
|
|
|
|
access it. |
|
6827
|
|
|
|
|
|
|
|
|
6828
|
|
|
|
|
|
|
The interface for your custom progress routine should be: |
|
6829
|
|
|
|
|
|
|
|
|
6830
|
|
|
|
|
|
|
sub MyProgress {my ($Obj, $Message) = @_; chomp $Message; .....} |
|
6831
|
|
|
|
|
|
|
|
|
6832
|
|
|
|
|
|
|
In other words, the routine takes a single message which may or may |
|
6833
|
|
|
|
|
|
|
not have a trailing newline. It should always chomp the newline if |
|
6834
|
|
|
|
|
|
|
present, and then do its business... which generally will include |
|
6835
|
|
|
|
|
|
|
printing or logging a message (usually with a newline added). |
|
6836
|
|
|
|
|
|
|
|
|
6837
|
|
|
|
|
|
|
The default, built-in progress routine for Data::CTable is: |
|
6838
|
|
|
|
|
|
|
|
|
6839
|
|
|
|
|
|
|
sub progress_default |
|
6840
|
|
|
|
|
|
|
{ |
|
6841
|
|
|
|
|
|
|
my ($this, $msg) = @_; |
|
6842
|
|
|
|
|
|
|
chomp $msg; |
|
6843
|
|
|
|
|
|
|
|
|
6844
|
|
|
|
|
|
|
print STDERR (localtime() . " $msg\n") if -t STDERR; |
|
6845
|
|
|
|
|
|
|
|
|
6846
|
|
|
|
|
|
|
return(1); ## Indicate progress actually completed |
|
6847
|
|
|
|
|
|
|
} |
|
6848
|
|
|
|
|
|
|
|
|
6849
|
|
|
|
|
|
|
Of course, you are free to call this method directly at any time, and |
|
6850
|
|
|
|
|
|
|
it will do its thing regardless of other progress-enabling settings. |
|
6851
|
|
|
|
|
|
|
But the preferred way is to first set the settings and then call |
|
6852
|
|
|
|
|
|
|
progress(). |
|
6853
|
|
|
|
|
|
|
|
|
6854
|
|
|
|
|
|
|
The warn() method always calls progress_default() -- i.e. warnings |
|
6855
|
|
|
|
|
|
|
will display even if progress is otherwise disabled or overridden at |
|
6856
|
|
|
|
|
|
|
the object or class level. However, you could create a subclass that |
|
6857
|
|
|
|
|
|
|
changes warn()'s behavior if desired. (For example, it could just |
|
6858
|
|
|
|
|
|
|
call perl's builtin warn function, or be even more forceful, |
|
6859
|
|
|
|
|
|
|
generating warnings even if STDERR is not a terminal, for example.) |
|
6860
|
|
|
|
|
|
|
|
|
6861
|
|
|
|
|
|
|
The progress_set() method may be used to override the progress routine |
|
6862
|
|
|
|
|
|
|
for an individual object (set to 1/true for default behavior, or |
|
6863
|
|
|
|
|
|
|
0/undef/false to disable progress for that object entirely). |
|
6864
|
|
|
|
|
|
|
|
|
6865
|
|
|
|
|
|
|
Call progress_class() to set similar values to control the global |
|
6866
|
|
|
|
|
|
|
default behavior (e.g. turning on/off default progress behavior for |
|
6867
|
|
|
|
|
|
|
all instances), but be cautious about using this approach in any |
|
6868
|
|
|
|
|
|
|
environment where other programs might be accessing the same loaded |
|
6869
|
|
|
|
|
|
|
class data, since the setting is stored in a class-owned global |
|
6870
|
|
|
|
|
|
|
($Data::CTable::DefaultProgress). |
|
6871
|
|
|
|
|
|
|
|
|
6872
|
|
|
|
|
|
|
Manipulating the class-default settings is only recommended in batch |
|
6873
|
|
|
|
|
|
|
or shell-script environments, not in mod_perl Web applications where |
|
6874
|
|
|
|
|
|
|
the module stays loaded into the Perl environment across multiple |
|
6875
|
|
|
|
|
|
|
invocations, for example. |
|
6876
|
|
|
|
|
|
|
|
|
6877
|
|
|
|
|
|
|
If you want a particular method (e.g. read() but not write()) to be |
|
6878
|
|
|
|
|
|
|
silent, you could make a subclass and could override that method with |
|
6879
|
|
|
|
|
|
|
an implementation that first disables progress, calls the SUPER:: |
|
6880
|
|
|
|
|
|
|
method, and then restores the progress setting to its original |
|
6881
|
|
|
|
|
|
|
setting. |
|
6882
|
|
|
|
|
|
|
|
|
6883
|
|
|
|
|
|
|
=head2 Timed progress |
|
6884
|
|
|
|
|
|
|
|
|
6885
|
|
|
|
|
|
|
Timed progress is a way of printing periodically-recurring progress |
|
6886
|
|
|
|
|
|
|
messages about potentially time-consuming processes to the terminal. |
|
6887
|
|
|
|
|
|
|
|
|
6888
|
|
|
|
|
|
|
For example, consider the following messages which might appear every |
|
6889
|
|
|
|
|
|
|
2 seconds during a lengthy read() operation: |
|
6890
|
|
|
|
|
|
|
|
|
6891
|
|
|
|
|
|
|
Reading... 0 (0%) |
|
6892
|
|
|
|
|
|
|
Reading... 2000 (4%) |
|
6893
|
|
|
|
|
|
|
... |
|
6894
|
|
|
|
|
|
|
Reading... 38000 (96%) |
|
6895
|
|
|
|
|
|
|
Reading... 40000 (100%) |
|
6896
|
|
|
|
|
|
|
|
|
6897
|
|
|
|
|
|
|
The progress_timed() method is called internally by potentially |
|
6898
|
|
|
|
|
|
|
time-consuming processes (read(), write(), and sort()), and you may |
|
6899
|
|
|
|
|
|
|
want to call it yourself from your own scripts, to produce |
|
6900
|
|
|
|
|
|
|
weary-programmer-soothing visual output during otherwise |
|
6901
|
|
|
|
|
|
|
panic-producing long delays. |
|
6902
|
|
|
|
|
|
|
|
|
6903
|
|
|
|
|
|
|
Generally, progress_timed() is called with the $Wait parameter set to |
|
6904
|
|
|
|
|
|
|
true, which delays the display of any messages until 2 seconds have |
|
6905
|
|
|
|
|
|
|
passed, so no messages will be displayed unless the process actually |
|
6906
|
|
|
|
|
|
|
does end up being slower than 2 seconds. |
|
6907
|
|
|
|
|
|
|
|
|
6908
|
|
|
|
|
|
|
Parameters are: |
|
6909
|
|
|
|
|
|
|
|
|
6910
|
|
|
|
|
|
|
$Op The string that identifies the "operation" taking place |
|
6911
|
|
|
|
|
|
|
$Item A milestone such as a number or datum to indicate progress |
|
6912
|
|
|
|
|
|
|
$Pos A numeric position against the (maybe estimated) baseline |
|
6913
|
|
|
|
|
|
|
$Tot The baseline. If estimated, don't re-estimate too often |
|
6914
|
|
|
|
|
|
|
$Wait If true, skip printing the first message for this $Op |
|
6915
|
|
|
|
|
|
|
|
|
6916
|
|
|
|
|
|
|
All parameters except $Op are optional. |
|
6917
|
|
|
|
|
|
|
|
|
6918
|
|
|
|
|
|
|
progress_timed() has a throttle that keeps it from re-triggering more |
|
6919
|
|
|
|
|
|
|
often than every 2 seconds for any given sequence of the same $Op. |
|
6920
|
|
|
|
|
|
|
The clock is restarted each time you call it with a different $Op or |
|
6921
|
|
|
|
|
|
|
$Tot from the previous call (on the assumption that if the operation |
|
6922
|
|
|
|
|
|
|
or the baseline changes then that fact should be noted). |
|
6923
|
|
|
|
|
|
|
|
|
6924
|
|
|
|
|
|
|
The messages printed will start with "$Op... ". |
|
6925
|
|
|
|
|
|
|
|
|
6926
|
|
|
|
|
|
|
If you supply $Item, which could be a number or a string, the messages |
|
6927
|
|
|
|
|
|
|
will then show the $Item after the $Op. |
|
6928
|
|
|
|
|
|
|
|
|
6929
|
|
|
|
|
|
|
If you supply BOTH $Pos and $Tot, then a percentage will be calculated |
|
6930
|
|
|
|
|
|
|
and added to the readout; otherwise omitted. |
|
6931
|
|
|
|
|
|
|
|
|
6932
|
|
|
|
|
|
|
If you supply $Wait, the first message (only) that uses this $Op will |
|
6933
|
|
|
|
|
|
|
be skipped, and the next one won't appear for at least 2 seconds. |
|
6934
|
|
|
|
|
|
|
|
|
6935
|
|
|
|
|
|
|
If using $Pos and $Tot to display percentages for your user, be sure |
|
6936
|
|
|
|
|
|
|
to call progress_timed() one final time when $Pos == $Tot so your user |
|
6937
|
|
|
|
|
|
|
sees the satisfying 100% milestone. This "completion" call will not |
|
6938
|
|
|
|
|
|
|
be skipped even if 2 seconds have not passed since the previous timed |
|
6939
|
|
|
|
|
|
|
progress message was printed. |
|
6940
|
|
|
|
|
|
|
|
|
6941
|
|
|
|
|
|
|
Althought progress_timed() is designed to cut down on too much visual |
|
6942
|
|
|
|
|
|
|
output when called often in a tight loop, remember that it still takes |
|
6943
|
|
|
|
|
|
|
some processing time to call it and so if you call it too frequently, |
|
6944
|
|
|
|
|
|
|
you're slowing down the very loop you wish were running faster. |
|
6945
|
|
|
|
|
|
|
|
|
6946
|
|
|
|
|
|
|
So, you might want to call it every tenth or 100th or even 1000th time |
|
6947
|
|
|
|
|
|
|
through a tight loop, instead of every time through, using the mod (%) |
|
6948
|
|
|
|
|
|
|
operator: |
|
6949
|
|
|
|
|
|
|
|
|
6950
|
|
|
|
|
|
|
$t->progress_timed(....) if ($LoopCount % 100) == 0; |
|
6951
|
|
|
|
|
|
|
|
|
6952
|
|
|
|
|
|
|
progress_timed_default() is the method called internally by |
|
6953
|
|
|
|
|
|
|
progress_timed() to actually print the messages it has prepared. In |
|
6954
|
|
|
|
|
|
|
this implementation, progress_timed_default() just calls |
|
6955
|
|
|
|
|
|
|
progress_default(). That is, it ignores all other progress-inhibiting |
|
6956
|
|
|
|
|
|
|
or -enhancing settings so delay-soothing messages will print on the |
|
6957
|
|
|
|
|
|
|
terminal even if other messages are turned off. |
|
6958
|
|
|
|
|
|
|
|
|
6959
|
|
|
|
|
|
|
This is because the author assumes that even if you don't want all |
|
6960
|
|
|
|
|
|
|
those other progress messages, you might still want these ones that |
|
6961
|
|
|
|
|
|
|
explain long delays. If you REALLY don't, then just make yourself a |
|
6962
|
|
|
|
|
|
|
lightweight subclass where progress_timed_default() is a no-op, or |
|
6963
|
|
|
|
|
|
|
maybe calls regular progress(). For example: |
|
6964
|
|
|
|
|
|
|
|
|
6965
|
|
|
|
|
|
|
BEGIN {package Data::CTable::Silent; use vars qw(@ISA); |
|
6966
|
|
|
|
|
|
|
@ISA=qw(Data::CTable); sub progress_timed_default{}} |
|
6967
|
|
|
|
|
|
|
|
|
6968
|
|
|
|
|
|
|
## Later... |
|
6969
|
|
|
|
|
|
|
my $t = Data::CTable::Silent->new(...); |
|
6970
|
|
|
|
|
|
|
|
|
6971
|
|
|
|
|
|
|
|
|
6972
|
|
|
|
|
|
|
=cut |
|
6973
|
|
|
|
|
|
|
|
|
6974
|
|
|
|
|
|
|
$Data::CTable::DefaultProgress = 1; |
|
6975
|
|
|
|
|
|
|
|
|
6976
|
|
|
|
|
|
|
sub progress_set |
|
6977
|
|
|
|
|
|
|
{ |
|
6978
|
5
|
|
|
5
|
0
|
30
|
my $this = shift; |
|
6979
|
5
|
|
|
|
|
8
|
my ($ProgSetting) = @_; |
|
6980
|
|
|
|
|
|
|
|
|
6981
|
5
|
|
|
|
|
14
|
$this->{_Progress} = $ProgSetting; |
|
6982
|
|
|
|
|
|
|
} |
|
6983
|
|
|
|
|
|
|
|
|
6984
|
|
|
|
|
|
|
sub progress_class |
|
6985
|
|
|
|
|
|
|
{ |
|
6986
|
199
|
|
|
199
|
0
|
366
|
my $Ignored = shift; |
|
6987
|
199
|
|
|
|
|
264
|
my ($ProgSetting) = @_; |
|
6988
|
|
|
|
|
|
|
|
|
6989
|
|
|
|
|
|
|
## Set if specified... |
|
6990
|
199
|
100
|
|
|
|
424
|
$Data::CTable::DefaultProgress = $ProgSetting if defined($ProgSetting); |
|
6991
|
|
|
|
|
|
|
|
|
6992
|
|
|
|
|
|
|
## Return.. |
|
6993
|
199
|
|
|
|
|
346
|
return($Data::CTable::DefaultProgress); |
|
6994
|
|
|
|
|
|
|
} |
|
6995
|
|
|
|
|
|
|
|
|
6996
|
|
|
|
|
|
|
sub progress_get |
|
6997
|
|
|
|
|
|
|
{ |
|
6998
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
6999
|
|
|
|
|
|
|
|
|
7000
|
0
|
|
|
|
|
0
|
my $ProgSetting = $this->{_Progress}; |
|
7001
|
|
|
|
|
|
|
|
|
7002
|
0
|
|
|
|
|
0
|
return($ProgSetting); |
|
7003
|
|
|
|
|
|
|
} |
|
7004
|
|
|
|
|
|
|
|
|
7005
|
|
|
|
|
|
|
sub progress_default |
|
7006
|
|
|
|
|
|
|
{ |
|
7007
|
7
|
|
|
7
|
0
|
18
|
my ($this, $msg) = @_; |
|
7008
|
7
|
|
|
|
|
23
|
chomp $msg; |
|
7009
|
|
|
|
|
|
|
|
|
7010
|
7
|
50
|
|
|
|
65
|
print STDERR (localtime() . " $msg\n") if -t STDERR; |
|
7011
|
|
|
|
|
|
|
|
|
7012
|
7
|
|
|
|
|
78
|
return(1); ## Indicate progress actually completed |
|
7013
|
|
|
|
|
|
|
} |
|
7014
|
|
|
|
|
|
|
|
|
7015
|
|
|
|
|
|
|
sub progress |
|
7016
|
|
|
|
|
|
|
{ |
|
7017
|
193
|
|
|
193
|
1
|
1965
|
my $this = shift; |
|
7018
|
193
|
|
|
|
|
262
|
my ($msg) = @_; |
|
7019
|
|
|
|
|
|
|
|
|
7020
|
193
|
|
|
|
|
326
|
my $Prog1 = $this->{_Progress}; ## First check object's progress setting |
|
7021
|
193
|
|
|
|
|
371
|
my $Prog2 = $this->progress_class(); ## Then check class's setting |
|
7022
|
|
|
|
|
|
|
|
|
7023
|
|
|
|
|
|
|
## Calling regular progress resets the timers & ops in |
|
7024
|
|
|
|
|
|
|
## progress_timed... |
|
7025
|
|
|
|
|
|
|
|
|
7026
|
193
|
|
|
|
|
472
|
delete $this->{_ProgTimeInfo}; |
|
7027
|
|
|
|
|
|
|
|
|
7028
|
|
|
|
|
|
|
## First examine object setting to find a progress routine... |
|
7029
|
|
|
|
|
|
|
|
|
7030
|
193
|
100
|
|
|
|
376
|
return(&$Prog1($this, $msg)) if ref($Prog1) eq 'CODE'; ## Code ref: return it. |
|
7031
|
191
|
100
|
|
|
|
389
|
return($this->progress_default($msg)) if $Prog1; ## true: use default progress. |
|
7032
|
185
|
100
|
|
|
|
321
|
return(undef) if defined($Prog1); ## false but defined: no progress. |
|
7033
|
|
|
|
|
|
|
|
|
7034
|
|
|
|
|
|
|
## undef: fall through to class settings... |
|
7035
|
|
|
|
|
|
|
|
|
7036
|
181
|
100
|
|
|
|
302
|
return(&$Prog2($this, $msg)) if ref($Prog2) eq 'CODE'; ## Code ref: return it. |
|
7037
|
179
|
50
|
|
|
|
303
|
return($this->progress_default($msg)) if $Prog2; ## true: use default progress. |
|
7038
|
179
|
|
|
|
|
268
|
return(undef); ## false/undef: no progress. |
|
7039
|
|
|
|
|
|
|
} |
|
7040
|
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
sub progress_timed |
|
7042
|
|
|
|
|
|
|
{ |
|
7043
|
188159
|
|
|
188159
|
0
|
582041
|
my $this = shift; |
|
7044
|
188159
|
|
|
|
|
282096
|
my ($Op, $Item, $Pos, $Tot, $Wait) = @_; |
|
7045
|
|
|
|
|
|
|
|
|
7046
|
|
|
|
|
|
|
## Get params from previous call if any. |
|
7047
|
188159
|
100
|
|
|
|
200829
|
my ($LastOp, $LastItem, $LastPos, $LastTot, $LastTime) = @{$this->{_ProgTimeInfo} || []}; |
|
|
188159
|
|
|
|
|
555883
|
|
|
7048
|
|
|
|
|
|
|
|
|
7049
|
|
|
|
|
|
|
## print &Dumper([$Op, $Item, $Pos, $Tot, $Wait], [$LastOp, $LastItem, $LastPos, $LastTot, $LastTime]); |
|
7050
|
|
|
|
|
|
|
|
|
7051
|
|
|
|
|
|
|
## Get elapsed time. |
|
7052
|
188159
|
|
|
|
|
241832
|
my $Time = time(); |
|
7053
|
188159
|
|
66
|
|
|
369130
|
my $Elapsed = $Time - ($LastTime || $Time); |
|
7054
|
|
|
|
|
|
|
|
|
7055
|
|
|
|
|
|
|
## We're on the "same" operation if the $Op name is the same and |
|
7056
|
|
|
|
|
|
|
## the total (baseline) is the same. Otherwise treat as new op. |
|
7057
|
|
|
|
|
|
|
|
|
7058
|
188159
|
|
|
|
|
257075
|
my $SameOp = (($Op eq $LastOp)); |
|
7059
|
188159
|
|
66
|
|
|
641931
|
my $SameOpAndTot = ($SameOp && ($Tot == $LastTot)); |
|
7060
|
188159
|
|
100
|
|
|
593417
|
my $Finished = ($Tot && ($Pos == $Tot)); |
|
7061
|
|
|
|
|
|
|
|
|
7062
|
|
|
|
|
|
|
## We trigger a message to print if we've been on the same op for |
|
7063
|
|
|
|
|
|
|
## 2 seconds or more, OR this is a new op. |
|
7064
|
|
|
|
|
|
|
|
|
7065
|
188159
|
|
100
|
|
|
1554885
|
my $Trigger = (($SameOpAndTot && ($Elapsed >= 2)) || ## Yes if same op & time has passed... |
|
7066
|
|
|
|
|
|
|
($SameOpAndTot && $Finished) || ## Yes if we're finished (100%). |
|
7067
|
|
|
|
|
|
|
!$SameOpAndTot); ## Yes if new op |
|
7068
|
|
|
|
|
|
|
|
|
7069
|
|
|
|
|
|
|
## Quit now if nothing to do. |
|
7070
|
188159
|
100
|
|
|
|
471098
|
goto done unless $Trigger; |
|
7071
|
|
|
|
|
|
|
|
|
7072
|
|
|
|
|
|
|
## Otherwise print message and save details for next time around. |
|
7073
|
17
|
100
|
66
|
|
|
125
|
my $Percent = sprintf("(%2d\%)", int(($Pos * 100) / $Tot)) if (defined($Pos) && $Tot); |
|
7074
|
|
|
|
|
|
|
|
|
7075
|
|
|
|
|
|
|
## If we've been asked to "wait", we skip actually printing the |
|
7076
|
|
|
|
|
|
|
## message this the first time, but act as if we did (starting the timer). |
|
7077
|
|
|
|
|
|
|
|
|
7078
|
17
|
100
|
100
|
|
|
110
|
my $RetVal = $this->progress_timed_default("$Op... $Item $Percent") |
|
7079
|
|
|
|
|
|
|
unless (!$SameOp && $Wait); ## Skip first-time message if $Wait. |
|
7080
|
|
|
|
|
|
|
|
|
7081
|
17
|
|
|
|
|
65
|
$this->{_ProgTimeInfo} = [$Op, $Item, $Pos, $Tot, $Time]; |
|
7082
|
|
|
|
|
|
|
|
|
7083
|
188159
|
|
|
|
|
440607
|
done: |
|
7084
|
|
|
|
|
|
|
return($RetVal); |
|
7085
|
|
|
|
|
|
|
} |
|
7086
|
|
|
|
|
|
|
|
|
7087
|
|
|
|
|
|
|
sub progress_timed_default |
|
7088
|
|
|
|
|
|
|
{ |
|
7089
|
6
|
|
|
6
|
0
|
82
|
my $this = shift; |
|
7090
|
6
|
|
|
|
|
13
|
my ($msg) = @_; |
|
7091
|
|
|
|
|
|
|
|
|
7092
|
6
|
|
|
|
|
37
|
return($this->progress_default("$msg")); |
|
7093
|
|
|
|
|
|
|
} |
|
7094
|
|
|
|
|
|
|
|
|
7095
|
|
|
|
|
|
|
sub warn |
|
7096
|
|
|
|
|
|
|
{ |
|
7097
|
1
|
|
|
1
|
0
|
3
|
my $this = shift; |
|
7098
|
1
|
|
|
|
|
4
|
my ($msg) = @_; |
|
7099
|
|
|
|
|
|
|
|
|
7100
|
1
|
|
|
|
|
6
|
return($this->progress_default("WARNING: $msg")); |
|
7101
|
|
|
|
|
|
|
} |
|
7102
|
|
|
|
|
|
|
|
|
7103
|
|
|
|
|
|
|
=pod |
|
7104
|
|
|
|
|
|
|
|
|
7105
|
|
|
|
|
|
|
=head1 DEBUGGING / DUMPING |
|
7106
|
|
|
|
|
|
|
|
|
7107
|
|
|
|
|
|
|
## Print some debugging output... |
|
7108
|
|
|
|
|
|
|
|
|
7109
|
|
|
|
|
|
|
$t->out() ## Pretty-print $t using Data::ShowTable |
|
7110
|
|
|
|
|
|
|
|
|
7111
|
|
|
|
|
|
|
$t->dump() ## Dump $t using Data::Dumper |
|
7112
|
|
|
|
|
|
|
$t->dump($x, $y) ## Dump anything else using Data::Dumper |
|
7113
|
|
|
|
|
|
|
|
|
7114
|
|
|
|
|
|
|
## Print some debugging output and then die. |
|
7115
|
|
|
|
|
|
|
|
|
7116
|
|
|
|
|
|
|
die $t->out() ## Same but die afterwards. |
|
7117
|
|
|
|
|
|
|
|
|
7118
|
|
|
|
|
|
|
die $t->dump() ## Same but die afterwards. |
|
7119
|
|
|
|
|
|
|
die $t->dump($x, $y) ## Same but die afterwards. |
|
7120
|
|
|
|
|
|
|
|
|
7121
|
|
|
|
|
|
|
These two methods can be very helpful in debugging your scripts. |
|
7122
|
|
|
|
|
|
|
|
|
7123
|
|
|
|
|
|
|
The out() method, which has many options, is described in complete |
|
7124
|
|
|
|
|
|
|
detail in the section below titled "FORMATTED TABLES". In short, it |
|
7125
|
|
|
|
|
|
|
prints a nicely-formatted diagram of $t, obeying the custom field list |
|
7126
|
|
|
|
|
|
|
if any and custom selection if any. |
|
7127
|
|
|
|
|
|
|
|
|
7128
|
|
|
|
|
|
|
The dump() method uses the Data::Dumper module to call &Dumper() on |
|
7129
|
|
|
|
|
|
|
the table itself (by default) and prints the result to STDERR. If you |
|
7130
|
|
|
|
|
|
|
specify any number of other values, those will be dumped instead using |
|
7131
|
|
|
|
|
|
|
a single call to &Dumper (rather than individually). |
|
7132
|
|
|
|
|
|
|
|
|
7133
|
|
|
|
|
|
|
=head2 Optional module dependencies |
|
7134
|
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
These methods require the otherwise-optional modules shown here: |
|
7136
|
|
|
|
|
|
|
|
|
7137
|
|
|
|
|
|
|
out() Data::ShowTable |
|
7138
|
|
|
|
|
|
|
dump() Data::Dumper |
|
7139
|
|
|
|
|
|
|
|
|
7140
|
|
|
|
|
|
|
You'll get a warning at runtime if you try to call either method |
|
7141
|
|
|
|
|
|
|
without the appropriate module installed on your system. |
|
7142
|
|
|
|
|
|
|
|
|
7143
|
|
|
|
|
|
|
=cut |
|
7144
|
|
|
|
|
|
|
|
|
7145
|
|
|
|
|
|
|
sub dump |
|
7146
|
|
|
|
|
|
|
{ |
|
7147
|
0
|
|
|
0
|
0
|
0
|
my $this = shift; |
|
7148
|
0
|
|
|
|
|
0
|
my (@Things) = @_; |
|
7149
|
|
|
|
|
|
|
|
|
7150
|
|
|
|
|
|
|
## Default is to dump the object. |
|
7151
|
0
|
0
|
|
|
|
0
|
@Things = ($this) unless @Things; |
|
7152
|
|
|
|
|
|
|
|
|
7153
|
0
|
0
|
|
|
|
0
|
if ($HaveDumper) |
|
7154
|
|
|
|
|
|
|
{ |
|
7155
|
0
|
|
|
|
|
0
|
print STDERR &Dumper(@Things); |
|
7156
|
|
|
|
|
|
|
} |
|
7157
|
|
|
|
|
|
|
else |
|
7158
|
|
|
|
|
|
|
{ |
|
7159
|
0
|
|
|
|
|
0
|
carp("Data::Dumper module is not installed. Can't dump."); |
|
7160
|
|
|
|
|
|
|
} |
|
7161
|
|
|
|
|
|
|
|
|
7162
|
0
|
|
|
|
|
0
|
return(1); |
|
7163
|
|
|
|
|
|
|
} |
|
7164
|
|
|
|
|
|
|
|
|
7165
|
|
|
|
|
|
|
=pod |
|
7166
|
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
=head1 MISCELLANEOUS UTILITY METHODS |
|
7168
|
|
|
|
|
|
|
|
|
7169
|
|
|
|
|
|
|
The following utilities are methods of the Data::CTable object. They |
|
7170
|
|
|
|
|
|
|
may be called directly by clients, subclassed, or used by subclass |
|
7171
|
|
|
|
|
|
|
implementations as needed. |
|
7172
|
|
|
|
|
|
|
|
|
7173
|
|
|
|
|
|
|
## Get cache file path (all args optional: defaulted from $t) |
|
7174
|
|
|
|
|
|
|
|
|
7175
|
|
|
|
|
|
|
$f = $t->prep_cache_file($FileName, $CacheExtension, $CacheSubDir) |
|
7176
|
|
|
|
|
|
|
|
|
7177
|
|
|
|
|
|
|
## Verify all directories in a path, creating any needed ones. |
|
7178
|
|
|
|
|
|
|
|
|
7179
|
|
|
|
|
|
|
$ok = $t->verify_or_create_path($DirPath, $Sep) |
|
7180
|
|
|
|
|
|
|
|
|
7181
|
|
|
|
|
|
|
## Testing readability / writeability of a proposed file |
|
7182
|
|
|
|
|
|
|
|
|
7183
|
|
|
|
|
|
|
$ok = $t->try_file_read ($Path); ## Opens for read; then closes |
|
7184
|
|
|
|
|
|
|
$ok = $t->try_file_write($Path); ## Opens for write; then deletes |
|
7185
|
|
|
|
|
|
|
|
|
7186
|
|
|
|
|
|
|
## Getting parameters from object with optional overrides |
|
7187
|
|
|
|
|
|
|
|
|
7188
|
|
|
|
|
|
|
$param = $t->getparam($Params, $Param) |
|
7189
|
|
|
|
|
|
|
|
|
7190
|
|
|
|
|
|
|
prep_cache_file() is the internal method used by both read() and |
|
7191
|
|
|
|
|
|
|
write() to calculate the name of a cache file to be used for a given |
|
7192
|
|
|
|
|
|
|
$FileName. |
|
7193
|
|
|
|
|
|
|
|
|
7194
|
|
|
|
|
|
|
It calculates the path to the cache file that corresponds to the given |
|
7195
|
|
|
|
|
|
|
$FileName (which may be a bare file name, a relative path, or a |
|
7196
|
|
|
|
|
|
|
partial path, as long as it obeys the current platform's path format |
|
7197
|
|
|
|
|
|
|
rules). All arguments are optional and if absent (undef), will be |
|
7198
|
|
|
|
|
|
|
defaulted from the corresponding parameters in $t. |
|
7199
|
|
|
|
|
|
|
|
|
7200
|
|
|
|
|
|
|
In addition to calculating the path and file name, it also prepends |
|
7201
|
|
|
|
|
|
|
the "current directory" path if there was no path. Then it checks |
|
7202
|
|
|
|
|
|
|
that all directories mentioned in the path actually exist. If not, it |
|
7203
|
|
|
|
|
|
|
fails. Then, it checks that EITHER the file exists and is readable, |
|
7204
|
|
|
|
|
|
|
OR it does not exist but would be writeable in that directory. If any |
|
7205
|
|
|
|
|
|
|
of these directory creations or file checks fails, then undef is |
|
7206
|
|
|
|
|
|
|
returned (and there would be no cache file). |
|
7207
|
|
|
|
|
|
|
|
|
7208
|
|
|
|
|
|
|
You may call it with no arguments on a file that has been read() to |
|
7209
|
|
|
|
|
|
|
find the path to the cache file that may have been used and/or |
|
7210
|
|
|
|
|
|
|
created, if any. |
|
7211
|
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
You may call it with a file name that was written to, to see what the |
|
7213
|
|
|
|
|
|
|
corresponding written cache file would be. |
|
7214
|
|
|
|
|
|
|
|
|
7215
|
|
|
|
|
|
|
For example: |
|
7216
|
|
|
|
|
|
|
|
|
7217
|
|
|
|
|
|
|
## Get name of cache file used or created by read and delete it. |
|
7218
|
|
|
|
|
|
|
|
|
7219
|
|
|
|
|
|
|
$RCache = $t->prep_cache_file() and unlink($RCache); |
|
7220
|
|
|
|
|
|
|
|
|
7221
|
|
|
|
|
|
|
## Cache on write() and get name of file and delete it. |
|
7222
|
|
|
|
|
|
|
|
|
7223
|
|
|
|
|
|
|
$Written = $t->write(_CacheOnWrite=>1, _FileName=>"Foo.txt"); |
|
7224
|
|
|
|
|
|
|
$WCache = $t->prep_cache_file($Written) and unlink($WCache); |
|
7225
|
|
|
|
|
|
|
|
|
7226
|
|
|
|
|
|
|
verify_or_create_path() is the internal routine used by read(), |
|
7227
|
|
|
|
|
|
|
write(), and the cache-writing logic, that makes sure a requested file |
|
7228
|
|
|
|
|
|
|
path exists (by creating it if necessary and possible) before any file |
|
7229
|
|
|
|
|
|
|
is written by this module. |
|
7230
|
|
|
|
|
|
|
|
|
7231
|
|
|
|
|
|
|
(If you don't like this module's tendency to try to create |
|
7232
|
|
|
|
|
|
|
directories, make yourself a subclass in which this routine simply |
|
7233
|
|
|
|
|
|
|
checks -d on its $Path argument and returns the result.) |
|
7234
|
|
|
|
|
|
|
|
|
7235
|
|
|
|
|
|
|
It must be called with a full or partial path TO A DIRECTORY, NOT A |
|
7236
|
|
|
|
|
|
|
FILE. You may supply $Sep, a platform-appropriate separator character |
|
7237
|
|
|
|
|
|
|
(which defaults correctly for the runtime platform if you don't). |
|
7238
|
|
|
|
|
|
|
|
|
7239
|
|
|
|
|
|
|
Returns true if the path verification and/or creation ultimately |
|
7240
|
|
|
|
|
|
|
succeeded, false otherwise (meaning that, after this call, there is no |
|
7241
|
|
|
|
|
|
|
such directory on the system and so you should not try to write a file |
|
7242
|
|
|
|
|
|
|
there). |
|
7243
|
|
|
|
|
|
|
|
|
7244
|
|
|
|
|
|
|
try_file_read() and try_file_write() are the internal methods called |
|
7245
|
|
|
|
|
|
|
by prep_cache_file() as well as by read() and write() to preflight |
|
7246
|
|
|
|
|
|
|
proposed file reading and writing locations. |
|
7247
|
|
|
|
|
|
|
|
|
7248
|
|
|
|
|
|
|
try_file_read() opens a file for read and closes it again; returns |
|
7249
|
|
|
|
|
|
|
true if the open was possible. |
|
7250
|
|
|
|
|
|
|
|
|
7251
|
|
|
|
|
|
|
try_file_write() opens a file for write and closes it again, deleting |
|
7252
|
|
|
|
|
|
|
it if successful. Returns true if the open for write and the delete |
|
7253
|
|
|
|
|
|
|
were successful. (Be aware that this call will actually delete any |
|
7254
|
|
|
|
|
|
|
existing file by this name.) |
|
7255
|
|
|
|
|
|
|
|
|
7256
|
|
|
|
|
|
|
The reason that failure to delete causes try_file_write() to fail is |
|
7257
|
|
|
|
|
|
|
that successful cacheing depends on the ability to delete cache files |
|
7258
|
|
|
|
|
|
|
as well as create them or write to them. A file in a location that |
|
7259
|
|
|
|
|
|
|
couldn't be deleted will not be used for cacheing. |
|
7260
|
|
|
|
|
|
|
|
|
7261
|
|
|
|
|
|
|
getparam() looks up a named parameter in a params hash if it exists |
|
7262
|
|
|
|
|
|
|
there, otherwise looks it up in the object, thereby allowing $Params |
|
7263
|
|
|
|
|
|
|
to shadow any parameters in $this. |
|
7264
|
|
|
|
|
|
|
|
|
7265
|
|
|
|
|
|
|
This internal routine is used by any methods that allow overriding of |
|
7266
|
|
|
|
|
|
|
parameters in the object when using a named-parameter calling |
|
7267
|
|
|
|
|
|
|
interface. It should be used by any subclasses that also wish to use |
|
7268
|
|
|
|
|
|
|
a named-parameter calling convention. For example: |
|
7269
|
|
|
|
|
|
|
|
|
7270
|
|
|
|
|
|
|
my $this = shift; |
|
7271
|
|
|
|
|
|
|
my $Params = (@_ == 1 ? {_FieldList => $_[0]} : {@_}); |
|
7272
|
|
|
|
|
|
|
|
|
7273
|
|
|
|
|
|
|
my($FieldList, $Selection) = map {$this->getparam($Params, $_)} |
|
7274
|
|
|
|
|
|
|
qw(_FieldList _Selection); |
|
7275
|
|
|
|
|
|
|
|
|
7276
|
|
|
|
|
|
|
=cut |
|
7277
|
|
|
|
|
|
|
|
|
7278
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
7279
|
|
|
|
|
|
|
|
|
7280
|
|
|
|
|
|
|
sub prep_cache_file |
|
7281
|
|
|
|
|
|
|
{ |
|
7282
|
53
|
|
|
53
|
0
|
90
|
my $this = shift; |
|
7283
|
53
|
|
|
|
|
89
|
my($FileName, $CacheExtension, $CacheSubDir) = @_; |
|
7284
|
|
|
|
|
|
|
|
|
7285
|
53
|
|
|
|
|
65
|
my $Success; |
|
7286
|
|
|
|
|
|
|
|
|
7287
|
53
|
|
66
|
|
|
115
|
$FileName ||= $this->{_FileName}; |
|
7288
|
53
|
|
66
|
|
|
102
|
$CacheExtension ||= $this->{_CacheExtension}; |
|
7289
|
53
|
|
66
|
|
|
93
|
$CacheSubDir ||= $this->{_CacheSubDir}; |
|
7290
|
|
|
|
|
|
|
|
|
7291
|
|
|
|
|
|
|
## Break the path into its parts... |
|
7292
|
1
|
|
|
1
|
|
10
|
use File::Basename qw(fileparse); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
506
|
|
|
7293
|
53
|
|
|
|
|
1923
|
my ($Basename, $Path, $Ext) = fileparse($FileName, '\.[^\.]+'); |
|
7294
|
|
|
|
|
|
|
|
|
7295
|
|
|
|
|
|
|
## Figure out what the path separator should be... |
|
7296
|
53
|
|
|
|
|
98
|
my ($Sep, $Up, $Cur) = @{$this->path_info()}{qw(sep up cur)}; |
|
|
53
|
|
|
|
|
187
|
|
|
7297
|
|
|
|
|
|
|
|
|
7298
|
|
|
|
|
|
|
## FileDir is guaranteed to either be empty or have a trailing |
|
7299
|
|
|
|
|
|
|
## separator (see: man File::Basename). If empty, we set it to |
|
7300
|
|
|
|
|
|
|
## $Cur (the current directory). |
|
7301
|
|
|
|
|
|
|
|
|
7302
|
53
|
50
|
|
|
|
187
|
my $FileDir = (length($Path) ? $Path : $Cur); |
|
7303
|
|
|
|
|
|
|
|
|
7304
|
|
|
|
|
|
|
## Ensure $CacheSubDir is either empty or has a trailing separator... |
|
7305
|
|
|
|
|
|
|
|
|
7306
|
53
|
|
|
|
|
414
|
$CacheSubDir =~ s/([^\Q$Sep\E])$/$1$Sep/; |
|
7307
|
|
|
|
|
|
|
|
|
7308
|
|
|
|
|
|
|
## Check whether it's an absolute path (and not really a "sub"-dir) |
|
7309
|
53
|
|
|
|
|
131
|
my $Absolute = &path_is_absolute($CacheSubDir); |
|
7310
|
|
|
|
|
|
|
|
|
7311
|
|
|
|
|
|
|
## $CacheDir is $FileDir with $CacheSubDir appended. $CacheSubDir |
|
7312
|
|
|
|
|
|
|
## may be empty, meaning $FileDir is to be used for the cache |
|
7313
|
|
|
|
|
|
|
## files. But if it's an absolute path, it stands alone. |
|
7314
|
|
|
|
|
|
|
|
|
7315
|
53
|
100
|
|
|
|
155
|
my $CacheDir = ($Absolute ? $CacheSubDir : "$FileDir$CacheSubDir"); |
|
7316
|
|
|
|
|
|
|
|
|
7317
|
|
|
|
|
|
|
## Now we need to make sure that CacheDir exists OR try to create |
|
7318
|
|
|
|
|
|
|
## it. (Warning will have already happened if necessary.) |
|
7319
|
53
|
50
|
|
|
|
152
|
goto done unless $this->verify_or_create_path($CacheDir, $Sep); |
|
7320
|
|
|
|
|
|
|
|
|
7321
|
|
|
|
|
|
|
## Verify that the dir is writeable. |
|
7322
|
53
|
50
|
|
|
|
673
|
$this->warn("Cache directory $CacheDir is read-only"), goto done |
|
7323
|
|
|
|
|
|
|
unless -r $CacheDir; |
|
7324
|
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
## Full path: note $CacheExtension and $Ext may be empty. |
|
7326
|
53
|
|
|
|
|
136
|
my $CacheFilePath = "$CacheDir$Basename$Ext$CacheExtension"; |
|
7327
|
|
|
|
|
|
|
|
|
7328
|
|
|
|
|
|
|
## If the cache path and the full path end up being the same |
|
7329
|
|
|
|
|
|
|
## (probably because the _CacheSubDir and _CacheExtension are both |
|
7330
|
|
|
|
|
|
|
## empty), we bail. Obviously, we don't want to risk overwriting |
|
7331
|
|
|
|
|
|
|
## the original data with the cache (or trying to read cache data |
|
7332
|
|
|
|
|
|
|
## from a text file). |
|
7333
|
|
|
|
|
|
|
|
|
7334
|
53
|
50
|
|
|
|
117
|
$this->warn("Can't cache $FileName without either _CacheSubDir or _CacheExtension"), goto done |
|
7335
|
|
|
|
|
|
|
if $CacheFilePath eq $FileName; |
|
7336
|
|
|
|
|
|
|
|
|
7337
|
|
|
|
|
|
|
## Pre-flight the cache file: ensure we can either read it or |
|
7338
|
|
|
|
|
|
|
## touch and then delete it. |
|
7339
|
|
|
|
|
|
|
|
|
7340
|
53
|
50
|
66
|
|
|
173
|
$this->warn("Cache file $CacheFilePath cannot be created/overwritten: $!"), goto done |
|
7341
|
|
|
|
|
|
|
unless ($this->try_file_read ($CacheFilePath) || |
|
7342
|
|
|
|
|
|
|
$this->try_file_write($CacheFilePath)); |
|
7343
|
|
|
|
|
|
|
|
|
7344
|
53
|
|
|
|
|
83
|
$Success = 1; |
|
7345
|
53
|
50
|
|
|
|
522
|
done: |
|
7346
|
|
|
|
|
|
|
return($Success ? $CacheFilePath : undef); |
|
7347
|
|
|
|
|
|
|
} |
|
7348
|
|
|
|
|
|
|
|
|
7349
|
|
|
|
|
|
|
sub verify_or_create_path |
|
7350
|
|
|
|
|
|
|
{ |
|
7351
|
61
|
|
|
61
|
0
|
101
|
my $this = shift; |
|
7352
|
61
|
|
|
|
|
93
|
my ($Dir, $Sep) = @_; |
|
7353
|
|
|
|
|
|
|
|
|
7354
|
|
|
|
|
|
|
## Get default value for $Sep if not supplied. |
|
7355
|
61
|
|
33
|
|
|
135
|
$Sep ||= ${$this->path_info()}{sep}; |
|
|
0
|
|
|
|
|
0
|
|
|
7356
|
|
|
|
|
|
|
|
|
7357
|
|
|
|
|
|
|
## $Dir might end in $Sep; split strips trailing one if so. |
|
7358
|
61
|
|
|
|
|
292
|
my $Parts = [split(/\Q$Sep\E/, $Dir)]; |
|
7359
|
|
|
|
|
|
|
|
|
7360
|
61
|
|
|
|
|
138
|
my $WholePath = ""; |
|
7361
|
61
|
|
|
|
|
119
|
foreach (@$Parts) |
|
7362
|
|
|
|
|
|
|
{ |
|
7363
|
114
|
|
|
|
|
193
|
$WholePath = "$WholePath$_$Sep"; |
|
7364
|
114
|
50
|
|
|
|
1737
|
next if -d $WholePath; |
|
7365
|
|
|
|
|
|
|
|
|
7366
|
|
|
|
|
|
|
## Directory does not exist. We need to make it. |
|
7367
|
|
|
|
|
|
|
|
|
7368
|
|
|
|
|
|
|
## mkdir($WholePath, 0777) or |
|
7369
|
|
|
|
|
|
|
## $this->warn("Failed to create directory '$WholePath': $!"), last; |
|
7370
|
|
|
|
|
|
|
|
|
7371
|
|
|
|
|
|
|
## On some platforms (e.g. Darwin), perl's mkdir fails if |
|
7372
|
|
|
|
|
|
|
## there's a trailing separator. Others tolerate its absence, |
|
7373
|
|
|
|
|
|
|
## so we remove it. |
|
7374
|
|
|
|
|
|
|
|
|
7375
|
0
|
|
|
|
|
0
|
(my $TryDir = $WholePath) =~ s/\Q$Sep\E$//; |
|
7376
|
|
|
|
|
|
|
|
|
7377
|
0
|
0
|
|
|
|
0
|
mkdir($TryDir, 0777) or |
|
7378
|
|
|
|
|
|
|
$this->warn("Failed to create directory '$TryDir': $!"), last; |
|
7379
|
|
|
|
|
|
|
} |
|
7380
|
|
|
|
|
|
|
|
|
7381
|
61
|
|
|
|
|
777
|
return(-d $Dir); |
|
7382
|
|
|
|
|
|
|
} |
|
7383
|
|
|
|
|
|
|
|
|
7384
|
|
|
|
|
|
|
sub try_file_write ## like a "touch" but deletes the file if it succeeds. |
|
7385
|
|
|
|
|
|
|
{ |
|
7386
|
10
|
|
|
10
|
0
|
20
|
my $this = shift; |
|
7387
|
10
|
|
|
|
|
17
|
my ($Path) = @_; |
|
7388
|
|
|
|
|
|
|
|
|
7389
|
10
|
|
|
|
|
14
|
my $Success; |
|
7390
|
|
|
|
|
|
|
|
|
7391
|
|
|
|
|
|
|
## Try creating it. |
|
7392
|
1
|
|
|
1
|
|
8
|
use IO::File; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
314
|
|
|
7393
|
10
|
|
|
|
|
69
|
my $File = IO::File->new(">$Path"); |
|
7394
|
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
## Created: close and delete it. |
|
7396
|
10
|
50
|
|
|
|
1304
|
$File->close(), unlink($Path) if $File; |
|
7397
|
|
|
|
|
|
|
|
|
7398
|
|
|
|
|
|
|
## Failed: bail. |
|
7399
|
10
|
50
|
|
|
|
810
|
goto done unless $File; |
|
7400
|
|
|
|
|
|
|
|
|
7401
|
|
|
|
|
|
|
## If we couldn't unlink, fail. The ability to delete a failed or |
|
7402
|
|
|
|
|
|
|
## part-written cache file is a critical part of cacheing. |
|
7403
|
|
|
|
|
|
|
|
|
7404
|
10
|
50
|
|
|
|
178
|
goto done if -e $Path; |
|
7405
|
|
|
|
|
|
|
|
|
7406
|
10
|
|
|
|
|
20
|
$Success = 1; |
|
7407
|
10
|
|
|
|
|
59
|
done: |
|
7408
|
|
|
|
|
|
|
return($Success); |
|
7409
|
|
|
|
|
|
|
} |
|
7410
|
|
|
|
|
|
|
|
|
7411
|
|
|
|
|
|
|
sub try_file_read ## Verifies that a file exists / can be read... |
|
7412
|
|
|
|
|
|
|
{ |
|
7413
|
53
|
|
|
53
|
0
|
73
|
my $this = shift; |
|
7414
|
53
|
|
|
|
|
76
|
my ($Path) = @_; |
|
7415
|
|
|
|
|
|
|
|
|
7416
|
53
|
|
|
|
|
71
|
my $Success; |
|
7417
|
|
|
|
|
|
|
|
|
7418
|
|
|
|
|
|
|
## Try opening it. |
|
7419
|
1
|
|
|
1
|
|
5
|
use IO::File; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
516
|
|
|
7420
|
53
|
100
|
|
|
|
394
|
my $File = IO::File->new("<$Path") or goto done; |
|
7421
|
|
|
|
|
|
|
|
|
7422
|
51
|
|
|
|
|
3867
|
$Success = 1; |
|
7423
|
53
|
|
|
|
|
966
|
done: |
|
7424
|
|
|
|
|
|
|
return($Success); |
|
7425
|
|
|
|
|
|
|
} |
|
7426
|
|
|
|
|
|
|
|
|
7427
|
|
|
|
|
|
|
sub getparam |
|
7428
|
|
|
|
|
|
|
{ |
|
7429
|
801
|
|
|
801
|
0
|
950
|
my $this = shift; |
|
7430
|
801
|
|
|
|
|
1036
|
my ($Params, $Param) = @_; |
|
7431
|
|
|
|
|
|
|
|
|
7432
|
801
|
100
|
|
|
|
2778
|
return(exists($Params->{$Param}) ? |
|
7433
|
|
|
|
|
|
|
( $Params->{$Param}) : |
|
7434
|
|
|
|
|
|
|
( $this->{$Param}) ); |
|
7435
|
|
|
|
|
|
|
} |
|
7436
|
|
|
|
|
|
|
|
|
7437
|
|
|
|
|
|
|
|
|
7438
|
|
|
|
|
|
|
|
|
7439
|
|
|
|
|
|
|
=pod |
|
7440
|
|
|
|
|
|
|
|
|
7441
|
|
|
|
|
|
|
=head1 GENERAL-PURPOSE UTILITY FUNCTIONS |
|
7442
|
|
|
|
|
|
|
|
|
7443
|
|
|
|
|
|
|
These general-purpose utility routines are defined in the Data::CTable |
|
7444
|
|
|
|
|
|
|
module but are not method calls. You may optionally import them or |
|
7445
|
|
|
|
|
|
|
call them by their fully-qualified name. |
|
7446
|
|
|
|
|
|
|
|
|
7447
|
|
|
|
|
|
|
use Data::CTable qw( |
|
7448
|
|
|
|
|
|
|
guess_endings |
|
7449
|
|
|
|
|
|
|
guess_delimiter |
|
7450
|
|
|
|
|
|
|
path_info |
|
7451
|
|
|
|
|
|
|
path_is_absolute |
|
7452
|
|
|
|
|
|
|
min |
|
7453
|
|
|
|
|
|
|
max |
|
7454
|
|
|
|
|
|
|
); |
|
7455
|
|
|
|
|
|
|
|
|
7456
|
|
|
|
|
|
|
## File-format guessing |
|
7457
|
|
|
|
|
|
|
|
|
7458
|
|
|
|
|
|
|
my $E = &guess_endings($IOHandle) ## Guess txt file line endings |
|
7459
|
|
|
|
|
|
|
my $D = &guess_delimiter($String) ## Tab if found, else comma |
|
7460
|
|
|
|
|
|
|
|
|
7461
|
|
|
|
|
|
|
## Cross-platform file path analysis |
|
7462
|
|
|
|
|
|
|
|
|
7463
|
|
|
|
|
|
|
my $Info = path_info(); ## Hash: 3 of platform's path values: |
|
7464
|
|
|
|
|
|
|
my ($Sep, ## ... path separator ( / on Unix) |
|
7465
|
|
|
|
|
|
|
$Up, ## ... "up" component (../ on Unix) |
|
7466
|
|
|
|
|
|
|
$Cur) = ## ... curr. dir path ( ./ on Unix) |
|
7467
|
|
|
|
|
|
|
@$Info{qw(sep up cur)}; |
|
7468
|
|
|
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
my $Abs = path_is_absolute($Path) ## Check path type |
|
7470
|
|
|
|
|
|
|
|
|
7471
|
|
|
|
|
|
|
## Our old favorites min and max |
|
7472
|
|
|
|
|
|
|
|
|
7473
|
|
|
|
|
|
|
$x = max($x, 0); ## Should have been part of Perl... |
|
7474
|
|
|
|
|
|
|
$x = min($x, 100); |
|
7475
|
|
|
|
|
|
|
|
|
7476
|
|
|
|
|
|
|
guess_endings() tries to figure out whether an open IO::File handle |
|
7477
|
|
|
|
|
|
|
has DOS, Mac, or Unix file endings. It reads successively large |
|
7478
|
|
|
|
|
|
|
blocks of the file until it either finds evidence of at least two |
|
7479
|
|
|
|
|
|
|
separate line endings (of any type, but presumably they are the same), |
|
7480
|
|
|
|
|
|
|
or until it reaches the end of the file. Then, it takes the resulting |
|
7481
|
|
|
|
|
|
|
block and searches for the first qualifying line ending sequence it |
|
7482
|
|
|
|
|
|
|
finds, if any. This sequence is then returned to the caller. If it |
|
7483
|
|
|
|
|
|
|
returns undef, it was not able to find any evidence of line endings in |
|
7484
|
|
|
|
|
|
|
the file. |
|
7485
|
|
|
|
|
|
|
|
|
7486
|
|
|
|
|
|
|
guess_delimiter() takes a string buffer and returns a "," unless it |
|
7487
|
|
|
|
|
|
|
finds a tab character before the first comma in the $String, if any, |
|
7488
|
|
|
|
|
|
|
in which case a tab is returned. |
|
7489
|
|
|
|
|
|
|
|
|
7490
|
|
|
|
|
|
|
path_info() returns a hash of three helpful strings for building and |
|
7491
|
|
|
|
|
|
|
parsing paths on the current platform. Knows about Mac, Dos/Win, and |
|
7492
|
|
|
|
|
|
|
otherwise defaults to Unix. |
|
7493
|
|
|
|
|
|
|
|
|
7494
|
|
|
|
|
|
|
path_is_absolute($Path) returns true if it thinks the given path |
|
7495
|
|
|
|
|
|
|
string is an absolute path on the current platform. |
|
7496
|
|
|
|
|
|
|
|
|
7497
|
|
|
|
|
|
|
=cut |
|
7498
|
|
|
|
|
|
|
|
|
7499
|
|
|
|
|
|
|
{}; ## Get emacs to indent correctly. |
|
7500
|
|
|
|
|
|
|
|
|
7501
|
|
|
|
|
|
|
sub guess_endings |
|
7502
|
|
|
|
|
|
|
{ |
|
7503
|
19
|
|
|
19
|
0
|
29
|
my ($File) = @_; |
|
7504
|
|
|
|
|
|
|
|
|
7505
|
19
|
|
|
|
|
29
|
my $Ending = undef; |
|
7506
|
|
|
|
|
|
|
|
|
7507
|
19
|
|
|
|
|
25
|
my $ReadCount = 0; |
|
7508
|
19
|
|
|
|
|
31
|
my $BlockSize = 512; |
|
7509
|
|
|
|
|
|
|
|
|
7510
|
19
|
|
|
|
|
26
|
my $Buf; |
|
7511
|
|
|
|
|
|
|
my $Actual; |
|
7512
|
|
|
|
|
|
|
|
|
7513
|
19
|
|
|
|
|
104
|
while ($File->seek(0, 0), $Actual = $File->read($Buf, ($BlockSize * ++$ReadCount))) |
|
7514
|
|
|
|
|
|
|
{ |
|
7515
|
|
|
|
|
|
|
## Break out of the loop if it appears a line ending match is |
|
7516
|
|
|
|
|
|
|
## found (but disallow initial match at very end of buffer). |
|
7517
|
|
|
|
|
|
|
|
|
7518
|
19
|
50
|
|
|
|
792
|
last if $Buf =~ /((?:\x0D\x0A)|(?:\x0D)|(?:\x0A))[^\x0D\x0A]/; |
|
7519
|
|
|
|
|
|
|
|
|
7520
|
|
|
|
|
|
|
## Break out of the loop if we just read any less than |
|
7521
|
|
|
|
|
|
|
## attempted (we are probably at the end of a very short, |
|
7522
|
|
|
|
|
|
|
## maybe one-line or even zero-line, file). |
|
7523
|
|
|
|
|
|
|
|
|
7524
|
0
|
0
|
|
|
|
0
|
last if $Actual < ($BlockSize * $ReadCount); |
|
7525
|
|
|
|
|
|
|
} |
|
7526
|
|
|
|
|
|
|
|
|
7527
|
|
|
|
|
|
|
## We can presume that the buffer we now have must either have |
|
7528
|
|
|
|
|
|
|
## line endings in it, or there is no line ending in the file at |
|
7529
|
|
|
|
|
|
|
## all. So we extract the first one we come to, (trying the DOS |
|
7530
|
|
|
|
|
|
|
## ending first since it contains the other two), if any, and we |
|
7531
|
|
|
|
|
|
|
## return it. |
|
7532
|
|
|
|
|
|
|
|
|
7533
|
19
|
|
|
|
|
181
|
my $Ending = ($Buf =~ /((\x0D\x0A)|(\x0D)|(\x0A))/)[0]; |
|
7534
|
|
|
|
|
|
|
|
|
7535
|
|
|
|
|
|
|
## &progress_default(undef, "DOS line endings") if $2; ## Debugging. |
|
7536
|
|
|
|
|
|
|
## &progress_default(undef, "Mac line endings") if $3; ## Debugging. |
|
7537
|
|
|
|
|
|
|
## &progress_default(undef, "Unix line endings") if $4; ## Debugging. |
|
7538
|
|
|
|
|
|
|
|
|
7539
|
19
|
|
|
|
|
70
|
done: |
|
7540
|
|
|
|
|
|
|
|
|
7541
|
|
|
|
|
|
|
## We always seek back to zero when done. |
|
7542
|
|
|
|
|
|
|
$File->seek(0, 0); |
|
7543
|
|
|
|
|
|
|
|
|
7544
|
19
|
|
|
|
|
223
|
return($Ending); |
|
7545
|
|
|
|
|
|
|
} |
|
7546
|
|
|
|
|
|
|
|
|
7547
|
|
|
|
|
|
|
sub guess_delimiter |
|
7548
|
|
|
|
|
|
|
{ |
|
7549
|
19
|
|
|
19
|
0
|
37
|
my ($String) = @_; |
|
7550
|
|
|
|
|
|
|
|
|
7551
|
19
|
|
50
|
|
|
162
|
return(($String =~ /([,\t])/)[0] || ","); |
|
7552
|
|
|
|
|
|
|
} |
|
7553
|
|
|
|
|
|
|
|
|
7554
|
|
|
|
|
|
|
sub path_info |
|
7555
|
|
|
|
|
|
|
{ |
|
7556
|
1
|
|
|
1
|
|
5
|
use Config qw(%Config); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
145
|
|
|
7557
|
70
|
|
|
70
|
0
|
673
|
my $OSName = $Config{osname}; |
|
7558
|
|
|
|
|
|
|
|
|
7559
|
70
|
50
|
|
|
|
277
|
return({sep =>':' , up =>'::' , cur =>':' }) if $OSName =~ /mac /ix; |
|
7560
|
70
|
50
|
|
|
|
363
|
return({sep =>'\\', up =>'..\\', cur =>'.\\'}) if $OSName =~ /(?
|
|
7561
|
70
|
|
|
|
|
379
|
return({sep =>'/' , up =>'../' , cur =>'./' }) ; |
|
7562
|
|
|
|
|
|
|
} |
|
7563
|
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
sub path_is_absolute |
|
7565
|
|
|
|
|
|
|
{ |
|
7566
|
53
|
|
|
53
|
0
|
86
|
my ($Path) = @_; |
|
7567
|
|
|
|
|
|
|
|
|
7568
|
1
|
|
|
1
|
|
6
|
use Config qw(%Config); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
494
|
|
|
7569
|
53
|
|
|
|
|
316
|
my $OSName = $Config{osname}; |
|
7570
|
|
|
|
|
|
|
|
|
7571
|
53
|
50
|
|
|
|
190
|
return($Path =~ /^[^:]/) if $OSName =~ /mac /ix; |
|
7572
|
53
|
50
|
|
|
|
216
|
return($Path =~ /^(([a-z][:])|(\\\\))/i) if $OSName =~ /(?
|
|
7573
|
53
|
|
|
|
|
182
|
return($Path =~ /^\//) ; |
|
7574
|
|
|
|
|
|
|
} |
|
7575
|
|
|
|
|
|
|
|
|
7576
|
|
|
|
|
|
|
### min and max |
|
7577
|
|
|
|
|
|
|
|
|
7578
|
188169
|
100
|
|
188169
|
0
|
865915
|
sub min {return($_[0] < $_[1] ? $_[0] : $_[1])} |
|
7579
|
134
|
100
|
|
134
|
0
|
323
|
sub max {return($_[0] > $_[1] ? $_[0] : $_[1])} |
|
7580
|
|
|
|
|
|
|
|
|
7581
|
|
|
|
|
|
|
|
|
7582
|
|
|
|
|
|
|
=pod |
|
7583
|
|
|
|
|
|
|
|
|
7584
|
|
|
|
|
|
|
=head1 IMPLEMENTATION LIMITATIONS |
|
7585
|
|
|
|
|
|
|
|
|
7586
|
|
|
|
|
|
|
=over 4 |
|
7587
|
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
=item Column (field) names must not start with underscore |
|
7589
|
|
|
|
|
|
|
|
|
7590
|
|
|
|
|
|
|
This object is implemented as a blessed hash reference. By |
|
7591
|
|
|
|
|
|
|
convention, keys that do not start with underscore are data columns |
|
7592
|
|
|
|
|
|
|
and the key is the field name. Keys that do start with underscore |
|
7593
|
|
|
|
|
|
|
refer to parameters or other data structures stored in the object. |
|
7594
|
|
|
|
|
|
|
|
|
7595
|
|
|
|
|
|
|
Consequently, no field names may start with underscore. When a file |
|
7596
|
|
|
|
|
|
|
is read from disk, any field names that DO start with underscores will |
|
7597
|
|
|
|
|
|
|
have the leading underscores stripped off. Strange things could then |
|
7598
|
|
|
|
|
|
|
occur if the field names are then no longer unique. For example, |
|
7599
|
|
|
|
|
|
|
field "A" and "_A" in the data file would be treated as the single |
|
7600
|
|
|
|
|
|
|
field "A" after the file was read. |
|
7601
|
|
|
|
|
|
|
|
|
7602
|
|
|
|
|
|
|
=item Field values are always read as strings |
|
7603
|
|
|
|
|
|
|
|
|
7604
|
|
|
|
|
|
|
Field values when written to a file are necessarily converted to |
|
7605
|
|
|
|
|
|
|
strings. When read back in, they are read as strings, regardless of |
|
7606
|
|
|
|
|
|
|
original format. The sole exception is the empty string which is read |
|
7607
|
|
|
|
|
|
|
back in as undef for efficiency. |
|
7608
|
|
|
|
|
|
|
|
|
7609
|
|
|
|
|
|
|
An exception is when the _CacheOnWrite feature is used: field values |
|
7610
|
|
|
|
|
|
|
stored internally as integers or other scalar types may be saved and |
|
7611
|
|
|
|
|
|
|
later restored as such. However, you should not rely on this |
|
7612
|
|
|
|
|
|
|
behavior. |
|
7613
|
|
|
|
|
|
|
|
|
7614
|
|
|
|
|
|
|
=item Undef vs. empty |
|
7615
|
|
|
|
|
|
|
|
|
7616
|
|
|
|
|
|
|
Empty field values are stored as "undef" for efficiency. This means |
|
7617
|
|
|
|
|
|
|
that programs should generally not rely on any differences between "" |
|
7618
|
|
|
|
|
|
|
and undef in field values. However, when working with large but |
|
7619
|
|
|
|
|
|
|
sparse tables, programs should take care not to convert undef values |
|
7620
|
|
|
|
|
|
|
to empty strings unnecessarily since the separate string objects |
|
7621
|
|
|
|
|
|
|
consume considerably more memory than undef. |
|
7622
|
|
|
|
|
|
|
|
|
7623
|
|
|
|
|
|
|
=back |
|
7624
|
|
|
|
|
|
|
|
|
7625
|
|
|
|
|
|
|
=head1 CONTRIBUTIONS |
|
7626
|
|
|
|
|
|
|
|
|
7627
|
|
|
|
|
|
|
Corrections, bug reports, bug fixes, or feature additions are |
|
7628
|
|
|
|
|
|
|
encouraged. Please send additions or patches with a clear explanation |
|
7629
|
|
|
|
|
|
|
of their purpose. Consider making additions in the form of a subclass |
|
7630
|
|
|
|
|
|
|
if possible. |
|
7631
|
|
|
|
|
|
|
|
|
7632
|
|
|
|
|
|
|
I'm committed to bundling useful subclasses contributed by myself or |
|
7633
|
|
|
|
|
|
|
others with this main distribution. |
|
7634
|
|
|
|
|
|
|
|
|
7635
|
|
|
|
|
|
|
So, if you've got a subclass of Data::CTable (which should have a name |
|
7636
|
|
|
|
|
|
|
like Data::CTable::YourClassName) and you would like it included in |
|
7637
|
|
|
|
|
|
|
the main distribution, please send it along with a test script and |
|
7638
|
|
|
|
|
|
|
I'll review the code and add it (at my discretion). |
|
7639
|
|
|
|
|
|
|
|
|
7640
|
|
|
|
|
|
|
If you've got a module that uses, augments, or complements this one, |
|
7641
|
|
|
|
|
|
|
let me know that, too, and I'll make appropriate mention of it. |
|
7642
|
|
|
|
|
|
|
|
|
7643
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
7644
|
|
|
|
|
|
|
|
|
7645
|
|
|
|
|
|
|
The Data::CTable home page: |
|
7646
|
|
|
|
|
|
|
|
|
7647
|
|
|
|
|
|
|
http://christhorman.com/projects/perl/Data-CTable/ |
|
7648
|
|
|
|
|
|
|
|
|
7649
|
|
|
|
|
|
|
The implementation in CTable.pm. |
|
7650
|
|
|
|
|
|
|
|
|
7651
|
|
|
|
|
|
|
The test.pl script, other subclasses, and examples. |
|
7652
|
|
|
|
|
|
|
|
|
7653
|
|
|
|
|
|
|
The Data::ShowTable module. |
|
7654
|
|
|
|
|
|
|
|
|
7655
|
|
|
|
|
|
|
The Data::Table module by Yingyao Zhou & Guangzhou Zou. |
|
7656
|
|
|
|
|
|
|
|
|
7657
|
|
|
|
|
|
|
The perlref manual page. |
|
7658
|
|
|
|
|
|
|
|
|
7659
|
|
|
|
|
|
|
=head1 AUTHOR |
|
7660
|
|
|
|
|
|
|
|
|
7661
|
|
|
|
|
|
|
Chris Thorman |
|
7662
|
|
|
|
|
|
|
|
|
7663
|
|
|
|
|
|
|
Copyright (c) 1995-2002 Chris Thorman. All rights reserved. |
|
7664
|
|
|
|
|
|
|
|
|
7665
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify |
|
7666
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
7667
|
|
|
|
|
|
|
|
|
7668
|
|
|
|
|
|
|
=cut |
|
7669
|
|
|
|
|
|
|
|
|
7670
|
|
|
|
|
|
|
1; |
|
7671
|
|
|
|
|
|
|
|