line
stmt
bran
cond
sub
pod
time
code
1
package Data::Presenter;
2
#$Id: Presenter.pm 1218 2008-02-10 00:11:59Z jimk $
3
$VERSION = 1.03; # 02-10-2008
4
11
11
24037
use strict;
11
28
11
523
5
11
11
68
use warnings;
11
19
11
444
6
11
11
15262
use List::Compare::Functional qw( is_LsubsetR );
11
179126
11
10740
7
11
11
130
use Carp;
11
24
11
668
8
11
11
7639
use Data::Dumper;
11
62707
11
31719
9
10
############################## Package Variables ##############################
11
12
our %fp = ();
13
our %fieldlabels = ();
14
our %reserved = map {$_ => 1} qw( fields parameters index options );
15
16
our %gt_lt_ops = map {$_ => 1} (
17
q{<}, q{lt}, q{>}, q{gt}, q{<=}, q{le}, q{>=}, q{ge},
18
);
19
20
my %eq = map {$_, 'eq'} (
21
q{eq}, q{equals}, q{is}, q{is equal to}, q{is a member of}, q{is part of}, q{=}, q{==},
22
);
23
my %ne = map {$_, 'ne'} (
24
q{ne}, q{is not}, q{is not equal to}, q{is not a member of}, q{is not part of}, q{is less than or greater than}, q{is less than or more than}, q{is greater than or less than}, q{is more than or less than}, q{does not equal}, q{not}, q{not equal to}, q{not equals}, q{!=}, q{! =}, q{!==}, q{! ==}, q{<>},
25
);
26
my %lt = map {$_, 'lt'} (
27
q{<}, q{lt}, q{is less than}, q{is fewer than}, q{before},
28
);
29
my %gt = map {$_, 'gt'} (
30
q{>}, q{gt}, q{is more than}, q{is greater than}, q{after},
31
);
32
my %le = map {$_, 'le'} (
33
q{<=}, q{le}, q{is less than or equal to}, q{is fewer than or equal to}, q{on or before}, q{before or on},
34
);
35
my %ge = map {$_, 'ge'} (
36
q{>=}, q{ge}, q{is more than or equal to}, q{is greater than or equal to}, q{on or after}, q{after or on},
37
);
38
39
our %all_relations;
40
foreach my $rel ( \%eq, \%ne, \%lt, \%gt, \%le, \%ge) {
41
foreach my $key (keys %{$rel}) {
42
$all_relations{$key} = $rel->{$key};
43
}
44
}
45
46
our %sortclass = (
47
eq => { a => q{eq}, s => q{eq}, n => q{==} },
48
ne => { a => q{ne}, s => q{ne}, n => q{!=} },
49
lt => { a => q{lt}, s => q{lt}, n => q{<} },
50
gt => { a => q{gt}, s => q{gt}, n => q{>} },
51
le => { a => q{le}, s => q{le}, n => q{<=} },
52
ge => { a => q{ge}, s => q{ge}, n => q{>=} },
53
);
54
55
################################# Constructor #################################
56
57
sub new {
58
148
148
1
664887
my ($inputs, $class, $source, $fieldsref, $paramsref,
59
$index, $self, $dataref, $datapoints, @defective_records);
60
148
329
$inputs = scalar(@_);
61
62
148
100
591
if ($inputs == 5) {
100
63
# regular Data::Presenter object immediately validates data
64
138
611
($class, $source, $fieldsref, $paramsref, $index) = @_;
65
138
577
_validate_fields($fieldsref);
66
137
1957
_validate_parameters($fieldsref, $paramsref);
67
134
554
_validate_index($fieldsref, $index);
68
} elsif ($inputs == 2) {
69
# Data::Presenter::Combo object: data already validated
70
9
29
($class, $source) = @_;
71
} else {
72
1
4
my ($package) = caller;
73
1
349
croak 'Wrong number of inputs to ', $package, '::new', "$!";
74
75
}
76
77
# bless a ref to an empty hash into the invoking class
78
# which is somewhere below this one in the hierarchy
79
141
880
$self = bless( {}, $class );
80
81
# prepare the database by using &_init from package somewhere below
82
# this one
83
141
100
527
if ($inputs == 5) {
84
132
1766
$dataref = $self->_init($source, $fieldsref, $paramsref,
85
$index, \%reserved);
86
} else {
87
9
113
$dataref = $self->_init($source);
88
}
89
90
# carp if, other than reserved words, the object has 0 records
91
# croak if, other than reserved words, the object has records with
92
# undefined elements
93
137
111195
foreach my $rec (keys %$dataref) {
94
2229
100
5829
unless ($reserved{$rec}) {
95
1813
2076
$datapoints++;
96
1813
1964
my $undefcount = 0;
97
1813
2203
foreach my $el ( @{ $dataref->{$rec} } ) {
1813
3235
98
13144
100
51844
$undefcount++ if not defined $el;
99
}
100
1813
100
4709
push @defective_records, $rec if $undefcount;
101
}
102
}
103
137
100
805
carp "Object initialized, $class, contains 0 data elements: $!"
104
unless ($datapoints);
105
137
100
638
croak "Records @defective_records have undefined elements;\n consider revising initialization subroutine: $!"
106
if @defective_records;
107
108
# prepare 2 hashes which will be needed in selecting rows and
109
# sorting columns
110
136
5345
_make_labels_params(
111
136
200
\@{${$dataref}{'fields'}}, \@{${$dataref}{'parameters'}});
136
205
136
175
136
488
112
113
# initialize the object from the prepared values (Damian, p. 98)
114
136
1676
%$self = %$dataref;
115
136
1204
return $self;
116
}
117
118
################################################################################
119
##### Subroutines called from with &new (constructor)
120
################################################################################
121
122
sub _validate_fields {
123
138
138
234
my $fieldsref = shift;
124
138
395
my %seen = ();
125
138
352
foreach my $field (@$fieldsref) {
126
# Note: Assuming that _init() has been written correctly in the
127
# Data::Presenter::subclass in which the object is created, the
128
# 'croak' branch below will never be reached.
129
955
100
2983
$seen{$field} ? croak "$field is a duplicated field in \@fields: $!"
130
: $seen{$field}++;
131
} # Confirmed: there exist no duplicated fields in @fields.
132
}
133
134
sub _validate_parameters {
135
137
137
797
my ($fieldsref, $paramsref) = @_;
136
137
567
my @fields = @$fieldsref;
137
137
1043
my %parameters = %$paramsref;
138
137
322
my ($i, $badvalues);
139
137
541
for ($i = 0; $i < scalar(@fields); $i++) {
140
952
2733
my @temp = @{$parameters{$fields[$i]}};
952
5198
141
952
100
66
11519
$badvalues .= ' ' . $fields[$i] . "\n"
66
142
if ($temp[0] !~ /^\d+$/ # 1st element must be numeric
143
||
144
$temp[1] !~ /^[UD]$/i # 2nd element must be U or D (lc or uc)
145
||
146
$temp[2] !~ /^[ans]$/i # 3rd element must be a, n or s
147
);
148
}
149
137
100
1175
croak "Need corrected values for these keys:\n$badvalues:$!" if ($badvalues);
150
}
151
152
sub _validate_index {
153
134
134
242
my ($fieldsref, $index) = @_;
154
134
358
my @fields = @$fieldsref;
155
134
100
1021
croak "\$index must be a numeral: $!"
156
unless ($index =~ /^\d+$/);
157
133
100
828
croak "\$index must be < number of elements in \@fields: $!"
158
unless $index <= $#fields;
159
}
160
161
sub _make_labels_params {
162
136
136
723
my ($fieldsref, $paramsref) = @_;
163
136
440
my @fields = @$fieldsref;
164
136
385
my @aryparams = @$paramsref;
165
136
851
%fp = ();
166
136
314
my %temp = ();
167
136
453
for (my $i = 0; $i < scalar(@fields); $i++) {
168
966
980
$fp{$fields[$i]} = [@{$aryparams[$i]}];
966
3017
169
966
3498
$temp{$fields[$i]} = $i;
170
}
171
136
1482
%fieldlabels = %temp;
172
}
173
174
################################################################################
175
##### Subroutines to get information on the Data::Presenter object
176
################################################################################
177
178
sub get_data_count {
179
110
110
1
29068
my $self = shift;
180
110
285
_count_engine($self);
181
}
182
183
sub print_data_count {
184
12
12
1
99099
my $self = shift;
185
12
80
print 'Current data count: ', _count_engine($self), "\n";
186
}
187
188
sub _count_engine {
189
122
122
194
my $self = shift;
190
122
834
my %data = %$self;
191
122
222
my ($count);
192
122
385
foreach (keys %data) {
193
1092
100
3958
$count++ unless ($reserved{$_});
194
}
195
122
100
1093
$count ? $count : 0;
196
}
197
198
sub get_keys {
199
206
206
1
106816
my $self = shift;
200
206
1304
my %data = %$self;
201
206
421
my @keys = ();
202
206
627
foreach (keys %data) {
203
1566
100
4152
push(@keys, $_) unless ($reserved{$_});
204
}
205
206
2262
return [ sort @keys ];
206
}
207
208
sub get_keys_seen {
209
12
12
1
70441
my $self = shift;
210
12
207
my %data = %$self;
211
12
40
my (%seen);
212
12
69
foreach (keys %data) {
213
204
100
475
$seen{$_}++ unless ($reserved{$_});
214
}
215
12
206
return \%seen;
216
}
217
218
################################################################################
219
##### &sort_by_column: called from package main to select particular fields
220
##### to be displayed in output
221
##### Subroutines called from within &sort_by_column
222
################################################################################
223
224
sub sort_by_column {
225
39
39
1
71472
my $self = shift;
226
39
75
my $columns_selected_ref = shift;
227
39
73
my %data = %{$self};
39
618
228
39
352
_validate_args($columns_selected_ref, \%fp);
229
38
153
$columns_selected_ref =
230
_verify_presence_of_index(\%data, $columns_selected_ref);
231
38
71
my @records;
232
38
216
foreach my $k (keys %data) {
233
706
100
2356
push (@records, $data{$k}) unless ($reserved{$k});
234
}
235
187
514
my $sortref = _sort_maker(
236
38
225
map { _make_single_comparator( $_ ) }
237
38
113
@{$columns_selected_ref}
238
);
239
240
38
538
return _extract_columns_selected(
241
[ sort $sortref @records ],
242
$columns_selected_ref,
243
);
244
}
245
246
sub _verify_presence_of_index {
247
38
38
207
my $dataref = shift;
248
38
64
my $columns_selected_ref = shift;
249
38
101
my @fields = @{$dataref->{fields}};
38
8244
250
38
73
my $index = ${$dataref}{index};
38
97
251
38
59
my @columns_selected = @{$columns_selected_ref};
38
116
252
38
101
my %cols = map {$_, 1} @columns_selected;
186
449
253
38
100
175
unless ($cols{$fields[$index]}) { # line 205
254
1
4100
carp "Field '$fields[$index]' which serves as unique index for records must be one of the columns selected for output; adding it to end of list of columns selected: $!";
255
1
111
push @columns_selected, $fields[$index];
256
}
257
38
201
return [ @columns_selected ];
258
}
259
260
sub _sort_maker {
261
38
38
108
my @littlesubs = @_;
262
sub {
263
2307
2307
3301
foreach my $sub (@littlesubs) {
264
2813
4027
my $result = $sub->();
265
2813
100
11829
return $result if $result;
266
}
267
38
228
};
268
}
269
270
sub _make_single_comparator {
271
187
187
426
my $field = shift;
272
187
651
my $sort_order = $fp{$field}->[1];
273
187
350
my $sort_type = $fp{$field}->[2];
274
187
288
my $idx = $fieldlabels{$field};
275
276
11
11
139
no warnings qw(uninitialized numeric);
11
130
11
87618
277
my %subs = (
278
U => {
279
642
642
1539
a => sub { lc($a->[$idx]) cmp lc($b->[$idx]) },
280
1984
1984
3127
n => sub { $a->[$idx] <=> $b->[$idx] },
281
59
59
360
s => sub { $a->[$idx] cmp $b->[$idx] },
282
},
283
D => {
284
69
69
146
a => sub { lc($b->[$idx]) cmp lc($a->[$idx]) },
285
30
30
52
n => sub { $b->[$idx] <=> $a->[$idx] },
286
29
29
45
s => sub { $b->[$idx] cmp $a->[$idx] },
287
},
288
187
4110
);
289
187
3025
$subs{$sort_order}{$sort_type};
290
}
291
292
sub _extract_columns_selected {
293
38
38
78
my ($intermed_ref, $columns_selected_ref) = @_;
294
38
69
my @results;
295
38
62
foreach my $record (@{$intermed_ref}) {
38
492
296
588
606
my @temp;
297
588
644
foreach my $col (@{$columns_selected_ref}) {
588
1307
298
3200
7449
push @temp, $record->[$fieldlabels{$col}];
299
}
300
588
5256
push @results, [ @temp ];
301
}
302
38
1165
return [ @results ];
303
}
304
305
sub seen_one_column {
306
27
27
1
61121
my $self = shift;
307
27
352
my %data = %$self;
308
27
100
2769
croak "Invalid number of arguments to seen_one_column(): $!"
309
unless @_ == 1;
310
15
48
my $columnref = [ shift ];
311
15
65
_validate_args($columnref, \%fp);
312
11
21
my (%seen);
313
11
53
foreach (keys %data) {
314
195
100
365
unless ($reserved{$_}) {
315
161
186
$seen{ $data{$_}[ $fieldlabels{ ${$columnref}[0] } ] }++;
161
404
316
}
317
}
318
11
151
return \%seen;
319
}
320
321
sub _validate_args {
322
63
63
267
my ($columns_selected_ref, $fpref) = @_;
323
63
198
my @columns_selected = @{$columns_selected_ref};
63
184
324
63
118
my (%seen, %unseen, @unseen);
325
63
173
foreach my $col (@columns_selected) {
326
235
987
foreach my $field (keys %$fpref) {
327
1113
100
3231
if ($col eq $field) {
328
229
522
$seen{$col} = 1;
329
229
426
last;
330
}
331
}
332
235
100
4467
$unseen{$col}++ unless $seen{$col};
333
}
334
63
326
@unseen = sort { lc($a) cmp lc($b) } (keys %unseen);
1
8
335
63
100
1200
croak "Invalid column selection(s): @{unseen}: $!"
336
if (@unseen);
337
}
338
339
################################################################################
340
##### &select_rows: called from package main to select a particular range of
341
##### entries from data source
342
##### Subroutines called within &select_rows
343
################################################################################
344
345
sub select_rows {
346
107
107
1
25968
my ($self, $column, $relation, $choicesref) = @_;
347
107
210
my $dataref = q{};
348
107
698
$dataref = $self->_extract_rows(
349
$column, $relation, $choicesref, \%fp, \%fieldlabels,
350
\&_analyze_relation, \&_strip_non_matches);
351
104
1474
%$self = %$dataref;
352
104
530
return $self;
353
}
354
355
sub _analyze_relation { # Analysis of $relation: passed by ref to subclass
356
106
106
3544
my ($relation_raw, $sorttype) = @_;
357
106
148
my ($type, $relation_confirmed);
358
106
100
710
croak "Relation \'$relation_raw\' has not yet been added to\nData::Presenter's internal specifications. $!"
359
unless $all_relations{$relation_raw};
360
105
433
$type = $sortclass{$all_relations{$relation_raw}}{$sorttype};
361
105
50
257
croak "Problem with sort type $type: $!"
362
unless $type;
363
105
159
$relation_confirmed = $type;
364
105
1106
return ($relation_confirmed, \%gt_lt_ops);
365
}
366
367
sub _action {
368
1136
1136
3125
my ($relation, $seenref, $item, $dataref, $record, $correctedref) = @_;
369
my %delete_instructions = ( # dispatch table
370
81
1202
'eq' => sub { delete ${$dataref}{$record}
91
428
371
91
100
91
156
unless exists ${$seenref}{$item} },
372
72
1228
'==' => sub { delete ${$dataref}{$record}
88
404
373
88
100
88
76
unless exists ${$seenref}{$item} },
374
98
1604
'ne' => sub { delete ${$dataref}{$record}
209
2714
375
209
100
209
230
unless ! exists ${$seenref}{$item} },
376
126
1592
'!=' => sub { delete ${$dataref}{$record}
198
1330
377
198
100
198
193
unless ! exists ${$seenref}{$item} },
378
35
1155
'lt' => sub { delete ${$dataref}{$record}
55
8778
379
55
100
55
70
unless $item lt ${$correctedref}[0] },
380
40
455
'<' => sub { delete ${$dataref}{$record}
55
275
381
55
100
55
51
unless $item < ${$correctedref}[0] },
382
35
472
'gt' => sub { delete ${$dataref}{$record}
55
366
383
55
100
55
57
unless $item gt ${$correctedref}[0] },
384
40
529
'>' => sub { delete ${$dataref}{$record}
55
276
385
55
100
55
58
unless $item > ${$correctedref}[0] },
386
36
567
'le' => sub { delete ${$dataref}{$record}
66
757
387
66
100
66
89
unless $item le ${$correctedref}[0] },
388
42
477
'<=' => sub { delete ${$dataref}{$record}
66
379
389
66
100
66
62
unless $item <= ${$correctedref}[0] },
390
31
562
'ge' => sub { delete ${$dataref}{$record}
85
1026
391
85
100
85
105
unless $item ge ${$correctedref}[0] },
392
82
1169
'>=' => sub { delete ${$dataref}{$record}
113
727
393
113
100
113
111
unless $item >= ${$correctedref}[0] },
394
1136
33591
);
395
1136
2173
&{$delete_instructions{$relation}};
1136
2129
396
}
397
sub _strip_non_matches {
398
104
104
2317
my ($dataref, $flref, $column, $relation, $correctedref, $seenref) = @_;
399
104
134
foreach my $record (keys %{$dataref}) {
104
1078
400
1448
100
4070
unless ($reserved{$record}) {
401
1136
1677
my $item = ${$dataref}{$record}[${$flref}{$column}];
1136
2081
1136
1833
402
1136
2366
_action( $relation, $seenref, $item, $dataref,
403
$record, $correctedref);
404
}
405
}
406
104
465
return $dataref;
407
}
408
409
################################################################################
410
##### Methods for simple output
411
##### and subroutines called within those methods
412
################################################################################
413
414
sub print_to_screen {
415
7
7
1
53397
my $class = shift;
416
7
110
my %data = %$class;
417
7
49
_print_engine(\%data, \%reserved);
418
7
39
return 1;
419
}
420
421
sub print_to_file {
422
9
9
1
15401
my ($class, $outputfile) = @_;
423
9
952
my %data = %$class;
424
9
26
my $OUT;
425
9
133
my $oldfh = select $OUT;
426
9
50
1484
open($OUT, ">$outputfile")
427
|| croak "Cannot open $outputfile for writing: $!";
428
9
49
_print_engine(\%data, \%reserved);
429
9
50
635
close($OUT) || croak "Cannot close $outputfile: $!";
430
9
36
select($oldfh);
431
9
77
return 1;
432
}
433
434
sub _print_engine {
435
16
16
39
my ($dataref, $reservedref) = @_;
436
16
140
my %data = %$dataref;
437
16
88
my %reserved = %$reservedref;
438
16
38
local $_;
439
16
164
foreach my $i (sort keys %data) {
440
174
100
662
unless ($reserved{$i}) {
441
126
132
print "$_;" foreach (@{$data{$i}});
126
968
442
126
3403
print "\n";
443
}
444
}
445
}
446
447
sub print_with_delimiter {
448
5
5
1
3451
my ($class, $outputfile, $delimiter) = @_;
449
5
51
my %data = %$class;
450
5
50
407
open (my $OUT, ">$outputfile")
451
|| croak "Cannot open $outputfile for writing: $!";
452
5
66
foreach my $i (sort keys %data) {
453
62
100
155
unless ($reserved{$i}) {
454
47
48
my @fields = @{$data{$i}};
47
174
455
47
120
for (my $j=0; $j < scalar(@fields); $j++) {
456
448
100
681
if ($j < scalar(@fields) - 1) {
457
401
100
541
if ($fields[$j]) {
458
325
1104
print $OUT $fields[$j], $delimiter;
459
} else {
460
76
165
print $OUT $delimiter;
461
}
462
} else {
463
47
100
164
print $OUT $fields[$j] if ($fields[$j]);
464
}
465
}
466
47
138
print $OUT "\n";
467
}
468
}
469
5
50
245
close ($OUT) || croak "Cannot close $outputfile: $!";
470
5
45
return 1;
471
}
472
473
sub full_report {
474
5
5
1
2978
my ($class, $outputfile);
475
5
17
my %data = ();
476
5
15
my @fields = ();
477
5
13
($class, $outputfile) = @_;
478
5
48
%data = %$class;
479
5
14
@fields = @{$data{'fields'}};
5
32
480
5
50
419
open (my $OUT, ">$outputfile")
481
|| croak "Cannot open $outputfile for writing: $!";
482
5
66
foreach my $i (sort keys %data) {
483
62
100
147
unless ($reserved{$i}) {
484
47
106
print $OUT "$i\n";
485
47
124
for (my $j=0; $j <= $#fields; $j++) {
486
448
811
print $OUT " $fields[$j]", ' ' x (24 - length($fields[$j]));
487
448
1137
print $OUT "$data{$i}[$j]\n";
488
}
489
47
81
print $OUT "\n";
490
}
491
}
492
5
50
242
close ($OUT) || croak "Cannot close $outputfile: $!";
493
5
46
return 1;
494
}
495
496
################################################################################
497
##### Methods which output data like Perl formats
498
##### and subroutines called from within those methods
499
################################################################################
500
501
our %keys_needed_to_write = (
502
'Data::Presenter::writeformat'
503
=> [ qw| sorted columns file | ],
504
'Data::Presenter::writeformat_plus_header'
505
=> [ qw| sorted columns file title | ],
506
'Data::Presenter::writeformat_with_reprocessing'
507
=> [ qw| sorted columns file reprocess | ],
508
'Data::Presenter::writeformat_deluxe'
509
=> [ qw| sorted columns file title reprocess | ],
510
'Data::Presenter::writedelimited'
511
=> [ qw| sorted file delimiter | ],
512
'Data::Presenter::writedelimited_plus_header'
513
=> [ qw| sorted columns file delimiter | ],
514
'Data::Presenter::writedelimited_with_reprocessing'
515
=> [ qw| sorted columns file reprocess delimiter | ],
516
'Data::Presenter::writedelimited_deluxe'
517
=> [ qw| sorted columns file reprocess delimiter | ],
518
'Data::Presenter::writeHTML'
519
=> [ qw| sorted columns file title | ],
520
);
521
522
sub _validate_write_args {
523
68
68
726
my $callingsub = (caller(1))[3];
524
68
310
my @incoming = @_;
525
68
100
429
croak "Method $callingsub needs even number of arguments: $!"
526
if (@incoming % 2);
527
67
436
my %args = @incoming;
528
67
100
701
croak "Method $callingsub needs key-value pairs:\n @{ $keys_needed_to_write{$callingsub} } \n $!"
8
2935
529
unless (is_LsubsetR( [
530
$keys_needed_to_write{$callingsub},
531
[ keys %args ],
532
] ) );
533
59
15837
return %args;
534
}
535
536
sub writeformat {
537
12
12
1
23340
my $self = shift;
538
12
67
my %args = _validate_write_args(@_);
539
10
76
my $picline = _format_picture_line($args{columns});
540
10
50
1809
open (my $REPORT, ">$args{file}") || croak "cannot create $args{file}: $!";
541
10
31
foreach my $record (@{$args{sorted}}) {
10
57
542
149
384
local $^A = q{};
543
149
144
formline($picline, @{$record});
149
482
544
149
541
print $REPORT $^A, "\n";
545
}
546
10
50
1187
close ($REPORT) || croak "can't close $args{file}:$!";
547
10
107
return 1;
548
}
549
550
sub writeformat_plus_header {
551
11
11
1
42728
my $self = shift;
552
11
59
my %args = _validate_write_args(@_);
553
10
81
my $title_out = _format_title($args{title});
554
10
49
my $argument_line_top_ref =
555
_format_argument_line_top($args{columns});
556
10
48
my $hyphen_line = _format_hyphen_line($args{columns});
557
10
43
my $picline = _format_picture_line($args{columns});
558
10
50
1308
open (my $REPORT, ">$args{file}") || croak "cannot create $args{file}: $!";
559
10
145
print $REPORT $title_out, "\n\n";
560
10
24
print $REPORT "$_\n" foreach (@{$argument_line_top_ref});
10
64
561
10
28
print $REPORT $hyphen_line, "\n";
562
10
20
foreach my $record (@{$args{sorted}}) {
10
33
563
149
281
local $^A = q{};
564
149
135
formline($picline, @{$record});
149
753
565
149
460
print $REPORT $^A, "\n";
566
}
567
10
50
570
close ($REPORT) || croak "can't close $args{file}:$!";
568
10
217
return 1;
569
}
570
571
sub writeformat_with_reprocessing {
572
5
5
1
6527
my $self = shift;
573
5
27
my %args = _validate_write_args(@_);
574
4
222
my %data = %$self;
575
576
4
53
my ($substr_data_ref, $picline) = _prepare_to_reprocess(
577
$args{reprocess}, \%fp, \%data, $args{columns});
578
579
1
50
165
open (my $REPORT, ">$args{file}") || croak "cannot create $args{file}: $!";
580
1
3
foreach my $record (@{$args{sorted}}) {
1
10
581
83
237
local $^A = q{};
582
83
88
formline($picline, @{$record});
83
337
583
83
262
my $line = $self->_reprocessor(
584
$^A, # the formed line
585
$substr_data_ref, # the points at which I have to splice out
586
# text from the formed line and amount thereof
587
);
588
83
18082
print $REPORT $line, "\n";
589
}
590
1
50
80
close ($REPORT) || croak "can't close $args{file}:$!";
591
1
45
return 1;
592
}
593
594
sub writeformat_deluxe {
595
2
2
1
5168
my $self = shift;
596
2
13
my %args = _validate_write_args(@_);
597
1
61
my %data = %$self;
598
599
1
11
my ($substr_data_ref, $picline) = _prepare_to_reprocess(
600
$args{reprocess}, \%fp, \%data, $args{columns});
601
602
1
2
my (@header, @accumulator);
603
1
6
my $title_out = _format_title($args{title});
604
1
6
my $argument_line_top_ref =
605
_format_argument_line_top($args{columns}, $args{reprocess});
606
1
7
my $hyphen_line = _format_hyphen_line($args{columns}, $args{reprocess});
607
1
3
@header = ($title_out, q{}, @{$argument_line_top_ref}, $hyphen_line);
1
13
608
609
1
2
foreach my $record (@{$args{sorted}}) {
1
4
610
83
204
local $^A = q{};
611
83
92
formline($picline, @{$record});
83
338
612
83
246
my $line = $self->_reprocessor(
613
$^A, # the formed line
614
$substr_data_ref, # the points at which I have to splice out
615
# text from the formed line and amount thereof
616
);
617
83
18111
push @accumulator, $line;
618
}
619
1
50
172
open (my $REPORT, ">$args{file}") || croak "cannot create $args{file}: $!";
620
1
134
print $REPORT $_, "\n" foreach (@header, @accumulator);
621
1
50
64
close ($REPORT) || croak "can't close $args{file}:$!";
622
1
52
return 1;
623
}
624
625
sub _prepare_to_reprocess {
626
5
5
13
my ($reprocessref, $fpref, $dataref, $columns_selected_ref) = @_;
627
5
11
my %reprocessing_info = %{$reprocessref};
5
33
628
5
10
my %fp = %{$fpref};
5
43
629
5
15
my %data = %{$dataref};
5
146
630
5
33
my @columns_selected = @{$columns_selected_ref};
5
23
631
632
# We must validate the info passed in thru $reprocessref.
633
# This is a multi-stage process.
634
# 1: Verify that the fields requested for reprocessing exist as
635
# fields in the configuration file.
636
5
43
my @fields_for_reprocessing = sort keys %reprocessing_info;
637
5
28
_validate_args(\@fields_for_reprocessing, \%fp);
638
639
640
# 2: Verify that there exists a subroutine named &reprocess_[field]
641
# whose name has been stored as a key in defined in
642
# %{$data{'options'}{'subs'}}.
643
45
209
my @confirmed_subs =
644
5
10
grep {s/^reprocess_(.*)/$1/} keys %{$data{'options'}{'subs'}};
5
42
645
5
100
32
croak "You are trying to reprocess fields for which no reprocessing subroutines yet exist: $!"
646
unless (is_LsubsetR( [
647
\@fields_for_reprocessing,
648
\@confirmed_subs
649
] ) );
650
651
# 3: Verify that we can tap into the data sources referenced in
652
# %{$data{'options'}{'sources'}} for each field needing reprocessing
653
4
900
my @available_sources = sort keys %{$data{'options'}{'sources'}};
4
33
654
4
100
22
croak "You are trying to reprocess fields for which no original data sources are available: $!"
655
unless (is_LsubsetR( [
656
\@fields_for_reprocessing,
657
\@available_sources
658
] ) );
659
660
# 4: Verify that the file mentioned in the values-arrays of
661
# %reprocessing_info exists, and that appropriate digits are entered
662
# for the fixed-length of the replacement string.
663
3
620
foreach (sort keys %reprocessing_info) {
664
6
100
187
croak "Fixed length of replacement string is misspecified;\n Must be all digits: $!"
665
unless $reprocessing_info{$_} =~ /^\d+$/;
666
}
667
668
2
8
my %args_indices = ();
669
2
10
for (my $h=0; $h<=$#columns_selected; $h++) {
670
12
41
$args_indices{$columns_selected[$h]} = $h;
671
}
672
673
2
5
my %substr_data = ();
674
2
9
foreach (sort keys %reprocessing_info) {
675
# 1st: Determine the position in the formed string where the
676
# old field began, as well as its length
677
# Given that I used a single whitespace to separate fields in
678
# the formed line, the starting position is the sum of the number of
679
# fields preceding the target field in the formed line
680
# PLUS the combined length of those fields
681
4
7
my ($comb_length, $start);
682
683
4
100
15
if ($args_indices{$_} == 0) {
684
2
4
$start = $args_indices{$_};
685
} else {
686
2
9
for (my $j=0; $j<$args_indices{$_}; $j++) {
687
2
10
$comb_length += $fp{$columns_selected[$j]}[0];
688
}
689
2
5
$start = $args_indices{$_} + $comb_length;
690
}
691
4
25
$substr_data{$start} = [
692
$_,
693
$fp{$_}[0],
694
$reprocessing_info{$_},
695
4
10
\%{ $data{'options'}{'sources'}{$_} },
696
$dataref, # new in v0.51
697
];
698
}
699
2
12
my $picline = _format_picture_line(\@columns_selected);
700
2
40
return (\%substr_data, $picline);
701
}
702
703
################################################################################
704
##### Methods which output data with delimiters between fields
705
##### and subroutines called within those methods
706
################################################################################
707
708
sub writedelimited {
709
11
11
1
94691
my $self = shift;
710
11
63
my %args = _validate_write_args(@_);
711
10
50
1871
open (my $REPORT, ">$args{file}") || croak "cannot create $args{file}: $!";
712
10
31
foreach my $record (@{$args{sorted}}) {
10
44
713
149
175
print $REPORT join($args{delimiter}, @{$record}), "\n";
149
468
714
}
715
10
50
578
close ($REPORT) || croak "can't close $args{file}:$!";
716
10
100
return 1;
717
}
718
719
sub writedelimited_plus_header {
720
11
11
1
43342
my $self = shift;
721
11
54
my %args = _validate_write_args(@_);
722
10
68
my $header =
723
_format_argument_line_top_delimited($args{columns}, $args{delimiter});
724
10
50
1205
open (my $REPORT, ">$args{file}") || croak "cannot create $args{file}: $!";
725
10
108
print $REPORT "$header\n";
726
10
25
foreach my $record (@{$args{sorted}}) {
10
58
727
149
191
print $REPORT join($args{delimiter}, @{$record}), "\n";
149
6201
728
}
729
10
50
562
close ($REPORT) || croak "can't close $args{file}:$!";
730
10
111
return 1;
731
}
732
733
sub writedelimited_with_reprocessing {
734
4
4
1
10307
my $self = shift;
735
4
15
my %args = _validate_write_args(@_);
736
3
9
my $dataref = \%{$self};
3
9
737
738
3
18
_prepare_to_reprocess_delimit(
739
$args{reprocess}, \%fp, $dataref, $args{columns});
740
741
1
209
my %cols_select_labels = ();
742
1
3
for (my $i = 0; $i <= $#{$args{columns}}; $i++) {
7
19
743
6
7
$cols_select_labels{${$args{columns}}[$i]} = $i;
6
12
744
}
745
1
1
my @revised;
746
1
2
foreach my $record (@{$args{sorted}}) {
1
7
747
83
5266
push @revised, $self->_reprocessor_delimit(
748
$record, $args{reprocess}, \%cols_select_labels, $dataref);
749
}
750
1
50
212
open my $OUT, ">$args{file}"
751
or croak "Couldn't open $args{file} for writing: $!";
752
1
4
foreach my $rev (@revised) {
753
83
89
print $OUT (join $args{delimiter}, @{$rev}), "\n";
83
212
754
}
755
1
50
88
close $OUT or croak "Couldn't close $args{file} after writing: $!";
756
1
52
return 1;
757
}
758
759
sub writedelimited_deluxe {
760
2
2
1
4726
my $self = shift;
761
2
16
my %args = _validate_write_args(@_);
762
1
3
my $dataref = \%{$self};
1
3
763
764
1
7
_prepare_to_reprocess_delimit(
765
$args{reprocess}, \%fp, $dataref, $args{columns});
766
767
1
194
my %cols_select_labels = ();
768
1
2
for (my $i = 0; $i <= $#{$args{columns}}; $i++) {
7
17
769
6
93
$cols_select_labels{${$args{columns}}[$i]} = $i;
6
13
770
}
771
1
2
my @revised;
772
1
2
foreach my $record (@{$args{sorted}}) {
1
5
773
83
5393
push @revised, $self->_reprocessor_delimit(
774
$record, $args{reprocess}, \%cols_select_labels, $dataref);
775
}
776
1
57
my $header = _format_argument_line_top_delimited(
777
$args{columns}, $args{delimiter}
778
);
779
1
50
171
open my $OUT, ">$args{file}"
780
or croak "Couldn't open $args{file} for writing: $!";
781
1
23
print $OUT "$header\n";
782
1
3
foreach my $rev (@revised) {
783
83
114
print $OUT (join $args{delimiter}, @{$rev}), "\n";
83
184
784
}
785
1
50
79
close $OUT or croak "Couldn't close $args{file} after writing: $!";
786
1
47
return 1;
787
}
788
789
sub _prepare_to_reprocess_delimit {
790
4
4
12
my ($reprocessref, $fpref, $dataref, $columns_selected_ref) = @_;
791
4
7
my %fp = %{$fpref};
4
35
792
4
10
my %data = %{$dataref};
4
173
793
4
26
my @columns_selected = @{$columns_selected_ref};
4
18
794
795
# We must validate the info passed in thru $reprocessref.
796
# This is a multi-stage process.
797
# 1: Verify that the fields requested for reprocessing exist as
798
# fields in the configuration file.
799
4
17
_validate_args($reprocessref, \%fp);
800
801
# 2: Verify that there exists a subroutine named
802
# &reprocess_delimit_[field]
803
# whose name has been stored as a key in defined in
804
# %{$data{'options'}{'subs'}}.
805
36
122
my @confirmed_subs =
806
4
6
grep {s/^reprocess_delimit_(.*)/$1/} keys %{$data{'options'}{'subs'}};
4
30
807
4
100
23
croak "You are trying to reprocess fields for which no reprocessing subroutines yet exist: $!"
808
unless (is_LsubsetR( [
809
$reprocessref,
810
\@confirmed_subs,
811
] ) );
812
813
# 3: Verify that we can tap into the data sources referenced in
814
3
531
my @available_sources = sort keys %{$data{'options'}{'sources'}};
3
28
815
3
100
15
croak "You are trying to reprocess fields for which no original data sources are available: $!"
816
unless (is_LsubsetR( [
817
$reprocessref,
818
\@available_sources,
819
] ) );
820
}
821
822
sub _format_title {
823
11
11
28
my $title_raw = shift;
824
11
23
my $title = $title_raw;
825
11
28
return $title;
826
}
827
828
sub _format_argument_line_top {
829
20
20
49
my $columns_selected_ref = shift;
830
20
100
78
my $reprocessref = shift if $_[0];
831
20
182
my @args = @$columns_selected_ref;
832
20
42
my @lines = ();
833
20
68
my $j = q{}; # index of the arg requested for printout currently
834
# being processed
835
20
211
for ($j = 0; $j < scalar(@args); $j++) {
836
122
160
my $n = 0; # current line being assigned to, starting with 0
837
122
286
my $label = $fp{$args[$j]}[3]; # easier to read
838
122
391
my $max = defined ${$reprocessref}{$args[$j]}
2
4
839
122
100
245
? ${$reprocessref}{$args[$j]}
840
: $fp{$args[$j]}[0];
841
122
180
my $remain = $label; # at the outset, the entire label
842
# remains to be allocated to the proper line
843
122
175
my @overage = ();
844
# first see if any words in $remain need to be truncated
845
122
590
my @remainwords = split(/\s/, $remain);
846
122
226
foreach my $word (@remainwords) {
847
230
100
586
$word = substr($word, 0, $max) if (length($word) > $max);
848
}
849
122
248
$remain = join ' ', @remainwords;
850
122
325
while ($remain) {
851
145
100
351
if (length($remain) <= $max) {
852
# entire remainder of label will be placed on current line
853
122
568
$lines[$n][$j] = $remain . ' ' x ($max - length($remain));
854
122
565
$remain = q{};
855
} else {
856
# entire remainder of label cannot fit on current line
857
23
54
my $word = q{};
858
23
123
my @labelwords = split(/\s/, $remain);
859
23
259
until (length($remain) <= $max) {
860
23
43
$word = shift(@labelwords);
861
23
56
push (@overage, $word);
862
23
74
$remain = join ' ', @labelwords;
863
}
864
23
98
$lines[$n][$j] = $remain . ' ' x ($max - length($remain));
865
23
55
$remain = join ' ', @overage ;
866
23
42
@overage = ();
867
23
71
$n++;
868
}
869
}
870
}
871
20
37
my (@column_heads);
872
20
47
foreach my $p (reverse @lines) {
873
39
124
for ($j = 0; $j < scalar(@args); $j++) {
874
238
733
my $max = defined ${$reprocessref}{$args[$j]}
2
5
875
238
100
253
? ${$reprocessref}{$args[$j]}
876
: $fp{$args[$j]}[0];
877
238
100
257
if (! ${$p}[$j]) {
238
845
878
93
185
${$p}[$j] = ' ' x $max;
93
1954
879
}
880
}
881
39
352
my $part = join ' ', @$p;
882
39
108
push @column_heads, $part;
883
}
884
20
113
return \@column_heads;
885
}
886
887
sub _format_argument_line_top_delimited {
888
11
11
33
my ($columns_selected_ref, $delimiter) = @_;
889
11
24
my @temp;
890
11
24
foreach my $col (@{$columns_selected_ref}) {
11
55
891
67
220
push @temp, $fp{$col}[3];
892
}
893
11
50
my $header = join($delimiter, @temp);
894
11
38
return $header;
895
}
896
897
sub _format_hyphen_line {
898
20
20
40
my $columns_selected_ref = shift;
899
20
100
66
my $reprocessref = shift if $_[0];
900
20
37
my $hyphen_line_length = 0;
901
20
45
my $hyphen_line = q{};
902
20
53
foreach my $h (@$columns_selected_ref) {
903
122
437
$hyphen_line_length += defined ${$reprocessref}{$h}
2
5
904
122
100
135
? (${$reprocessref}{$h} + 1)
905
: ($fp{$h}->[0] + 1);
906
}
907
20
67
$hyphen_line = '-' x ($hyphen_line_length - 1);
908
20
50
return $hyphen_line;
909
}
910
911
sub _format_picture_line {
912
22
22
49
my $columns_selected_ref = shift;
913
22
54
my $line = q{};
914
22
41
my $g = 0; # counter
915
22
213
foreach my $h (@$columns_selected_ref) {
916
134
178
my $picture = q{};
917
134
100
636
if ($fp{$h}[2] =~ /^n$/i) {
918
48
132
$picture = '@' . '>' x ($fp{$h}[0] - 1);
919
} else {
920
86
196
$picture = '@' . '<' x ($fp{$h}[0] - 1);
921
}
922
134
100
237
if ($g < $#{$columns_selected_ref}) {
134
457
923
112
520
$line .= $picture . q{ };
924
112
194
$g++;
925
} else {
926
22
70
$line .= $picture;
927
}
928
}
929
22
59
return $line;
930
}
931
932
################################################################################
933
##### Subroutines involved in writing HTML
934
################################################################################
935
936
sub writeHTML {
937
10
10
1
34578
my $self = shift;
938
10
56
my %args = _validate_write_args(@_);
939
10
56
my %max = (); # keys will be indices of @{$args{columns}};
940
# values will be max space allocated from %parameters
941
10
36
for (my $j = 0; $j < scalar(@{$args{columns}}); $j++) {
70
199
942
60
83
$max{$j} = $fp{${$args{columns}}[$j]}[0];
60
246
943
}
944
10
100
251
croak "Name of output file must end in .html or .htm $!"
945
unless ($args{file} =~ /\.html?$/);
946
9
50
1081
open(my $HTML, ">$args{file}")
947
|| croak "cannot open $args{file} for writing: $!";
948
9
166
print $HTML <
949
950
951
$args{title}
952
953
954
962
END_OF_HTML1
963
9
51
my $argument_line_top_ref =
964
_format_argument_line_top($args{columns});
965
9
48
my $hyphen_line = _format_hyphen_line($args{columns});
966
9
34
print $HTML ' ', "\n";
967
9
18
print $HTML $_, '', "\n" foreach (@{$argument_line_top_ref});
9
50
968
9
33
print $HTML "$hyphen_line", '', "\n";
969
9
36
foreach my $row (@{$args{sorted}}) {
9
39
970
66
75
my @values = @{$row};
66
242
971
66
98
my @paddedvalues = ();
972
66
96
for (my $j = 0; $j < scalar(@{$args{columns}}); $j++) {
472
1110
973
406
454
my $newvalue = q{};
974
406
100
424
if ($fp{${$args{columns}}[$j]}[2] =~ /^n$/i) {
406
1352
975
132
362
$newvalue =
976
(' ' x ($max{$j} - length($values[$j]))) .
977
$values[$j] . ' ';
978
} else { #
979
274
677
$newvalue =
980
$values[$j] .
981
(' ' x ($max{$j} - length($values[$j]) + 1));
982
}
983
406
793
push(@paddedvalues, $newvalue);
984
}
985
66
78
chop $paddedvalues[(scalar(@{$args{columns}})) - 1];
66
128
986
66
316
print $HTML @paddedvalues, '', "\n";
987
}
988
9
25
print $HTML ' ', "\n";
989
9
24
print $HTML <
990
991
992
END_OF_HTML2
993
9
50
544
close($HTML) || croak "cannot close $args{file}: $!";
994
9
105
return 1;
995
}
996
997
1;
998
999
######################################################################
1000
##### DOCUMENTATION #####
1001
######################################################################
1002
1003
=head1 NAME
1004
1005
Data::Presenter - Reformat database reports
1006
1007
=head1 VERSION
1008
1009
This document refers to version 1.03 of Data::Presenter, which consists of
1010
Data::Presenter.pm and various packages subclassed thereunder, most notably
1011
Data::Presenter::Combo.pm and its subclasses
1012
Data::Presenter::Combo::Intersect.pm and Data::Presenter::Combo::Union.pm.
1013
This version was released February 10, 2008.
1014
1015
=head1 SYNOPSIS
1016
1017
use Data::Presenter;
1018
use Data::Presenter::[Package1]; # example: use Data::Presenter::Census
1019
1020
our (@fields, %parameters, $index);
1021
$configfile = 'fields.XXX.data';
1022
do $configfile;
1023
1024
$dp1 = Data::Presenter::[Package1]->new(
1025
$sourcefile, \@fields,\%parameters, $index
1026
);
1027
1028
$data_count = $dp1->get_data_count();
1029
1030
$dp1->print_data_count();
1031
1032
$keysref = $dp1->get_keys();
1033
1034
$seenref = $dp1->get_keys_seen();
1035
1036
$dp1->print_to_screen();
1037
1038
$dp1->print_to_file($outputfile);
1039
1040
$dp1->print_with_delimiter($outputfile, $delimiter);
1041
1042
$dp1->full_report($outputfile);
1043
1044
$dp1->select_rows($column, $relation, \@choices);
1045
1046
$sorted_data = $dp1->sort_by_column(\@columns_selected);
1047
1048
$seen_hash_ref = $dp1->seen_one_column($column);
1049
1050
$dp1->writeformat(
1051
sorted => $sorted_data,
1052
columns => \@columns_selected,
1053
file => $outputfile,
1054
);
1055
1056
$dp1->writeformat_plus_header(
1057
sorted => $sorted_data,
1058
columns => \@columns_selected,
1059
file => $outputfile,
1060
title => $title,
1061
);
1062
1063
%reprocessing_info = (
1064
lastname => 17,
1065
firstname => 15,
1066
);
1067
1068
$dp1->writeformat_with_reprocessing(
1069
sorted => $sorted_data,
1070
columns => \@columns_selected,
1071
file => $outputfile,
1072
reprocess => \%reprocessing_info,
1073
);
1074
1075
$dp1->writeformat_deluxe(
1076
sorted => $sorted_data,
1077
columns => \@columns_selected,
1078
file => $outputfile,
1079
title => $title,
1080
reprocess => \%reprocessing_info,
1081
);
1082
1083
$dp1->writedelimited(
1084
sorted => $sorted_data,
1085
file => $outputfile,
1086
delimiter => $delimiter,
1087
);
1088
1089
$dp1->writedelimited_plus_header(
1090
sorted => $sorted_data,
1091
columns => \@columns_selected,
1092
file => $outputfile,
1093
delimiter => $delimiter,
1094
);
1095
1096
@reprocessing_info = qw( instructor timeslot room );
1097
1098
$dp1->writedelimited_with_reprocessing(
1099
sorted => $sorted_data,
1100
columns => \@columns_selected,
1101
file => $outputfile,
1102
delimiter => $delimiter,
1103
reprocess => \@reprocessing_info,
1104
);
1105
1106
$dp1->writedelimited_deluxe(
1107
sorted => $sorted_data,
1108
columns => \@columns_selected,
1109
file => $outputfile,
1110
delimiter => $delimiter,
1111
reprocess => \@reprocessing_info,
1112
);
1113
1114
$dp1->writeHTML(
1115
sorted => $sorted_data,
1116
columns => \@columns_selected,
1117
file => 'somename.html',
1118
title => $title,
1119
);
1120
1121
Data::Presenter::Combo objects:
1122
1123
use Data::Presenter;
1124
use Data::Presenter::[Package1]; # example: use Data::Presenter::Census
1125
use Data::Presenter::[Package2]; # example: use Data::Presenter::Medinsure
1126
1127
our (@fields, %parameters, $index);
1128
$configfile = 'fields.XXX.data';
1129
do $configfile;
1130
1131
$dp1 = Data::Presenter::[Package1]->new(
1132
$sourcefile, \@fields,\%parameters, $index
1133
);
1134
1135
# different source file and configuration file
1136
1137
$configfile = 'fields.YYY.data';
1138
do $configfile;
1139
1140
$dp2 = Data::Presenter::[Package2]->new(
1141
$sourcefile, \@fields,\%parameters, $index);
1142
1143
@objects = ($dp1, $dp2);
1144
$dpC = Data::Presenter::Combo::Intersect->new(\@objects);
1145
$dpC = Data::Presenter::Combo::Union->new(\@objects);
1146
1147
=head2 Notice of Changes of Interface
1148
1149
If you have I used Data::Presenter prior to version 1.0, skip this
1150
section.
1151
1152
=head3 C-Family of Methods Now Takes List of Key-Value Pairs
1153
1154
Since the last publicly available version of Data::Presenter (0.68), the
1155
interface to nine of its public methods has been changed. Previously, methods
1156
in the C-family of methods took a list of arguments which had
1157
to be provided in a very specific order. For example, C
1158
took five arguments:
1159
1160
$dp1->writeformat_deluxe(
1161
$sorted_data,
1162
\@columns_selected,
1163
$outputfile,
1164
$title,
1165
\%reprocessing_info
1166
);
1167
1168
As the number of elements in the list of arguments increases, it becomes more
1169
difficult to remember the order in which they must be passed. At a certain
1170
point it becomes easier to pass the arguments in the form of key-value pairs.
1171
As long as each pair is correctly specified, the order of the pairs no longer
1172
matters. C, for example, now has this interface:
1173
1174
$dp1->writeformat_deluxe(
1175
sorted => $sorted_data,
1176
columns => \@columns_selected,
1177
file => $outputfile,
1178
title => $title,
1179
reprocess => \%reprocessing_info,
1180
);
1181
1182
Please study the L<"SYNOPSIS"> above to see how to revise your calls to
1183
methods with C, C or C in their names.
1184
1185
=head3 Change in Assignment of C<$index> in C
1186
1187
Data::Presenter is used by writing and using a subclass in which a new object
1188
is created. Each such subclass must hold an C<_init()> method and each such
1189
C<_init()> method must accomplish certain tasks. One of these tasks is to
1190
store the value of C<$index> (found in the configuration file) in the object
1191
being created. In versions 0.68 and earlier, the code which did this looked
1192
like this:
1193
1194
$data{'index'} = [$index];
1195
1196
In other words, C<$index> was not directly assigned to the hash holding the
1197
Data::Presenter::[Package1] object's data. Instead, a reference to a
1198
one-element array holding C<$index> was passed.
1199
1200
This has now been simplified:
1201
1202
$data{'index'} = $index;
1203
1204
In other words, simply assign C<$index>; no reference is needed. See the
1205
sample packages included under the F directory in this distribution for a
1206
live presentation of this change.
1207
1208
=head1 PREREQUISITES
1209
1210
Data::Presenter requires Perl 5.6 or later. The module and its test suite
1211
require the following modules from CPAN:
1212
1213
=over 4
1214
1215
=item List::Compare
1216
1217
By the same author as Data::Presenter:
1218
L.
1219
1220
=item IO::Capture
1221
1222
Used only in the test suite to capture output printed to screen by
1223
Data::Presenter methods. By Mark Reynolds and Jon Morgan.
1224
L.
1225
1226
=item IO::Capture::Extended
1227
1228
Used only in the test suite to capture output printed to screen by
1229
Data::Presenter methods. By the same author as Data::Presenter. Has
1230
IO::Capture (above) as prerequisite.
1231
L.
1232
1233
=item Tie::File
1234
1235
Used only in the test suite to validate text printed to files by
1236
Data::Presenter methods. By Mark-Jason Dominus. Distributed with Perl since
1237
5.7.3; otherwise, available from CPAN: L.
1238
1239
=back
1240
1241
Each of the prerequisites is pure Perl and should install with the
1242
F shell by typing 'y' at the prompts as needed.
1243
1244
=head1 DESCRIPTION
1245
1246
Data::Presenter is an object-oriented module useful for the
1247
reformatting of already formatted text files such as reports generated by
1248
database programs. If the data can be represented by a
1249
row-column matrix, where for each data entry (row):
1250
1251
=over 4
1252
1253
=item *
1254
1255
there are one or more fields containing data values (columns); and
1256
1257
=item *
1258
1259
at least one of those fields can be used as an index to uniquely identify
1260
each entry,
1261
1262
=back
1263
1264
then the data structure is suitable for manipulation by Data::Presenter.
1265
In Perl terms, if the data can be represented by a I, it is
1266
suitable for reformatting with Data::Presenter.
1267
1268
Data::Presenter can be used to output some fields (columns) from a database
1269
while excluding others (see L<"sort_by_column()"> below). It can also be used
1270
to select certain entries (rows) from the database for output while excluding
1271
other entries (see L<"select_rows()"> below).
1272
1273
In addition, if a user has two or more database reports, each of which has
1274
the same field serving as an index for the data, then it is possible to
1275
construct either a:
1276
1277
=over 4
1278
1279
=item *
1280
1281
L object
1282
which holds data for those entries found in common in all the source
1283
databases (the I of the entries in the source databases); or a
1284
1285
=item *
1286
1287
L object
1288
which holds data for those entries found in any of the source databases (the
1289
I of the entries in the source databases).
1290
1291
=back
1292
1293
Whichever flavor of Data::Presenter::Combo object the user creates, the
1294
module guarantees that each field (column) found in any of the source
1295
databases appears once and once only in the Combo object.
1296
1297
Data::Presenter is I a database module I, nor is it an interface
1298
to databases in the manner of DBI. It cannot used to enter data into a
1299
database, nor can it be used to modify or delete data. Data::Presenter
1300
operates on I generated from databases and is designed for the user
1301
who:
1302
1303
=over 4
1304
1305
=item *
1306
1307
may not have direct access to a given database;
1308
1309
=item *
1310
1311
receives reports from that database generated by another user; but
1312
1313
=item *
1314
1315
needs to manipulate and re-output that data in simple, useful ways such as
1316
text files, Perl formats and HTML tables.
1317
1318
=back
1319
1320
Data::Presenter is most appropriate in situations where the user either has
1321
no access to (or chooses not to use) commercial desktop database programs such
1322
as I(r) or open source database programs such as I(r).
1323
Data::Presenter's installation and preparation require moderate knowledge of
1324
Perl, but the actual running of Data::Presenter scripts can be delegated to
1325
someone with less knowledge of Perl.
1326
1327
=head1 DEFINITIONS AND EXAMPLES
1328
1329
=head2 Definitions
1330
1331
=head3 Administrator
1332
1333
The individual in a workplace responsible for the
1334
installation of Data::Presenter on the system or network, analysis of
1335
sources, preparation of Data::Presenter configuration files and preparation
1336
of Data::Presenter subclass packages other than Data::Presenter::Combo and
1337
its subclasses. (I L<"Operator">.)
1338
1339
=head3 Entry
1340
1341
A row in the L containing the values of the
1342
fields for one particular item.
1343
1344
=head3 Field
1345
1346
A column in the L containing a value for each
1347
entry.
1348
1349
=head3 Index
1350
1351
The column in the L whose values uniquely
1352
identify each entry in the source. Also referred to as ''unique ID.'' (In
1353
the current implementation of Data::Presenter, an index must be a strictly
1354
numerical value.)
1355
1356
=head3 Index Field
1357
1358
The column in the L containing a unique
1359
value (L<"index">) for each entry.
1360
1361
=head3 Metadata
1362
1363
Entries in the Data::Presenter object's data structure which
1364
hold information prepared by the administrator about the data structure and
1365
output parameters.
1366
1367
In the current version of Data::Presenter, metadata is extracted from the
1368
variables C<@fields>, C<%parameters> and C<$index> found in the configuration
1369
file F. The metadata is first stored in package variables in
1370
the invoking Data::Presenter subclass package and then entered into the
1371
Data::Presenter object as hash entries keyed by C<'fields'>, C<'parameters'>
1372
and C<$index>, respectively. (The word 'options' has also been reserved for
1373
future use as the key of a metadata entry in the object's data structure.)
1374
1375
=head3 Object's Current Data Structure
1376
1377
Non-L entries found
1378
in the Data::Presenter object at the point a particular selection, sorting or
1379
output method is called.
1380
1381
The object's current data structure may be thought of as the result of the
1382
following calculations:
1383
1384
construct a Data::Presenter::[Package1] object
1385
1386
less: entries excluded by application of selection criteria found
1387
in C
1388
1389
less: metadata entries in object keyed by 'fields', 'parameters' or
1390
'fields'
1391
1392
result: object's current data structure
1393
1394
=head3 Operator
1395
1396
The individual in a workplace responsible for running a
1397
Data::Presenter script, including:
1398
1399
=over 4
1400
1401
=item *
1402
1403
selection of sources;
1404
1405
=item *
1406
1407
selection of particular entries and fields from the source for presentation
1408
in the output; and
1409
1410
=item *
1411
1412
selection of output formats and names of output files. (I
1413
L<"Administrator">.)
1414
1415
=back
1416
1417
=head3 Source
1418
1419
A report, typically saved in the form of a text file, generated
1420
by a database program which presents data in a row-column format. The source
1421
may also contain other information such as page headers and footers and table
1422
headers and footers. Also referred to herein as ''source report,'' ''source
1423
file'' or ''database source report.''
1424
1425
=head2 Examples
1426
1427
Sample files are included in the archive file in which this documentation is
1428
found. Three source files, F, F and F,
1429
are included, as are the corresponding Data::Presenter subclass packages
1430
(F, F and F) and configuration files
1431
(F, F and F).
1432
1433
=head1 USAGE: Administrator
1434
1435
This section addresses those aspects of the usage of Data::Presenter which
1436
must be implemented by the L:
1437
1438
=over 4
1439
1440
=item *
1441
1442
L of Data::Presenter on the system;
1443
1444
=item *
1445
1446
analysis of L;
1447
1448
=item *
1449
1450
preparation of Data::Presenter L
1451
File (fields.XXX.data)"> files; and
1452
1453
=item *
1454
1455
preparation of Data::Presenter L
1456
Data::Presenter Subclasses"> other than Data::Presenter::Combo and its
1457
subclasses.
1458
1459
=back
1460
1461
If Data::Presenter has already been properly configured by your administrator
1462
and you are simply concerned with using Data::Presenter to generate reports,
1463
you may skip ahead to L<"USAGE: Operator">.
1464
1465
=head2 Installation
1466
1467
Data::Presenter installs in the same way as other Perl extensions available
1468
from CPAN: either automatically via the CPAN shell or manually with these
1469
commands:
1470
1471
% gunzip Data-Presenter-1.03.tar.gz
1472
% tar xf Data-Presenter-1.03.tar
1473
% cd Data-Presenter-1.03
1474
% perl Makefile.PL
1475
% make
1476
% make test
1477
% make install
1478
1479
This will install the following directory tree in your ''site perl''
1480
directory, I a directory such as
1481
F:
1482
1483
Data/
1484
Presenter.pm
1485
Presenter/
1486
Combo.pm
1487
Combo/
1488
Intersect.pm
1489
Union.pm
1490
1491
1492
Once the Administrator has installed Data::Presenter, she must then decide
1493
which location on the network will be used to hold Data::Presenter::[Package1]
1494
subclass packages, where [Package1] is a Data::Presenter subclass in which a
1495
new object will be created. That location could be the F
1496
directory listed above or it could be some other location which users can
1497
access in a Perl program via the C pragma.
1498
1499
The Administrator must also decide on a location on the network which will be
1500
used to hold the Data::Presenter configuration files -- one for each data
1501
source to be used by Data::Presenter. By convention, each configuration file
1502
is named by some variation on the theme of F.
1503
1504
Suppose, for instance, that F is the directory
1505
created to hold Data::Presenter-related files accessible to all users.
1506
Suppose, further, that in this business two database reports, I and
1507
I, will be processed via Data::Presenter. The Administrator would
1508
then create a directory tree like this:
1509
1510
/usr/share/datapresenter/
1511
Data/
1512
Presenter/
1513
Census.pm
1514
Medinsure.pm
1515
config/
1516
fields.census.data
1517
fields.medinsure.data
1518
1519
The Administrator could also create a directory called F to hold the
1520
source files to be processed with Data::Presenter, and she could also create a
1521
directory called F to hold files created via Data::Presenter -- but
1522
neither of these are strictly necessary.
1523
1524
=head2 Analysis of Source Files
1525
1526
Successful use of Data::Presenter assumes that the administrator is able to
1527
analyze a report generated from a database, distinguish key structural
1528
features of such a source report and write Perl code which will extract the
1529
most relevant information from the report. A complete discussion of these
1530
issues is beyond the scope of this documentation. What follows is a taste of
1531
the issues involved.
1532
1533
Structural features of a database report are likely to include the following:
1534
report headers, page headers, table headers, data entries reporting values
1535
of a variety of fields, page footers and report footers. Of these features,
1536
data entries and table headers are most important from the perspective of
1537
Data::Presenter. The data entries are the data which will actually be
1538
manipulated by Data::Presenter, while table headers will provide the
1539
administrator guidance when writing the configuration file F.
1540
Report and page headers and footers are generally irrelevant and will be
1541
stripped out.
1542
1543
For example, let us suppose that a portion of a client census looks like
1544
this:
1545
1546
CLIENTS - AUGUST 1, 2001 - C O N F I D E N T I A L PAGE 1
1547
SHRED WHEN NEW LIST IS RECEIVED!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1548
LAST NAME FIRST NAM C. NO BIRTH
1549
1550
HERNANDEZ HECTOR 456791 1963-07-16
1551
VASQUEZ ADALBERTO 456792 1973-10-02
1552
WASHINGTON ALBERT 906786 1953-03-31
1553
1554
The first two lines are probably report or page headers and should be
1555
stripped out. The third line consists of table column names and may give
1556
clues as to how F should be written. The fourth line is
1557
blank and should be stripped out. The next three lines constitute actual
1558
rows of data; these will be the focus of Data::Presenter.
1559
1560
A moderately experienced Perl programmer will look at this report and say,
1561
''Each row of data can be stored in a Perl array. If each client's 'c. no'
1562
is unique, then it can be used as the key of an entry in a Perl hash where
1563
the entry's value is a reference to the array just mentioned. A hash of
1564
arrays -- I can use Data::Presenter!''
1565
1566
Our Perl programmer would then say, ''I'll open a filehandle to the source
1567
file and read the file line-by-line into a C loop. I'll write lines
1568
beginning C to bypass the headers and the blank lines.'' For
1569
instance:
1570
1571
next if (/^CLIENTS/);
1572
next if (/^SHRED/);
1573
next if (/^\s?LAST\sNAME/);
1574
next if (/^$/);
1575
1576
Our Perl hacker will then say, ''I could try to write regular expressions to
1577
handle the rows of data. But since the data appears to be strictly columnar,
1578
I'll probably be better off using the Perl C function. I'll use the
1579
column headers to suggest names for my variables.'' For instance:
1580
1581
my ($lastname, $firstname, $cno, $datebirth) =
1582
unpack("x A14 x A10 x A6 x A10", $_);
1583
1584
Having provided a taste of what to do with the rows of the data structure, we
1585
now turn to an analysis of the columns of the structure.
1586
1587
=head2 Preparation of Configuration File (F)
1588
1589
For each data source, the administrator must prepare a configuration file,
1590
typically named as some variation on F.
1591
F consists of three Perl variables:
1592
C<@fields>, C<%parameters> and C<$index>.
1593
1594
=head3 C<@fields>
1595
1596
C<@fields> has one element for each column (field) that appears
1597
in the data source. The elements of C<@fields> I appear in exactly the
1598
same order as they appear in the data source. Each element should be a
1599
single Perl word, I, consist solely of letters, numerals or the
1600
underscore character (C<_>).
1601
1602
In the sample configuration file F included with this
1603
documentation, this variable reads:
1604
1605
@fields = qw(
1606
lastname firstname cno unit ward dateadmission datebirth
1607
);
1608
1609
In another sample configuration file, F, this variable
1610
reads:
1611
1612
@fields = qw(lastname firstname cno stateid medicare medicaid);
1613
1614
=head3 C<%parameters>
1615
1616
C<%parameters> is a bit trickier. There must be one entry
1617
in C<%parameters> for each element in C<@fields>. Hence, there is one entry
1618
in C<%parameters> for each column (field) in the data source. However, the
1619
keys of C<%parameters> are spelled C<$fields[0]>, C<$fields[1]>, and so on
1620
through the highest index number in C<@fields> (which is 1 less than the
1621
number of elements in C<@fields>). Using the example above, we can begin to
1622
construct C<%parameters> as follows:
1623
1624
%parameters = (
1625
$fields[0] =>
1626
$fields[1] =>
1627
$fields[2] =>
1628
$fields[3] =>
1629
$fields[4] =>
1630
$fields[5] =>
1631
$fields[6] =>
1632
);
1633
1634
The value for each entry in C<%parameters> consists of an array of 4 elements
1635
specified as follows:
1636
1637
=over 4
1638
1639
=item Element 0
1640
1641
A positive integer specifying the maximum number of characters which may be
1642
displayed in any output format for the given column (field). In the example
1643
above, we will specify that column 'lastname' (C<$fields[0]>) may have a
1644
maximum of 14 characters.
1645
1646
$fields[0] => [14,
1647
1648
=item Element 1
1649
1650
An upper-case letter 'U' or 'D' (for 'Up' or 'Down') enclosed in single
1651
quotation marks indicating whether the given column should be sorted in
1652
ascending or descending order. In the example above, 'lastname' sorts in
1653
ascending order.
1654
1655
$fields[0] => [14, 'U',
1656
1657
=item Element 2
1658
1659
A lower-case letter 'a', 'n' or 's' enclosed in single quotation marks
1660
indicating whether the given column should be sorted alphabetically
1661
(case-insensitive), numerically or ASCII-betically (case-sensitive). In the
1662
example above, 'lastname' sorts in alphabetical order. (Data::Presenter
1663
I does not yet have a facility for sorting in date or time order. If
1664
dates are entered as pure numerals in 'MMDD' order, they may be sorted
1665
numerically. If they are entered in the MySQL standard format '
1666
YY-MM-DD', they may be sorted alphabetically.)
1667
1668
$fields[0] => [14, 'U', 'a',
1669
1670
=item Element 3
1671
1672
A string enclosed in single quotation marks to be used as a column header
1673
when the data is outputted in some table-like format such as a Perl format
1674
with a header or an HTML table. The administrator may choose to use exactly
1675
the same words here that were used in C<@fields>, but a more natural language
1676
string is probably preferable. In the example above, the first column will
1677
carry the title 'Last Name' in any output.
1678
1679
$fields[0] => [14, 'U', 'a', 'Last Name'],
1680
1681
=back
1682
1683
Using the same example as previously, we can now complete C<%parameters> as:
1684
1685
%parameters = (
1686
$fields[0] => [14, 'U', 'a', 'Last Name'],
1687
$fields[1] => [10, 'U', 'a', 'First Name'],
1688
$fields[2] => [ 7, 'U', 'n', 'C No.'],
1689
$fields[3] => [ 6, 'U', 'a', 'Unit'],
1690
$fields[4] => [ 4, 'U', 'n', 'Ward'],
1691
$fields[5] => [10, 'U', 'a', 'Date of Admission'],
1692
$fields[6] => [10, 'U', 'a', 'Date of Birth'],
1693
);
1694
1695
=head3 C<$index>
1696
1697
C<$index> is the simplest element of I. It is the
1698
array index for the entry in C<@fields> which describes the field in the data
1699
source whose values uniquely identify each entry in the source. If, in the
1700
example above, C<'cno'> is the L for the data in
1701
I, then C<$index> is C<2>. (Remember that Perl starts counting
1702
array elements with C<0>.)
1703
1704
=head2 Preparation of Data::Presenter Subclasses
1705
1706
F, F,
1707
F and F
1708
are ready to use ''as is.'' They require no further modification by the
1709
administrator. However, each report from which the operator draws data needs
1710
to have a package subclassed beneath Data::Presenter and written specifically
1711
for that report by the administrator.
1712
1713
Indeed, B from Data::Presenter.
1714
All objects are constructed from subclasses of Data::Presenter.>
1715
1716
Hence:
1717
1718
$dp1 = Data::Presenter->new( # INCORRECT
1719
$source, \@fields, \%parameters, $index);
1720
1721
$dp1 = Data::Presenter::[Package1]->new( # CORRECT
1722
$source, \@fields, \%parameters, $index);
1723
1724
Data::Presenter::[Package1], however, does not contain a C method. It
1725
inherits Data::Presenter's C method -- which then turns around and
1726
delegates the task of populating the object with data to
1727
Data::Presenter::[Package1]'s C<_init()> method!
1728
1729
This C<_init()> method must be customized by the administrator to properly
1730
handle the specific features of each source file. This requires that the
1731
administrator be able to write a Perl script to 'clean up' the source file so
1732
that only lines containing meaningful data are written to the Data::Presenter
1733
object. (See L<"Analysis of Source Files"> above.) With that in mind, a
1734
Data::Presenter::[Package1] package must always include the following
1735
methods:
1736
1737
=over 4
1738
1739
=item * C<_init()>
1740
1741
This method is called from within the constructor and is used to populate the
1742
hash which is blessed into the new object. It opens a filehandle to the
1743
source file and typically reads that source file line-by-line via a Perl
1744
C loop. Perl techniques and functions such as regular expressions,
1745
C and C are used to populate a hash of arrays and to strip out
1746
lines in the data source not needed in the object. Should the administrator
1747
need to ''munge'' any of the incoming data so that it appears in a uniform
1748
format (I, '2001-07-02' rather than '7/2/2001' or '07/02/2001'), the
1749
administrator should write appropriate code within C<_init()> or in a separate
1750
module imported into the main package. Each element of each array used to
1751
store a data record must have a defined value. C is not permitted;
1752
assign an empty string to the element instead. A reference to this hash of
1753
arrays is returned to the constructor, which blesses it into the object.
1754
1755
=item * C<_extract_rows>
1756
1757
This method is called from within the Data::Presenter C method.
1758
In much the same manner as C<_init()>, it permits the administrator to
1759
''munge'' operator-typed data to achieve a uniform format.
1760
1761
=back
1762
1763
The packages F and F
1764
found in the F directory in this distribution provide examples of
1765
C<_init()> and C<_extract_rows>. Search for the lines of code which read:
1766
1767
# DATA MUNGING STARTS HERE
1768
# DATA MUNGING ENDS HERE
1769
1770
Here is a simple example of data munging. In the sample configuration file
1771
F, all elements of C<@fields> are entered entirely in
1772
lower-case. Hence, it would be advisable to transform the operator-specified
1773
content of C<$column> to all lower-case so that the program does not fail
1774
simply because an operator types an upper-case letter. See C<_extract_rows()>
1775
in the Data::Presenter::Census package included with this documentation for
1776
an example.
1777
1778
Sample file F contains an example of a subroutine
1779
written to clean up repetitive coding within the data munging section.
1780
Search for C.
1781
1782
=head1 USAGE: Operator
1783
1784
Once the administrator has installed Data::Presenter and completed the
1785
preparation of configuration files and Data::Presenter subclass packages, the
1786
administrator may turn over to the operator the job of selecting particular
1787
source files, output formats and particular entries and fields from within
1788
the source files.
1789
1790
=head2 Construction of a Data::Presenter Object
1791
1792
=head3 Declarations
1793
1794
Using the hospital census example included with this documentation, the
1795
operator would construct a Data::Presenter::Census object with the following
1796
code:
1797
1798
use Data::Presenter;
1799
use lib ("/usr/share/datapresenter");
1800
use Data::Presenter::Census;
1801
1802
our @fields = ();
1803
our %parameters = ();
1804
our $index = q{};
1805
1806
my $sourcefile = 'census.txt';
1807
my $configdir = "/usr/share/datapresenter";
1808
my $configfile = "$configdir/fields.census.data";
1809
1810
do $configfile;
1811
1812
=head3 C
1813
1814
my $dp1 = Data::Presenter::Census->new(
1815
$sourcefile, \@fields, \%parameters, $index);
1816
1817
=head2 Methods to Report on the Data::Presenter Object Itself
1818
1819
=head3 C
1820
1821
Returns the current number of data entries in the
1822
specified Data::Presenter object. This number does I include those
1823
elements in the object whose keys are reserved words. This method takes no
1824
arguments and returns one numerical scalar.
1825
1826
my $data_count = $dp1->get_data_count();
1827
print 'Data count is now: ', $data_count, "\n";
1828
1829
=head3 C
1830
1831
Prints the current data count preceded by ''Current
1832
data count: ''. This number does I include those elements in the
1833
object whose keys are reserved words. This method takes no arguments and
1834
returns no values.
1835
1836
$dp1->print_data_count();
1837
1838
=head3 C
1839
1840
Returns a reference to an array whose elements are an
1841
ASCII-betically sorted list of keys to the hash blessed into the
1842
Data::Presenter::[Package1] object. This list does not include those
1843
elements whose keys are reserved words. This method takes no arguments and
1844
returns only the array reference described.
1845
1846
my $keysref = $dp1->get_keys();
1847
print "Current data points are: @$keysref\n";
1848
1849
=head3 C
1850
1851
Returns a reference to a hash whose elements are
1852
key-value pairs where the key is the key of an element blessed into the
1853
Data::Presenter::[Package1] object and the value is 1, indicating that the
1854
key has been seen (a 'seen-hash'). This list does not include those elements
1855
whose keys are reserved words. This method takes no arguments and returns
1856
only the hash reference described.
1857
1858
my $seenref = $dp1->get_keys_seen();
1859
print "Current data points are: ";
1860
print "$_ " foreach (sort keys %{$seenref});
1861
print "\n";
1862
1863
=head3 C
1864
1865
Takes as argument a single string which is the name of one of the fields
1866
listed in C<@fields> in the configuration file. Returns a reference to a hash
1867
whose elements are keyed by the entries for that field in the data source and
1868
whose values are the number of times each entry was seen in the data.
1869
1870
For example, if the data consisted of this:
1871
1872
HERNANDEZ HECTOR 1963-08-01 456791
1873
VASQUEZ ADALBERTO 1973-08-17 786792
1874
VASQUEZ ALBERTO 1953-02-28 906786
1875
1876
where the left-most column was described in C<@fields> as C, then:
1877
1878
$seenref = $dp1->seen_one_column('lastname');
1879
1880
and C<$seenref> would hold:
1881
1882
{
1883
HERNANDEZ => 1,
1884
VASQUEZ => 2,
1885
}
1886
1887
=head2 Data::Presenter Selection, Sorting and Output Methods
1888
1889
=head3 C
1890
1891
C enables the operator to establish criteria
1892
by which specific entries from the data can be selected for output. It does
1893
so I by creating a new object but by striking out entries in the
1894
L which do
1895
not meet the selection criteria.
1896
1897
If the operator were using Perl as an interface to a true database program,
1898
selection of entries would most likely be handled by a module such as DBI and
1899
an SQL-like query. In that case, it would be possible to write complex
1900
selection queries which operate on more than one field at a time such as:
1901
1902
select rows where 'datebirth' is before 01/01/1960
1903
AND 'lastname' equals 'Vasquez'
1904
# (NOTE: This is generic code,
1905
# not true Perl or Perl DBI code.)
1906
1907
Complex selection queries are not yet possible in Data::Presenter. However,
1908
you could accomplish much the same objective with a series of simple
1909
selection queries that operate on only one field at a time,
1910
1911
select rows where 'datebirth" is before 01/01/1960
1912
1913
then
1914
1915
select rows where 'lastname' equals 'Vasquez'
1916
1917
each of which narrows the selection criteria.
1918
1919
How do we accomplish this within Data::Presenter? For each selection query,
1920
the operator must define 3 variables: C<$column>, C<$relation> and
1921
C<@choices>. These variables are passed to C, which in turn
1922
passes them to certain internal subroutines where their values are
1923
manipulated as follows.
1924
1925
=over 4
1926
1927
=item * C<$column>
1928
1929
C<$column> must be an element of L|"@fields"> found in the
1930
L.
1931
1932
=item * C<$relation>
1933
1934
C<$relation> expresses the verb part of the selection query, I
1935
relations such as C, C, C=>, C and so
1936
forth. In an attempt to add natural language flexibility to the selection
1937
query, Data::Presenter permits the operator to enter a wide variety of
1938
mathematical and English expressions here:
1939
1940
=over 4
1941
1942
=item * equality
1943
1944
'eq', 'equals', 'is', 'is equal to', 'is a member of',
1945
'is part of', '=', '=='
1946
1947
=item * non-equality
1948
1949
'is', 'is not', 'is not equal to', 'is not a member of',
1950
'is not part of', 'is less than or greater than',
1951
'is less than or more than', 'is greater than or less than',
1952
'is more than or less than', 'does not equal', 'not',
1953
'not equal to ', 'not equals', '!=', '! =', '!==', '! =='
1954
1955
=item * less than
1956
1957
'<', 'lt', 'is less than', 'is fewer than', 'before'
1958
1959
=item * greater than
1960
1961
'>', 'gt', 'is more than', 'is greater than', 'after'
1962
1963
=item * less than or equal to
1964
1965
'<=', 'le', 'is less than or equal to',
1966
'is fewer than or equal to', 'on or before', 'before or on'
1967
1968
=item * greater than or equal to
1969
1970
'>=', 'ge', 'is more than or equal to', 'is greater than or equal to',
1971
'on or after', 'after or on'
1972
1973
=back
1974
1975
As long as the operator selects a string from the category desired,
1976
Data::Presenter will convert it internally in an appropriate manner.
1977
1978
=item * C<@choices>
1979
1980
If the relationship being tested is one of equality or non-equality, then the
1981
operator may enter more than one value here, any one of which may satisfy the
1982
selection criterion.
1983
1984
my ($column, $relation, @choices);
1985
1986
$column = 'lastname';
1987
$relation = 'is';
1988
@choices = ('Smith', 'Jones');
1989
$dp1->select_rows($column, $relation, \@choices);
1990
1991
If, however, the relationship being tested is one of 'less than', 'greater
1992
than', etc., then the operator should enter only one value, as the value is
1993
establishing a limit above or below which the selection criterion will not be
1994
met.
1995
1996
$column = 'datebirth';
1997
$relation = 'before';
1998
@choices = ('01/01/1970');
1999
$dp1->select_rows($column, $relation, \@choices);
2000
2001
=back
2002
2003
=head3 C
2004
2005
C takes only 1 argument: a reference
2006
to an array consisting of the fields the operator wishes to present in the
2007
final output, listed in the order in which those fields should be sorted.
2008
All elements of this array must be elements in C<@fields>. B
2009
must always be included as one of the columns selected,> though it may be
2010
placed last if it is not intrinsically important in the final output.
2011
C returns a reference to a hash of appropriately sorted data
2012
which will be used as input to Data::Presenter methods such as
2013
C, C and C.
2014
2015
To illustrate:
2016
2017
my @columns_selected = ('lastname', 'firstname', 'datebirth', 'cno');
2018
$sorted_data = $dp1->sort_by_column(\@columns_selected);
2019
2020
Suppose that the operator fails to include the index column in
2021
C<@columns_selected>. This risks having two or more identical data entries,
2022
only the last of which would appear in the final output. As a safety
2023
precaution, C throws a warning and places duplicate entries
2024
in a text file called F.
2025
2026
Note: If you want your output to report only selected entries from the
2027
source, and if you want to apply one of the complex Data::Presenter output
2028
methods which require application of C, call C
2029
I calling C. Otherwise your report may contain
2030
blank lines.
2031
2032
=head3 C
2033
2034
C prints to standard output (generally, the computer
2035
monitor) a semicolon-delimited display of all entries in the object's
2036
current data structure. It takes no arguments and returns no values.
2037
2038
$dp1->print_to_screen();
2039
2040
A typical line of output will look something like:
2041
2042
VASQUEZ;JORGE;456787;LAVER;0105;1986-01-17;1956-01-13;
2043
2044
=head3 C
2045
2046
C prints to an operator-specified file a
2047
semicolon-delimited display of all entries in the object's current data
2048
structure. It takes 1 argument -- the user-specified output file -- and
2049
returns no values.
2050
2051
$outputfile = 'census01.txt';
2052
$dp1->print_to_file($outputfile);
2053
2054
A typical line of output will look exactly like that produced by
2055
L|"print_to_screen()">.
2056
2057
=head3 C
2058
2059
C, like C,
2060
prints to an operator-specified file. C allows the
2061
operator to specify the character pattern which will be used to delimit
2062
display of all entries in the object's current data structure. It does not
2063
print the delimiter after the final field in a particular data record. It
2064
takes 2 arguments -- the user-specified output file and the character pattern
2065
to be used as delimiter -- and returns no values.
2066
2067
$outputfile = 'delimited01.txt';
2068
$delimiter = '|||';
2069
$dp1->print_with_delimiter($outputfile, $delimiter);
2070
2071
The file created C is designed to be used as an input
2072
to functions such as 'Convert text to tabs' or 'Convert text to table' found
2073
in commercial word processing programs. Such functions require delimiter
2074
characters in the input. A typical line of output will look something like:
2075
2076
VASQUEZ|||JORGE|||456787|||LAVER|||0105|||1986-01-17|||1956-01-13
2077
2078
=head3 C
2079
2080
C prints to an operator-specified file each
2081
entry in the object's current data structure, sorted by the index and
2082
explicitly naming each field name/field value pair. It takes 1 argument --
2083
the user-specified output file -- and returns no values.
2084
2085
$outputfile = 'report01.txt';
2086
$dp1->full_report($outputfile);
2087
2088
The output for a given entry will look something like:
2089
2090
456787
2091
lastname VASQUEZ
2092
firstname JORGE
2093
cno 456787
2094
unit LAVER
2095
ward 0105
2096
dateadmission 1986-01-17
2097
datebirth 1956-01-13
2098
2099
=head3 C
2100
2101
C writes data via Perl's C function
2102
-- the function which internally powers Perl formats -- to an
2103
operator-specified file. C takes a list of 3 key-value pairs:
2104
2105
$dp1->writeformat(
2106
sorted => $sorted_data,
2107
columns => \@columns_selected,
2108
file => $outputfile,
2109
);
2110
2111
=over 4
2112
2113
=item * C
2114
2115
The value of C is a hash reference which is the return value of
2116
C. Hence, C can only be called once
2117
C has been called.
2118
2119
=item * C
2120
2121
The value of C is a reference to the array of fields in the data
2122
source selected for presentation in the output file. It is the same variable
2123
which is used as the argument to C.
2124
2125
=item * C
2126
2127
The value of C is the name of a file arbitrarily selected by the
2128
operator to hold the output of C.
2129
2130
=back
2131
2132
Using the ''census'' example from above, the overall sequence of code needed
2133
to use C would be:
2134
2135
@columns_selected = ('lastname', 'firstname', 'datebirth', 'cno');
2136
$sorted_data = $dp1->sort_by_column(\@columns_selected);
2137
2138
$dp1->writeformat(
2139
sorted => $sorted_data,
2140
columns => \@columns_selected,
2141
file => $outputfile,
2142
);
2143
2144
The result of the above call would be a file named F containing:
2145
2146
HERNANDEZ HECTOR 1963-08-01 456791
2147
VASQUEZ ADALBERTO 1973-08-17 786792
2148
VASQUEZ ALBERTO 1953-02-28 906786
2149
2150
The columnar appearance of the data is governed by choices made by the
2151
administrator within the configuration file (here, within
2152
F). The choice of columns themselves is controlled by
2153
the operator via C<\@columns_selected>.
2154
2155
=head3 C
2156
2157
C writes data via
2158
Perl formats to an operator-specified file and writes a Perl format header to
2159
that file as well. C takes a list of 4 key-value
2160
pairs. Three of these pairs are the same as in C; the fourth
2161
is:
2162
2163
=over 4
2164
2165
=item * C
2166
2167
title => $title,
2168
2169
C holds text chosen by the operator.
2170
2171
=back
2172
2173
The complete call to C looks like this:
2174
2175
@columns_selected = ('lastname', 'firstname', 'datebirth', 'cno');
2176
$sorted_data = $dp1->sort_by_column(\@columns_selected);
2177
2178
$dp1->writeformat_plus_header(
2179
sorted => $sorted_data,
2180
columns => \@columns_selected,
2181
file => $outputfile,
2182
title => $title,
2183
);
2184
2185
and will produce a header and formatted data like this:
2186
2187
Hospital Census Report
2188
2189
Date Date of
2190
Unit Ward Last Name First Name of Birth Admission C No.
2191
------------------------------------------------------------------
2192
LAVER 0105 VASQUEZ JORGE 1956-01-13 1986-01-17 456787
2193
LAVER 0107 VASQUEZ LEONARDO 1970-15-23 1990-08-23 456788
2194
SAMSON 0209 VASQUEZ JOAQUIN 1970-03-25 1990-11-14 456789
2195
2196
The wording of the column headers is governed by choices made by the
2197
administrator within the configuration file (here, within
2198
F). If a particular word in a column header is too long
2199
to fit in the space allocated, it will be truncated.
2200
2201
=head3 C
2202
2203
C is an
2204
advanced application of Data::Presenter and the reader may wish to skip this
2205
section until other parts of the module have been mastered.
2206
2207
C permits a sophisticated administrator to
2208
activate ''last minute'' substitutions in the strings printed out from the
2209
format accumulator variable C<$^A>. Suppose, for example, that a school
2210
administrator faced the problem of scheduling classes in different classrooms
2211
and in various time slots. Suppose further that, for ease of programming or
2212
data entry, the time slots were identified by chronologically sequential
2213
numbers and that instructors were identified by a unique ID built up from
2214
their first and last names. Applying an ordinary C to such
2215
data might show output like this
2216
2217
11 Arithmetic Jones 4044 4044_11
2218
11 Language Studies WilsonT 4054 4054_11
2219
12 Bible Study Eliade 4068 4068_12
2220
12 Introduction to Computers Knuth 4086 4086_12
2221
13 Psychology Adler 4077 4077_13
2222
13 Social Science JonesT 4044 4044_13
2223
51 World History Wells 4052 4052_51
2224
51 Music Appreciation WilsonW 4044 4044_51
2225
2226
where C<11> mapped to 'Monday, 9:00 am', C<12> to 'Monday, 10:00 am', C<51>
2227
to 'Friday, 9:00 am' and so forth and where the fields underlying this output
2228
were 'timeslot', 'classname', 'instructor', 'room' and 'sessionID'. While
2229
this presentation is useful, a client might wish to have the time slots and
2230
instructor IDs decoded for more readable output:
2231
2232
Monday, 9:00 Arithmetic E Jones 4044 4044_11
2233
Monday, 9:00 Language Studies T Wilson 4054 4054_11
2234
Monday, 10:00 Bible Study M Eliade 4068 4068_12
2235
Monday, 10:00 Introduction to Computers D Knuth 4086 4086_12
2236
Monday, 11:00 Psychology A Adler 4077 4077_13
2237
Monday, 11:00 Social Science T Jones 4044 4044_13
2238
Friday, 9:00 World History H Wells 4052 4052_51
2239
Friday, 9:00 Music Appreciation W Wilson 4044 4044_51
2240
2241
Time slots coded with chronologically sequential numbers can be ordered to
2242
sort numerically in the C<%parameters> established in the
2243
F file corresponding to a particular
2244
Data::Presenter::[package1]. Their human-language equivalents, however, will
2245
I sort properly, as, for example, 'Friday' comes before 'Monday' in an
2246
alphabetical or ASCII-betical sort. Clearly, it would be desirable to
2247
establish the sorting order by relying on the chronologically sequential time
2248
slots and yet have the printed output reflect more human-readable days of the
2249
week and times. Analogously, for the instructor we might wish to display the
2250
first initial and last name in our printed output rather than his/her ID
2251
code.
2252
2253
The order in which data records appear in output is determined by
2254
C I C is called. How can we preserve
2255
this order in the final output?
2256
2257
Answer: After we have stored a given formed line in C<$^A>, we I
2258
that line by calling an internal subroutine defined in the invoking class,
2259
C, which tells Perl to splice out
2260
certain portions of the formed line and substitute more human-readable copy.
2261
The information needed to make C<_reprocessor()> work comes from two places.
2262
2263
First, from a hash passed by reference as an argument to
2264
C. C takes
2265
a list of four key-value pairs, the first three of which are the same as those
2266
passed to C. The fourth key-value pair to
2267
C is a reference to a hash whose keys are
2268
the names of the fields in the data records where we wish to make
2269
substitutions and whose corresponding values are the number of characters
2270
the field will be allocated I substitution. The call to
2271
C would therefore look like this:
2272
2273
%reprocessing_info = (
2274
timeslot => 17,
2275
instructor => 15,
2276
);
2277
2278
$dp1->writeformat_with_reprocessing(
2279
sorted => $sorted_data,
2280
columns => \@columns_selected,
2281
file => $outputfile,
2282
reprocess => \%reprocessing_info,
2283
);
2284
2285
Second, C takes advantage of the fact that
2286
Data::Presenter's package global hash C<%reserved> contains four keys --
2287
C, C, C and C -- only the first
2288
three of which are used in Data::Presenter's constructor or sorting methods.
2289
Early in the development of Data::Presenter the keyword C was
2290
deliberately left unused so as to be available for future use.
2291
2292
The sophisticated administrator can make use of the C key to store
2293
metadata in a variety of ways. In writing
2294
C, the administrator prepares the way
2295
for last-minute reprocessing by creating an C key in the hash to
2296
be blessed into the C object. The value
2297
corresponding to the key C is itself a hash with two elements
2298
keyed by C and C. If C<$dp1> is the object and C<%data>
2299
is the hash blessed into the object, then we are looking at these two
2300
elements:
2301
2302
$data{options}{subs}
2303
$data{options}{sources}
2304
2305
The values corresponding to these two keys are references to yet more hashes.
2306
The hash which is the value for C<$data{options}{subs}> hash keys whose
2307
elements are the name of subroutines, each of which is built up from the
2308
string C concatenated with the name of the field to be
2309
reprocessed, I
2310
2311
$data{options}{subs} = {
2312
reprocess_timeslot => 1,
2313
reprocess_instructor => 1,
2314
};
2315
2316
These field-specific internal reprocessing subroutines may be defined by the
2317
administrator in C or they may be imported from
2318
some other module. C verifies that these
2319
subroutines are actually present in C
2320
regardless of where they were originally found.
2321
2322
What about C<$data{options}{sources}>? This location stores all the
2323
original data from which substitutions are made. Example:
2324
2325
$data{options}{sources} = {
2326
timeslot => {
2327
11 => ['Monday', '9:00 am' ],
2328
12 => ['Monday', '10:00 am' ],
2329
13 => ['Monday', '11:00 am' ],
2330
51 => ['Friday', '9:00 am' ],
2331
},
2332
instructor => {
2333
'Jones' => ['Jones', 'E' ],
2334
'WilsonT' => ['Wilson', 'T' ],
2335
'Eliade' => ['Eliade', 'M' ],
2336
'Knuth' => ['Knuth', 'D' ],
2337
'Adler' => ['Adler', 'A' ],
2338
'JonesT' => ['Jones', 'T' ],
2339
'Wells' => ['Wells', 'H' ],
2340
'WilsonW' => ['Wilson', 'W' ],
2341
}
2342
};
2343
2344
The point at which this data gets into the object is, of course,
2345
C. What the administrator does at that
2346
point is limited only by his/her imagination. Data::Presenter seeks to bless
2347
a hash into its object. That hash must meet the following requirements:
2348
2349
=over 4
2350
2351
=item *
2352
2353
With the exception of elements holding metadata, each element holds an array,
2354
each of whose elements must be a number or a string.
2355
2356
=item *
2357
2358
Three metadata elements keyed as follows must be present:
2359
2360
=over 4
2361
2362
=item * C
2363
2364
=item * C
2365
2366
=item * C
2367
2368
=back
2369
2370
The fourth metadata element keyed by C is required only if some
2371
Data::Presenter method has been written which requires the information stored
2372
therein. C is the only such method currently
2373
present, but additional methods using the C key may be added in
2374
the future.
2375
2376
=back
2377
2378
The author has used two different approaches to the problem of initializing
2379
Data::Presenter::[package1] objects.
2380
2381
=over 4
2382
2383
=item *
2384
2385
In the first, more standard approach, the name of a source file can be passed
2386
to the constructor, which passes it on to the initializer, which then opens a
2387
filehandle to the file and processes with regular expressions, C,
2388
etc. to build an array for each data record. Keyed by a unique ID, a
2389
reference to this array then becomes the value of an element of the hash
2390
which, once metadata is added, is blessed into the
2391
Data::Presenter::[package1] object. The source for the metadata is the
2392
F file and the C<@fields>, C<%parameters> and
2393
C<$index> found therein.
2394
2395
=item *
2396
2397
A second approach asks: ''Instead of having C<_init()> do data munging on a
2398
file, why not directly pass it a hash of arrays? Better still, why not pass
2399
it a hash of arrays which already has an C<'options'> key defined? And
2400
better still yet, why not pass it an object produced by some other Perl
2401
module and containing a blessed hash of arrays with an already defined
2402
C key?'' In this approach, C
2403
does no data munging. It is mainly concerned with defining the three
2404
required metadata elements.
2405
2406
=back
2407
2408
=head3 C
2409
2410
C is an advanced application of
2411
Data::Presenter and the reader may wish to skip this section until other
2412
parts of the module have been mastered.
2413
2414
C enables the user to have I column headers (as in
2415
C) and dynamic, 'just-in-time' reprocessing of data
2416
in selected fields (as in C). Call it just
2417
as you would C, but add a key-value pair
2418
keyed by C.
2419
2420
%reprocessing_info = (
2421
timeslot => 17,
2422
instructor => 15,
2423
);
2424
2425
$dp1->writeformat_deluxe(
2426
sorted => $sorted_data,
2427
columns => \@columns_selected,
2428
file => $outputfile,
2429
reprocess => \%reprocessing_info,
2430
title => $title,
2431
);
2432
2433
=head3 C
2434
2435
The C family of
2436
methods discussed above write data to plain-text files in columns aligned
2437
with whitespace via Perl's C function -- the function which
2438
internally powers Perl formats. This is suitable if the ultimate consumer of
2439
the data is satisfied to read a plain-text file. However, in many business
2440
contexts data consumers are more accustomed to word processing files than to
2441
plain-text files. In particular, data consumers are accustomed to data
2442
presented in tables created by commercial word processing programs. Such
2443
programs generally have the capacity to take text in which individual lines
2444
consist of data separated by delimiter characters such as tabs or commas and
2445
transform that text into rows in a table where the delimiters signal the
2446
borders between table cells.
2447
2448
To that end, the author has created the
2449
C family of subroutines to print output
2450
to plain-text files intended for further processing within word processing
2451
programs. The simplest method in this family, C, takes a
2452
list of three key-value pairs:
2453
2454
=over 4
2455
2456
=item * C
2457
2458
The value keyed by C is a hash reference which is the return value of
2459
C. Hence, C can only be called once
2460
C has been called.
2461
2462
=item * C
2463
2464
The value keyed by C is the name of a file arbitrarily selected by
2465
the operator to hold the output of C.
2466
2467
=item * C
2468
2469
The value keyed by C is the user-selected delimiter character or
2470
characters which will delineate fields within an individual record in the
2471
output file. Typically, this character will be a tab (C<\t>), comma (C<,>)
2472
or similar character that a word processing program's 'convert text to table'
2473
feature can use to establish columns.
2474
2475
=back
2476
2477
Using the ''census'' example from above, the overall sequence of code needed
2478
to use C would be:
2479
2480
@columns_selected = ('lastname', 'firstname', 'datebirth', 'cno');
2481
$sorted_data = $dp1->sort_by_column(\@columns_selected);
2482
2483
$dp1->writedelimited(
2484
sorted => $sorted_data,
2485
file => $outputfile,
2486
delimiter => $delimiter,
2487
);
2488
2489
Note that, unlike C, C does not require a
2490
reference to C<@columns_selected> to be passed as an argument.
2491
2492
Depending on the number of characters in a text editor's tab-stop setting,
2493
the result of the above call might look like:
2494
2495
HERNANDEZ HECTOR 1963-08-01 456791
2496
VASQUEZ ADALBERTO 1973-08-17 786792
2497
VASQUEZ ALBERTO 1953-02-28 906786
2498
2499
This is obviously less readable than the output of C -- but
2500
since the output of C is intended for further processing by
2501
a word processing program rather than for final use, this is not a major
2502
concern.
2503
2504
=head3 C
2505
2506
Just as C extended
2507
C to include column headers, C
2508
extends C to include column headers, separated by the same
2509
delimiter character as the data, in a plain-text file intended for further
2510
processing by a word processing program.
2511
2512
C takes a list of four key-value pairs:
2513
C, C, C, and C. The complete call
2514
to C looks like this:
2515
2516
@columns_selected = (
2517
'unit', 'ward', 'lastname', 'firstname',
2518
'datebirth', 'dateadmission', 'cno');
2519
$sorted_data = $dp1->sort_by_column(\@columns_selected);
2520
2521
$dp1->writedelimited_plus_header(
2522
sorted => $sorted_data,
2523
columns => \@columns_selected,
2524
file => $outputfile,
2525
delimiter => $delimiter,
2526
);
2527
2528
Note that, unlike C, C
2529
does not take C<$title> as an argument. It is felt that any title would be
2530
more likely to be supplied in the word-processing file which ultimately holds
2531
the data prepared by C and that its inclusion
2532
at this point might interfere with the workings of the word processing
2533
program's 'convert text to table' feature.
2534
2535
Depending on the number of characters in a text editor's tab-stop setting,
2536
the result of the above call might look like:
2537
2538
Date Date of
2539
Unit Ward Last Name First Name of Birth Admission C No.
2540
LAVER 0105 VASQUEZ JORGE 1956-01-13 1986-01-17 456787
2541
LAVER 0107 VASQUEZ LEONARDO 1970-15-23 1990-08-23 456788
2542
SAMSON 0209 VASQUEZ JOAQUIN 1970-03-25 1990-11-14 456789
2543
2544
Again, the readability of the delimited copy in the plain-text file here is
2545
not as important as how correctly the delimiter has been chosen in order to
2546
produce good results once the file is further processed by a word processing
2547
program.
2548
2549
Note that, unlike C, C
2550
does not produce a hyphen line. The author feels that the separation of
2551
header and body within the table is here better handled within the word
2552
processing file which ultimately holds the data prepared by
2553
C.
2554
2555
Note further that, unlike C,
2556
C does not truncate the words in column headers.
2557
This is because the C family of methods does not impose
2558
a maximum width on output fields as does the C family of
2559
methods. Hence, there is no need to truncate headers to fit within specified
2560
column widths. Column widths in the C family are
2561
ultimately determined by the word processing program which produces the final
2562
output.
2563
2564
=head3 C
2565
2566
C
2567
is an advanced application of Data::Presenter and the reader may wish to skip
2568
this section until other parts of the module have been mastered.
2569
2570
C, like C,
2571
permits a sophisticated administrator to activate ''last minute''
2572
substitutions in strings to be printed such that substitutions do not affect
2573
the pre-established sorting order. For a full discussion of the rationale
2574
for this feature, see the discussion of L<"writeformat_with_reprocessing()">
2575
above.
2576
2577
C takes a list of five key-value pairs,
2578
four of which are the same arguments passed to
2579
C. The fifth key-value pair is a reference
2580
to an array holding a list of those columns selected for output upon which
2581
the user chooses to perform reprocessing.
2582
2583
@reprocessing_info = qw( instructor timeslot room );
2584
2585
$dp1->writedelimited_with_reprocessing(
2586
sorted => $sorted_data,
2587
columns => \@columns_selected,
2588
file => $outputfile,
2589
delimiter => $delimiter,
2590
reprocess => \@reprocessing_info,
2591
);
2592
2593
Taking the classroom scheduling problem presented above,
2594
C would produce output looking something
2595
like this:
2596
2597
Monday, 9:00 Arithmetic E Jones 4044 4044_11
2598
Monday, 9:00 Language Studies T Wilson 4054 4054_11
2599
Monday, 10:00 Bible Study M Eliade 4068 4068_12
2600
Monday, 10:00 Introduction to Computers D Knuth 4086 4086_12
2601
Monday, 11:00 Psychology A Adler 4077 4077_13
2602
Monday, 11:00 Social Science T Jones 4044 4044_13
2603
Friday, 9:00 World History H Wells 4052 4052_51
2604
Friday, 9:00 Music Appreciation W Wilson 4044 4044_51
2605
2606
Usage of C requires that the administrator
2607
appropriately define C and
2608
C subroutines in the invoking package,
2609
along with appropriate subroutines specific to each argument capable of being
2610
reprocessed. Again, see the discussion in L<"writeformat_with_reprocessing()">.
2611
2612
=head3 C
2613
2614
C is an advanced
2615
application of Data::Presenter and the reader may wish to skip this section
2616
until other parts of the module have been mastered.
2617
2618
C completes the parallel structure between the
2619
C and C families of Data::Presenter
2620
methods by enabling the user to have I column headers (as in
2621
C) and dynamic, 'just-in-time' reprocessing of
2622
data in selected fields (as in C). Except
2623
for the name of the method called, the call to C is
2624
the same as for C:
2625
2626
@reprocessing_info = qw( instructor timeslot );
2627
2628
$dp1->writedelimited_deluxe(
2629
sorted => $sorted_data,
2630
columns => \@columns_selected,
2631
file => $outputfile,
2632
delimiter => $delimiter,
2633
reprocess => \@reprocessing_info,
2634
);
2635
2636
Using the classroom scheduling example from above,the output from
2637
C might look like this:
2638
2639
Timeslot Group Instructor Room GroupID
2640
Monday, 9:00 Arithmetic E Jones 4044 4044_11
2641
Monday, 9:00 Language Studies T Wilson 4054 4054_11
2642
Monday, 10:00 Bible Study M Eliade 4068 4068_12
2643
Monday, 10:00 Introduction to Computers D Knuth 4086 4086_12
2644
Monday, 11:00 Psychology A Adler 4077 4077_13
2645
Monday, 11:00 Social Science T Jones 4044 4044_13
2646
Friday, 9:00 World History H Wells 4052 4052_51
2647
Friday, 9:00 Music Appreciation W Wilson 4044 4044_51
2648
2649
As with C, C
2650
requires careful preparation on the part of the administrator. See the
2651
discussion under L<"writeformat_with_reprocessing()"> above.
2652
2653
=head3 C
2654
2655
In its current formulation, C works very much
2656
like C. It writes data to an operator-specified
2657
HTML file and writes an appropriate header to that file as well.
2658
C takes the same 4 arguments as C:
2659
C<$sorted_data>, C<\@columns_selected>, C<$outputfile> and C<$title>. The
2660
body of the resulting HTML file is more similar to a Perl format than to an
2661
HTML table. (This may be upgraded to a true HTML table in a future release.)
2662
2663
$dp1->writeHTML(
2664
sorted => $sorted_data,
2665
columns => \@columns_selected,
2666
file => $HTMLoutputfile, # must have .html extension
2667
title => $title,
2668
);
2669
2670
=head2 Data::Presenter::Combo Objects
2671
2672
It is quite possible that we may have two or more different database reports
2673
which present data on the same underlying universe or population. If these
2674
reports share a common index field which can be used to uniquely identify
2675
each entry in the underlying population, then we would like to be able to
2676
combine these sources, manipulate the data and re-output them via the simple
2677
and complex Data::Presenter output methods described in the L<"Synopsis">
2678
above.
2679
2680
In other words, if we have already created
2681
2682
my $dp1 = Data::Presenter::[Package1]->new(
2683
$sourcefile, \@fields,\%parameters, $index);
2684
my $dp2 = Data::Presenter::[Package2]->new(
2685
$sourcefile, \@fields,\%parameters, $index);
2686
...
2687
my $dpx = Data::Presenter::[Package2]->new(
2688
$sourcefile, \@fields,\%parameters, $index);
2689
2690
we would like to be able to define an array of the objects we have created
2691
and construct a new object combining the first two in an orderly manner:
2692
2693
my @objects = ($dp1, $dp2, ... $dpx);
2694
my $dpC = Data::Presenter::[some subclass]->new(\@objects);
2695
2696
We would then like to be able to call all the Data::Presenter sorting,
2697
selecting and output methods discussed above on C<$dpC> B
2698
re-specify C<$sourcefile>, C<\@fields>, C<\%parameters> or C<$index>>.
2699
2700
Can we do this? Yes, we can. More precisely, we can create I new types
2701
of objects: one in which the data entries comprise those entries found in
2702
I of the original sources, and one in which the data entries comprise
2703
those found in I of the sources. In mathematical terms, we can create
2704
either a new object which represents the I of the sources or
2705
one which represents the I of the sources. We call these as follows:
2706
2707
my $dpI = Data::Presenter::Combo::Intersect->new(\@objects);
2708
2709
and
2710
2711
my $dpU = Data::Presenter::Combo::Union->new(\@objects);
2712
2713
Note the following:
2714
2715
=over 4
2716
2717
=item *
2718
2719
For Combo objects, unlike all other Data::Presenter::[Package1] objects, we
2720
pass only one variable -- a reference to an array of Data::Presenter objects
2721
-- to the constructor instead of three.
2722
2723
=item *
2724
2725
Combo objects are always called from a subclass of Data::Presenter::Combo
2726
such as Data::Presenter::Combo::Intersect or Data::Presenter::Combo::Union.
2727
They are not called from Data::Presenter::Combo itself.
2728
2729
=item *
2730
2731
The regular Data::Presenter objects which are selected to make up a
2732
Data::Presenter::Combo object must share a field which serves as the L
2733
field|"Index Field"> for each object. This field must carry the same name in
2734
C<@fields> in the I configuration files corresponding to each of
2735
the objects, though that field does not have to appear in the same element
2736
position in C<@fields> in each such file. Similarly, the parameters on the
2737
value side of C<%parameters> for the index field must be specified
2738
identically in each configuration file. If these conditions are not met, a
2739
Data::Presenter::Combo object cannot be constructed and the program will die
2740
with an error message.
2741
2742
Let us illlustrate this point. Suppose that we have two configuration files,
2743
I and I, corresponding to two different
2744
Data::Presenter objects, C<$obj1> and C<$obj2>. For I, we
2745
have:
2746
2747
@fields = qw(lastname, firstname, cno);
2748
2749
%parameters = (
2750
$fields[0] => [14, 'U', 'a', 'Last Name'],
2751
$fields[1] => [10, 'U', 'a', 'First Name'],
2752
$fields[2] => [ 7, 'U', 'n', 'C No.'],
2753
);
2754
2755
$index = 2;
2756
2757
For I, we have:
2758
2759
@fields = qw(cno, dateadmission, datebirth);
2760
2761
%parameters = (
2762
$fields[0] => [ 7, 'U', 'n', 'C No.'],
2763
$fields[1] => [10, 'U', 'a', 'Date of Admission'],
2764
$fields[2] => [10, 'U', 'a', 'Date of Birth'],
2765
);
2766
2767
$index = 0;
2768
2769
Can C<$obj1> and C<$obj2> be combined into a Data::Presenter::Combo object?
2770
Yes, they can. C is named as the index field in each configuration
2771
file, and the values assigned to C<$fields[$index]> in each are identical:
2772
C<[ 7, 'U', 'n', 'C No.']>.
2773
2774
Suppose, however, that we had a third configuration file, I,
2775
corresponding to yet another Data::Presenter object, C<$obj3>. If the
2776
contents of I were:
2777
2778
@fields = qw(cno, dateadmission, datebirth);
2779
2780
%parameters = (
2781
$fields[0] => [ 7, 'U', 'n', 'Serial No.'],
2782
$fields[1] => [10, 'U', 'a', 'Date of Admission'],
2783
$fields[2] => [10, 'U', 'a', 'Date of Birth'],
2784
);
2785
2786
$index = 0;
2787
2788
then C<$obj3> could not be combined with either C<$obj1> or C<$obj2> because
2789
the elements of C<$parameters{$fields[$index]}> in C<$obj3> are not identical
2790
to those in the first two objects.
2791
2792
=back
2793
2794
Here are some things to consider in using Data::Presenter::Combo objects:
2795
2796
=over 4
2797
2798
=item *
2799
2800
Q: What happens if C<$dp1> has entries not found in C<$dp2> (or vice versa)?
2801
2802
A: It depends on whether you are interested in only those entries found in
2803
each of the data sources (the mathematical intersection of the sources) or
2804
those found in any of the sources (the mathematical union). Only those
2805
entries found in I C<$dp1> and C<$dp2> are included in a
2806
Data::Presenter::Combo::Intersect object. But if you are constructing a
2807
Data::Presenter::Combo::Union object, any entry found in either source file
2808
will be represented in the Union object. These properties would hold no
2809
matter how many sources you used as arguments.
2810
2811
=item *
2812
2813
Q: What happens if both C<$dp1> and C<$dp2> have fields named, for instance,
2814
C<'lastname'>?
2815
2816
A: Left-to-right precedence determines which object's C<'lastname'> field is
2817
entered into C<$dpC>. Assuming that C<$dp1> is listed first in C<@objects>,
2818
I the fields in C<$dp1> will appear in C<$dpC>. Only those fields in
2819
C<$dp2> I found in C<$dp1> will be added to C<$dpC>. If, however,
2820
C<@objects> were defined as C<($dp2, $dp1)>, then C<$dp2>'s fields would have
2821
precedence over those of C<$dp1>. If a C<$dp3> object were constructed based
2822
on yet another data source, only those fields entries I found in C<$dp1>
2823
or C<$dp2> would be included in the Combo object -- and so forth. This
2824
left-to-right precedence rule governs both the data entries in C<$dpC> as
2825
well as the selection, sorting and output characteristics.
2826
2827
=back
2828
2829
=head1 BUGS
2830
2831
It was discovered that in versions 0.68 and earlier, C
2832
failed to sort data properly in descending order. This has been fixed.
2833
See F.
2834
2835
=head1 REFERENCES
2836
2837
The fundamental reference for this program is, of course, the Camel book:
2838
Larry Wall, Tom Christiansen, Jon Orwant. , 3rd ed.
2839
O'Reilly & Associates, 2000, L .
2840
2841
A careful reading of the code will tell any competent Perl hacker that many
2842
tricks were taken from the Ram book: Tom Christiansen & Nathan Torkington.
2843
I. O'Reilly & Associates, 1998,
2844
L .
2845
2846
The object-oriented programming skills needed to develop this program were
2847
learned via extensive re-reading of Chapters 3, 6 and 7 of Damian Conway's
2848
I. Manning Publications, 2000,
2849
L.
2850
2851
This program goes to great length to follow the principle of 'Repeated Code
2852
is a Mistake' L -- a specific
2853
application of the general Perl principle of Laziness. The author grasped
2854
this principle best following a 2001 talk by Mark-Jason Dominus
2855
L to the New York Perlmongers L .
2856
2857
Most of the code in the C<_init()> subroutines was written before the author
2858
read I L by
2859
Dave Cross. Nonetheless, that is an excellent discussion of the problems
2860
involved in understanding the structure of data sources.
2861
2862
The discussion of bugs in this program benefitted from discussions on the
2863
Perl Seminar New York mailing list
2864
L, particularly with Martin
2865
Heinsdorf.
2866
2867
Correcting the bug involving sorting in descending order entailed a complete
2868
rewrite of much code. This rewrite was greatly assisted by C and
2869
Tanktalus in the Perlmonks thread ''Building a sorting subroutine on the
2870
fly'' (L).
2871
2872
=head1 AUTHOR
2873
2874
James E. Keenan (jkeenan@cpan.org).
2875
2876
Creation date: October 25, 2001.
2877
Last modification date: February 10, 2008.
2878
Copyright (c) 2001-5 James E. Keenan. United States.
2879
All rights reserved.
2880
2881
All data presented in this documentation or in the sample files in the
2882
archive accompanying this documentation are dummy copy. The data was
2883
entirely fabricated by the author for heuristic purposes. Any resemblance to
2884
any person, living or dead, is coincidental.
2885
2886
This is free software which you may distribute under the same terms as Perl
2887
itself.
2888
2889
=cut
2890
2891