line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# This file contains package Data::UNLreport, along with a retinue of |
2
|
|
|
|
|
|
|
# utility functions |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
#use 5.010001; |
5
|
1
|
|
|
1
|
|
23422
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
6
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
143
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
13
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
14
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# This allows declaration: use Data::UNLreport ':all'; |
17
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
18
|
|
|
|
|
|
|
# will save memory. |
19
|
|
|
|
|
|
|
#our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
#our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
#our @EXPORT = qw( ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our $VERSION = '1.07'; |
26
|
|
|
|
|
|
|
our $ABSTRACT = 'Formats delimited column data into uniform column sizes'; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Patterns I will use to determine the data type of column data: |
29
|
|
|
|
|
|
|
# |
30
|
|
|
|
|
|
|
my $white = '\s+'; # White-space pattern (routine) |
31
|
|
|
|
|
|
|
my $int_pattern = '^[-+]?\d+$'; # Integer pattern, optionally signed |
32
|
|
|
|
|
|
|
my $dec_pattern = '^[-+]?\d+\.\d*$'; # Decimal Number pattern, signed (opt) |
33
|
|
|
|
|
|
|
my $hex_pattern = '^[A-Fa-f0-9]+$'; # Hex number w/o the 0x prefix |
34
|
|
|
|
|
|
|
my $zhx_pattern = '^0[xX][A-Fa-f0-9]+$'; # Hex number with 0x prefix |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $util; # Will be a reference to _util object, to be |
37
|
|
|
|
|
|
|
# used by both the UNLreport and UNLreport::Line |
38
|
|
|
|
|
|
|
# |
39
|
|
|
|
|
|
|
package Data::UNLreport; |
40
|
|
|
|
|
|
|
use overload |
41
|
1
|
|
|
|
|
6
|
'+' => "UNL_add_line", |
42
|
1
|
|
|
1
|
|
1671
|
'<<' => "UNL_add_parsed_line"; |
|
1
|
|
|
|
|
1164
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# Note: The methods for setting/retrieving the input and output |
45
|
|
|
|
|
|
|
# delimiters are so identical, I can mimic code from Sam Tregar to |
46
|
|
|
|
|
|
|
# create these methods by poking the symbol table. |
47
|
|
|
|
|
|
|
# |
48
|
|
|
|
|
|
|
BEGIN { |
49
|
|
|
|
|
|
|
# Temporarily turn off the 'strict' stricture for refs in this block |
50
|
|
|
|
|
|
|
# so that I can get away with Sam Tregar's little trick. |
51
|
|
|
|
|
|
|
# |
52
|
1
|
|
|
1
|
|
76
|
no strict 'refs'; # As advised by Sam Tregar himself. |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
112
|
|
53
|
1
|
|
|
1
|
|
3
|
my @attrs = qw(in_delim out_delim); # Create accessor-mutator |
54
|
|
|
|
|
|
|
# methods named like attributes |
55
|
1
|
|
|
|
|
2
|
for my $attr (@attrs) |
56
|
|
|
|
|
|
|
{ |
57
|
|
|
|
|
|
|
*$attr = sub { |
58
|
0
|
|
|
0
|
|
0
|
my $self = shift(@_); |
59
|
|
|
|
|
|
|
# Use only first character of string |
60
|
0
|
0
|
|
|
|
0
|
$self->{$attr} = substr((shift(@_)), 0, 1) if (@_); |
61
|
|
|
|
|
|
|
# If specified b, it means blank |
62
|
0
|
0
|
|
|
|
0
|
$self->{$attr} = ' ' if ($self->{$attr} eq 'b'); |
63
|
0
|
|
|
|
|
0
|
return $self->{$attr}; |
64
|
|
|
|
|
|
|
} |
65
|
2
|
|
|
|
|
1992
|
} |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
$util = Data::UNLreport::_util->new(); # Create utility pseudo-object |
68
|
|
|
|
|
|
|
# before any UNLreport objects |
69
|
|
|
|
|
|
|
# are created. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub new |
72
|
|
|
|
|
|
|
{ # Create the object and parse the input/out delimiters as well |
73
|
|
|
|
|
|
|
# |
74
|
0
|
|
|
0
|
0
|
0
|
my $class = shift(@_); |
75
|
0
|
|
|
|
|
0
|
my $self = {}; # (Just a reference to an anonynmous hash) |
76
|
0
|
|
|
|
|
0
|
bless ($self, $class); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Some object initialization, with default values |
79
|
|
|
|
|
|
|
# |
80
|
0
|
|
|
|
|
0
|
$self->{in_delim} = '|'; # Default delimiter for unl files |
81
|
0
|
|
|
|
|
0
|
$self->{out_delim} = '|'; # Reasonable for out to mimic in |
82
|
0
|
|
|
|
|
0
|
$self->{out_file} = "(STDOUT)"; # Default output file. |
83
|
0
|
|
|
|
|
0
|
$self->{fdesc} = \*STDOUT; # Default output file descriptor. |
84
|
|
|
|
|
|
|
#$self->{in_split} = '\|'; # Escape it, since | is a metacharacter |
85
|
0
|
|
|
|
|
0
|
$self->{n_lines} = 0; # No lines parsed yet |
86
|
0
|
|
|
|
|
0
|
$self->{max_width}[0] = 0; # Member arrays for column width |
87
|
0
|
|
|
|
|
0
|
$self->{max_decimals}[0] = 0; # comparisons. THis is decimal places |
88
|
0
|
|
|
|
|
0
|
$self->{max_wholes}[0] = 0; # Whole parts of decimal numbers |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
0
|
$self->{has_end_delim} = 0; # Assume no delimiter at end of line |
91
|
|
|
|
|
|
|
# Will likely revise this flag |
92
|
|
|
|
|
|
|
# |
93
|
|
|
|
|
|
|
# Now that the defaults have been set up, look at the parameters, if |
94
|
|
|
|
|
|
|
# any. |
95
|
|
|
|
|
|
|
# |
96
|
0
|
0
|
|
|
|
0
|
die "That is no hash!" if ( (@_ % 2) != 0); # Odd count is BAD! |
97
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
0
|
my %params = @_; # Copy paramater arry into private hash |
99
|
|
|
|
|
|
|
# and start applying them. |
100
|
0
|
0
|
|
|
|
0
|
$self->in_delim($params{in_delim}) |
101
|
|
|
|
|
|
|
if (defined ($params{in_delim})); |
102
|
0
|
0
|
|
|
|
0
|
$self->out_delim($params{out_delim}) |
103
|
|
|
|
|
|
|
if (defined ($params{out_delim})); |
104
|
0
|
0
|
|
|
|
0
|
my $o_mode = defined ($params{mode}) ? $params{mode} : ">" ; |
105
|
0
|
0
|
|
|
|
0
|
$self->out_file($params{out_file}, $o_mode) |
106
|
|
|
|
|
|
|
if (defined ($params{out_file})); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
0
|
0
|
|
|
|
0
|
$self->{out_delim} = $params{out_delim} |
110
|
|
|
|
|
|
|
if (defined ($params{out_delim})); |
111
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
0
|
return $self; # Setup all done. |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
# |
115
|
|
|
|
|
|
|
# Methods for setting and retrieving some basic attrributes |
116
|
|
|
|
|
|
|
# Methods in_delim() and out_delim() were already set up before new() |
117
|
|
|
|
|
|
|
# |
118
|
|
|
|
|
|
|
sub out_file |
119
|
|
|
|
|
|
|
{ |
120
|
0
|
|
|
0
|
0
|
0
|
my $self = shift(@_); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# If output file name was given, use that; else, set a null string. |
123
|
|
|
|
|
|
|
# Note that a null string may have been sent. The code handles that |
124
|
|
|
|
|
|
|
# as well. |
125
|
|
|
|
|
|
|
# |
126
|
0
|
0
|
|
|
|
0
|
my $fpath = (@_) ? shift(@_) : ""; # File name: Given or wanted |
127
|
0
|
0
|
|
|
|
0
|
my $fmode = (@_) ? shift(@_) : ">"; # File mode: Given or default |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Default output file name & descriptor are already set. Override? |
130
|
|
|
|
|
|
|
# |
131
|
0
|
0
|
|
|
|
0
|
if ($fpath) # If supplied the output file name |
132
|
|
|
|
|
|
|
{ |
133
|
0
|
|
|
|
|
0
|
$self->{out_file} = $fpath; # Subject to change shortly |
134
|
|
|
|
|
|
|
|
135
|
0
|
0
|
|
|
|
0
|
open($self->{fdesc}, $fmode, $fpath) |
136
|
|
|
|
|
|
|
or die "Error <$!> trying to open <-$fpath-> in mode ($fmode)\n"; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
# If no name supplied - caller just wants the file name |
139
|
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
0
|
return $self->{out_file}; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub fdesc |
144
|
|
|
|
|
|
|
{ # No setting file descriptor! |
145
|
0
|
|
|
0
|
0
|
0
|
my $self = shift(@_); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# If called prematurely, return 0 instread of the file descripter |
148
|
|
|
|
|
|
|
# |
149
|
0
|
0
|
|
|
|
0
|
return (defined($self->{fdesc}) ? $self->{fdesc} : 0); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
# |
152
|
|
|
|
|
|
|
# Method: UNLreport::has_end_delim() |
153
|
|
|
|
|
|
|
# Sets or clears a flag to indicate if I want a terminating delimiter |
154
|
|
|
|
|
|
|
# on each line, as befits a proper .unl file. Call with no parameter to |
155
|
|
|
|
|
|
|
# just get the value of this flag. |
156
|
|
|
|
|
|
|
# |
157
|
|
|
|
|
|
|
# Parameters: |
158
|
|
|
|
|
|
|
# - (Implicit) Ref to a UNLreort object (ie parsed file) |
159
|
|
|
|
|
|
|
# - 1 for yes, 0 for no. Omit to just get the value |
160
|
|
|
|
|
|
|
# |
161
|
|
|
|
|
|
|
sub has_end_delim |
162
|
|
|
|
|
|
|
{ |
163
|
0
|
|
|
0
|
0
|
0
|
my $self = shift(@_); |
164
|
0
|
0
|
|
|
|
0
|
$self->{has_end_delim} = shift(@_) if @_; # No param: Don't set |
165
|
0
|
|
|
|
|
0
|
return $self->{has_end_delim}; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# Method: chomp_delim() - Removes the delimiter character from the end |
170
|
|
|
|
|
|
|
# of the input line, as many as appear there. |
171
|
|
|
|
|
|
|
# |
172
|
|
|
|
|
|
|
# Parameters: |
173
|
|
|
|
|
|
|
# - (Implicit) Ref to a UNLreort object (ie parsed file) |
174
|
|
|
|
|
|
|
# - The line itself, (Probably already chomped by the caller) |
175
|
|
|
|
|
|
|
# Returns: |
176
|
|
|
|
|
|
|
# - The same line, minus the delibiter(s) at the end of the line |
177
|
|
|
|
|
|
|
# |
178
|
|
|
|
|
|
|
sub chomp_delim |
179
|
|
|
|
|
|
|
{ |
180
|
0
|
|
|
0
|
0
|
0
|
my $self = shift(@_); |
181
|
0
|
|
|
|
|
0
|
my $rline = shift(@_); # Get the line string |
182
|
0
|
|
|
|
|
0
|
chomp($rline); #(Probably not necessary; just being thorough) |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# Plan: As long as we keep finding an input delimiter at the end of the |
185
|
|
|
|
|
|
|
# line (note the fairly ugly "while" condition), keep chopping it off |
186
|
|
|
|
|
|
|
# |
187
|
0
|
|
|
|
|
0
|
while (substr($rline, (length($rline) - 1), 1) eq $self->{in_delim}) |
188
|
0
|
|
|
|
|
0
|
{ chop($rline); } #(Hey, there's still life in the chop function!) |
189
|
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
0
|
return($rline); |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
193
|
|
|
|
|
|
|
# |
194
|
|
|
|
|
|
|
# Method UNLreport::+ to add a raw line into the parsed-file list |
195
|
|
|
|
|
|
|
# |
196
|
|
|
|
|
|
|
sub UNL_add_line |
197
|
|
|
|
|
|
|
{ |
198
|
0
|
|
|
0
|
0
|
0
|
my $self = shift(@_); # Ref to the UNLreport object |
199
|
0
|
|
|
|
|
0
|
my $one_line = shift(@_); # Get the actual line string to be appended |
200
|
0
|
|
|
|
|
0
|
$one_line = $self->chomp_delim($one_line); # Lose trailing delimiters |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Parse the line and calculate basic info about it. |
203
|
|
|
|
|
|
|
# |
204
|
0
|
|
|
|
|
0
|
my $p_line = Data::UNLreport::Line->new($one_line, $self); |
205
|
|
|
|
|
|
|
|
206
|
0
|
|
|
|
|
0
|
my $ok = $self << $p_line; # Integrate parsed line to line list |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
0
|
$self->{n_lines}++; # Tally up line count |
209
|
0
|
|
|
|
|
0
|
return $self->{n_lines}; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
# |
212
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
213
|
|
|
|
|
|
|
# Method UNLreport::<< to add a parsed line into the parsed file list |
214
|
|
|
|
|
|
|
# Parameters: |
215
|
|
|
|
|
|
|
# o (Implicit) reference to the UNLreport file object |
216
|
|
|
|
|
|
|
# o Reference to a parsed line object |
217
|
|
|
|
|
|
|
# |
218
|
|
|
|
|
|
|
sub UNL_add_parsed_line |
219
|
|
|
|
|
|
|
{ |
220
|
0
|
|
|
0
|
0
|
0
|
my ($self, $pline) = @_; |
221
|
0
|
|
|
|
|
0
|
my $cur_line = $self->{n_lines}; # Slot number to get the line |
222
|
0
|
|
|
|
|
0
|
my $n_cols = $pline->ncolumns(); # Column count for looping |
223
|
0
|
|
|
|
|
0
|
$self->{parsed_line}[$cur_line] = $pline; # Store line reference |
224
|
|
|
|
|
|
|
# Line is now integrated |
225
|
|
|
|
|
|
|
|
226
|
0
|
0
|
|
|
|
0
|
if ($pline->{has_delims}) |
227
|
0
|
|
|
|
|
0
|
{ $self->check_col_widths($cur_line); } |
228
|
|
|
|
|
|
|
# If no delimiter, I don't give a hoot about column width. |
229
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
0
|
return 1; # Return success code |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
# |
233
|
|
|
|
|
|
|
# Method: check_col_widths() |
234
|
|
|
|
|
|
|
# For an already parsed line, run down trhe columns to set the width of |
235
|
|
|
|
|
|
|
# the widest value in that column for whole file. |
236
|
|
|
|
|
|
|
# |
237
|
|
|
|
|
|
|
# Parameters: |
238
|
|
|
|
|
|
|
# o (Implicit) Reference to the parsed file object |
239
|
|
|
|
|
|
|
# o Row (or line) number |
240
|
|
|
|
|
|
|
# |
241
|
|
|
|
|
|
|
sub check_col_widths |
242
|
|
|
|
|
|
|
{ |
243
|
0
|
|
|
0
|
0
|
0
|
my ($pfile, $row) = @_; # (Using $pfile instead of $self. Why?) |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# For each column, compare its width against the widest so far. |
246
|
|
|
|
|
|
|
# Similar check for decimal places if it has decimal places |
247
|
|
|
|
|
|
|
# |
248
|
0
|
|
|
|
|
0
|
my $col_wid = 0; # Column width |
249
|
0
|
|
|
|
|
0
|
my $col_whole = 0; # Width of integer or integer part of a decimal |
250
|
0
|
|
|
|
|
0
|
my $col_dec = 0; # Width of decimal part of a float |
251
|
0
|
|
|
|
|
0
|
my $row_ref = $pfile->{parsed_line}[$row]; # Neater access to cols |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# For cleaner access to columns of the line, use this reference: |
254
|
|
|
|
|
|
|
# |
255
|
0
|
|
|
|
|
0
|
my $split_ref = $row_ref->{split_line}; |
256
|
|
|
|
|
|
|
|
257
|
0
|
|
|
|
|
0
|
for (my $lc = 0; $lc < $row_ref->{columns}; $lc++) |
258
|
|
|
|
|
|
|
{ # First make sure there is a column width to compare; |
259
|
|
|
|
|
|
|
# If not, start it with a zero width. |
260
|
|
|
|
|
|
|
# |
261
|
0
|
0
|
|
|
|
0
|
if (! defined($pfile->{max_width}[$lc])) |
262
|
0
|
|
|
|
|
0
|
{ $pfile->{max_width}[$lc] = 0; } |
263
|
0
|
0
|
|
|
|
0
|
if (! defined($pfile->{max_wholes}[$lc])) |
264
|
0
|
|
|
|
|
0
|
{ $pfile->{max_wholes}[$lc] = 0; } |
265
|
0
|
0
|
|
|
|
0
|
if (! defined($pfile->{max_decimals}[$lc])) |
266
|
0
|
|
|
|
|
0
|
{ $pfile->{max_decimals}[$lc] = 0; } |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# Check for widest column. This is counted different ways for |
269
|
|
|
|
|
|
|
# string, integer and decimal. Start by checking integer pattern |
270
|
|
|
|
|
|
|
# |
271
|
0
|
0
|
|
|
|
0
|
if ($split_ref->[$lc] =~ $int_pattern) |
|
|
0
|
|
|
|
|
|
272
|
|
|
|
|
|
|
{ |
273
|
0
|
0
|
|
|
|
0
|
if ( ($col_wid = length($split_ref->[$lc])) |
274
|
|
|
|
|
|
|
> $pfile->{max_width}[$lc]) |
275
|
|
|
|
|
|
|
{ # We have a new largest width for this column |
276
|
|
|
|
|
|
|
# as wll as a widest whole-number part for this column |
277
|
|
|
|
|
|
|
# |
278
|
0
|
|
|
|
|
0
|
$pfile->{max_width}[$lc] = $col_wid; # New widest width |
279
|
0
|
|
|
|
|
0
|
$pfile->{max_wholes}[$lc] = $col_wid; # Widest whole number |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
# |
283
|
|
|
|
|
|
|
# Check for decimal/float pattern |
284
|
|
|
|
|
|
|
# |
285
|
|
|
|
|
|
|
elsif ($split_ref->[$lc] =~ $dec_pattern) |
286
|
|
|
|
|
|
|
{ # If decimal, check for most decimal places and whole numbers |
287
|
|
|
|
|
|
|
# |
288
|
0
|
|
|
|
|
0
|
my ($whole_part, $decimal_part) = (0, 0); |
289
|
0
|
|
|
|
|
0
|
($whole_part, $decimal_part) = split('\.', $split_ref->[$lc]); |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# If there is a + sign in there, it will not print with the |
292
|
|
|
|
|
|
|
# printf call but its presence my skew the column width, if it |
293
|
|
|
|
|
|
|
# happens to be the widest column alredy. we want to lose it so |
294
|
|
|
|
|
|
|
# that the + does not get counted into the length. |
295
|
|
|
|
|
|
|
# |
296
|
0
|
0
|
|
|
|
0
|
if (substr($whole_part, 0, 1) eq '+') { $whole_part =~ s/^\+// ;} |
|
0
|
|
|
|
|
0
|
|
297
|
0
|
0
|
|
|
|
0
|
if ( ($col_whole = length($whole_part)) |
298
|
|
|
|
|
|
|
> $pfile->{max_wholes}[$lc]) |
299
|
0
|
|
|
|
|
0
|
{ $pfile->{max_wholes}[$lc] = $col_whole; } # New widest whole |
300
|
|
|
|
|
|
|
|
301
|
0
|
0
|
|
|
|
0
|
if ( ( $col_dec = length($decimal_part)) |
302
|
|
|
|
|
|
|
> $pfile->{max_decimals}[$lc]) |
303
|
0
|
|
|
|
|
0
|
{ $pfile->{max_decimals}[$lc] = $col_dec;} # New widest decimal |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# Width of widest decimal, so far, is: |
306
|
|
|
|
|
|
|
# width of widest whole part |
307
|
|
|
|
|
|
|
# + width of widest decimal part |
308
|
|
|
|
|
|
|
# + 1 for the decimal point. |
309
|
|
|
|
|
|
|
# (Note: I am calculating and using $col_wid differently from the way |
310
|
|
|
|
|
|
|
# I use it for string and integer data.) |
311
|
|
|
|
|
|
|
# |
312
|
0
|
|
|
|
|
0
|
$col_wid = $pfile->{max_wholes}[$lc] |
313
|
|
|
|
|
|
|
+ $pfile->{max_decimals}[$lc] |
314
|
|
|
|
|
|
|
+ 1; # What is total width of these maxima? |
315
|
0
|
0
|
|
|
|
0
|
if ($col_wid > $pfile->{max_width}[$lc]) |
316
|
0
|
|
|
|
|
0
|
{ $pfile->{max_width}[$lc] = $col_wid; } |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
else |
319
|
|
|
|
|
|
|
{ # Neither decimal nor integer be: Must be a string |
320
|
|
|
|
|
|
|
# Much simpler width calculation - Just one simple comparison |
321
|
|
|
|
|
|
|
# |
322
|
0
|
0
|
|
|
|
0
|
if ( ($col_wid = length($split_ref->[$lc])) |
323
|
|
|
|
|
|
|
> $pfile->{max_width}[$lc]) |
324
|
0
|
|
|
|
|
0
|
{ $pfile->{max_width}[$lc] = $col_wid; } # New widest this column |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
# |
330
|
|
|
|
|
|
|
# Method print() - Print the beautified output |
331
|
|
|
|
|
|
|
# Implicit parameter: [Reference to] the completely parsed file |
332
|
|
|
|
|
|
|
sub print |
333
|
|
|
|
|
|
|
{ |
334
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
335
|
0
|
|
|
|
|
0
|
my $lc; # My usual loop counter |
336
|
|
|
|
|
|
|
|
337
|
0
|
|
|
|
|
0
|
for ($lc = 0; $lc < $self->{n_lines}; $lc++) |
338
|
|
|
|
|
|
|
{ |
339
|
0
|
|
|
|
|
0
|
my $out_buf = ""; # Buffer for output line |
340
|
0
|
|
|
|
|
0
|
my $col_buf = ""; # Buffer to format 1 column |
341
|
0
|
|
|
|
|
0
|
my $cur_col; # Current column number within line |
342
|
0
|
|
|
|
|
0
|
my $cur_p_line = $self->{parsed_line}[$lc]; # ->Line object |
343
|
0
|
|
|
|
|
0
|
my $split_ref = $cur_p_line->{split_line}; # -> Array of cols |
344
|
0
|
0
|
|
|
|
0
|
if (! $cur_p_line->{has_delims}) # If line has no delimiters |
345
|
|
|
|
|
|
|
{ |
346
|
|
|
|
|
|
|
#printf($self->{fdesc} "%s\n", $split_ref->[0]); |
347
|
0
|
|
|
|
|
0
|
$split_ref->[0] =~ s/\s+$//; # Trim trailing white-spaces |
348
|
0
|
|
|
|
|
0
|
printf {$self->{fdesc}} ("%s\n", $split_ref->[0]); |
|
0
|
|
|
|
|
0
|
|
349
|
|
|
|
|
|
|
# Just print the line as is |
350
|
0
|
|
|
|
|
0
|
next; # and go the next parsed line |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
# Still here: then line has delimiters (majority of cases) |
353
|
|
|
|
|
|
|
# |
354
|
0
|
|
|
|
|
0
|
for ($cur_col = 0; $cur_col < $cur_p_line->{columns}; $cur_col++) |
355
|
|
|
|
|
|
|
{ # One column per round in this loop |
356
|
0
|
0
|
|
|
|
0
|
if ($cur_p_line->{type}[$cur_col] eq "s") |
357
|
|
|
|
|
|
|
{ |
358
|
0
|
|
|
|
|
0
|
$col_buf = sprintf ("%-*s%s", |
359
|
|
|
|
|
|
|
$self->{max_width}[$cur_col], |
360
|
|
|
|
|
|
|
$split_ref->[$cur_col], |
361
|
|
|
|
|
|
|
$self->{out_delim}); |
362
|
0
|
|
|
|
|
0
|
$out_buf .= $col_buf; # Concatenate column to line |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
else |
365
|
|
|
|
|
|
|
{ # Else, it is a numeric type - either d or f. I won't even look |
366
|
|
|
|
|
|
|
# at that but at the widest column and most decimal places |
367
|
|
|
|
|
|
|
# |
368
|
0
|
0
|
|
|
|
0
|
if ($self->{max_decimals}[$cur_col] == 0) |
369
|
|
|
|
|
|
|
{ # No row had any decimal places in this column. Format |
370
|
|
|
|
|
|
|
# intger at widest width with [default] right justification |
371
|
|
|
|
|
|
|
# |
372
|
|
|
|
|
|
|
#printf($self->{fdesc} "%*d%s", |
373
|
0
|
|
|
|
|
0
|
$col_buf = sprintf ("%*d%s", |
374
|
|
|
|
|
|
|
$self->{max_width}[$cur_col], |
375
|
|
|
|
|
|
|
$split_ref->[$cur_col], |
376
|
|
|
|
|
|
|
$self->{out_delim}); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
else |
379
|
|
|
|
|
|
|
{ # If even 1 row had decimal places in this column, format |
380
|
|
|
|
|
|
|
# this column accordingly for all rows. |
381
|
|
|
|
|
|
|
# |
382
|
|
|
|
|
|
|
#printf("%*.*f%s", |
383
|
0
|
|
|
|
|
0
|
$col_buf = sprintf ("%*.*f%s", |
384
|
|
|
|
|
|
|
$self->{max_width}[$cur_col], |
385
|
|
|
|
|
|
|
$self->{max_decimals}[$cur_col], |
386
|
|
|
|
|
|
|
$split_ref->[$cur_col], |
387
|
|
|
|
|
|
|
$self->{out_delim}); |
388
|
|
|
|
|
|
|
} |
389
|
0
|
|
|
|
|
0
|
$out_buf .= $col_buf; # Concatenate column to line |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
} # End loop for one row |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# Above loop filled an output line. Now trim it off (just in case) |
394
|
|
|
|
|
|
|
# and print it. |
395
|
|
|
|
|
|
|
# |
396
|
0
|
|
|
|
|
0
|
$out_buf =~ s/\s+$//; # Trim trailing white-spaces |
397
|
0
|
|
|
|
|
0
|
printf {$self->{fdesc}} ("%s\n", $out_buf); |
|
0
|
|
|
|
|
0
|
|
398
|
|
|
|
|
|
|
} # End loop for whole set of parsed lines |
399
|
|
|
|
|
|
|
} # End method print() |
400
|
|
|
|
|
|
|
# |
401
|
|
|
|
|
|
|
# package UNLreport::Line: |
402
|
|
|
|
|
|
|
# "Private" class used by class UNLreport. That class operates on |
403
|
|
|
|
|
|
|
# a whole report. UNLreport::Line operates on a single line structure. |
404
|
|
|
|
|
|
|
# |
405
|
|
|
|
|
|
|
package Data::UNLreport::Line; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Constructor for 1 line-object. Parameters: |
408
|
|
|
|
|
|
|
# - The class (implicit) |
409
|
|
|
|
|
|
|
# - The line (scalar) OR a reference to an array of scalars. |
410
|
|
|
|
|
|
|
# The scalar is more likely to be passed if the client is working |
411
|
|
|
|
|
|
|
# with ..unl data; the array reference is more likely if client is |
412
|
|
|
|
|
|
|
# fetching database data an passing it to this method. |
413
|
|
|
|
|
|
|
# - A reference to the UNLreport object to which this line belongs |
414
|
|
|
|
|
|
|
# |
415
|
|
|
|
|
|
|
sub new |
416
|
|
|
|
|
|
|
{ |
417
|
0
|
|
|
0
|
|
0
|
my $class = shift(@_); # (Implicitly passed class name) |
418
|
0
|
|
|
|
|
0
|
my $one_line = shift(@_); |
419
|
0
|
|
|
|
|
0
|
my $p_file = shift(@_); # The UNLreport object reference |
420
|
0
|
|
|
|
|
0
|
my $self = {}; # Create new object |
421
|
0
|
|
|
|
|
0
|
bless ($self, $class); # of this class |
422
|
0
|
|
|
|
|
0
|
$self->{split_line}[0] = ""; # Just to establish this field as array |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
#my ($in_delim, $in_split) # Just get local copies of delimiters |
425
|
|
|
|
|
|
|
# = ($p_file->{in_delim}, $p_file->{in_split}); |
426
|
0
|
|
|
|
|
0
|
my $in_delim = $p_file->{in_delim}; # Just get local copy of in-delimiter |
427
|
0
|
0
|
|
|
|
0
|
$in_delim = qr/\|/ if ($in_delim eq "|"); # Avoid confusion cause by |
428
|
|
|
|
|
|
|
# this special character |
429
|
|
|
|
|
|
|
|
430
|
0
|
0
|
0
|
|
|
0
|
if (($in_delim eq 'b') || ($in_delim eq ' ')) # If input delimiter is |
431
|
|
|
|
|
|
|
{ # white space, use this |
432
|
0
|
|
|
|
|
0
|
$in_delim = qr/\s+/; # white-space pattern |
433
|
|
|
|
|
|
|
} |
434
|
0
|
0
|
|
|
|
0
|
if (ref($one_line) eq "ARRAY") # If I received an array reference |
435
|
|
|
|
|
|
|
{ # copy the array into line object |
436
|
0
|
|
|
|
|
0
|
@{$self->{split_line}} = @{$one_line}; # and set the |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
437
|
0
|
|
|
|
|
0
|
$self->{columns} = @{$self->{split_line}}; # column count |
|
0
|
|
|
|
|
0
|
|
438
|
0
|
|
|
|
|
0
|
$self->{has_delims} = 1; # Already separated - as good as |
439
|
|
|
|
|
|
|
# delimited. |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
else # Assume I got a scalar - a line |
442
|
|
|
|
|
|
|
{ # More work: Split, check, repair, etc.. |
443
|
0
|
|
|
|
|
0
|
chomp($one_line); |
444
|
0
|
|
|
|
|
0
|
$one_line =~ s/^\s+//; # Trim leading spaces |
445
|
0
|
|
|
|
|
0
|
$one_line =~ s/\s+$//; # Trim trailing spaces |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# If line has no delimiters, it is a blob-dump line, not to be |
448
|
|
|
|
|
|
|
# counted like a reguler UNL line. |
449
|
|
|
|
|
|
|
# |
450
|
0
|
|
|
|
|
0
|
$self->{has_delims} = 0; # Initially assume line had no |
451
|
0
|
0
|
|
|
|
0
|
if ($one_line =~ $in_delim) # delims, but if I find one, |
|
0
|
|
|
|
|
0
|
|
452
|
|
|
|
|
|
|
{$self->{has_delims} = 1;} # correct the assumption ASAP |
453
|
|
|
|
|
|
|
|
454
|
0
|
0
|
|
|
|
0
|
if ($self->{has_delims}) |
455
|
|
|
|
|
|
|
{ |
456
|
|
|
|
|
|
|
# Split the line but keep trailing null fields. |
457
|
|
|
|
|
|
|
# |
458
|
0
|
|
|
|
|
0
|
@{$self->{split_line}} = split($in_delim, $one_line, -1); |
|
0
|
|
|
|
|
0
|
|
459
|
0
|
|
|
|
|
0
|
$util->repair_esc_delims(\@{$self->{split_line}}, $in_delim); |
|
0
|
|
|
|
|
0
|
|
460
|
|
|
|
|
|
|
# That is, undo overzealous splits |
461
|
|
|
|
|
|
|
# |
462
|
|
|
|
|
|
|
# Now, is there a trailing delimiter in the original line? In a |
463
|
|
|
|
|
|
|
# .unl file, that is the last character of the line; there is no |
464
|
|
|
|
|
|
|
# field past that. However, the split() function does not know |
465
|
|
|
|
|
|
|
# that and creates a bogus, null last field. I need to drop that |
466
|
|
|
|
|
|
|
# myself. |
467
|
|
|
|
|
|
|
# Also, if even one line has a final delimiter, flag whole file to |
468
|
|
|
|
|
|
|
# making sure there is one on every output line. |
469
|
|
|
|
|
|
|
# |
470
|
0
|
0
|
|
|
|
0
|
if (substr($one_line, (length($one_line) -1)) eq $in_delim) |
471
|
|
|
|
|
|
|
{ |
472
|
0
|
|
|
|
|
0
|
$p_file->{has_end_delim} = 1; # OK if this is set repeatedly |
473
|
0
|
|
|
|
|
0
|
pop @{$self->{split_line}}; # Lose the bogus last element |
|
0
|
|
|
|
|
0
|
|
474
|
|
|
|
|
|
|
} |
475
|
0
|
|
|
|
|
0
|
$self->{columns} = @{$self->{split_line}}; # Column count |
|
0
|
|
|
|
|
0
|
|
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
else # If line has no delimiters |
478
|
|
|
|
|
|
|
{ |
479
|
0
|
|
|
|
|
0
|
$self->{split_line}[0] = $one_line; # Copy the line unparsed |
480
|
0
|
|
|
|
|
0
|
$self->{columns} = 1; # Exactly 1 column |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
} # End of line-splitting code |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# Regardless of whether I got the split record or had to split it |
485
|
|
|
|
|
|
|
# myself, tidy up fields by trimming leading & trailing spaces |
486
|
|
|
|
|
|
|
# Then track the size & formats of each field |
487
|
|
|
|
|
|
|
# |
488
|
0
|
|
|
|
|
0
|
for (my $nfield = 0; |
|
0
|
|
|
|
|
0
|
|
489
|
|
|
|
|
|
|
$nfield <= $#{$self->{split_line}}; |
490
|
|
|
|
|
|
|
$nfield++) |
491
|
|
|
|
|
|
|
{ |
492
|
|
|
|
|
|
|
#$self->{split_line}[$nfield] =~ s/^\s+//; # Trim leading #(No, dont) |
493
|
0
|
|
|
|
|
0
|
$self->{split_line}[$nfield] =~ s/\s+$//; # Trim trailing |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# Now for the data types: |
496
|
|
|
|
|
|
|
# %d for integer |
497
|
|
|
|
|
|
|
# %f for decimal (float) |
498
|
|
|
|
|
|
|
# %s for anything else |
499
|
|
|
|
|
|
|
# |
500
|
0
|
0
|
|
|
|
0
|
if ($self->{split_line}[$nfield] =~ $int_pattern) |
|
|
0
|
|
|
|
|
|
501
|
0
|
|
|
|
|
0
|
{ $self->{type}[$nfield] = "d";} |
502
|
|
|
|
|
|
|
elsif ($self->{split_line}[$nfield] =~ $dec_pattern) |
503
|
0
|
|
|
|
|
0
|
{ $self->{type}[$nfield] = "f"; } |
|
0
|
|
|
|
|
0
|
|
504
|
|
|
|
|
|
|
else |
505
|
|
|
|
|
|
|
{$self->{type}[$nfield] = "s";} |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
} |
508
|
0
|
|
|
|
|
0
|
return $self; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
# |
511
|
0
|
|
|
0
|
|
0
|
sub ncolumns { my $self = shift(@_); return $self->{columns}; } |
|
0
|
|
|
|
|
0
|
|
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# |
514
|
|
|
|
|
|
|
package Data::UNLreport::_util; |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# Token constructor so that functions can be called like methods |
517
|
|
|
|
|
|
|
# |
518
|
1
|
|
|
1
|
|
2
|
sub new {my $self = {}; bless($self, $_[0]) ; return $self; } |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2
|
|
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# matches_meta(): Function to test if the given delimiter character is |
521
|
|
|
|
|
|
|
# a known metacharacter. |
522
|
|
|
|
|
|
|
# Returns 1 if it does match, 0 if it does not. |
523
|
|
|
|
|
|
|
# |
524
|
|
|
|
|
|
|
sub matches_meta |
525
|
|
|
|
|
|
|
{ |
526
|
0
|
|
|
0
|
|
|
shift(@_); # Don't need object reference; lose it |
527
|
0
|
|
|
|
|
|
my $delim_char = shift(@_); # Get the parameter into a private var |
528
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
|
my $rval = 0; # Return value - Assume not a meta |
530
|
|
|
|
|
|
|
|
531
|
0
|
|
|
|
|
|
my $metachars = '|()[]{}^$*+?.'; # This is the list of metacharacters |
532
|
|
|
|
|
|
|
|
533
|
0
|
|
|
|
|
|
my $meta_length = length($metachars); # Loop limit |
534
|
|
|
|
|
|
|
|
535
|
0
|
|
|
|
|
|
for (my $lc = 0; $lc < $meta_length; $lc++) |
536
|
|
|
|
|
|
|
{ |
537
|
0
|
0
|
|
|
|
|
if ($delim_char eq substr($metachars, $lc, 1)) {$rval = 1; last;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
0
|
|
|
|
|
|
return $rval; |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
# |
543
|
|
|
|
|
|
|
# repair_esc_delims() - Scan up the array to look for columns that end |
544
|
|
|
|
|
|
|
# with an escape cahracter (\); this indicates that a delimiter was |
545
|
|
|
|
|
|
|
# intended to be part fo the scring and we hsould not have split it |
546
|
|
|
|
|
|
|
# up there. We need to put back the delimiter and recombine the |
547
|
|
|
|
|
|
|
# split column with the following column. The last column, the first |
548
|
|
|
|
|
|
|
# one I will check, cannot be recombined, of course. |
549
|
|
|
|
|
|
|
# |
550
|
|
|
|
|
|
|
# Parameters: (for now) |
551
|
|
|
|
|
|
|
# - An array reference. |
552
|
|
|
|
|
|
|
# - The delimiter to put back. |
553
|
|
|
|
|
|
|
# |
554
|
|
|
|
|
|
|
sub repair_esc_delims |
555
|
|
|
|
|
|
|
{ |
556
|
0
|
|
|
0
|
|
|
shift(@_); # Don't need object reference; lose it |
557
|
0
|
|
|
|
|
|
my ($listref, $delim_p) = @_; |
558
|
|
|
|
|
|
|
|
559
|
0
|
|
|
|
|
|
for (my $lc = $#{$listref}; $lc >= 0; $lc--) |
|
0
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
{ |
561
|
0
|
|
|
|
|
|
my $col_copy = $listref->[$lc]; # Copy to make code more readable |
562
|
0
|
|
|
|
|
|
my $col_length = length($col_copy) -1; # O, length off by 1.. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# If column does not end in escape character(s), fuggeddaboudit! |
565
|
|
|
|
|
|
|
# |
566
|
0
|
0
|
|
|
|
|
next if ($col_copy !~ m/\\+$/); |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
# AHA! Column does end in an escape. It may have been escaping a |
569
|
|
|
|
|
|
|
# delimiter in the original line. Or it may itself be an escaped |
570
|
|
|
|
|
|
|
# escape. How can I tell? An odd number of \ clusters at end of |
571
|
|
|
|
|
|
|
# colum indicate an escape delimiter, requiring repair. An even |
572
|
|
|
|
|
|
|
# number indicates escaped escape character. Not my jurisdicion. |
573
|
|
|
|
|
|
|
# |
574
|
0
|
|
|
|
|
|
my $esc_count = $util->count_escapes($col_copy); |
575
|
0
|
0
|
|
|
|
|
next if (($esc_count % 2) == 0); # Even number of esc; no problem |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# Odd number of escapes - need to effect repair of improper split. |
578
|
|
|
|
|
|
|
# o Put back the wrongly removed delimiter |
579
|
|
|
|
|
|
|
# o If this is not the last column in the array, append the succee- |
580
|
|
|
|
|
|
|
# ding column to this one while splicing that succeeding column |
581
|
|
|
|
|
|
|
# from the array. |
582
|
|
|
|
|
|
|
# |
583
|
0
|
|
|
|
|
|
$listref->[$lc] .= $delim_p; # Putting back the delimiter |
584
|
0
|
0
|
|
|
|
|
if ($lc < $#{$listref}) # Cant splice after last element |
|
0
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
{ |
586
|
0
|
|
|
|
|
|
$listref->[$lc] .= splice(@{$listref}, $lc+1, 1); |
|
0
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
} # End FOR (my $lc = $#{$listref}; $lc >= 0; $lc--) |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
# |
591
|
|
|
|
|
|
|
# Function count_escapes: Counts contiguous escape characters at the |
592
|
|
|
|
|
|
|
# end of the given string. |
593
|
|
|
|
|
|
|
# |
594
|
|
|
|
|
|
|
# Parameter: |
595
|
|
|
|
|
|
|
# o The string |
596
|
|
|
|
|
|
|
# |
597
|
|
|
|
|
|
|
# Returns: |
598
|
|
|
|
|
|
|
# o The number of consecutive escapes at end. |
599
|
|
|
|
|
|
|
# |
600
|
|
|
|
|
|
|
sub count_escapes |
601
|
|
|
|
|
|
|
{ |
602
|
0
|
|
|
0
|
|
|
shift(@_); # Don't need object reference; lose it |
603
|
0
|
|
|
|
|
|
my $instr = shift @_; |
604
|
|
|
|
|
|
|
|
605
|
0
|
|
|
|
|
|
my $len = length($instr) -1; |
606
|
0
|
|
|
|
|
|
my $lc = $len; # Loop counter to start high, work down |
607
|
0
|
|
|
|
|
|
my $count = 0; # Good place for a counter to start |
608
|
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
|
while (substr($instr, $lc--, 1) eq "\\") {($count++);} |
|
0
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
|
611
|
0
|
|
|
|
|
|
return $count; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
# |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
1; |
617
|
|
|
|
|
|
|
__END__ |