File Coverage

blib/lib/AnyData.pm
Criterion Covered Total %
statement 276 487 56.6
branch 101 246 41.0
condition 54 145 37.2
subroutine 34 49 69.3
pod 8 43 18.6
total 473 970 48.7


line stmt bran cond sub pod time code
1             ##################################################################
2             package AnyData;
3             ###################################################################
4             #
5             # This module is copyright (c), 2000 by Jeff Zucker
6             # All rights reserved.
7             #
8             ###################################################################
9 6     6   191747 use strict;
  6         18  
  6         237  
10 6     6   33 use warnings;
  6         10  
  6         209  
11             require Exporter;
12 6     6   3381 use AnyData::Storage::TiedHash;
  6         20  
  6         218  
13 6     6   44 use vars qw( @ISA @EXPORT $VERSION );
  6         15  
  6         11022  
14             @ISA = qw(Exporter);
15             @EXPORT = qw( adConvert adTie adRows adColumn adExport adDump adNames adFormats);
16             #@EXPORT = qw( ad_fields adTable adErr adArray);
17            
18             $VERSION = '0.11';
19            
20             sub new {
21 12     12 0 24 my $class = shift;
22 12         19 my $format = shift;
23 12   50     38 my $flags = shift || {};
24 12         21 my $del_marker = "\0";
25 12 100       85 $format = 'CSV' if $format eq 'ARRAY';
26 12         34 my $parser_name = 'AnyData/Format/' . $format . '.pm';
27 12         18 eval { require $parser_name; };
  12         6909  
28 12 50       55 die "Error Opening File-Parser: $@" if $@;
29 12         71 $parser_name =~ s#/#::#g;
30 12         59 $parser_name =~ s#\.pm$##g;
31 12   100     64 my $col_names = $flags->{col_names} || undef;
32 12 100       41 if ($col_names) {
33 7         10 my @cols;
34 7 50       43 @cols = ref $col_names eq 'ARRAY'
35             ? @$col_names
36             : split ',',$col_names;
37 7         19 $flags->{col_names} = \@cols;
38             }
39 12         33 $flags->{del_marker} = $del_marker;
40 12   33     86 $flags->{records} ||= $flags->{data};
41 12   33     468 $flags->{field_sep} ||= $flags->{sep_char} ||= $flags->{ad_sep_char};
      33        
42 12   33     109 $flags->{quote} ||= $flags->{quote_char} ||= $flags->{ad_quote_char};
      33        
43 12   33     114 $flags->{escape} ||= $flags->{escape_char}||= $flags->{ad_escape_char};
      33        
44 12   33     143 $flags->{record_sep}||= $flags->{eol} ||= $flags->{ad_eol};
      33        
45             # $flags->{skip_first_row}
46 12         98 my $parser = $parser_name->new ($flags);
47 12 100 100     136 if ($parser->{col_names} && !$col_names) {
48 1         3 my @cols;
49 0         0 @cols = ref $parser->{col_names} eq 'ARRAY'
50 1 50       11 ? @{$parser->{col_names}}
51             : split ',',$parser->{col_names};
52 1         4 $flags->{col_names} = \@cols;
53 1         3 $parser->{col_names} = \@cols;
54             }
55 12   100     81 my $storage_name = $flags->{storage}
56             || $parser->storage_type()
57             || 'File';
58 12         36 $storage_name = "AnyData/Storage/$storage_name.pm";
59 12         24 eval { require $storage_name; };
  12         3657  
60 12 50       59 die "Error Opening Storage Module: $@" if $@;
61 12         70 $storage_name =~ s#/#::#g;
62 12         60 $storage_name =~ s#\.pm$##g;
63 12         217 my $storage = new $storage_name({del_marker=>$del_marker,%$flags});
64 12 50       67 if ($storage_name =~ 'PassThru') {
65 0         0 $storage->{parser} = $parser;
66 0         0 $parser->{del_marker} = "\0";
67 0 0 0     0 $parser->{url} = $flags->{file}
68             if $flags->{file} and $flags->{file} =~ /http:|ftp:/;
69             }
70 12         50 my $self = {
71             storage => $storage,
72             parser => $parser,
73             };
74 12         63 return( bless($self,$class) );
75             }
76            
77             sub adFormats {
78 0     0 1 0 my @formats;
79 0         0 for my $dir(@INC) {
80 0         0 my $format_dir = "$dir/AnyData/Format";
81 0 0       0 if ( -d $format_dir ) {
82 0         0 local *D;
83 0         0 opendir(D,$format_dir);
84 0         0 @formats = grep {/\.pm$/} readdir(D);
  0         0  
85 0         0 last;
86             }
87             }
88 0         0 unshift @formats,'ARRAY';
89 0         0 @formats = map {s/^(.*)\.pm$/$1/;$_} @formats;
  0         0  
  0         0  
90 0         0 return @formats;
91             }
92            
93             sub export {
94 1     1 0 3 my $self=shift;
95 1         3 my $fh = $self->{storage}->{fh};
96 1   50     7 my $mode = $self->{storage}->{open_mode} || 'r';
97             # if ( $self->{parser}->{export_on_close}
98             # && $self->{storage}->{fh}
99             # && $mode ne 'r'
100             # ){
101 1         5 return $self->{parser}->export( $self->{storage}, @_ );
102             # }
103             }
104             sub DESTROY {
105 12     12   7772 my $self=shift;
106             # $self->export;
107 12         43 $self->zpack;
108             #print "AD DESTROYED ";
109             }
110             ##########################################
111             # DBD STUFF
112             ##########################################
113             # required only for DBD-AnyData
114             ##########################################
115             sub prep_dbd_table {
116 0     0 0 0 my $self = shift;
117 0         0 my $tname = shift;
118 0         0 my $createMode = shift;
119 0         0 my $col_names;
120             my $col_nums;
121 0         0 my $first_row_pos;
122 0 0       0 if (!$createMode) {
123 0         0 $col_names = $self->{storage}->get_col_names($self->{parser});
124 0         0 $col_nums = $self->{storage}->set_col_nums();
125 0         0 $first_row_pos = $self->{storage}->{first_row_pos};
126             }
127 0 0 0     0 die "ERROR: No Column Names!:", $self->{storage}->{open_mode}
      0        
      0        
128             if (!$col_names || !scalar @$col_names)
129             && 'ru' =~ $self->{storage}->{open_mode}
130             && !$createMode eq 'o';
131 0         0 my $table = {
132             NAME => $tname,
133             DATA => [],
134             CURRENT_ROW => 0,
135             col_names => $col_names,
136             col_nums => $col_nums,
137             first_row_pos => $first_row_pos,
138             fh => $self->{storage}->get_file_handle,
139             file => $self->{storage}->get_file_name,
140             ad => $self,
141             };
142             #use Data::Dumper; print Dumper $table;
143 0         0 return $table;
144             }
145             sub fetch_row {
146 0     0 0 0 my $self = shift;
147 0   0     0 my $requested_cols = shift || [];
148 0         0 my $rec;
149 0 0       0 if ( $self->{parser}->{skip_pattern} ) {
150 0         0 my $found;
151 0         0 while (!$found) {
152 0         0 $rec = $self->{storage}->file2str($self->{parser},$requested_cols);
153 0 0       0 last if !defined $rec;
154 0 0       0 next if $rec =~ $self->{parser}->{skip_pattern};
155 0         0 last;
156             }
157             }
158             else {
159 0         0 $rec = $self->{storage}->file2str($self->{parser},$requested_cols);
160             }
161 0 0       0 return $rec if ref $rec eq 'ARRAY';
162 0 0       0 return unless $rec;
163 0         0 my @fields = $self->{parser}->read_fields($rec);
164 0 0 0     0 return undef if scalar @fields == 1 and !defined $fields[0];
165 0         0 return \@fields;
166             }
167             sub fetch_rowNEW {
168 0     0 0 0 my $self = shift;
169 0   0     0 my $requested_cols = shift || [];
170 0         0 my $rec = $self->{storage}->file2str($self->{parser},$requested_cols);
171 0         0 my @fields;
172 0 0       0 if (ref $rec eq 'ARRAY') {
173 0         0 @fields = @$rec;
174             }
175             else {
176 0 0       0 return unless defined $rec;
177 0         0 my @fields = $self->{parser}->read_fields($rec);
178 0 0 0     0 return undef if scalar @fields == 1 and !defined $fields[0];
179             }
180 0 0       0 if ( my $subs = $self->{parser}->{read_sub} ) {
181 0         0 for (@$subs) {
182 0         0 my($col,$sub) = @$_;
183 0 0       0 next unless defined $col;
184 0         0 my $col_num = $self->{storage}->{col_nums}->{$col};
185 0 0       0 next unless defined $col_num;
186 0         0 $fields[$col_num] = &$sub($fields[$col_num]);
187             }
188             }
189 0         0 return \@fields;
190             }
191             sub push_names {
192 0     0 0 0 my $self = shift;
193 0   0     0 my $col_names = shift || undef;
194             #print "Can't find column names!" unless scalar @$col_names;
195 0 0 0     0 $self->{storage}->print_col_names( $self->{parser}, $col_names )
196             unless $self->{parser}->{col_names} && $self->parser_type ne 'XML';
197             # $self->set_col_nums;
198 0   0     0 $self->{parser}->{key} ||= $col_names->[0];
199             #use Data::Dumper; print Dumper $self; exit;
200             }
201 0     0 0 0 sub drop { shift->{storage}->drop(@_); }
202 0     0 0 0 sub truncate { shift->{storage}->truncate(@_) }
203            
204             ##################################################################
205             # END OF DBD STUFF
206             ##################################################################
207            
208             ##################################################################
209             # REQUIRED BY BOTH DBD AND TIEDHASH
210             ##################################################################
211             sub push_row {
212 42     42 0 49 my $self = shift;
213 42 50       47 die "ERROR: No Column Names!" unless scalar @{$self->col_names};
  42         72  
214 42         78 my $requested_cols = [];
215 42         92 my @row = @_;
216 42 100       85 if (ref($row[0]) eq 'ARRAY') {
217 6         7 $requested_cols = shift @row;
218             }
219 42 50       147 my $rec = $self->{parser}->write_fields(@row) or return undef;
220 42         140 return $self->{storage}->push_row( $rec, $self->{parser}, $requested_cols);
221             }
222             sub push_rowNEW {
223 0     0 0 0 my $self = shift;
224             #print "PUSHING... ";
225 0 0       0 die "ERROR: No Column Names!" unless scalar @{$self->col_names};
  0         0  
226 0         0 my $requested_cols = [];
227 0         0 my @row = @_;
228 6     6   7425 use Data::Dumper;
  6         70229  
  6         34577  
229             #print "PUSHING ", Dumper \@row;
230 0 0       0 if (ref($row[0]) eq 'ARRAY') {
231 0         0 $requested_cols = shift @row;
232             }
233 0 0       0 my $rec = $self->{parser}->write_fields(@row) or return undef;
234 0         0 return $self->{storage}->push_row( $rec, $self->{parser}, $requested_cols);
235             }
236 30     30 0 88 sub seek { shift->{storage}->seek(@_); }
237             sub seek_first_record {
238 54     54 0 72 my $self=shift;
239 54         186 $self->{storage}->seek_first_record($self->{parser});
240             }
241             sub col_names {
242 290     290 0 349 my $self = shift;
243 290         440 my $c = $self->{storage}->{col_names};
244 290 50 50     1031 $c = $self->{parser}->{col_names} unless (ref $c eq 'ARRAY') and scalar @$c;
245 290   50     925 $c ||= [];
246             }
247             sub is_url {
248 11     11 0 24 my $file = shift;
249 11 50 33     126 return $file if $file and $file =~ m"^http://|ftp://";
250             }
251            
252             sub adTable {
253             ###########################################################
254             # Patch from Wes Hardaker
255             ###########################################################
256             # my($formatref,$file,$read_mode,$lockMode,$othflags)=@_;
257 11     11 0 30 my($formatref,$file,$read_mode,$lockMode,$othflags,$tname)=@_;
258             ###########################################################
259             #use Data::Dumper; print Dumper \@_;
260 11         20 my($format,$flags);
261 11   50     35 $file ||= '';
262 11         42 my $url = is_url($file);
263 11         27 $flags = {};
264 11   100     44 $othflags ||= {};
265 11 50 33     77 if ( ref $formatref eq 'HASH' or $othflags->{data}) {
266 0         0 $format = 'Base';
267 0         0 $flags = $othflags;
268 0 0       0 if (ref $formatref eq 'HASH') {
269 0         0 %$flags = (%$formatref,%$othflags);
270             }
271             }
272             else {
273 11         42 ($format,$flags) = split_params($formatref);
274 11   50     35 $othflags ||= {};
275 11         54 %$flags = (%$flags,%$othflags);
276             }
277 11 100       42 if ( $flags->{cols} ) {
278 6         13 $flags->{col_names} = $flags->{cols};
279 6         14 delete $flags->{cols};
280             }
281 11 100       49 if (ref($file) eq 'ARRAY') {
282 8 50 33     51 if ($format eq 'Mp3' or $format eq 'FileSys') {
283 0         0 $flags->{dirs} = $file;
284             }
285             else {
286 8         24 $flags->{recs} = join '',@$file;
287 8 100       31 $flags->{recs} = $file if $format =~ /ARRAY/i;
288 8 50       26 $flags->{storage} = 'RAM' unless $format eq 'XML';
289 8         18 $read_mode = 'u';
290             }
291             }
292             else {
293 3         10 $flags->{file} = $file;
294             }
295 11 50 33     118 if ($format ne 'XML' and ($format eq 'Base' or $url) ) {
      33        
296 0         0 my $x;
297 0         0 $flags->{storage} = 'RAM';
298 0         0 delete $flags->{recs};
299 0         0 my $ad = AnyData->new( $format, $flags);
300 0 0       0 $format eq 'Base'
301             ? $ad->open_table( $file )
302             : $ad->open_table( $file, 'r',
303             $ad->{storage}->get_remote_data($file)
304             );
305 0         0 return $ad;
306             }
307 11         121 my $ad = AnyData->new( $format, $flags);
308 11         26 my $createMode = 0;
309 11 50       42 $createMode = $read_mode if defined $lockMode;
310 11 50 33     44 $read_mode = 'c' if $createMode and $lockMode;
311 11 50 33     114 $read_mode = 'u' if !$createMode and $lockMode;
312 11   50     61 $read_mode ||= 'r';
313 11 100 100     135 $ad->{parser}->{keep_first_line} = 1
314             if $flags->{col_names} and 'ru' =~ /$read_mode/;
315             #####################################################
316             # Patch from Wes Hardaker
317             #####################################################
318             # $ad->open_table( $file, $read_mode );
319             ## $ad->open_table( $file, $read_mode, $tname );
320 11         75 $ad->open_table( $file, $read_mode, $tname );
321             # use Data::Dumper; my $x = $ad; delete $x->{parser}->{twig}; delete $x->{parser}->{record_tag}; delete $x->{parser}->{current_element}; print Dumper $x;
322             #####################################################
323 11         101 return $ad;
324             }
325            
326             sub open_table {
327 11     11 0 99 my $self = shift;
328 11         79 $self->{storage}->open_table( $self->{parser}, @_ );
329 11         49 my $col_names = $self->col_names();
330 11   100     76 $self->{parser}->{key} ||= '';
331 11 50 66     98 $self->{parser}->{key} ||= $col_names->[0] if $col_names->[0];
332             }
333             ##################################################################
334            
335            
336             ##################################################################
337             # TIEDHASH STUFF
338             ##################################################################
339 98     98 0 598 sub key_col { shift->{parser}->{key} }
340            
341             sub fetchrow_hashref {
342 221     221 0 273 my $self = shift;
343 221 100       380 my $rec = $self->get_undeleted_record or return undef;
344 186 100       691 my @fields = ref $rec eq 'ARRAY'
345             ? @$rec
346             : $self->{parser}->read_fields($rec);
347 186         378 my $col_names = $self->col_names();
348 186 50       389 return undef unless scalar @fields;
349 186 50 66     455 return undef if scalar @fields == 1 and !defined $fields[0];
350 186         189 my $rowhash;
351 186         232 @{$rowhash}{@$col_names} = @fields;
  186         821  
352 186         997 return ( $rowhash );
353             }
354             sub get_undeleted_record {
355 235     235 0 243 my $self = shift;
356 235         223 my $rec;
357 235         257 my $found=0;
358 235 50       383 return $self->fetch_row if $self->parser_type eq 'XML';
359 235         725 while (!$found) {
360 289         865 my $test = $rec = $self->{storage}->file2str($self->{parser});
361 289 100       921 return if !defined $rec;
362 252 100       710 next if $self->{storage}->is_deleted($self->{parser});
363 198 50 33     572 next if $self->{parser}->{skip_pattern}
364             and $rec =~ $self->{parser}->{skip_pattern};
365 198         302 last;
366             }
367 198         581 return $rec;
368             # return $rec if ref $rec eq 'ARRAY';
369             # return unless $rec;
370             # my @fields = $self->{parser}->read_fields($rec);
371             # return undef if scalar @fields == 1 and !defined $fields[0];
372             # return \@fields;
373             }
374             sub update_single_row {
375 6     6 0 7 my $self = shift;
376 6         6 my $oldrow = shift;
377 6         9 my $newvals = shift;
378 6         6 my @colnames = @{ $self->col_names };
  6         11  
379 6         9 my @newrow;
380 6         9 my $requested_cols = [];
381 6         13 for my $i(0..$#colnames) {
382 18 100       39 push @$requested_cols, $colnames[$i] if defined $newvals->{$colnames[$i]};
383 18         30 $newrow[$i] = $newvals->{$colnames[$i]};
384 18 100       53 $newrow[$i] = $oldrow->{$colnames[$i]} unless defined $newrow[$i];
385             }
386 6         14 unshift @newrow, $requested_cols;
387 6         25 $self->{storage}->seek(0,2);
388 6         13 $self->push_row( @newrow );
389 6         27 return \@newrow;
390             }
391             sub update_multiple_rows {
392 6     6 0 10 my $self = shift;
393 6         8 my $key = shift;
394 6         7 my $values = shift;
395 6         16 $self->seek_first_record;
396 6         5 my @rows_to_update;
397 6         17 while (my $row = $self->fetchrow_hashref) {
398 30 100       103 next unless $self->match($row,$key);
399 6 50       24 $self->{parser}->{has_update_function}
400             ? $self->update_single_row($row,$values)
401             : $self->delete_single_row();
402 6 50       24 $self->{parser}->{has_update_function}
403             ? push @rows_to_update,1
404             : push @rows_to_update,$row;
405             }
406 6 50       18 if (!$self->{parser}->{has_update_function}) {
407 6         11 for (@rows_to_update) {
408 6         14 $self->update_single_row($_,$values);
409             }
410             }
411 6         31 return scalar @rows_to_update;
412             }
413             sub match {
414 107     107 0 156 my($self,$row,$key) = @_;
415 107 100       255 if ( ref $key ne 'HASH') {
416 34 100 66     77 return 0 if !$row->{$self->key_col}
417             or $row->{$self->key_col} ne $key;
418 16         68 return 1;
419             }
420 73         79 my $found = 0;
421 73         228 while (my($col,$re)=each %$key) {
422 30 100 66     94 next unless defined $row->{$col} and is_matched($row->{$col},$re);
423 6         21 $found++;
424             }
425 73 100       515 return 1 if $found == scalar keys %$key;
426             }
427             sub is_matched {
428 30     30 0 35 my($str,$re)=@_;
429 30 50       56 if (ref $re eq 'Regexp') {
430 0 0       0 return $str =~ /$re/ ? 1 : 0;
431             }
432 30         29 my($op,$val);
433            
434 30 50 33     175 if ( $re and $re =~/^(\S*)\s+(.*)/ ) {
    50          
435 0         0 $op = $1;
436 0         0 $val = $2;
437             }
438             elsif ($re) {
439 30 100       217 return $str =~ /$re/ ? 1 : 0;
440             }
441             else {
442 0 0       0 return $str eq '' ? 1 : 0;
443             }
444 0         0 my $numop = '< > == != <= >=';
445 0         0 my $chrop = 'lt gt eq ne le ge';
446 0 0 0     0 if (!($numop =~ /$op/) and !($chrop =~ /$op/)) {
447 0 0       0 return $str =~ /$re/ ? 1 : 0;
448             }
449 0 0       0 if ($op eq '<' ) { return $str < $val; }
  0         0  
450 0 0       0 if ($op eq '>' ) { return $str > $val; }
  0         0  
451 0 0       0 if ($op eq '==') { return $str == $val; }
  0         0  
452 0 0       0 if ($op eq '!=') { return $str != $val; }
  0         0  
453 0 0       0 if ($op eq '<=') { return $str <= $val; }
  0         0  
454 0 0       0 if ($op eq '>=') { return $str >= $val; }
  0         0  
455 0 0       0 if ($op eq 'lt') { return $str lt $val; }
  0         0  
456 0 0       0 if ($op eq 'gt') { return $str gt $val; }
  0         0  
457 0 0       0 if ($op eq 'eq') { return $str eq $val; }
  0         0  
458 0 0       0 if ($op eq 'ne') { return $str ne $val; }
  0         0  
459 0 0       0 if ($op eq 'le') { return $str le $val; }
  0         0  
460 0 0       0 if ($op eq 'ge') { return $str ge $val; }
  0         0  
461             }
462             sub delete_single_row {
463 12     12 0 13 my $self = shift;
464             # my $curpos = $self->{storage}->get_pos;
465 12         37 $self->{storage}->delete_record($self->{parser});
466             # $self->{storage}->go_pos($curpos);
467 12         34 $self->{needs_packing}++;
468             }
469             sub delete_multiple_rows {
470 0     0 0 0 my $self = shift;
471 0         0 my $key = shift;
472 0         0 $self->seek_first_record;
473 0         0 my $rows_deleted =0;
474 0         0 while (my $row = $self->fetchrow_hashref) {
475 0 0       0 next unless $self->match($row,$key);
476 0         0 $self->delete_single_row;
477 0         0 $rows_deleted++;
478             }
479 0         0 return $rows_deleted;
480             }
481            
482 6     6 1 2428 sub adNames { @{ shift->{__colnames}} }
  6         30  
483            
484             sub adDump {
485 1     1 1 44711 my $table = shift;
486 1         3 my $pat = shift;
487 1 50       8 die "No table defined" unless $table;
488 1         11 my $ad = tied(%$table)->{ad};
489 1         3 my @cols = @{ $ad->col_names };
  1         7  
490 1         66 print "<",join(":", @cols), ">\n";
491 1         11 while (my $row = each %$table) {
492 6 100       13 my @row = map {defined $row->{$_} ? $row->{$_} : ''} @cols;
  12         48  
493 6         15 for (@row) { print "[$_]"; }
  12         169  
494 6         91 print "\n";
495             }
496             }
497            
498             sub adRows {
499 10     10 1 1072 my $thash = shift;
500 10         22 my %keys = @_;
501 10         19 my $obj = tied(%$thash);
502 10         45 return $obj->adRows(\%keys);
503             }
504             sub adColumn {
505 12     12 1 25 my $thash = shift;
506 12         18 my $column = shift;
507 12         15 my $flags = shift;
508 12         17 my $obj = tied(%$thash);
509 12         42 return $obj->adColumn($column, $flags);
510             }
511             sub adArray {
512 0     0 0 0 my($format,$data)=@_;
513 0         0 my $t = adTie( $format, $data );
514 0         0 my $t1 = tied(%$t);
515 0         0 my $ad = $t1->{ad};
516 0         0 my $arrayref = $ad->{storage}->{records};
517 0         0 unshift @$arrayref, $ad->{storage}->{col_names};
518 0         0 return $arrayref;
519             }
520             ##################################################################
521             # END OF TIEDHASH STUFF
522             ##################################################################
523             sub parser_type {
524 235     235 0 396 my $type = ref shift->{parser};
525 235         961 $type =~ s/AnyData::Format::(.*)/$1/;
526 235         773 return $type;
527             }
528             sub zpack {
529 12     12 0 24 my $self = shift;
530 12 50       49 return if $self->{storage}->{no_pack};
531 12 100       524 return if (ref $self->{storage} ) !~ /File$/;
532            
533             # return unless $self->{needs_packing};
534             # $self->{needs_packing} = 0;
535 3 50       5 return unless scalar(keys %{ $self->{storage}->{deleted} } );
  3         38  
536 0         0 $self->{needs_packing} = 0;
537             # my @callA = caller 2;
538             # my @callB = caller 3;
539             # return if $callA[3] =~ /DBD/;
540             # return if $callB[3] and $callB[3] =~ /SQL::Statement/;
541             # return if $self->{parser}->{export_on_close};
542             #print "PACKING";
543 0         0 my $bak_file = $self->{storage}->get_file_name . '.bak';
544 0         0 my $bak = adTable( 'Text', $bak_file, 'o' );
545 0         0 my $bak_fh = $bak->{storage}->get_file_handle;
546 0         0 my $fh = $self->{storage}->get_file_handle;
547 0 0 0     0 die "Can't pack to backup $!" unless $fh and $bak_fh;
548             # $self->seek_first_record;
549 0 0       0 $fh->seek(0,0) || die $!;
550             #$bak_fh->seek(0,0) || die $!;
551             # while (my $line = $self->get_record) {
552             # next if $self->is_deleted($line);
553 0         0 while (my $line = $self->get_undeleted_record) {
554 0         0 my $tmpstr = $bak->{parser}->write_fields($line)
555             . $self->{parser}->{record_sep};
556 0         0 $bak_fh->write($tmpstr,length $tmpstr);
557             }
558 0         0 $fh->seek(0,0);
559 0 0       0 $fh->truncate(0) || die $!;
560 0         0 $bak->seek_first_record;
561 0         0 while (<$bak_fh>) {
562 0         0 $fh->write($_,length $_);
563             }
564 0         0 $fh->close;
565 0         0 $bak_fh->close;
566 0         0 $self->{doing_pack} = 0;
567 0         0 undef $self->{storage}->{deleted};
568             }
569            
570             ##########################################################
571             # FUNCTION CALL INTERFACE
572             ##########################################################
573             sub adTie {
574 10     10 1 1981 my($format,$file,$read_mode,$flags)=@_;
575 10         16 my $data;
576 10 100 100     88 if (ref $file eq 'ARRAY' && !$read_mode ) { $read_mode = 'u'; }
  2         6  
577             # ARRAY only {data=>[]};
578 10 50       50 if (scalar @_ == 1){
579 0         0 $read_mode = 'o';
580 0         0 tie %$data,
581             'AnyData::Storage::TiedHash',
582             adTable($format),
583             $read_mode;
584 0         0 return $data;
585             }
586 10         53 tie %$data,
587             'AnyData::Storage::TiedHash',
588             adTable($format,$file,$read_mode,undef,$flags),
589             $read_mode;
590 10         44 return $data;
591             }
592             sub adErr {
593 0     0 0 0 my $hash = shift;
594 0         0 my $t = tied(%$hash);
595 0   0     0 my $errstr = $t->{ad}->{parser}->{errstr}
596             || $t->{ad}->{storage}->{errstr};
597 0 0       0 print $errstr if $errstr;
598 0         0 return $errstr;
599             }
600             sub adExport {
601 2     2 1 4 my $tiedhash = shift;
602 2         6 my($tformat,$tfile,$tflags)=@_;
603 2         7 my $ad = tied(%$tiedhash)->{ad};
604 2         6 my $sformat = ref $ad->{parser};
605 2         9 $sformat =~ s/AnyData::Format:://;
606 2   33     18 $tformat ||= $sformat;
607 2 50 66     27 if ($tformat eq $sformat and $tformat eq 'XML') {
608 0         0 return $ad->{parser}->export($ad->{storage},$tfile,$tflags);
609             }
610 2         10 return adConvert('adHash',$ad,$tformat,$tfile,undef,$tflags);
611             }
612             sub adConvert {
613 2     2 1 7 my( $source_format, $source_data,
614             $target_format,$target_file_name,
615             $source_flags,$target_flags )=@_;
616            
617 2         4 my $target_type = 'STRING';
618 2 100       17 $target_type = 'FILE' if defined $target_file_name;
619 2 50       8 $target_type = 'ARRAY' if $target_format eq 'ARRAY';
620            
621 2         5 my $data_type = 'AD-OBJECT';
622 2 50 33     10 $data_type = 'ARRAY' if ref $source_data eq 'ARRAY'
623             and ref $source_data->[0] eq 'ARRAY';
624            
625             # INIT SOURCE OBJECT
626 2         4 my $source_ad;
627 2 50       8 if ($source_format eq 'adHash') {
628 2         4 $source_ad = $source_data;
629 2         6 undef $source_data;
630             }
631             else {
632 0 0       0 $source_format = 'CSV' if $source_format =~ /ARRAY/i;
633 0         0 $source_ad = adTable(
634             $source_format,$source_data,'r',undef,$source_flags
635             );
636             }
637            
638             # GET COLUMN NAMES
639 2         3 my @cols;
640 2 50       20 if ( $data_type eq 'ARRAY') {
641 0         0 @cols = @{ shift @{ $source_data } };
  0         0  
  0         0  
642             }
643             else {
644 2         2 @cols = @{ $source_ad->col_names };
  2         9  
645             }
646            
647            
648             # insert storable here
649 2 100       42 if ('XML HTMLtable' =~ /$target_format/) {
650 1         4 $target_flags->{col_names} = join ',',@cols;
651 1         4 my $target_ad = adTable(
652             $target_format,$target_file_name,'o',undef,$target_flags
653             );
654 1 50       4 if ($data_type eq 'ARRAY' ) {
655 0         0 for my $row(@$source_data) {
656 0         0 my @fields=$source_ad->str2ary($row);
657 0         0 $target_ad->push_row( $source_ad->str2ary(\@fields) );
658             }
659 0         0 unshift @$source_data, \@cols;
660 0         0 return $target_ad->export($target_file_name);
661             }
662 1         6 $source_ad->seek_first_record;
663 1         5 while (my $row = $source_ad->get_undeleted_record) {
664 6         17 $target_ad->push_row( $source_ad->str2ary($row) );
665             }
666 1         5 return $target_ad->export($target_file_name);
667             }
668            
669 1         2 my($target_ad,$fh);
670             ### INIT TARGET OBJECT
671 1 50       7 if ($target_type eq 'FILE') {
    50          
672 0         0 $target_ad = adTable(
673             $target_format,$target_file_name,'c',undef,$target_flags
674             );
675 0         0 $fh = $target_ad->{storage}->get_file_handle;
676             }
677             elsif ($target_type eq 'STRING') {
678 1         9 $target_ad = AnyData->new( $target_format,$target_flags);
679             }
680            
681 1         3 my($str,$aryref);
682             ### GET COLUMN NAMES
683 1 50       4 if ( !$target_ad->{parser}->{no_col_print} ) {
684 1 50       6 if ($target_type eq 'ARRAY') {
685 0         0 push @$aryref, \@cols;
686             }
687             else {
688 1         5 $str = $target_ad->{parser}->write_fields(@cols);
689 1 50       8 $str =~ s/ /,/g if $target_format eq 'Fixed';
690 1 50       3 if ($target_type eq 'FILE') {
691 0         0 $fh->write($str,length $str);
692             }
693 1 50       3 if ($target_type eq 'STRING') {
694 1         4 $str = $target_ad->{parser}->write_fields(@cols);
695             }
696             }
697             }
698            
699             # GET DATA
700 1 50       4 if ($data_type eq 'ARRAY') {
701 0         0 for my $row(@$source_data) {
702 0         0 my @fields = $source_ad->str2ary($row);
703 0         0 my $tmpstr = $target_ad->{parser}->write_fields(@fields);
704             # print $tmpstr if $check;
705 0 0       0 $fh->write($tmpstr,length $tmpstr) if $target_type eq 'FILE';
706 0 0       0 $str .= $tmpstr if $target_type eq 'STRING';
707             }
708 0         0 unshift @$source_data, \@cols;
709 0 0       0 return $str if $target_format ne 'ARRAY';
710 0         0 return $aryref;
711             }
712 1         3 $source_ad->seek_first_record; # unless $source_format eq 'XML';
713 1         13 while (my $row = $source_ad->get_undeleted_record) {
714 6 50       17 if ($target_format eq 'ARRAY') {
715 0 0       0 push @$aryref,$row if $target_format eq 'ARRAY';
716 0         0 next;
717             }
718 6         14 my @fields = $source_ad->str2ary($row);
719 6         22 my $tmpstr = $target_ad->{parser}->write_fields(@fields);
720 6 50       31 $str .= $target_type eq 'FILE'
721             ? $fh->write($tmpstr,length $tmpstr)
722             : $tmpstr;
723             }
724 1 50       13 return $str if $target_format ne 'ARRAY';
725 0         0 return $aryref;
726             }
727            
728             # if ('Storable' =~ /$target_format/) {
729             # $target_flags->{col_names} = join ',',@cols;
730             # $target_ad = adTable(
731             # $target_format,$target_file_name,'c',undef,$target_flags
732             # );
733             # if (ref $source_data && !$data) {
734             # for my $row(@$source_data) {
735             # push @$data,$row;
736             # }
737             # }
738             # elsif (!$data) {
739             # $source_ad->seek_first_record;
740             # while (my $row = $source_ad->fetch_row) {
741             # push @$data, $row;
742             # }
743             # }
744             # unshift @$data, \@cols;
745             # return $target_ad->{parser}->export($data,$target_file_name);
746             # }
747            
748             sub str2ary {
749 12     12 0 20 my($ad,$row) = @_;
750 12 100       50 return @$row if ref $row eq 'ARRAY';
751 6         24 return $ad->{parser}->read_fields($row);
752             }
753             sub ad_string {
754 0     0 0 0 my($formatref,@fields) = @_;
755 0         0 my($format,$flags) = split_params($formatref);
756             # &dump($formatref); print "<$format>"; &dump($flags) if $flags;
757             #$formatref =~ s/(.*)/$1/;
758 0         0 my $ad = AnyData->new( $format, $flags );
759 0         0 return $ad->{parser}->write_fields(@fields);
760             # return $ad->write_fields(@fields);
761             }
762            
763             sub ad_fields {
764 0     0 0 0 my($formatref,$str,$flags) = @_;
765             # my($format,$flags) = split_params($formatref);
766             # my $ad = AnyData::new( $format, $flags );
767 0         0 my $ad = AnyData->new( $formatref, $flags );
768 0         0 return $ad->{parser}->read_fields($str);
769             }
770            
771             sub ad_convert_str {
772 0     0 0 0 my($source_formatref,$target_formatref,$str) = @_;
773 0         0 my($source_format,$source_flags) = split_params($source_formatref);
774 0         0 my($target_format,$target_flags) = split_params($target_formatref);
775 0         0 my $source_ad = AnyData->new( $source_format,$source_flags);
776 0         0 my $target_ad = AnyData->new( $target_format,$target_flags);
777 0         0 my @fields = $source_ad->read_fields($str);
778 0         0 return $target_ad->write_fields( @fields );
779             }
780            
781             #########################################################
782             # UTILITY METHODS
783             #########################################################
784             #
785             # For all methods that have $format as a parameter,
786             # $format can be either a string name of a format e.g. 'CSV'
787             # or a hashref of the format and flags for that format e.g.
788             # { format => 'FixedWidth', pattern=>'A1 A3 A2' }
789             #
790             # given this parameter, this method returns $format and $flags
791             # setting $flags to {} if none are given
792             #
793             sub split_params {
794 11     11 0 20 my $source_formatref = shift;
795 11         19 my $source_flags = {};
796 11         19 my $source_format = $source_formatref;
797 11 50       36 if (ref $source_formatref eq 'HASH') {
798 0         0 while (my($k,$v)=each %$source_formatref) {
799 0         0 ($source_format,$source_flags) = ($k,$v);
800             }
801             }
802             #use Data::Dumper;
803 11         37 return( $source_format, $source_flags);
804             }
805             sub dump {
806 0     0 0   my $var = shift;
807 0           my $name = ref($var);
808             #use Data::Dumper;
809 0           $Data::Dumper::Indent = 1;
810 0           $Data::Dumper::Useqq = 0;
811 0           print Data::Dumper->new([$var],[$name])->Dump();
812             }
813            
814             ###########################################################################
815             # START OF DOCUMENTATION
816             ###########################################################################
817            
818             =pod
819            
820             =head1 NAME
821            
822             AnyData - easy access to data in many formats
823            
824             =head1 SYNOPSIS
825            
826             use AnyData;
827             my $table = adTie( 'CSV','my_db.csv','o', # create a table
828             {col_names=>'name,country,sex'}
829             );
830             $table->{Sue} = {country=>'de',sex=>'f'}; # insert a row
831             delete $table->{Tom}; # delete a single row
832             $str = $table->{Sue}->{country}; # select a single value
833             while ( my $row = each %$table ) { # loop through table
834             print $row->{name} if $row->{sex} eq 'f';
835             }
836             $rows = $table->{{age=>'> 25'}}; # select multiple rows
837             delete $table->{{country=>qr/us|mx|ca/}}; # delete multiple rows
838             $table->{{country=>'Nz'}}={country=>'nz'}; # update multiple rows
839             my $num = adRows( $table, age=>'< 25' ); # count matching rows
840             my @names = adNames( $table ); # get column names
841             my @cars = adColumn( $table, 'cars' ); # group a column
842             my @formats = adFormats(); # list available parsers
843             adExport( $table, $format, $file, $flags ); # save in specified format
844             print adExport( $table, $format, $flags ); # print to screen in format
845             print adDump($table); # dump table to screen
846             undef $table; # close the table
847            
848             #adConvert( $format1, $file1, $format2, $file2 ); # convert btwn formats
849             #print adConvert( $format1, $file1, $format2 ); # convert to screen
850            
851             =head1 DESCRIPTION
852            
853             The rather wacky idea behind this module and its sister module
854             DBD::AnyData is that any data, regardless of source or format should
855             be accessible and modifiable with the same simple set of methods.
856             This module provides a multidimensional tied hash interface to data
857             in a dozen different formats. The DBD::AnyData module adds a DBI/SQL
858             interface for those same formats.
859            
860             Both modules provide built-in protections including appropriate
861             flocking() for all I/O and (in most cases) record-at-a-time access to
862             files rather than slurping of entire files.
863            
864             Currently supported formats include general format flat files (CSV,
865             Fixed Length, etc.), specific formats (passwd files, httpd logs,
866             etc.), and a variety of other kinds of formats (XML, Mp3, HTML
867             tables). The number of supported formats will continue to grow
868             rapidly since there is an open API making it easy for any author to
869             create additional format parsers which can be plugged in to AnyData
870             itself and thereby be accessible by either the tiedhash or DBI/SQL
871             interface.
872            
873             =head1 PREREQUISITES
874            
875             The AnyData.pm module itself is pure Perl and does not depend on
876             anything other than modules that come standard with Perl. Some
877             formats and some advanced features require additional modules: to use
878             the remote ftp/http features, you must have the LWP bundle installed;
879             to use the XML format, you must have XML::Parser and XML::Twig installed;
880             to use the HTMLtable format for reading, you must have HTML::Parser and
881             HTML::TableExtract installed but you can use the HTMLtable for writing
882             with just the standard CGI module. To use DBI/SQL commands, you must have
883             DBI, DBD::AnyData, SQL::Statement and DBD::File installed.
884            
885             =head1 USAGE
886            
887             The AnyData module imports eight methods (functions):
888            
889             =for test ignore
890            
891             adTie() -- create a new table or open an existing table
892             adExport() -- save an existing table in a specified format
893             adConvert() -- convert data in one format into another format
894             adFormats() -- list available formats
895             adNames() -- get the column names of a table
896             adRows() -- get the number of rows in a table or query
897             adDump() -- display the data formatted as an array of rows
898             adColumn() -- group values in a single column
899            
900             The adTie() command returns a special tied hash. The tied hash can
901             then be used to access and/or modify data. See below for details
902            
903             With the exception of the XML, HTMLtable, and ARRAY formats, the
904             adTie() command saves all modifications of the data directly to file
905             as they are made. With XML and HTMLtable, you must make your
906             modifications in memory and then explicitly save them to file with
907             adExport().
908            
909             =head2 adTie()
910            
911             my $table = adTie( $format, $data, $open_mode, $flags );
912            
913             The adTie() command creates a reference to a multidimensional tied hash. In its simplest form, it simply reads a file in a specified format into the tied hash:
914            
915             my $table = adTie( $format, $file );
916            
917             $format is the name of any supported format 'CSV','Fixed','Passwd', etc.
918             $file is the name of a relative or absolute path to a local file
919            
920             e.g.
921             my $table = adTie( 'CSV', '/usr/me/myfile.csv' );
922            
923             this creates a tied hash called $table by reading data in the
924             CSV (comma separated values) format from the file 'myfile.csv'.
925            
926             The hash reference resulting from adTie() can be accessed and modified as follows:
927            
928             use AnyData;
929             my $table = adTie( $format, $file );
930             $table->{$key}->{$column}; # select a value
931             $table->{$key} = {$col1=>$val1,$col2=>$val2...}; # update a row
932             delete $table->{$key}; # delete a row
933             while(my $row = each %$table) { # loop through rows
934             print $row->{$col1} if $row->{$col2} ne 'baz';
935             }
936            
937             The thing returned by adTie ($table in the example) is not an object,
938             it is a reference to a tied hash. This means that hash operations
939             such as exists, values, keys, may be used, keeping in mind that this
940             is a *reference* to a tied hash so the syntax would be
941            
942             for( keys %$table ) {...}
943             for( values %$table ) {...}
944            
945             Also keep in mind that if the table is really large, you probably do
946             not want to use keys and values because they create arrays in memory
947             containing data from every row in the table. Instead use 'each' as
948             shown above since that cycles through the file one record at a time
949             and never puts the entire table into memory.
950            
951             It is also possible to use more advanced searching on the hash, see "Multiple Row Operations" below.
952            
953             In addition to the simple adTie($format,$file), there are other ways to specify additional information in the adTie() command. The full syntax is:
954            
955             my $table = adTie( $format, $data, $open_mode, $flags );
956            
957             The $data parameter allows you to read data from remote files accessible by
958             http or ftp, see "Using Remote Files" below. It also allows you to treat
959             strings and arrays as data sources without needing a file at all, see
960             "Working with Strings and Arrays" below.
961            
962             The optional $mode parameter defaults to 'r' if none is supplied or must be
963             one of
964            
965             'r' read # read only access
966             'u' update # read/write access
967             'c' create # create a new file unless it already exists
968             'o' overwrite # create a new file, overwriting any that already exist
969            
970             The $flags parameter allows you to specify additional information such as column names. See the sections in "Further Details" below.
971            
972             With the exception of the XML, HTMLtable, and ARRAY formats, the
973             adTie() command saves all modifications of the data directly to file
974             as they are made. With XML and HTMLtable, you must make your
975             modifications in memory and then explicitly save them to file with
976             adExport().
977            
978             =head2 adConvert()
979            
980             adConvert( $format1, $data1, $format2, $file2, $flags1, $flags2 );
981            
982             or
983            
984             print adConvert( $format1, $data1, $format2, undef, $flags1, $flags2 );
985            
986             or
987            
988             my $aryref = adConvert( $format1, $data1, 'ARRAY', undef, $flags1 );
989            
990             This method converts data in any supported format into any other supported
991             format. The resulting data may either be saved to a file (if $file2 is
992             supplied as a parameter) or sent back as a string to e.g. print the data
993             to the screen in the new format (if no $file2 is supplied), or sent back
994             as an array reference if $format2 is 'ARRAY'.
995            
996             Some examples:
997            
998             # convert a CSV file into an XML file
999             #
1000             adConvert('CSV','foo.csv','XML','foo.xml');
1001            
1002             # convert a CSV file into an HTML table and print it to the screen
1003             #
1004             print adConvert('CSV','foo.csv','HTMLtable');
1005            
1006             # convert an XML string into a CSV file
1007             #
1008             adConvert('XML', ["TIMTOWTDI"],
1009             'CSV','foo.csv'
1010             );
1011            
1012             # convert an array reference into an XML file
1013             #
1014             adConvert('ARRAY', [['id','motto'],['perl','TIMTOWTDI']],
1015             'XML','foo.xml'
1016             );
1017            
1018             # convert an XML file into an array reference
1019             #
1020             my $aryref = adConvert('XML','foo.xml','ARRAY');
1021            
1022             See section below "Using strings and arrays" for details.
1023            
1024             =head2 adExport()
1025            
1026             adExport( $table, $format, $file, $flags );
1027            
1028             or
1029            
1030             print adExport( $table, $format );
1031            
1032             or
1033            
1034             my $aryref = adExport( $table, 'ARRAY' );
1035            
1036             This method converts an existing tied hash into another format and/or
1037             saves the tied hash as a file in the specified format.
1038            
1039             Some examples:
1040            
1041             all assume a previous call to my $table= adTie(...);
1042            
1043             # export table to an XML file
1044             #
1045             adExport($table','XML','foo.xml');
1046            
1047             # export table to an HTML string and print it to the screen
1048             #
1049             print adExport($table,'HTMLtable');
1050            
1051             # export the table to an array reference
1052             #
1053             my $aryref = adExport($table,'ARRAY');
1054            
1055             See section below "Using strings and arrays" for details.
1056            
1057             =head2 adNames()
1058            
1059             my $table = adTie(...);
1060             my @column_names = adNames($table);
1061            
1062             This method returns an array of the column names for the specified table.
1063            
1064             =head2 adRows()
1065            
1066             my $table = adTie(...);
1067             adRows( $table, %search_hash );
1068            
1069             This method takes an AnyData tied hash created with adTie() and
1070             counts the rows in the table that match the search hash.
1071            
1072             For example, this snippet returns a count of the rows in the
1073             file that contain the specified page in the request column
1074            
1075             my $hits = adTie( 'Weblog', 'access.log');
1076             print adRows( $hits , request => 'mypage.html' );
1077            
1078             The search hash may contain multiple search criteria, see the
1079             section on multiple row operations below.
1080            
1081             If the search_hash is omitted, it returns a count of all rows.
1082            
1083             =head2 adColumn()
1084            
1085             my @col_vals = adColumn( $table, $column_name, $distinct_flag );
1086            
1087             This method returns an array of values taken from the specified column.
1088             If there is a distinct_flag parameter, duplicates will be eliminated
1089             from the list.
1090            
1091             For example, this snippet returns a unique list of the values in
1092             the 'player' column of the table.
1093            
1094             my $game = adTie( 'Pipe','games.db' );
1095             my @players = adColumn( $game, 'player', 1 );
1096            
1097             =head2 adDump()
1098            
1099             my $table = adTie(...);
1100             print adDump($table);
1101            
1102             This method prints the raw data in the table. Column names are printed inside angle brackets and separated by colons on the first line, then each row is printed as a list of values inside square brackets.
1103            
1104             =head2 adFormats()
1105            
1106             print "$_\n for adFormats();
1107            
1108             This method shows the available format parsers, e.g. 'CSV', 'XML', etc. It looks in your @INC for the .../AnyData/Format directory and prints the names of format parsing files there. If the parser requires further modules (e.g. XML requires XML::Parser) and you do not have the additional modules installed, the format will not work even if listed by this command. Otherwise, all formats should work as described in this documentation.
1109            
1110             =head1 FURTHER DETAILS
1111            
1112             =head2 Column Names
1113            
1114             Column names may be assigned in three ways:
1115            
1116             * pre -- The format parser preassigns column
1117             names (e.g. Passwd files automatically have
1118             columns named 'username', 'homedir', 'GID', etc.).
1119            
1120             * user -- The user specifies the column names as a comma
1121             separated string associated with the key 'cols':
1122            
1123             my $table = adTie( $format,
1124             $file,
1125             $mode,
1126             {cols=>'name,age,gender'}
1127             );
1128            
1129             * auto -- If there is no preassigned list of column names
1130             and none defined by the user, the first line of
1131             the file is treated as a list of column names;
1132             the line is parsed according to the specific
1133             format (e.g. CSV column names are a comma-separated
1134             list, Tab column names are a tab separated list);
1135            
1136             When creating a new file in a format that does not preassign
1137             column names, the user *must* manually assign them as shown above.
1138            
1139             Some formats have special rules for assigning column names (XML,Fixed,HTMLtable), see the sections below on those formats.
1140            
1141             =head2 Key Columns
1142            
1143             The AnyData modules support tables that have a single key column that
1144             uniquely identifies each row as well as tables that do not have such
1145             keys. For tables where there is a unique key, that key may be assigned
1146             in three ways:
1147            
1148             * pre -- The format parser automatically preassigns the
1149             key column name e.g. Passwd files automatically
1150             have 'username' as the key column.
1151            
1152             * user -- The user specifies the key column name:
1153            
1154             my $table = adTie( $format,
1155             $file,
1156             $mode,
1157             {key=>'country'}
1158             );
1159            
1160             * auto If there is no preassigned key column and the user
1161             does not define one, the first column becomes the
1162             default key column
1163            
1164             =head2 Format Specific Details
1165            
1166             For full details, see the documentation for AnyData::Format::Foo
1167             where Foo is any of the formats listed in the adFormats() command
1168             e.g. 'CSV', 'XML', etc.
1169            
1170             Included below are only some of the more important details of the
1171             specific parsers.
1172            
1173             =over
1174            
1175             =item Fixed Format
1176            
1177             When using the Fixed format for fixed length records you
1178             must always specify a pattern indicating the lengths of the fields.
1179             This should be a string as would be passed to the unpack() function
1180             to unpack the records in your Fixed length definition:
1181            
1182             my $t = adTie( 'Fixed', $file, 'r', {pattern=>'A3 A7 A9'} );
1183            
1184             If you want the column names to appear on the first line of a Fixed
1185             file, they should be in comma-separated format, not in Fixed format.
1186             This is different from other formats which use their own format to
1187             display the column names on the first line. This is necessary because
1188             the name of the column might be longer than the length of the column.
1189            
1190             =item XML Format
1191            
1192             The XML format does not allow you to specify column names as a flag,
1193             rather you specify a "record_tag" and the column names are determined
1194             from the contents of the tag. If no record_tag is specified, the
1195             record tag will be assumed to be the first child of the root of the
1196             XML tree. That child and its structure will be determined from the
1197             DTD if there is one, or from the first occurring record if there is
1198             no DTD.
1199            
1200             For simple XML, no flags are necessary:
1201            
1202            
1203             JoeSeattle
1204             SuePortland
1205            
1206            
1207             The record_tag will default to the first child, namely "row". The column
1208             names will be generated from the attributes of the record tag and all of
1209             the tags included under the record tag, so the column names in this
1210             example will be "row_id","name","location".
1211            
1212             If the record_tag is not the first child, you will need to specify it. For example:
1213            
1214            
1215            
1216             JoeSeattle
1217             SuePortland
1218            
1219            
1220             BobBoise
1221             BevBillings
1222            
1223            
1224            
1225             In this case you will need to specify "row" as the record_tag since it is not the first child of the tree. The column names will be generated from the attributes of row's parent (if the parent is not the root), from row's attributes
1226             and sub tags, i.e. "table_id","row_id","name","location".
1227            
1228             When exporting XML, you can specify a DTD to control the output. For example, if you import a table from CSV or from an Array, you can output as XML and specify which of the columns become tags and which become attributes and also specify the nesting of the tags in your DTD.
1229            
1230             The XML format parser is built on top of Michel Rodriguez's excellent XML::Twig which is itself based on XML::Parser. Parameters to either of those modules may be passed in the flags for adTie() and the other commands including the "prettyPrint" flag to specify how the output XML is displayed and things like ProtocolEncoding. ProtocolEncoding defaults to 'ISO-8859-1', all other flags keep the defaults of XML::Twig and XML::Parser. See the documentation of those modules for details;
1231            
1232             CAUTION: Unlike other formats, the XML format does not save changes to
1233             the file as they are entered, but only saves the changes when you explicitly
1234             request them to be saved with the adExport() command.
1235            
1236             =item HTMLtable Format
1237            
1238             This format is based on Matt Sisk's excelletn HTML::TableExtract.
1239            
1240             It can be used to read an existing table from an html page, or to
1241             create a new HTML table from any data source.
1242            
1243             You may control which table in an HTML page is used with the column_names,
1244             depth and count flags.
1245            
1246             If a column_names flag is passed, the first table that contains those names
1247             as the cells in a row will be selected.
1248            
1249             If depth and or count parameters are passed, it will look for tables as
1250             specified in the HTML::TableExtract documentation.
1251            
1252             If none of column_names, depth, or count flags are passed, the first table
1253             encountered in the file will be the table selected and its first row will
1254             be used to determine the column names for the table.
1255            
1256             When exporting to an HTMLtable, you may pass flags to specify properties
1257             of the whole table (table_flags), the top row containing the column names
1258             (top_row_flags), and the data rows (data_row_flags). These flags follow
1259             the syntax of CGI.pm table constructors, e.g.:
1260            
1261             print adExport( $table, 'HTMLtable', {
1262             table_flags => {Border=>3,bgColor=>'blue'};
1263             top_row_flags => {bgColor=>'red'};
1264             data_row_flags => {valign='top'};
1265             });
1266            
1267             The table_flags will default to {Border=>1,bgColor=>'white'} if none
1268             are specified.
1269            
1270             The top_row_flags will default to {bgColor=>'#c0c0c0'} if none are
1271             specified;
1272            
1273             The data_row_flags will be empty if none are specified.
1274            
1275             In other words, if no flags are specified the table will print out with
1276             a border of 1, the column headings in gray, and the data rows in white.
1277            
1278             CAUTION: This module will *not* preserve anything in the html file except
1279             the selected table so if your file contains more than the selected table,
1280             you will want to use adTie() to read the table and then adExport() to write
1281             the table to a different file. When using the HTMLtable format, this is the
1282             only way to preserve changes to the data, the adTie() command will *not*
1283             write to a file.
1284            
1285             =back
1286            
1287             =head2 Multiple Row Operations
1288            
1289             The AnyData hash returned by adTie() may use either single values as keys, or a reference to a hash of comparisons as a key. If the key to the hash is a single value, the hash operates on a single row but if the key to the hash is itself a hash reference, the hash operates on a group of rows.
1290            
1291             my $num_deleted = delete $table->{Sue};
1292            
1293             This example deletes a single row where the key column has the value 'Sue'. If multiple rows have the value 'Sue' in that column, only the first is deleted. It uses a simple string as a key, therefore it operates on only a single row.
1294            
1295             my $num_deleted = delete $table->{ {name=>'Sue'} };
1296            
1297             This example deletes all rows where the column 'name' is equal to 'Sue'. It uses a hashref as a key and therefore operates on multiple rows.
1298            
1299             The hashref used in this example is a single column comparison but the hashref could also include multiple column comparisons. This deletes all rows where the the values listed for the country, gender, and age columns are equal to those specified:
1300            
1301             my $num_deleted = delete $table->{{ country => 'us',
1302             gender => 'm',
1303             age => '25'
1304             }}
1305            
1306            
1307             In addition to simple strings, the values may be specified as regular expressions or as numeric or alphabetic comparisons. This will delete all North American males under the age of 25:
1308            
1309             my $num_deleted = delete $table->{{ country => qr/mx|us|ca/,
1310             gender => 'm',
1311             age => '< 25'
1312             }}
1313            
1314             If numeric or alphabetic comparisons are used, they should be a string with the comparison operator separated from the value by a space, e.g. '> 4' or 'lt b'.
1315            
1316             This kind of search hashref can be used not only to delete multiple rows, but also to update rows. In fact you *must* use a hashref key in order to update your table. Updating is the only operation that can not be done with a single string key.
1317            
1318             The search hashref can be used with a select statement, in which case it returns a reference to an array of rows matching the criteria:
1319            
1320             my $male_players = $table->{{gender=>'m'}};
1321             for my $player( @$male_players ) { print $player->{name},"\n" }
1322            
1323             This should be used with caution with a large table since it gathers all of the selected rows into an array in memory. Again, 'each' is a much better way for large tables. This accomplishes the same thing as the example above, but without ever pulling more than a row into memory at a time:
1324            
1325             while( my $row= each %$table ) {
1326             print $row->{name}, "\n" if $row->{gender}=>'m';
1327             }
1328            
1329             Search criteria for multiple rows can also be used with the adRows() function:
1330            
1331             my $num_of_women = adRows( $table, gender => 'w' );
1332            
1333             That does *not* pull the entire table into memory, it counts the rows a record at a time.
1334            
1335             =head2 Using Remote Files
1336            
1337             If the first file parameter of adTie() or adConvert() begins with "http://" or "ftp://", the file is treated as a remote URL and the LWP module is called behind the scenes to fetch the file. If the files are in an area that requires authentication, that may be supplied in the $flags parameter.
1338            
1339             For example:
1340            
1341             # read a remote file and access it via a tied hash
1342             #
1343             my $table = adTie( 'XML', 'http://www.foo.edu/bar.xml' );
1344            
1345             # same with username/password
1346             #
1347             my $table = ( 'XML', 'ftp://www.foo.edu/pub/bar.xml', 'r'
1348             { user => 'me', pass => 'x7dy4'
1349             );
1350            
1351             # read a remote file, convert it to an HTML table, and print it
1352             #
1353             print adConvert( 'XML', 'ftp://www.foo.edu/pub/bar.xml', 'HTMLtable' );
1354            
1355             =head2 Using Strings and Arrays
1356            
1357             Strings and arrays may be used as either the source of data input or as the target of data output. Strings should be passed as the only element of an array reference (in other words, inside square brackets). Arrays should be a reference to an array whose first element is a reference to an array of column names and whose succeeding elements are references to arrays of row values.
1358            
1359             For example:
1360            
1361             my $table = adTie( 'XML', ["TIMTOWTDI"] );
1362            
1363             This uses the XML format to parse the supplied string and returns a tied
1364             hash to the resulting table.
1365            
1366            
1367             my $table = adTie( 'ARRAY', [['id','motto'],['perl','TIMTOWTDI']] );
1368            
1369             This uses the column names "id" and "motto" and the supplied row values
1370             and returns a tied hash to the resulting table.
1371            
1372             It is also possible to use an empty array to create a new empty tied hash in any format, for example:
1373            
1374             my $table = adTie('XML',[],'c');
1375            
1376             creates a new empty tied hash;
1377            
1378             See adConvert() and adExport() for further examples of using strings and arrays.
1379            
1380             =head2 Ties, Flocks, I/O, and Atomicity
1381            
1382             AnyData provides flocking which works under the limitations of flock -- that it only works if other processes accessing the files are also using flock and only on platforms that support flock. See the flock() man page for details.
1383            
1384             Here is what the user supplied open modes actually do:
1385            
1386             r = read only (LOCK_SH) O_RDONLY
1387             u = update (LOCK_EX) O_RDWR
1388             c = create (LOCK_EX) O_CREAT | O_RDWR | O_EXCL
1389             o = overwrite (LOCK_EX) O_CREAT | O_RDWR | O_TRUNC
1390            
1391             When you use something like "my $table = adTie(...)", it opens
1392             the file with a lock and leaves the file and lock open until
1393             1) the hash variable ($table) goes out of scope or 2) the
1394             hash is undefined (e.g. "undef $table") or 3) the hash is
1395             re-assigned to another tie. In all cases the file is closed
1396             and the lock released.
1397            
1398             If adTie is called without creating a tied hash variable, the file
1399             is closed and the lock released immediately after the call to adTie.
1400            
1401             For example: print adTie('XML','foo.xml')->{main_office}->{phone}.
1402            
1403             That obtains a shared lock, opens the file, retrieves the one value
1404             requested, closes the file and releases the lock.
1405            
1406             These two examples accomplish the same thing but the first example
1407             opens the file once, does all of the deletions, keeping the exclusive
1408             lock in place until they are all done, then closes the
1409             file. The second example opens and closes the file three times,
1410             once for each deletion and releases the exclusive lock between each
1411             deletion:
1412            
1413             1. my $t = adTie('Pipe','games.db','u');
1414             delete $t->{"user$_"} for (0..3);
1415             undef $t; # closes file and releases lock
1416            
1417             2. delete adTie('Pipe','games.db','u')->{"user$_"} for (0..3);
1418             # no undef needed since no hash variable created
1419            
1420             =head2 Deletions and Packing
1421            
1422             In order to save time and to prevent having to do writes anywhere except at the end of the file, deletions and updates are *not* done at the time of issuing a delete command. Rather when the user does a delete, the position of the deleted record is stored in a hash and when the file is saved to disk, the deletions are only then physically removed by packing the entire database. Updates are done by inserting the new record at the end of the file and marking the old record for deletion. In the normal course of events, all of this should be transparent and you'll never need to worry about it. However, if your server goes down after you've made updates or deletions but before you've saved the file, then the deleted rows will remain in the database and for updates there will be duplicate rows -- the old non updated row and the new updated row. If you are worried about this kind of event, then use atomic deletes and updates as shown in the section above. There's still a very small possibility of a crash in between the deletion and the save, but in this case it should impact at most a single row. (BIG thanks to Matthew Wickline for suggestions on handling deletes)
1423            
1424             =head1 MORE HELP
1425            
1426             See the README file and the test.pl included with the module
1427             for further examples.
1428            
1429             See the AnyData/Format/*.pm PODs for further details of specific
1430             formats.
1431            
1432             For further support, please use comp.lang.perl.modules
1433            
1434             =head1 ACKNOWLEDGEMENTS
1435            
1436             Special thanks to Andy Duncan, Tom Lowery, Randal Schwartz, Michel Rodriguez, Jochen Wiedmann, Tim Bunce, Alligator Descartes, Mathew Persico, Chris Nandor, Malcom Cook and to many others on the DBI mailing lists and the clp* newsgroups.
1437            
1438             =head1 AUTHOR & COPYRIGHT
1439            
1440             Jeff Zucker
1441            
1442             This module is copyright (c), 2000 by Jeff Zucker.
1443             Some changes (c) 2012 Sven Dowideit L
1444             It may be freely distributed under the same terms as Perl itself.
1445            
1446             =cut
1447            
1448             ################################
1449             # END OF AnyData
1450             ################################
1451             1;