File Coverage

blib/lib/AnyData.pm

Criterion	Covered	Total	%
statement	276	487	56.6
branch	101	246	41.0
condition	54	145	37.2
subroutine	34	49	69.3
pod	8	43	18.6
total	473	970	48.7

line

stmt

bran

cond

sub

pod

time

code

##################################################################

package AnyData;

###################################################################

# This module is copyright (c), 2000 by Jeff Zucker

###################################################################

191747

use strict;

237

use warnings;

209

require Exporter;

3381

use AnyData::Storage::TiedHash;

218

use vars qw( @ISA @EXPORT $VERSION );

11022

@ISA = qw(Exporter);

@EXPORT = qw( adConvert adTie adRows adColumn adExport adDump adNames adFormats);

#@EXPORT = qw( ad_fields adTable adErr adArray);

$VERSION = '0.11';

sub new {

my $class = shift;

my $format = shift;

my $flags = shift || {};

my $del_marker = "\0";

100

$format = 'CSV' if $format eq 'ARRAY';

my $parser_name = 'AnyData/Format/' . $format . '.pm';

eval { require $parser_name; };

6909

die "Error Opening File-Parser: $@" if $@;

$parser_name =~ s#/#::#g;

$parser_name =~ s#\.pm$##g;

100

my $col_names = $flags->{col_names} || undef;

100

if ($col_names) {

my @cols;

@cols = ref $col_names eq 'ARRAY'

? @$col_names

: split ',',$col_names;

$flags->{col_names} = \@cols;

}

$flags->{del_marker} = $del_marker;

$flags->{records} ||= $flags->{data};

468

$flags->{field_sep} ||= $flags->{sep_char} ||= $flags->{ad_sep_char};

109

$flags->{quote} ||= $flags->{quote_char} ||= $flags->{ad_quote_char};

114

$flags->{escape} ||= $flags->{escape_char}||= $flags->{ad_escape_char};

143

$flags->{record_sep}||= $flags->{eol} ||= $flags->{ad_eol};

# $flags->{skip_first_row}

my $parser = $parser_name->new ($flags);

100

136

if ($parser->{col_names} && !$col_names) {

my @cols;

@cols = ref $parser->{col_names} eq 'ARRAY'

? @{$parser->{col_names}}

: split ',',$parser->{col_names};

$flags->{col_names} = \@cols;

$parser->{col_names} = \@cols;

}

100

my $storage_name = $flags->{storage}

|| $parser->storage_type()

|| 'File';

$storage_name = "AnyData/Storage/$storage_name.pm";

eval { require $storage_name; };

3657

die "Error Opening Storage Module: $@" if $@;

$storage_name =~ s#/#::#g;

$storage_name =~ s#\.pm$##g;

217

my $storage = new $storage_name({del_marker=>$del_marker,%$flags});

if ($storage_name =~ 'PassThru') {

$storage->{parser} = $parser;

$parser->{del_marker} = "\0";

$parser->{url} = $flags->{file}

if $flags->{file} and $flags->{file} =~ /http:|ftp:/;

}

my $self = {

storage => $storage,

parser => $parser,

};

return( bless($self,$class) );

}

sub adFormats {

my @formats;

for my $dir(@INC) {

my $format_dir = "$dir/AnyData/Format";

if ( -d $format_dir ) {

local *D;

opendir(D,$format_dir);

@formats = grep {/\.pm$/} readdir(D);

last;

}

unshift @formats,'ARRAY';

@formats = map {s/^(.*)\.pm$/$1/;$_} @formats;

return @formats;

}

sub export {

my $self=shift;

my $fh = $self->{storage}->{fh};

my $mode = $self->{storage}->{open_mode} || 'r';

# if ( $self->{parser}->{export_on_close}

# && $self->{storage}->{fh}

# && $mode ne 'r'

100

# ){

101

return $self->{parser}->export( $self->{storage}, @_ );

102

# }

103

}

104

sub DESTROY {

105

7772

my $self=shift;

106

# $self->export;

107

$self->zpack;

108

#print "AD DESTROYED ";

109

}

110

##########################################

111

# DBD STUFF

112

##########################################

113

# required only for DBD-AnyData

114

##########################################

115

sub prep_dbd_table {

116

my $self = shift;

117

my $tname = shift;

118

my $createMode = shift;

119

my $col_names;

120

my $col_nums;

121

my $first_row_pos;

122

if (!$createMode) {

123

$col_names = $self->{storage}->get_col_names($self->{parser});

124

$col_nums = $self->{storage}->set_col_nums();

125

$first_row_pos = $self->{storage}->{first_row_pos};

126

}

127

die "ERROR: No Column Names!:", $self->{storage}->{open_mode}

128

if (!$col_names || !scalar @$col_names)

129

&& 'ru' =~ $self->{storage}->{open_mode}

130

&& !$createMode eq 'o';

131

my $table = {

132

NAME => $tname,

133

DATA => [],

134

CURRENT_ROW => 0,

135

col_names => $col_names,

136

col_nums => $col_nums,

137

first_row_pos => $first_row_pos,

138

fh => $self->{storage}->get_file_handle,

139

file => $self->{storage}->get_file_name,

140

ad => $self,

141

};

142

#use Data::Dumper; print Dumper $table;

143

return $table;

144

}

145

sub fetch_row {

146

my $self = shift;

147

my $requested_cols = shift || [];

148

my $rec;

149

if ( $self->{parser}->{skip_pattern} ) {

150

my $found;

151

while (!$found) {

152

$rec = $self->{storage}->file2str($self->{parser},$requested_cols);

153

last if !defined $rec;

154

next if $rec =~ $self->{parser}->{skip_pattern};

155

last;

156

}

157

}

158

else {

159

$rec = $self->{storage}->file2str($self->{parser},$requested_cols);

160

}

161

return $rec if ref $rec eq 'ARRAY';

162

return unless $rec;

163

my @fields = $self->{parser}->read_fields($rec);

164

return undef if scalar @fields == 1 and !defined $fields[0];

165

return \@fields;

166

}

167

sub fetch_rowNEW {

168

my $self = shift;

169

my $requested_cols = shift || [];

170

my $rec = $self->{storage}->file2str($self->{parser},$requested_cols);

171

my @fields;

172

if (ref $rec eq 'ARRAY') {

173

@fields = @$rec;

174

}

175

else {

176

return unless defined $rec;

177

my @fields = $self->{parser}->read_fields($rec);

178

return undef if scalar @fields == 1 and !defined $fields[0];

179

}

180

if ( my $subs = $self->{parser}->{read_sub} ) {

181

for (@$subs) {

182

my($col,$sub) = @$_;

183

next unless defined $col;

184

my $col_num = $self->{storage}->{col_nums}->{$col};

185

next unless defined $col_num;

186

$fields[$col_num] = &$sub($fields[$col_num]);

187

}

188

}

189

return \@fields;

190

}

191

sub push_names {

192

my $self = shift;

193

my $col_names = shift || undef;

194

#print "Can't find column names!" unless scalar @$col_names;

195

$self->{storage}->print_col_names( $self->{parser}, $col_names )

196

unless $self->{parser}->{col_names} && $self->parser_type ne 'XML';

197

# $self->set_col_nums;

198

$self->{parser}->{key} ||= $col_names->[0];

199

#use Data::Dumper; print Dumper $self; exit;

200

}

201

sub drop { shift->{storage}->drop(@_); }

202

sub truncate { shift->{storage}->truncate(@_) }

203

204

##################################################################

205

# END OF DBD STUFF

206

##################################################################

207

208

##################################################################

209

# REQUIRED BY BOTH DBD AND TIEDHASH

210

##################################################################

211

sub push_row {

212

my $self = shift;

213

die "ERROR: No Column Names!" unless scalar @{$self->col_names};

214

my $requested_cols = [];

215

my @row = @_;

216

100

if (ref($row[0]) eq 'ARRAY') {

217

$requested_cols = shift @row;

218

}

219

147

my $rec = $self->{parser}->write_fields(@row) or return undef;

220

140

return $self->{storage}->push_row( $rec, $self->{parser}, $requested_cols);

221

}

222

sub push_rowNEW {

223

my $self = shift;

224

#print "PUSHING... ";

225

die "ERROR: No Column Names!" unless scalar @{$self->col_names};

226

my $requested_cols = [];

227

my @row = @_;

228

7425

use Data::Dumper;

70229

34577

229

#print "PUSHING ", Dumper \@row;

230

if (ref($row[0]) eq 'ARRAY') {

231

$requested_cols = shift @row;

232

}

233

my $rec = $self->{parser}->write_fields(@row) or return undef;

234

return $self->{storage}->push_row( $rec, $self->{parser}, $requested_cols);

235

}

236

sub seek { shift->{storage}->seek(@_); }

237

sub seek_first_record {

238

my $self=shift;

239

186

$self->{storage}->seek_first_record($self->{parser});

240

}

241

sub col_names {

242

290

349

my $self = shift;

243

290

440

my $c = $self->{storage}->{col_names};

244

290

1031

$c = $self->{parser}->{col_names} unless (ref $c eq 'ARRAY') and scalar @$c;

245

290

925

$c ||= [];

246

}

247

sub is_url {

248

my $file = shift;

249

126

return $file if $file and $file =~ m"^http://|ftp://";

250

}

251

252

sub adTable {

253

###########################################################

254

# Patch from Wes Hardaker

255

###########################################################

256

# my($formatref,$file,$read_mode,$lockMode,$othflags)=@_;

257

my($formatref,$file,$read_mode,$lockMode,$othflags,$tname)=@_;

258

###########################################################

259

#use Data::Dumper; print Dumper \@_;

260

my($format,$flags);

261

$file ||= '';

262

my $url = is_url($file);

263

$flags = {};

264

100

$othflags ||= {};

265

if ( ref $formatref eq 'HASH' or $othflags->{data}) {

266

$format = 'Base';

267

$flags = $othflags;

268

if (ref $formatref eq 'HASH') {

269

%$flags = (%$formatref,%$othflags);

270

}

271

}

272

else {

273

($format,$flags) = split_params($formatref);

274

$othflags ||= {};

275

%$flags = (%$flags,%$othflags);

276

}

277

100

if ( $flags->{cols} ) {

278

$flags->{col_names} = $flags->{cols};

279

delete $flags->{cols};

280

}

281

100

if (ref($file) eq 'ARRAY') {

282

if ($format eq 'Mp3' or $format eq 'FileSys') {

283

$flags->{dirs} = $file;

284

}

285

else {

286

$flags->{recs} = join '',@$file;

287

100

$flags->{recs} = $file if $format =~ /ARRAY/i;

288

$flags->{storage} = 'RAM' unless $format eq 'XML';

289

$read_mode = 'u';

290

}

291

}

292

else {

293

$flags->{file} = $file;

294

}

295

118

if ($format ne 'XML' and ($format eq 'Base' or $url) ) {

296

my $x;

297

$flags->{storage} = 'RAM';

298

delete $flags->{recs};

299

my $ad = AnyData->new( $format, $flags);

300

$format eq 'Base'

301

? $ad->open_table( $file )

302

: $ad->open_table( $file, 'r',

303

$ad->{storage}->get_remote_data($file)

304

);

305

return $ad;

306

}

307

121

my $ad = AnyData->new( $format, $flags);

308

my $createMode = 0;

309

$createMode = $read_mode if defined $lockMode;

310

$read_mode = 'c' if $createMode and $lockMode;

311

114

$read_mode = 'u' if !$createMode and $lockMode;

312

$read_mode ||= 'r';

313

100

135

$ad->{parser}->{keep_first_line} = 1

314

if $flags->{col_names} and 'ru' =~ /$read_mode/;

315

#####################################################

316

# Patch from Wes Hardaker

317

#####################################################

318

# $ad->open_table( $file, $read_mode );

319

## $ad->open_table( $file, $read_mode, $tname );

320

$ad->open_table( $file, $read_mode, $tname );

321

# use Data::Dumper; my $x = $ad; delete $x->{parser}->{twig}; delete $x->{parser}->{record_tag}; delete $x->{parser}->{current_element}; print Dumper $x;

322

#####################################################

323

101

return $ad;

324

}

325

326

sub open_table {

327

my $self = shift;

328

$self->{storage}->open_table( $self->{parser}, @_ );

329

my $col_names = $self->col_names();

330

100

$self->{parser}->{key} ||= '';

331

$self->{parser}->{key} ||= $col_names->[0] if $col_names->[0];

332

}

333

##################################################################

334

335

336

##################################################################

337

# TIEDHASH STUFF

338

##################################################################

339

598

sub key_col { shift->{parser}->{key} }

340

341

sub fetchrow_hashref {

342

221

273

my $self = shift;

343

221

100

380

my $rec = $self->get_undeleted_record or return undef;

344

186

100

691

my @fields = ref $rec eq 'ARRAY'

345

? @$rec

346

: $self->{parser}->read_fields($rec);

347

186

378

my $col_names = $self->col_names();

348

186

389

return undef unless scalar @fields;

349

186

455

return undef if scalar @fields == 1 and !defined $fields[0];

350

186

189

my $rowhash;

351

186

232

@{$rowhash}{@$col_names} = @fields;

186

821

352

186

997

return ( $rowhash );

353

}

354

sub get_undeleted_record {

355

235

243

my $self = shift;

356

235

223

my $rec;

357

235

257

my $found=0;

358

235

383

return $self->fetch_row if $self->parser_type eq 'XML';

359

235

725

while (!$found) {

360

289

865

my $test = $rec = $self->{storage}->file2str($self->{parser});

361

289

100

921

return if !defined $rec;

362

252

100

710

next if $self->{storage}->is_deleted($self->{parser});

363

198

572

next if $self->{parser}->{skip_pattern}

364

and $rec =~ $self->{parser}->{skip_pattern};

365

198

302

last;

366

}

367

198

581

return $rec;

368

# return $rec if ref $rec eq 'ARRAY';

369

# return unless $rec;

370

# my @fields = $self->{parser}->read_fields($rec);

371

# return undef if scalar @fields == 1 and !defined $fields[0];

372

# return \@fields;

373

}

374

sub update_single_row {

375

my $self = shift;

376

my $oldrow = shift;

377

my $newvals = shift;

378

my @colnames = @{ $self->col_names };

379

my @newrow;

380

my $requested_cols = [];

381

for my $i(0..$#colnames) {

382

100

push @$requested_cols, $colnames[$i] if defined $newvals->{$colnames[$i]};

383

$newrow[$i] = $newvals->{$colnames[$i]};

384

100

$newrow[$i] = $oldrow->{$colnames[$i]} unless defined $newrow[$i];

385

}

386

unshift @newrow, $requested_cols;

387

$self->{storage}->seek(0,2);

388

$self->push_row( @newrow );

389

return \@newrow;

390

}

391

sub update_multiple_rows {

392

my $self = shift;

393

my $key = shift;

394

my $values = shift;

395

$self->seek_first_record;

396

my @rows_to_update;

397

while (my $row = $self->fetchrow_hashref) {

398

100

103

next unless $self->match($row,$key);

399

$self->{parser}->{has_update_function}

400

? $self->update_single_row($row,$values)

401

: $self->delete_single_row();

402

$self->{parser}->{has_update_function}

403

? push @rows_to_update,1

404

: push @rows_to_update,$row;

405

}

406

if (!$self->{parser}->{has_update_function}) {

407

for (@rows_to_update) {

408

$self->update_single_row($_,$values);

409

}

410

}

411

return scalar @rows_to_update;

412

}

413

sub match {

414

107

156

my($self,$row,$key) = @_;

415

107

100

255

if ( ref $key ne 'HASH') {

416

100

return 0 if !$row->{$self->key_col}

417

or $row->{$self->key_col} ne $key;

418

return 1;

419

}

420

my $found = 0;

421

228

while (my($col,$re)=each %$key) {

422

100

next unless defined $row->{$col} and is_matched($row->{$col},$re);

423

$found++;

424

}

425

100

515

return 1 if $found == scalar keys %$key;

426

}

427

sub is_matched {

428

my($str,$re)=@_;

429

if (ref $re eq 'Regexp') {

430

return $str =~ /$re/ ? 1 : 0;

431

}

432

my($op,$val);

433

434

175

if ( $re and $re =~/^(\S*)\s+(.*)/ ) {

435

$op = $1;

436

$val = $2;

437

}

438

elsif ($re) {

439

100

217

return $str =~ /$re/ ? 1 : 0;

440

}

441

else {

442

return $str eq '' ? 1 : 0;

443

}

444

my $numop = '< > == != <= >=';

445

my $chrop = 'lt gt eq ne le ge';

446

if (!($numop =~ /$op/) and !($chrop =~ /$op/)) {

447

return $str =~ /$re/ ? 1 : 0;

448

}

449

if ($op eq '<' ) { return $str < $val; }

450

if ($op eq '>' ) { return $str > $val; }

451

if ($op eq '==') { return $str == $val; }

452

if ($op eq '!=') { return $str != $val; }

453

if ($op eq '<=') { return $str <= $val; }

454

if ($op eq '>=') { return $str >= $val; }

455

if ($op eq 'lt') { return $str lt $val; }

456

if ($op eq 'gt') { return $str gt $val; }

457

if ($op eq 'eq') { return $str eq $val; }

458

if ($op eq 'ne') { return $str ne $val; }

459

if ($op eq 'le') { return $str le $val; }

460

if ($op eq 'ge') { return $str ge $val; }

461

}

462

sub delete_single_row {

463

my $self = shift;

464

# my $curpos = $self->{storage}->get_pos;

465

$self->{storage}->delete_record($self->{parser});

466

# $self->{storage}->go_pos($curpos);

467

$self->{needs_packing}++;

468

}

469

sub delete_multiple_rows {

470

my $self = shift;

471

my $key = shift;

472

$self->seek_first_record;

473

my $rows_deleted =0;

474

while (my $row = $self->fetchrow_hashref) {

475

next unless $self->match($row,$key);

476

$self->delete_single_row;

477

$rows_deleted++;

478

}

479

return $rows_deleted;

480

}

481

482

2428

sub adNames { @{ shift->{__colnames}} }

483

484

sub adDump {

485

44711

my $table = shift;

486

my $pat = shift;

487

die "No table defined" unless $table;

488

my $ad = tied(%$table)->{ad};

489

my @cols = @{ $ad->col_names };

490

print "<",join(":", @cols), ">\n";

491

while (my $row = each %$table) {

492

100

my @row = map {defined $row->{$_} ? $row->{$_} : ''} @cols;

493

for (@row) { print "[$_]"; }

169

494

print "\n";

495

}

496

}

497

498

sub adRows {

499

1072

my $thash = shift;

500

my %keys = @_;

501

my $obj = tied(%$thash);

502

return $obj->adRows(\%keys);

503

}

504

sub adColumn {

505

my $thash = shift;

506

my $column = shift;

507

my $flags = shift;

508

my $obj = tied(%$thash);

509

return $obj->adColumn($column, $flags);

510

}

511

sub adArray {

512

my($format,$data)=@_;

513

my $t = adTie( $format, $data );

514

my $t1 = tied(%$t);

515

my $ad = $t1->{ad};

516

my $arrayref = $ad->{storage}->{records};

517

unshift @$arrayref, $ad->{storage}->{col_names};

518

return $arrayref;

519

}

520

##################################################################

521

# END OF TIEDHASH STUFF

522

##################################################################

523

sub parser_type {

524

235

396

my $type = ref shift->{parser};

525

235

961

$type =~ s/AnyData::Format::(.*)/$1/;

526

235

773

return $type;

527

}

528

sub zpack {

529

my $self = shift;

530

return if $self->{storage}->{no_pack};

531

100

524

return if (ref $self->{storage} ) !~ /File$/;

532

533

# return unless $self->{needs_packing};

534

# $self->{needs_packing} = 0;

535

return unless scalar(keys %{ $self->{storage}->{deleted} } );

536

$self->{needs_packing} = 0;

537

# my @callA = caller 2;

538

# my @callB = caller 3;

539

# return if $callA[3] =~ /DBD/;

540

# return if $callB[3] and $callB[3] =~ /SQL::Statement/;

541

# return if $self->{parser}->{export_on_close};

542

#print "PACKING";

543

my $bak_file = $self->{storage}->get_file_name . '.bak';

544

my $bak = adTable( 'Text', $bak_file, 'o' );

545

my $bak_fh = $bak->{storage}->get_file_handle;

546

my $fh = $self->{storage}->get_file_handle;

547

die "Can't pack to backup $!" unless $fh and $bak_fh;

548

# $self->seek_first_record;

549

$fh->seek(0,0) || die $!;

550

#$bak_fh->seek(0,0) || die $!;

551

# while (my $line = $self->get_record) {

552

# next if $self->is_deleted($line);

553

while (my $line = $self->get_undeleted_record) {

554

my $tmpstr = $bak->{parser}->write_fields($line)

555

. $self->{parser}->{record_sep};

556

$bak_fh->write($tmpstr,length $tmpstr);

557

}

558

$fh->seek(0,0);

559

$fh->truncate(0) || die $!;

560

$bak->seek_first_record;

561

while (<$bak_fh>) {

562

$fh->write($_,length $_);

563

}

564

$fh->close;

565

$bak_fh->close;

566

$self->{doing_pack} = 0;

567

undef $self->{storage}->{deleted};

568

}

569

570

##########################################################

571

# FUNCTION CALL INTERFACE

572

##########################################################

573

sub adTie {

574

1981

my($format,$file,$read_mode,$flags)=@_;

575

my $data;

576

100

if (ref $file eq 'ARRAY' && !$read_mode ) { $read_mode = 'u'; }

577

# ARRAY only {data=>[]};

578

if (scalar @_ == 1){

579

$read_mode = 'o';

580

tie %$data,

581

'AnyData::Storage::TiedHash',

582

adTable($format),

583

$read_mode;

584

return $data;

585

}

586

tie %$data,

587

'AnyData::Storage::TiedHash',

588

adTable($format,$file,$read_mode,undef,$flags),

589

$read_mode;

590

return $data;

591

}

592

sub adErr {

593

my $hash = shift;

594

my $t = tied(%$hash);

595

my $errstr = $t->{ad}->{parser}->{errstr}

596

|| $t->{ad}->{storage}->{errstr};

597

print $errstr if $errstr;

598

return $errstr;

599

}

600

sub adExport {

601

my $tiedhash = shift;

602

my($tformat,$tfile,$tflags)=@_;

603

my $ad = tied(%$tiedhash)->{ad};

604

my $sformat = ref $ad->{parser};

605

$sformat =~ s/AnyData::Format:://;

606

$tformat ||= $sformat;

607

if ($tformat eq $sformat and $tformat eq 'XML') {

608

return $ad->{parser}->export($ad->{storage},$tfile,$tflags);

609

}

610

return adConvert('adHash',$ad,$tformat,$tfile,undef,$tflags);

611

}

612

sub adConvert {

613

my( $source_format, $source_data,

614

$target_format,$target_file_name,

615

$source_flags,$target_flags )=@_;

616

617

my $target_type = 'STRING';

618

100

$target_type = 'FILE' if defined $target_file_name;

619

$target_type = 'ARRAY' if $target_format eq 'ARRAY';

620

621

my $data_type = 'AD-OBJECT';

622

$data_type = 'ARRAY' if ref $source_data eq 'ARRAY'

623

and ref $source_data->[0] eq 'ARRAY';

624

625

# INIT SOURCE OBJECT

626

my $source_ad;

627

if ($source_format eq 'adHash') {

628

$source_ad = $source_data;

629

undef $source_data;

630

}

631

else {

632

$source_format = 'CSV' if $source_format =~ /ARRAY/i;

633

$source_ad = adTable(

634

$source_format,$source_data,'r',undef,$source_flags

635

);

636

}

637

638

# GET COLUMN NAMES

639

my @cols;

640

if ( $data_type eq 'ARRAY') {

641

@cols = @{ shift @{ $source_data } };

642

}

643

else {

644

@cols = @{ $source_ad->col_names };

645

}

646

647

648

# insert storable here

649

100

if ('XML HTMLtable' =~ /$target_format/) {

650

$target_flags->{col_names} = join ',',@cols;

651

my $target_ad = adTable(

652

$target_format,$target_file_name,'o',undef,$target_flags

653

);

654

if ($data_type eq 'ARRAY' ) {

655

for my $row(@$source_data) {

656

my @fields=$source_ad->str2ary($row);

657

$target_ad->push_row( $source_ad->str2ary(\@fields) );

658

}

659

unshift @$source_data, \@cols;

660

return $target_ad->export($target_file_name);

661

}

662

$source_ad->seek_first_record;

663

while (my $row = $source_ad->get_undeleted_record) {

664

$target_ad->push_row( $source_ad->str2ary($row) );

665

}

666

return $target_ad->export($target_file_name);

667

}

668

669

my($target_ad,$fh);

670

### INIT TARGET OBJECT

671

if ($target_type eq 'FILE') {

672

$target_ad = adTable(

673

$target_format,$target_file_name,'c',undef,$target_flags

674

);

675

$fh = $target_ad->{storage}->get_file_handle;

676

}

677

elsif ($target_type eq 'STRING') {

678

$target_ad = AnyData->new( $target_format,$target_flags);

679

}

680

681

my($str,$aryref);

682

### GET COLUMN NAMES

683

if ( !$target_ad->{parser}->{no_col_print} ) {

684

if ($target_type eq 'ARRAY') {

685

push @$aryref, \@cols;

686

}

687

else {

688

$str = $target_ad->{parser}->write_fields(@cols);

689

$str =~ s/ /,/g if $target_format eq 'Fixed';

690

if ($target_type eq 'FILE') {

691

$fh->write($str,length $str);

692

}

693

if ($target_type eq 'STRING') {

694

$str = $target_ad->{parser}->write_fields(@cols);

695

}

696

}

697

}

698

699

# GET DATA

700

if ($data_type eq 'ARRAY') {

701

for my $row(@$source_data) {

702

my @fields = $source_ad->str2ary($row);

703

my $tmpstr = $target_ad->{parser}->write_fields(@fields);

704

# print $tmpstr if $check;

705

$fh->write($tmpstr,length $tmpstr) if $target_type eq 'FILE';

706

$str .= $tmpstr if $target_type eq 'STRING';

707

}

708

unshift @$source_data, \@cols;

709

return $str if $target_format ne 'ARRAY';

710

return $aryref;

711

}

712

$source_ad->seek_first_record; # unless $source_format eq 'XML';

713

while (my $row = $source_ad->get_undeleted_record) {

714

if ($target_format eq 'ARRAY') {

715

push @$aryref,$row if $target_format eq 'ARRAY';

716

next;

717

}

718

my @fields = $source_ad->str2ary($row);

719

my $tmpstr = $target_ad->{parser}->write_fields(@fields);

720

$str .= $target_type eq 'FILE'

721

? $fh->write($tmpstr,length $tmpstr)

722

: $tmpstr;

723

}

724

return $str if $target_format ne 'ARRAY';

725

return $aryref;

726

}

727

728

# if ('Storable' =~ /$target_format/) {

729

# $target_flags->{col_names} = join ',',@cols;

730

# $target_ad = adTable(

731

# $target_format,$target_file_name,'c',undef,$target_flags

732

# );

733

# if (ref $source_data && !$data) {

734

# for my $row(@$source_data) {

735

# push @$data,$row;

736

# }

737

# }

738

# elsif (!$data) {

739

# $source_ad->seek_first_record;

740

# while (my $row = $source_ad->fetch_row) {

741

# push @$data, $row;

742

# }

743

# }

744

# unshift @$data, \@cols;

745

# return $target_ad->{parser}->export($data,$target_file_name);

746

# }

747

748

sub str2ary {

749

my($ad,$row) = @_;

750

100

return @$row if ref $row eq 'ARRAY';

751

return $ad->{parser}->read_fields($row);

752

}

753

sub ad_string {

754

my($formatref,@fields) = @_;

755

my($format,$flags) = split_params($formatref);

756

# &dump($formatref); print "<$format>"; &dump($flags) if $flags;

757

#$formatref =~ s/(.*)/$1/;

758

my $ad = AnyData->new( $format, $flags );

759

return $ad->{parser}->write_fields(@fields);

760

# return $ad->write_fields(@fields);

761

}

762

763

sub ad_fields {

764

my($formatref,$str,$flags) = @_;

765

# my($format,$flags) = split_params($formatref);

766

# my $ad = AnyData::new( $format, $flags );

767

my $ad = AnyData->new( $formatref, $flags );

768

return $ad->{parser}->read_fields($str);

769

}

770

771

sub ad_convert_str {

772

my($source_formatref,$target_formatref,$str) = @_;

773

my($source_format,$source_flags) = split_params($source_formatref);

774

my($target_format,$target_flags) = split_params($target_formatref);

775

my $source_ad = AnyData->new( $source_format,$source_flags);

776

my $target_ad = AnyData->new( $target_format,$target_flags);

777

my @fields = $source_ad->read_fields($str);

778

return $target_ad->write_fields( @fields );

779

}

780

781

#########################################################

782

# UTILITY METHODS

783

#########################################################

784

785

# For all methods that have $format as a parameter,

786

# $format can be either a string name of a format e.g. 'CSV'

787

# or a hashref of the format and flags for that format e.g.

788

# { format => 'FixedWidth', pattern=>'A1 A3 A2' }

789

790

# given this parameter, this method returns $format and $flags

791

# setting $flags to {} if none are given

792

793

sub split_params {

794

my $source_formatref = shift;

795

my $source_flags = {};

796

my $source_format = $source_formatref;

797

if (ref $source_formatref eq 'HASH') {

798

while (my($k,$v)=each %$source_formatref) {

799

($source_format,$source_flags) = ($k,$v);

800

}

801

}

802

#use Data::Dumper;

803

return( $source_format, $source_flags);

804

}

805

sub dump {

806

my $var = shift;

807

my $name = ref($var);

808

#use Data::Dumper;

809

$Data::Dumper::Indent = 1;

810

$Data::Dumper::Useqq = 0;

811

print Data::Dumper->new([$var],[$name])->Dump();

812

}

813

814

###########################################################################

815

# START OF DOCUMENTATION

816

###########################################################################

817

818

=pod

819

820

=head1 NAME

821

822

AnyData - easy access to data in many formats

823

824

=head1 SYNOPSIS

825

826

use AnyData;

827

my $table = adTie( 'CSV','my_db.csv','o', # create a table

828

{col_names=>'name,country,sex'}

829

);

830

$table->{Sue} = {country=>'de',sex=>'f'}; # insert a row

831

delete $table->{Tom}; # delete a single row

832

$str = $table->{Sue}->{country}; # select a single value

833

while ( my $row = each %$table ) { # loop through table

834

print $row->{name} if $row->{sex} eq 'f';

835

}

836

$rows = $table->{{age=>'> 25'}}; # select multiple rows

837

delete $table->{{country=>qr/us|mx|ca/}}; # delete multiple rows

838

$table->{{country=>'Nz'}}={country=>'nz'}; # update multiple rows

839

my $num = adRows( $table, age=>'< 25' ); # count matching rows

840

my @names = adNames( $table ); # get column names

841

my @cars = adColumn( $table, 'cars' ); # group a column

842

my @formats = adFormats(); # list available parsers

843

adExport( $table, $format, $file, $flags ); # save in specified format

844

print adExport( $table, $format, $flags ); # print to screen in format

845

print adDump($table); # dump table to screen

846

undef $table; # close the table

847

848

#adConvert( $format1, $file1, $format2, $file2 ); # convert btwn formats

849

#print adConvert( $format1, $file1, $format2 ); # convert to screen

850

851

=head1 DESCRIPTION

852

853

The rather wacky idea behind this module and its sister module

854

DBD::AnyData is that any data, regardless of source or format should

855

be accessible and modifiable with the same simple set of methods.

856

This module provides a multidimensional tied hash interface to data

857

in a dozen different formats. The DBD::AnyData module adds a DBI/SQL

858

interface for those same formats.

859

860

Both modules provide built-in protections including appropriate

861

flocking() for all I/O and (in most cases) record-at-a-time access to

862

files rather than slurping of entire files.

863

864

Currently supported formats include general format flat files (CSV,

865

Fixed Length, etc.), specific formats (passwd files, httpd logs,

866

etc.), and a variety of other kinds of formats (XML, Mp3, HTML

867

tables). The number of supported formats will continue to grow

868

rapidly since there is an open API making it easy for any author to

869

create additional format parsers which can be plugged in to AnyData

870

itself and thereby be accessible by either the tiedhash or DBI/SQL

871

interface.

872

873

=head1 PREREQUISITES

874

875

The AnyData.pm module itself is pure Perl and does not depend on

876

anything other than modules that come standard with Perl. Some

877

formats and some advanced features require additional modules: to use

878

the remote ftp/http features, you must have the LWP bundle installed;

879

to use the XML format, you must have XML::Parser and XML::Twig installed;

880

to use the HTMLtable format for reading, you must have HTML::Parser and

881

HTML::TableExtract installed but you can use the HTMLtable for writing

882

with just the standard CGI module. To use DBI/SQL commands, you must have

883

DBI, DBD::AnyData, SQL::Statement and DBD::File installed.

884

885

=head1 USAGE

886

887

The AnyData module imports eight methods (functions):

888

889

=for test ignore

890

891

adTie() -- create a new table or open an existing table

892

adExport() -- save an existing table in a specified format

893

adConvert() -- convert data in one format into another format

894

adFormats() -- list available formats

895

adNames() -- get the column names of a table

896

adRows() -- get the number of rows in a table or query

897

adDump() -- display the data formatted as an array of rows

898

adColumn() -- group values in a single column

899

900

The adTie() command returns a special tied hash. The tied hash can

901

then be used to access and/or modify data. See below for details

902

903

With the exception of the XML, HTMLtable, and ARRAY formats, the

904

adTie() command saves all modifications of the data directly to file

905

as they are made. With XML and HTMLtable, you must make your

906

modifications in memory and then explicitly save them to file with

907

adExport().

908

909

=head2 adTie()

910

911

my $table = adTie( $format, $data, $open_mode, $flags );

912

913

The adTie() command creates a reference to a multidimensional tied hash. In its simplest form, it simply reads a file in a specified format into the tied hash:

914

915

my $table = adTie( $format, $file );

916

917

$format is the name of any supported format 'CSV','Fixed','Passwd', etc.

918

$file is the name of a relative or absolute path to a local file

919

920

e.g.

921

my $table = adTie( 'CSV', '/usr/me/myfile.csv' );

922

923

this creates a tied hash called $table by reading data in the

924

CSV (comma separated values) format from the file 'myfile.csv'.

925

926

The hash reference resulting from adTie() can be accessed and modified as follows:

927

928

use AnyData;

929

my $table = adTie( $format, $file );

930

$table->{$key}->{$column}; # select a value

931

$table->{$key} = {$col1=>$val1,$col2=>$val2...}; # update a row

932

delete $table->{$key}; # delete a row

933

while(my $row = each %$table) { # loop through rows

934

print $row->{$col1} if $row->{$col2} ne 'baz';

935

}

936

937

The thing returned by adTie ($table in the example) is not an object,

938

it is a reference to a tied hash. This means that hash operations

939

such as exists, values, keys, may be used, keeping in mind that this

940

is a *reference* to a tied hash so the syntax would be

941

942

for( keys %$table ) {...}

943

for( values %$table ) {...}

944

945

Also keep in mind that if the table is really large, you probably do

946

not want to use keys and values because they create arrays in memory

947

containing data from every row in the table. Instead use 'each' as

948

shown above since that cycles through the file one record at a time

949

and never puts the entire table into memory.

950

951

It is also possible to use more advanced searching on the hash, see "Multiple Row Operations" below.

952

953

In addition to the simple adTie($format,$file), there are other ways to specify additional information in the adTie() command. The full syntax is:

954

955

my $table = adTie( $format, $data, $open_mode, $flags );

956

957

The $data parameter allows you to read data from remote files accessible by

958

http or ftp, see "Using Remote Files" below. It also allows you to treat

959

strings and arrays as data sources without needing a file at all, see

960

"Working with Strings and Arrays" below.

961

962

The optional $mode parameter defaults to 'r' if none is supplied or must be

963

one of

964

965

'r' read # read only access

966

'u' update # read/write access

967

'c' create # create a new file unless it already exists

968

'o' overwrite # create a new file, overwriting any that already exist

969

970

The $flags parameter allows you to specify additional information such as column names. See the sections in "Further Details" below.

971

972

With the exception of the XML, HTMLtable, and ARRAY formats, the

973

adTie() command saves all modifications of the data directly to file

974

as they are made. With XML and HTMLtable, you must make your

975

modifications in memory and then explicitly save them to file with

976

adExport().

977

978

=head2 adConvert()

979

980

adConvert( $format1, $data1, $format2, $file2, $flags1, $flags2 );

981

982

983

984

print adConvert( $format1, $data1, $format2, undef, $flags1, $flags2 );

985

986

987

988

my $aryref = adConvert( $format1, $data1, 'ARRAY', undef, $flags1 );

989

990

This method converts data in any supported format into any other supported

991

format. The resulting data may either be saved to a file (if $file2 is

992

supplied as a parameter) or sent back as a string to e.g. print the data

993

to the screen in the new format (if no $file2 is supplied), or sent back

994

as an array reference if $format2 is 'ARRAY'.

995

996

Some examples:

997

998

# convert a CSV file into an XML file

999

1000

adConvert('CSV','foo.csv','XML','foo.xml');

1001

1002

# convert a CSV file into an HTML table and print it to the screen

1003

1004

print adConvert('CSV','foo.csv','HTMLtable');

1005

1006

# convert an XML string into a CSV file

1007

1008

adConvert('XML', ["TIMTOWTDI"],

1009

'CSV','foo.csv'

1010

);

1011

1012

# convert an array reference into an XML file

1013

1014

adConvert('ARRAY', [['id','motto'],['perl','TIMTOWTDI']],

1015

'XML','foo.xml'

1016

);

1017

1018

# convert an XML file into an array reference

1019

1020

my $aryref = adConvert('XML','foo.xml','ARRAY');

1021

1022

See section below "Using strings and arrays" for details.

1023

1024

=head2 adExport()

1025

1026

adExport( $table, $format, $file, $flags );

1027

1028

1029

1030

print adExport( $table, $format );

1031

1032

1033

1034

my $aryref = adExport( $table, 'ARRAY' );

1035

1036

This method converts an existing tied hash into another format and/or

1037

saves the tied hash as a file in the specified format.

1038

1039

Some examples:

1040

1041

all assume a previous call to my $table= adTie(...);

1042

1043

# export table to an XML file

1044

1045

adExport($table','XML','foo.xml');

1046

1047

# export table to an HTML string and print it to the screen

1048

1049

print adExport($table,'HTMLtable');

1050

1051

# export the table to an array reference

1052

1053

my $aryref = adExport($table,'ARRAY');

1054

1055

See section below "Using strings and arrays" for details.

1056

1057

=head2 adNames()

1058

1059

my $table = adTie(...);

1060

my @column_names = adNames($table);

1061

1062

This method returns an array of the column names for the specified table.

1063

1064

=head2 adRows()

1065

1066

my $table = adTie(...);

1067

adRows( $table, %search_hash );

1068

1069

This method takes an AnyData tied hash created with adTie() and

1070

counts the rows in the table that match the search hash.

1071

1072

For example, this snippet returns a count of the rows in the

1073

file that contain the specified page in the request column

1074

1075

my $hits = adTie( 'Weblog', 'access.log');

1076

print adRows( $hits , request => 'mypage.html' );

1077

1078

The search hash may contain multiple search criteria, see the

1079

section on multiple row operations below.

1080

1081

If the search_hash is omitted, it returns a count of all rows.

1082

1083

=head2 adColumn()

1084

1085

my @col_vals = adColumn( $table, $column_name, $distinct_flag );

1086

1087

This method returns an array of values taken from the specified column.

1088

If there is a distinct_flag parameter, duplicates will be eliminated

1089

from the list.

1090

1091

For example, this snippet returns a unique list of the values in

1092

the 'player' column of the table.

1093

1094

my $game = adTie( 'Pipe','games.db' );

1095

my @players = adColumn( $game, 'player', 1 );

1096

1097

=head2 adDump()

1098

1099

my $table = adTie(...);

1100

print adDump($table);

1101

1102

This method prints the raw data in the table. Column names are printed inside angle brackets and separated by colons on the first line, then each row is printed as a list of values inside square brackets.

1103

1104

=head2 adFormats()

1105

1106

print "$_\n for adFormats();

1107

1108

This method shows the available format parsers, e.g. 'CSV', 'XML', etc. It looks in your @INC for the .../AnyData/Format directory and prints the names of format parsing files there. If the parser requires further modules (e.g. XML requires XML::Parser) and you do not have the additional modules installed, the format will not work even if listed by this command. Otherwise, all formats should work as described in this documentation.

1109

1110

=head1 FURTHER DETAILS

1111

1112

=head2 Column Names

1113

1114

Column names may be assigned in three ways:

1115

1116

* pre -- The format parser preassigns column

1117

names (e.g. Passwd files automatically have

1118

columns named 'username', 'homedir', 'GID', etc.).

1119

1120

* user -- The user specifies the column names as a comma

1121

separated string associated with the key 'cols':

1122

1123

my $table = adTie( $format,

1124

$file,

1125

$mode,

1126

{cols=>'name,age,gender'}

1127

);

1128

1129

* auto -- If there is no preassigned list of column names

1130

and none defined by the user, the first line of

1131

the file is treated as a list of column names;

1132

the line is parsed according to the specific

1133

format (e.g. CSV column names are a comma-separated

1134

list, Tab column names are a tab separated list);

1135

1136

When creating a new file in a format that does not preassign

1137

column names, the user *must* manually assign them as shown above.

1138

1139

Some formats have special rules for assigning column names (XML,Fixed,HTMLtable), see the sections below on those formats.

1140

1141

=head2 Key Columns

1142

1143

The AnyData modules support tables that have a single key column that

1144

uniquely identifies each row as well as tables that do not have such

1145

keys. For tables where there is a unique key, that key may be assigned

1146

in three ways:

1147

1148

* pre -- The format parser automatically preassigns the

1149

key column name e.g. Passwd files automatically

1150

have 'username' as the key column.

1151

1152

* user -- The user specifies the key column name:

1153

1154

my $table = adTie( $format,

1155

$file,

1156

$mode,

1157

{key=>'country'}

1158

);

1159

1160

* auto If there is no preassigned key column and the user

1161

does not define one, the first column becomes the

1162

default key column

1163

1164

=head2 Format Specific Details

1165

1166

For full details, see the documentation for AnyData::Format::Foo

1167

where Foo is any of the formats listed in the adFormats() command

1168

e.g. 'CSV', 'XML', etc.

1169

1170

Included below are only some of the more important details of the

1171

specific parsers.

1172

1173

=over

1174

1175

=item Fixed Format

1176

1177

When using the Fixed format for fixed length records you

1178

must always specify a pattern indicating the lengths of the fields.

1179

This should be a string as would be passed to the unpack() function

1180

to unpack the records in your Fixed length definition:

1181

1182

my $t = adTie( 'Fixed', $file, 'r', {pattern=>'A3 A7 A9'} );

1183

1184

If you want the column names to appear on the first line of a Fixed

1185

file, they should be in comma-separated format, not in Fixed format.

1186

This is different from other formats which use their own format to

1187

display the column names on the first line. This is necessary because

1188

the name of the column might be longer than the length of the column.

1189

1190

=item XML Format

1191

1192

The XML format does not allow you to specify column names as a flag,

1193

rather you specify a "record_tag" and the column names are determined

1194

from the contents of the tag. If no record_tag is specified, the

1195

record tag will be assumed to be the first child of the root of the

1196

XML tree. That child and its structure will be determined from the

1197

DTD if there is one, or from the first occurring record if there is

1198

no DTD.

1199

1200

For simple XML, no flags are necessary:

1201

1202

1203							JoeSeattle
1204							SuePortland
1205

1206

1207

The record_tag will default to the first child, namely "row". The column

1208

names will be generated from the attributes of the record tag and all of

1209

the tags included under the record tag, so the column names in this

1210

example will be "row_id","name","location".

1211

1212

If the record_tag is not the first child, you will need to specify it. For example:

1213

1214

1215

1216							JoeSeattle
1217							SuePortland
1218

1219

1220							BobBoise
1221							BevBillings
1222

1223

1224

1225

In this case you will need to specify "row" as the record_tag since it is not the first child of the tree. The column names will be generated from the attributes of row's parent (if the parent is not the root), from row's attributes

1226

and sub tags, i.e. "table_id","row_id","name","location".

1227

1228

When exporting XML, you can specify a DTD to control the output. For example, if you import a table from CSV or from an Array, you can output as XML and specify which of the columns become tags and which become attributes and also specify the nesting of the tags in your DTD.

1229

1230

The XML format parser is built on top of Michel Rodriguez's excellent XML::Twig which is itself based on XML::Parser. Parameters to either of those modules may be passed in the flags for adTie() and the other commands including the "prettyPrint" flag to specify how the output XML is displayed and things like ProtocolEncoding. ProtocolEncoding defaults to 'ISO-8859-1', all other flags keep the defaults of XML::Twig and XML::Parser. See the documentation of those modules for details;

1231

1232

CAUTION: Unlike other formats, the XML format does not save changes to

1233

the file as they are entered, but only saves the changes when you explicitly

1234

request them to be saved with the adExport() command.

1235

1236

=item HTMLtable Format

1237

1238

This format is based on Matt Sisk's excelletn HTML::TableExtract.

1239

1240

It can be used to read an existing table from an html page, or to

1241

create a new HTML table from any data source.

1242

1243

You may control which table in an HTML page is used with the column_names,

1244

depth and count flags.

1245

1246

If a column_names flag is passed, the first table that contains those names

1247

as the cells in a row will be selected.

1248

1249

If depth and or count parameters are passed, it will look for tables as

1250

specified in the HTML::TableExtract documentation.

1251

1252

If none of column_names, depth, or count flags are passed, the first table

1253

encountered in the file will be the table selected and its first row will

1254

be used to determine the column names for the table.

1255

1256

When exporting to an HTMLtable, you may pass flags to specify properties

1257

of the whole table (table_flags), the top row containing the column names

1258

(top_row_flags), and the data rows (data_row_flags). These flags follow

1259

the syntax of CGI.pm table constructors, e.g.:

1260

1261

print adExport( $table, 'HTMLtable', {

1262

table_flags => {Border=>3,bgColor=>'blue'};

1263

top_row_flags => {bgColor=>'red'};

1264

data_row_flags => {valign='top'};

1265

});

1266

1267

The table_flags will default to {Border=>1,bgColor=>'white'} if none

1268

are specified.

1269

1270

The top_row_flags will default to {bgColor=>'#c0c0c0'} if none are

1271

specified;

1272

1273

The data_row_flags will be empty if none are specified.

1274

1275

In other words, if no flags are specified the table will print out with

1276

a border of 1, the column headings in gray, and the data rows in white.

1277

1278

CAUTION: This module will *not* preserve anything in the html file except

1279

the selected table so if your file contains more than the selected table,

1280

you will want to use adTie() to read the table and then adExport() to write

1281

the table to a different file. When using the HTMLtable format, this is the

1282

only way to preserve changes to the data, the adTie() command will *not*

1283

write to a file.

1284

1285

=back

1286

1287

=head2 Multiple Row Operations

1288

1289

The AnyData hash returned by adTie() may use either single values as keys, or a reference to a hash of comparisons as a key. If the key to the hash is a single value, the hash operates on a single row but if the key to the hash is itself a hash reference, the hash operates on a group of rows.

1290

1291

my $num_deleted = delete $table->{Sue};

1292

1293

This example deletes a single row where the key column has the value 'Sue'. If multiple rows have the value 'Sue' in that column, only the first is deleted. It uses a simple string as a key, therefore it operates on only a single row.

1294

1295

my $num_deleted = delete $table->{ {name=>'Sue'} };

1296

1297

This example deletes all rows where the column 'name' is equal to 'Sue'. It uses a hashref as a key and therefore operates on multiple rows.

1298

1299

The hashref used in this example is a single column comparison but the hashref could also include multiple column comparisons. This deletes all rows where the the values listed for the country, gender, and age columns are equal to those specified:

1300

1301

my $num_deleted = delete $table->{{ country => 'us',

1302

gender => 'm',

1303

age => '25'

1304

}}

1305

1306

1307

In addition to simple strings, the values may be specified as regular expressions or as numeric or alphabetic comparisons. This will delete all North American males under the age of 25:

1308

1309

my $num_deleted = delete $table->{{ country => qr/mx|us|ca/,

1310

gender => 'm',

1311

age => '< 25'

1312

}}

1313

1314

If numeric or alphabetic comparisons are used, they should be a string with the comparison operator separated from the value by a space, e.g. '> 4' or 'lt b'.

1315

1316

This kind of search hashref can be used not only to delete multiple rows, but also to update rows. In fact you *must* use a hashref key in order to update your table. Updating is the only operation that can not be done with a single string key.

1317

1318

The search hashref can be used with a select statement, in which case it returns a reference to an array of rows matching the criteria:

1319

1320

my $male_players = $table->{{gender=>'m'}};

1321

for my $player( @$male_players ) { print $player->{name},"\n" }

1322

1323

This should be used with caution with a large table since it gathers all of the selected rows into an array in memory. Again, 'each' is a much better way for large tables. This accomplishes the same thing as the example above, but without ever pulling more than a row into memory at a time:

1324

1325

while( my $row= each %$table ) {

1326

print $row->{name}, "\n" if $row->{gender}=>'m';

1327

}

1328

1329

Search criteria for multiple rows can also be used with the adRows() function:

1330

1331

my $num_of_women = adRows( $table, gender => 'w' );

1332

1333

That does *not* pull the entire table into memory, it counts the rows a record at a time.

1334

1335

=head2 Using Remote Files

1336

1337

If the first file parameter of adTie() or adConvert() begins with "http://" or "ftp://", the file is treated as a remote URL and the LWP module is called behind the scenes to fetch the file. If the files are in an area that requires authentication, that may be supplied in the $flags parameter.

1338

1339

For example:

1340

1341

# read a remote file and access it via a tied hash

1342

1343

my $table = adTie( 'XML', 'http://www.foo.edu/bar.xml' );

1344

1345

# same with username/password

1346

1347

my $table = ( 'XML', 'ftp://www.foo.edu/pub/bar.xml', 'r'

1348

{ user => 'me', pass => 'x7dy4'

1349

);

1350

1351

# read a remote file, convert it to an HTML table, and print it

1352

1353

print adConvert( 'XML', 'ftp://www.foo.edu/pub/bar.xml', 'HTMLtable' );

1354

1355

=head2 Using Strings and Arrays

1356

1357

Strings and arrays may be used as either the source of data input or as the target of data output. Strings should be passed as the only element of an array reference (in other words, inside square brackets). Arrays should be a reference to an array whose first element is a reference to an array of column names and whose succeeding elements are references to arrays of row values.

1358

1359

For example:

1360

1361

my $table = adTie( 'XML', ["TIMTOWTDI"] );

1362

1363

This uses the XML format to parse the supplied string and returns a tied

1364

hash to the resulting table.

1365

1366

1367

my $table = adTie( 'ARRAY', [['id','motto'],['perl','TIMTOWTDI']] );

1368

1369

This uses the column names "id" and "motto" and the supplied row values

1370

and returns a tied hash to the resulting table.

1371

1372

It is also possible to use an empty array to create a new empty tied hash in any format, for example:

1373

1374

my $table = adTie('XML',[],'c');

1375

1376

creates a new empty tied hash;

1377

1378

See adConvert() and adExport() for further examples of using strings and arrays.

1379

1380

=head2 Ties, Flocks, I/O, and Atomicity

1381

1382

AnyData provides flocking which works under the limitations of flock -- that it only works if other processes accessing the files are also using flock and only on platforms that support flock. See the flock() man page for details.

1383

1384

Here is what the user supplied open modes actually do:

1385

1386

r = read only (LOCK_SH) O_RDONLY

1387

u = update (LOCK_EX) O_RDWR

1388

c = create (LOCK_EX) O_CREAT | O_RDWR | O_EXCL

1389

o = overwrite (LOCK_EX) O_CREAT | O_RDWR | O_TRUNC

1390

1391

When you use something like "my $table = adTie(...)", it opens

1392

the file with a lock and leaves the file and lock open until

1393

1) the hash variable ($table) goes out of scope or 2) the

1394

hash is undefined (e.g. "undef $table") or 3) the hash is

1395

re-assigned to another tie. In all cases the file is closed

1396

and the lock released.

1397

1398

If adTie is called without creating a tied hash variable, the file

1399

is closed and the lock released immediately after the call to adTie.

1400

1401

For example: print adTie('XML','foo.xml')->{main_office}->{phone}.

1402

1403

That obtains a shared lock, opens the file, retrieves the one value

1404

requested, closes the file and releases the lock.

1405

1406

These two examples accomplish the same thing but the first example

1407

opens the file once, does all of the deletions, keeping the exclusive

1408

lock in place until they are all done, then closes the

1409

file. The second example opens and closes the file three times,

1410

once for each deletion and releases the exclusive lock between each

1411

deletion:

1412

1413

1. my $t = adTie('Pipe','games.db','u');

1414

delete $t->{"user$_"} for (0..3);

1415

undef $t; # closes file and releases lock

1416

1417

2. delete adTie('Pipe','games.db','u')->{"user$_"} for (0..3);

1418

# no undef needed since no hash variable created

1419

1420

=head2 Deletions and Packing

1421

1422

In order to save time and to prevent having to do writes anywhere except at the end of the file, deletions and updates are *not* done at the time of issuing a delete command. Rather when the user does a delete, the position of the deleted record is stored in a hash and when the file is saved to disk, the deletions are only then physically removed by packing the entire database. Updates are done by inserting the new record at the end of the file and marking the old record for deletion. In the normal course of events, all of this should be transparent and you'll never need to worry about it. However, if your server goes down after you've made updates or deletions but before you've saved the file, then the deleted rows will remain in the database and for updates there will be duplicate rows -- the old non updated row and the new updated row. If you are worried about this kind of event, then use atomic deletes and updates as shown in the section above. There's still a very small possibility of a crash in between the deletion and the save, but in this case it should impact at most a single row. (BIG thanks to Matthew Wickline for suggestions on handling deletes)

1423

1424

=head1 MORE HELP

1425

1426

See the README file and the test.pl included with the module

1427

for further examples.

1428

1429

See the AnyData/Format/*.pm PODs for further details of specific

1430

formats.

1431

1432

For further support, please use comp.lang.perl.modules

1433

1434

=head1 ACKNOWLEDGEMENTS

1435

1436

Special thanks to Andy Duncan, Tom Lowery, Randal Schwartz, Michel Rodriguez, Jochen Wiedmann, Tim Bunce, Alligator Descartes, Mathew Persico, Chris Nandor, Malcom Cook and to many others on the DBI mailing lists and the clp* newsgroups.

1437

1438

=head1 AUTHOR & COPYRIGHT

1439

1440

Jeff Zucker

1441

1442

1443

1444

It may be freely distributed under the same terms as Perl itself.

1445