File Coverage

lib/DBD/TreeData.pm

Criterion	Covered	Total	%
statement	6	6	100.0
branch			n/a
condition			n/a
subroutine	2	2	100.0
pod			n/a
total	8	8	100.0

line	stmt	sub	time	code
1				##############################################################################
2				# DBD::TreeData Module #
3				# E-mail: Brendan Byrd <Perl@resonatorsoft.org> #
4				##############################################################################
5
6				##############################################################################
7				# DBD::TreeData
8
9				package DBD::TreeData;
10
11	1	1	49951	use sanity;
	1		560486
	1		15
12
13	1	1	582448	use parent qw(DBD::AnyData);
	1		3
	1		8
14
15				# ABSTRACT: DBI driver for any abstract hash/array tree
16
17				our $VERSION = '0.90'; # VERSION
18				our $drh = undef; # holds driver handle once initialized
19				our $err = 0; # DBI::err
20				our $errstr = ""; # DBI::errstr
21				our $sqlstate = ""; # DBI::state
22
23				our $methods_already_installed = 0;
24
25				sub driver {
26				return $drh if $drh; # already created - return same one
27				my ($class, $attr) = @_;
28
29				$drh = $class->DBI::DBD::SqlEngine::driver({ # DBD::AnyData doesn't pass over our $attr stuff, so let DBI::DBD::SqlEngine handle it
30				'Name' => 'TreeData',
31				'Version' => $VERSION,
32				'Err' => \$DBD::TreeData::err,
33				'Errstr' => \$DBD::TreeData::errstr,
34				'State' => \$DBD::TreeData::state,
35				'Attribution' => 'DBD::TreeData by Brendan Byrd',
36				}) \|\| return undef;
37
38				unless ( $methods_already_installed++ ) {
39				DBD::TreeData::dr->install_method('tree_process_hash_tree');
40
41				# because of the DBD::AnyData driver by-pass, we have to do its dirty work
42				DBD::TreeData::db->install_method('ad_import');
43				DBD::TreeData::db->install_method('ad_catalog');
44				DBD::TreeData::db->install_method('ad_convert');
45				DBD::TreeData::db->install_method('ad_export');
46				DBD::TreeData::db->install_method('ad_clear');
47				DBD::TreeData::db->install_method('ad_dump');
48				}
49
50				return $drh;
51				}
52
53				sub CLONE {
54				undef $drh;
55				}
56
57				1;
58
59
60				##############################################################################
61				# DBD::TreeData::dr
62
63				package # hide from PAUSE
64				DBD::TreeData::dr; # ====== DRIVER ======
65
66				use sanity 0.94;
67				use DBI 1.619; # first version with tree_ prefix
68				use DBD::AnyData 0.110;
69				use parent qw(-norequire DBD::AnyData::dr); # no such file as ::dr.pm
70
71				use List::AllUtils qw(none any uniq firstidx indexes);
72				use Scalar::Util qw(reftype looks_like_number);
73				use Lingua::EN::Inflect::Phrase qw(to_PL to_S);
74				use Data::Dumper;
75
76				use subs qw(foundin notin col2word print_debug);
77
78				our @dbh;
79				our $debug = 0;
80				our $VERSION = $DBD::TreeData::VERSION;
81				our $imp_data_size = 0;
82				our ($tables, $columns, $ids, $types, $can_null);
83
84				sub connect {
85				my ($drh, $dr_dsn, $user, $auth, $attr) = @_;
86
87				if ($dr_dsn =~ /\;\|\=/) { # is DSN notation
88				foreach my $var (split /\;/, $dr_dsn) {
89				my ($attr_name, $attr_value) = split(/\=/, $var, 2);
90				return $drh->set_err($DBI::stderr, "Can't parse DSN part '$var'", '08001') unless (defined $attr_value);
91
92				$attr_name = lc($attr_name);
93				$attr_name = 'tree_'.$attr_name unless ($attr_name =~ /^tree_/o);
94				$attr->{$attr_name} = $attr_value;
95				}
96				}
97				else {
98				$attr->{tree_table_name} \|\|= $dr_dsn;
99				}
100				$attr->{tree_table_name} \|\|= 'tree_data';
101				$debug = $attr->{tree_debug} \|\| $attr->{TraceLevel} \|\| $drh->{TraceLevel};
102
103				# Run through the tree conversion
104				$attr->{tree_data} or return $drh->set_err($DBI::stderr, "Data! Data! Data! I cannot make bricks without clay!", '08004');
105				$drh->tree_process_hash_tree($attr->{tree_table_name}, $attr->{tree_data}, 0);
106
107				# remove global data and keep the local tree in $tref
108				my $tref = $tables;
109				$attr->{tree_columns} = {
110				names => { map { $_ => $tref->{$_}{columns} } keys %$tref },
111				types => $types,
112				nulls => $can_null,
113				};
114				$attr->{tree_cardinality} = $ids->{table};
115
116				### TODO: Clean this up ###
117				undef $tables;
118				undef $columns;
119				undef $ids;
120				undef $types;
121				undef $can_null;
122
123				# Add into our $dbh object, using AnyData's methods
124				my ($outer_dbh, $dbh) = DBI::_new_dbh($drh, {
125				Name => $attr->{tree_table_name},
126				}, $attr);
127				$dbh->func( 0, "init_default_attributes" ); # make sure we get all of the right sql_* vars in place
128				$dbh->func("init_done");
129				$dbh->STORE('Active', 1);
130
131				### TODO: Need error checking for tree_rename_tables ###
132
133				foreach my $table (keys %$tref) {
134				my $table_name = exists $attr->{'tree_rename_tables'} ?
135				($attr->{'tree_rename_tables'}{$table} \|\| $table) : $table;
136
137				$dbh->func($table_name, 'ARRAY', [@{$tref->{$table}{data}}], {
138				col_names => join(',', @{$tref->{$table}{columns}}),
139				}, 'ad_import');
140				}
141
142				# Using the DBD::AnyData $dbh for the rest of the work
143				push @dbh, $dbh;
144				return $outer_dbh;
145				}
146
147				sub data_sources {
148				# Typically no need for parameters, as the defaults work just fine...
149				return ('dbi:TreeData:');
150				}
151
152				sub disconnect_all {
153				while (my $dbh = shift @dbh) {
154				ref $dbh && $dbh->disconnect;
155				}
156				return 1;
157				}
158
159				sub tree_process_hash_tree ($$$;$) {
160				my ($drh, $col, $tree, $depth) = @_;
161				my ($col_id, $serialized_tree);
162
163				if ($depth > 100) {
164				$drh->set_err(0, "Too deep down the rabbit hole; crawling back...");
165				return $col => undef;
166				}
167
168				print_debug($depth, "$depth - $col => ".(reftype($tree) \|\| substr($tree, 0, 30)));
169
170				state $id_names = ['group', 'matrix', 'cube', 'hypercube']; # if you go past here, you've gone too far...
171
172				given (reftype $tree) {
173				# Common code for both HASHs and ARRAYs
174				when (/HASH\|ARRAY/) {
175				$col = to_S(col2word($col));
176				$col =~ s/ /_/g;
177				$col_id = $col.'_id';
178
179				# compare serialized trees for the same IDs
180				if ($depth) { # no point if this is the first node
181				$serialized_tree = Data::Dumper->new([$tree], ['*XXXX'])->
182				# (options for consistency, for exact matches)
183				Reset->Sortkeys(1)->
184				# (options designed to use the smallest possible footprint, as these can get rather large)
185				Indent(0)->Quotekeys(0)->Pair('=')->Dump;
186
187				# cycle through possible ID names
188				my @list = ('', (reftype $tree eq 'ARRAY') ? @$id_names : ());
189				foreach my $suffix (@list) {
190				my $id_name = $col.($suffix ? '_'.$suffix : '').'_id';
191				my $tree = $serialized_tree;
192				$tree =~ s/^(\W{1,2})XXXX/$1$id_name/;
193
194				# already exists, makes this easier
195				my $id = $ids->{trees}{$tree};
196				if ($id) {
197				print_debug($depth+1, "TREE <=== Dumper match ID ".join(' => ', split(/\\|/, $id)));
198				return split(/\\|/, $id);
199				}
200				}
201				}
202				continue;
203				}
204				# HASHs control the main tables, providing column names and data for the rows
205				# Table = $col (plural)
206				# ID = $col.'_id'
207				when ('HASH') {
208				# parse out a table name (with plural form)
209				my $table_name = $depth ? to_PL($col) : $col;
210				$table_name =~ s/ /_/g;
211
212				# now run through the columns and data (with recursive loop goodness)
213				my %data = map {
214				my ($dat, $id) = ($$tree{$_}, $_);
215				# clean up the column names
216				$id = col2word($id);
217				$id =~ s/ /_/g;
218				$drh->tree_process_hash_tree($id => $dat, $depth + 1);
219				} keys %$tree;
220				### FIXME: don't forget about undef in keys ###
221
222				# check the new column names to see if we've seen this table before
223				my @cols = ($col_id, sort keys %data); # new ID column first
224				my $col_key = join('\|', @cols);
225
226				if ($columns->{$col_key}) { # known table
227				$table_name = $columns->{$col_key};
228
229				print_debug($depth+1, "HASH ===> Found known table '$table_name'");
230				}
231				elsif ($tables->{$table_name}) { # errr, known table, but not with this column structure
232				my $t;
233				foreach my $j ('', 2 .. 200) { # loop through a bunch of potential table names
234				my $tname = $table_name.$j;
235
236				if ($t = $tables->{$tname}) {
237				my @tcols = @{$t->{columns}};
238				my @ucols = uniq(@cols, @tcols);
239				# have to be the same ID columns && need to have at least one field in common
240				# (remove keys while we're at it)
241				if (shift(@tcols) eq shift(@cols) && uniq(@cols, @tcols) < (@tcols + @cols)) {
242				my @extra_cols = notin(\@tcols, \@cols);
243
244				# new table has extra columns to add
245				if (@extra_cols) {
246				# add new column names and resort
247				my @old_cols = @{$t->{columns}};
248				my @new_cols = ($col_id, sort(@tcols, @extra_cols));
249				my @diff_idx = grep { $old_cols[$_] ne $new_cols[$_] } (0 .. (@new_cols - 1));
250				$t->{columns} = \@new_cols;
251
252				unless ($diff_idx[0] > @{$t->{columns}}-1) {
253				# well, the new columns aren't on the end, so old data needs to be shuffled
254				for (my $l = 0; $l < @{$t->{data}}; $l++) {
255				my @data = @{$t->{data}[$l]};
256				my %data = map { $old_cols[$_] => $data[$_] } (0 .. (@data - 1)); # change to hash
257				@data = map { $data{$_} } @new_cols; # change to array
258				$t->{data}[$l] = \@data;
259				}
260				}
261
262				# remove the old column key and replace with a new one
263				delete $columns->{ join('\|', @old_cols) };
264				}
265
266				# if the new table is missing certain columns, they will insert undefs as needed naturally below
267				# however, nullability checks might be in order
268				my @missing_cols = notin(\@cols, \@tcols);
269				$can_null->{$_} = 1 for (@missing_cols, @extra_cols);
270
271				print_debug($depth+1, "HASH ===> Found known table with different columns '$table_name'");
272				last;
273				}
274
275				# wrong table to use; try next name
276				next;
277				}
278				else { # just treat this as as new table, then
279				$drh->set_err(0, "Found a table with a dupe name, but totally different columns; calling it '$tname'...") if ($j);
280				$table_name = $tname;
281				$tables->{$table_name} = $t = {
282				columns => [@cols],
283				data => [],
284				};
285
286				print_debug($depth+1, "HASH ===> Creating new table '$table_name' because of conflicting columns");
287				last;
288				}
289				}
290
291				$col_key = join('\|', @{$t->{columns}});
292				$columns->{$col_key} = $table_name;
293				}
294				else { # new table
295				$tables->{$table_name} = {
296				columns => [@cols],
297				data => [],
298				};
299				$columns->{$col_key} = $table_name;
300
301				print_debug($depth+1, "HASH ===> Creating new table '$table_name'");
302				}
303
304				# Add new row
305				my $t = $tables->{$table_name};
306				my $id = ++($ids->{table}{$table_name});
307				$serialized_tree =~ s/^(\W{1,2})XXXX/$1$col_id/;
308				$ids->{trees}{$serialized_tree} = $col_id.'\|'.$id;
309				push(@{$t->{data}}, [ $id, map { $data{$_} } grep { $_ ne $col_id } @{$t->{columns}} ]);
310
311				# Since we're done with this table, send back the col_id and id#
312				print_debug($depth+1, "HASH <=== $col_id => $id");
313				$types->{$col_id} = 'ID';
314				return $col_id => $id;
315				}
316				# ARRAYs provide ID grouping tables, capturing the individual rows in a group
317				# These are going to be two-column tables with two different IDs
318				# Table = $col.'_groups' (plural)
319				# ID = $col.(group\|matrix\|cube\|etc.).'_id'
320				when ('ARRAY') {
321				# Pass the data on down first (ARRAY of ARRAYS to prevent de-duplication of keys)
322				my @data = map {
323				my $dat = $_;
324				[ $drh->tree_process_hash_tree($col => $dat, $depth + 1) ]
325				} @$tree;
326
327				# Okay, we could end up with several different scenarios:
328
329				# A. All items have the same column name (as a ID)
330				# B. All items appear to be some form of data
331				# C. A mixture of IDs and data (scary!)
332
333				# Process both groups individually (and hope for the best)
334				my @id_cols = grep { $data[$_]->[0] =~ /_id$/; } (0 .. (@data - 1));
335				my @data_cols = grep { $data[$_]->[0] !~ /_id$/; } (0 .. (@data - 1));
336				@id_cols = () unless ($depth); # skip any group ID tables if this is the very first node
337
338				$drh->set_err(0, "Inconsistant sets of data within an array near '$col'; going to process it as best as possible...") if (@id_cols && @data_cols);
339
340				# Items of IDs
341				### TODO: Clean this up; the logic is a bit of a mess... ###
342				my (@max_id, @group_id);
343				foreach my $ii (@id_cols, @data_cols) {
344				# In all cases, there will be two tables to populate: a group/id table, and a id/data (or id/id) table
345				my ($icol, $item) = @{$data[$ii]};
346				my $is_id = ($icol =~ /_id$/i);
347
348				# IDs are singular; table names are plural
349				my $strip = to_S(col2word($icol));
350				$icol = $strip;
351				$icol =~ s/ /_/g;
352				$icol .= '_id' if ($is_id);
353
354				# Process group ID names
355				# ncol = N+1, icol = N (as in _group_id => _id, or _matrix_id => _group_id)
356				my $ncol = $icol;
357				$ncol =~ s/_id$//i;
358				my $i = firstidx { $ncol =~ s/(?<=_)$_$//; } @$id_names; # that's underscore + $_ + EOL
359				# $i = -1 if not found, which then ++$i = 0 and id_names = _group
360
361				if (++$i > 3) { # start whining here
362				$ncol .= '_hypercube_'.$id_names->[$i -= 4];
363
364				$drh->set_err(0, "Seriously?! We're using ridiculous names like '$ncol"."_id' at this point...");
365				}
366				else { $ncol .= '_'.$id_names->[$i]; }
367				$i++; # prevent -1 on @_id arrays
368
369				# Parse out a group table name (with plural form)
370				my $grp_table_name = to_S(col2word($ncol));
371				$grp_table_name = to_PL($grp_table_name); # like blah_groups
372				$grp_table_name =~ s/ /_/g;
373
374				$icol .= '_id' unless ($icol =~ /_id$/);
375				$ncol .= '_id' unless ($ncol =~ /_id$/);
376				$max_id[$i] = $ncol;
377				print_debug($depth+1, "ARRAY ===> max_id = $i/$ncol");
378
379				# Create new group table (if it doesn't already exist)
380				my $t;
381				if ($depth) { # skip any group ID tables if this is the very first node
382				unless ($tables->{$grp_table_name}) {
383				### FIXME: Assuming that table doesn't exist with the same columns ###
384				print_debug($depth+1, "ARRAY ===> Creating new group table '$grp_table_name'");
385
386				$tables->{$grp_table_name} = {
387				columns => [ $ncol, $icol ],
388				data => [],
389				};
390				}
391				$t = $tables->{$grp_table_name};
392				my $col_key = join('\|', @{$t->{columns}});
393				$columns->{$col_key} = $grp_table_name;
394				}
395
396				# Add new row
397				$group_id[$i] = ++($ids->{table}{$grp_table_name}) unless ($group_id[$i]); # only increment once (per group type)
398				if ($is_id) { # ID column: $item = ID, and this goes in a group table (id/data table already processed)
399				print_debug($depth+1, "ARRAY ===> $grp_table_name => [ $group_id[$i], $item ] (new ID row for an group table)");
400				push(@{$t->{data}}, [ $group_id[$i], $item ]);
401				}
402				else { # data column: $item = data, and we process both tables
403				my $itbl_name = to_PL($strip); # like blahs
404				$itbl_name =~ s/ /_/g;
405				my $data_col = $strip;
406				$data_col =~ s/ /_/g;
407
408				# Create new id table (if it doesn't already exist)
409				unless ($tables->{$itbl_name}) {
410				print_debug($depth+1, "ARRAY ===> Creating new ID table '$itbl_name'");
411
412				$tables->{$itbl_name} = {
413				columns => [ $icol, $data_col ],
414				data => [],
415				};
416				}
417				my $n = $tables->{$itbl_name};
418				my $col_key = join('\|', @{$n->{columns}});
419				$columns->{$col_key} = $itbl_name;
420				$types->{$icol} = 'ID';
421
422				$max_id[$i-1] = $icol;
423				print_debug($depth+1, "ARRAY ===> max_id = ".int($i-1)."/$icol");
424				### FIXME: Assuming that table doesn't exist with the same columns ###
425
426				# First, check serial tree with single value
427				my $stree = Data::Dumper->new([$item], ['*'.$icol])->Reset->Indent(0)->Dump;
428				if ($ids->{trees}{$stree} && $depth) {
429				# Add new group row (with proper col_id)
430				my $id = (split(/\\|/, $ids->{trees}{$stree}))[1];
431				print_debug($depth+1, "ARRAY ===> $grp_table_name => [ $group_id[$i], $id ] (serial tree found)");
432				push(@{$t->{data}}, [ $group_id[$i], $id ] );
433
434				# (no need to add into main table; already exists)
435				}
436				else {
437				# Add new group row (with proper col_id)
438				my $id = ++($ids->{table}{$itbl_name});
439				if ($depth) {
440				print_debug($depth+1, "ARRAY ===> $grp_table_name => [ $group_id[$i], $id ] (new group row)");
441				push(@{$t->{data}}, [ $group_id[$i], $id ]);
442				}
443
444				# Add new id row
445				$ids->{trees}{$stree} = $icol.'_id\|'.$id;
446				print_debug($depth+2, "ARRAY ===> $itbl_name => [ $id, $item ] (new ID/data row)");
447				push(@{$n->{data}}, [ $id, $item ]);
448				}
449				}
450				}
451
452				# Pass back an ID
453				my ($gid_col, $gid) = (pop(@max_id) \|\| $col, pop(@group_id)); # undef @max_id might happen with an empty array
454
455				print_debug($depth+1, "ARRAY <=== $gid_col => $gid");
456				$serialized_tree =~ s/^(\W{1,2})XXXX/$1$gid_col/;
457				$ids->{trees}{$serialized_tree} = $gid_col.'\|'.$gid;
458				$types->{$gid_col} = 'ID';
459				return $gid_col => $gid;
460				}
461				# An actual scalar; return back the proper column name and data
462				when ('' \|\| undef) {
463				return type_detect($col, $tree);
464				}
465				# De-reference
466				when (/SCALAR\|VSTRING/) {
467				return type_detect($col, $$tree);
468				}
469				# Warn and de-reference
470				when (/Regexp\|LVALUE/i) {
471				$drh->set_err(0, "Found a ".(reftype $tree)."; just going to treat this like a SCALAR...");
472				return type_detect($col, $$tree);
473				}
474				# Warn and de-reference (for further examination)
475				when ('REF') {
476				$drh->set_err(0, "Found a REF; going to dive in the rabbit hole...");
477				return $drh->tree_process_hash_tree($col => $$tree, $depth + 1);
478				}
479				# Warn and de-reference (for further examination)
480				when ('GLOB') {
481				foreach my $t (qw(Regexp VSTRING IO FORMAT LVALUE GLOB REF CODE HASH ARRAY SCALAR)) { # scalar last, since a ref is still a scalar
482				if (defined *$$tree{$t}) {
483				$drh->set_err(0, "Found a GLOB (which turn out to be a $t); going to dive in the rabbit hole...");
484				return $drh->tree_process_hash_tree($col => *$$tree{$t}, $depth + 1);
485				}
486				}
487				$drh->set_err(0, "Found a GLOB, but it didn't point to anything...");
488				return $col => undef;
489				}
490				# Warn and throw away
491				when ('CODE') {
492				### TODO: Warn immediately, eval block with timer to use as output, then continue ###
493				### Definitely need a switch, though ###
494				$drh->set_err(0, "Found a CODE block; not going to even touch this one...");
495				return $col => undef;
496				}
497				default {
498				$drh->set_err(0, "Found a ".(reftype $tree)."; WTF is this? Can't use this at all...");
499				return $col => undef;
500				}
501				}
502
503				die "WTF?! Perl broke my given/when! Alert the Pumpking!!!";
504				}
505
506				# Find items in @B that are in @A
507				sub foundin (\@\@) {
508				my ($A, $B) = @_;
509				return grep { my $i = $_; any { $i eq $_ } @$A; } @$B;
510				}
511
512				# Find items in @B that are not in @A
513				sub notin (\@\@) {
514				my ($A, $B) = @_;
515				return grep { my $i = $_; none { $i eq $_ } @$A; } @$B;
516				}
517
518				sub col2word ($) {
519				my $word = $_[0];
520				$word = lc($word);
521				$word =~ s/[\W_]+/ /g;
522				$word =~ s/^\s+\|\s+(?:id)?$//g;
523				return $word;
524				}
525
526				sub type_detect ($;$) {
527				my ($col, $val) = @_;
528				my $is_num = looks_like_number($val);
529
530				$col = to_S(col2word($col)); # if we're at this point, it's a single item
531				$col =~ s/ /_/g;
532				unless (defined $val) {
533				$can_null->{$_} = 1;
534				return $col => undef;
535				}
536
537				$types->{$col} = 'STRING' if (!$is_num && $types->{$col}); # any non-number data invalidates the NUMBER type
538				$types->{$col} \|\|= $is_num ? 'NUMBER' : 'STRING';
539				return $col => $val;
540				}
541
542				sub print_debug ($$) {
543				my ($depth, $msg) = @_;
544				return unless ($debug);
545
546				print (" " x $depth);
547				say $msg;
548				}
549
550				1;
551
552				##############################################################################
553				# DBD::TreeData::db
554
555				package # hide from PAUSE
556				DBD::TreeData::db; # ====== DATABASE ======
557
558				our $imp_data_size = 0;
559				use DBD::AnyData;
560				use parent qw(-norequire DBD::AnyData::db); # no such file as ::db.pm
561
562				use Config;
563				use List::AllUtils qw(first);
564
565				# Overriding the package here to add some *_info methods
566
567				### TODO: get_info ###
568
569				sub table_info {
570				my ($dbh, $catalog, $schema, $table) = @_;
571				my $names = [qw( TABLE_QUALIFIER TABLE_OWNER TABLE_NAME TABLE_TYPE REMARKS )];
572
573				$table = '^'.$table.'$' if length $table;
574
575				return sponge_sth_loader($dbh, 'TABLE_INFO', $names, [
576				grep { !$table \|\| $_->[2] =~ /$table/i } $dbh->func("get_avail_tables")
577				] );
578				}
579
580				sub column_info {
581				my ($dbh, $catalog, $schema, $table, $column) = @_;
582				my $type = 'COLUMN_INFO';
583				my $names = [qw(
584				TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME DATA_TYPE TYPE_NAME COLUMN_SIZE BUFFER_LENGTH DECIMAL_DIGITS
585				NUM_PREC_RADIX NULLABLE REMARKS COLUMN_DEF SQL_DATA_TYPE SQL_DATETIME_SUB CHAR_OCTET_LENGTH ORDINAL_POSITION IS_NULLABLE
586				CHAR_SET_CAT CHAR_SET_SCHEM CHAR_SET_NAME COLLATION_CAT COLLATION_SCHEM COLLATION_NAME UDT_CAT UDT_SCHEM UDT_NAME
587				DOMAIN_CAT DOMAIN_SCHEM DOMAIN_NAME SCOPE_CAT SCOPE_SCHEM SCOPE_NAME MAX_CARDINALITY DTD_IDENTIFIER IS_SELF_REF
588				)];
589
590				$table = '^'.$table .'$' if length $table;
591				$column = '^'.$column.'$' if length $column;
592
593				my @tables = $dbh->func("get_avail_tables");
594				my @col_rows = ();
595				my $tc = $dbh->{tree_columns};
596
597				# De-mangle types
598				my $types = $dbh->type_info_all;
599				shift(@$types); # helper "column key" row
600				my %types = map { $_->[0] => $_ } @$types;
601
602				foreach my $tbl (sort { $a->[2] cmp $b->[2] } @tables) { # ->[2] = table name
603				next unless ($tbl);
604				next unless (!$table \|\| $tbl->[2] =~ /$table/i);
605
606				my $id = 0;
607				foreach my $col ( @{$tc->{names}{$tbl->[2]}} ) {
608				next unless (!$column \|\| $col =~ /$column/i);
609				my $ti = $types{ $id ? uc($tc->{types}{$col}) : 'PID' };
610				my $can_null = $id && $tc->{nulls}{$col} \|\| 0;
611
612				my $col_row = [
613				# 0=TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME DATA_TYPE TYPE_NAME COLUMN_SIZE BUFFER_LENGTH DECIMAL_DIGITS
614				undef, undef, $tbl->[2], $col, $ti->[0], $ti->[1], $ti->[2], undef, $ti->[17] ? int($ti->[14] * log($ti->[17])/log(10)) : undef, # log(r^l) = l * log(r)
615				# 9=NUM_PREC_RADIX NULLABLE REMARKS COLUMN_DEF SQL_DATA_TYPE SQL_DATETIME_SUB CHAR_OCTET_LENGTH ORDINAL_POSITION IS_NULLABLE
616				$ti->[17], $can_null, undef, undef, $ti->[15], $ti->[16], $ti->[17] ? undef : $ti->[2], $id, $can_null ? 'YES' : 'NO',
617				# 18=CHAR_SET_CAT CHAR_SET_SCHEM CHAR_SET_NAME COLLATION_CAT COLLATION_SCHEM COLLATION_NAME UDT_CAT UDT_SCHEM UDT_NAME
618				undef, undef, undef, undef, undef, undef, undef, undef, undef,
619				# DOMAIN_CAT DOMAIN_SCHEM DOMAIN_NAME SCOPE_CAT SCOPE_SCHEM SCOPE_NAME MAX_CARDINALITY DTD_IDENTIFIER IS_SELF_REF
620				undef, undef, undef, undef, undef, undef, undef, undef, undef,
621				];
622
623				push @col_rows, $col_row;
624				$id++;
625				}
626				}
627
628				return sponge_sth_loader($dbh, $type, $names, \@col_rows);
629				}
630
631				sub primary_key_info {
632				my ($dbh, $catalog, $schema, $table) = @_;
633				my $type = 'PRIMARY_KEY_INFO';
634
635				my $cols = $dbh->{tree_columns}{names}{$table} \|\| return $dbh->set_err($DBI::stderr, "No such table name: $table", '42704');
636				my $pkey = $cols->[0];
637
638				return sponge_sth_loader($dbh, $type,
639				[qw(TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME KEY_SEQ PK_NAME)],
640				[ [ undef, undef, $table, $pkey, 1, $pkey.'_pkey' ] ]
641				);
642				}
643
644				sub foreign_key_info {
645				my ($dbh, $pk_catalog, $pk_schema, $pk_table, $fk_catalog, $fk_schema, $fk_table) = @_;
646				my $type = 'FOREIGN_KEY_INFO';
647				my $names = [qw(
648				PKTABLE_CAT PKTABLE_SCHEM PKTABLE_NAME PKCOLUMN_NAME FKTABLE_CAT FKTABLE_SCHEM FKTABLE_NAME FKCOLUMN_NAME
649				KEY_SEQ UPDATE_RULE DELETE_RULE FK_NAME PK_NAME DEFERRABILITY UNIQUE_OR_PRIMARY
650				)];
651
652				my $colnames = $dbh->{tree_columns}{names};
653				my $pkey = $pk_table && $colnames->{$pk_table} ? $colnames->{$pk_table}[0] : undef;
654				my $fkey = $fk_table && $colnames->{$fk_table} ? $colnames->{$fk_table}[0] : undef;
655				my ($pk_list, $fk_list) = ([$pk_table], [$fk_table]);
656				my @dbi_data;
657
658				# If both PKT and FKT are given, the function returns the foreign key, if any,
659				# in table FKT that refers to the primary (unique) key of table PKT.
660				if ($pkey && $fkey) {
661				$fkey = first { $_ eq $pkey } $colnames->{$fk_table}; # pkey or bust
662				}
663
664				# If only PKT is given, then the result set contains the primary key of that table
665				# and all foreign keys that refer to it.
666				elsif ($pkey) { $fk_list = [ grep { $colnames->{$_} ~~ /^$pkey$/ } keys %$colnames ]; }
667
668				# If only FKT is given, then the result set contains all foreign keys in that table
669				# and the primary keys to which they refer.
670				elsif ($fkey) {
671				my @cols = @{$colnames->{$fk_table}};
672				shift @cols; # remove primary key
673
674				$pk_list = [];
675				foreach my $col (@cols) {
676				my $tbl = (first { $colnames->{$_}[0] eq $col } keys %$colnames) \|\| next;
677				push @$pk_list, $tbl;
678				}
679				}
680				else { return sponge_sth_loader($dbh, $type, $names, []); }
681
682				# main loop
683				foreach my $pt (@$pk_list) {
684				foreach my $ft (@$fk_list) {
685				my $key = $colnames->{$pt}[0]; # key links are named the same
686				push @dbi_data, [
687				# 0=PKTABLE_CAT PKTABLE_SCHEM PKTABLE_NAME PKCOLUMN_NAME FKTABLE_CAT FKTABLE_SCHEM FKTABLE_NAME FKCOLUMN_NAME
688				undef, undef, $pt, $key, undef, undef, $ft, $key,
689				# 8=KEY_SEQ UPDATE_RULE DELETE_RULE FK_NAME PK_NAME DEFERRABILITY UNIQUE_OR_PRIMARY
690				1, 3, 3, join('_', $ft, $key, 'fkey'), $key.'_pkey', 7, 'PRIMARY',
691				];
692				}
693				}
694
695				return sponge_sth_loader($dbh, $type, $names, \@dbi_data);
696				}
697
698				sub statistics_info {
699				my ($dbh, $catalog, $schema, $table, $unique_only, $quick) = @_;
700				my $type = 'STATISTICS_INFO';
701
702				my $cols = $dbh->{tree_columns}{names}{$table} \|\| return $dbh->set_err($DBI::stderr, "No such table name: $table", '42704');
703				my $pkey = $cols->[0];
704				my $rows = $dbh->{tree_cardinality}{$table};
705
706				return sponge_sth_loader($dbh, $type,
707				[qw(
708				TABLE_CAT TABLE_SCHEM TABLE_NAME NON_UNIQUE INDEX_QUALIFIER INDEX_NAME TYPE ORDINAL_POSITION
709				COLUMN_NAME ASC_OR_DESC CARDINALITY PAGES FILTER_CONDITION
710				)],
711				[
712				[
713				undef, undef, $table, 0, undef, undef, 'table', undef,
714				undef, undef, $rows, undef, undef
715				],
716				[
717				undef, undef, $table, 0, undef, $pkey.'_pkey', 'content', 1,
718				$pkey, 'A', $rows, undef, undef
719				],
720				],
721				);
722				}
723
724				sub sponge_sth_loader {
725				my ($dbh, $tbl_name, $names, $rows) = @_;
726
727				# (mostly a straight copy from DBI::DBD::SqlEngine)
728				my $dbh2 = $dbh->func("sql_sponge_driver");
729				my $sth = $dbh2->prepare(
730				$tbl_name,
731				{
732				rows => $rows \|\| [],
733				NAME => $names,
734				}
735				);
736				$sth or $dbh->set_err( $DBI::stderr, $dbh2->errstr, $dbh2->state );
737				return $sth;
738				}
739
740				sub type_info_all {
741				# We are basically just translating Perl variable types to SQL,
742				# though once everything has been flattened, it's basically just
743				# string and number.
744
745				# Perl's number size varies between 32/64-bit versions
746				my $nbits = $Config{ptrsize} * 16 - 11;
747
748				return [
749				{
750				TYPE_NAME => 0,
751				DATA_TYPE => 1,
752				COLUMN_SIZE => 2, # was PRECISION originally
753				LITERAL_PREFIX => 3,
754				LITERAL_SUFFIX => 4,
755				CREATE_PARAMS => 5,
756				NULLABLE => 6,
757				CASE_SENSITIVE => 7,
758				SEARCHABLE => 8,
759				UNSIGNED_ATTRIBUTE => 9,
760				FIXED_PREC_SCALE => 10, # was MONEY originally
761				AUTO_UNIQUE_VALUE => 11, # was AUTO_INCREMENT originally
762				LOCAL_TYPE_NAME => 12,
763				MINIMUM_SCALE => 13,
764				MAXIMUM_SCALE => 14,
765				SQL_DATA_TYPE => 15,
766				SQL_DATETIME_SUB => 16,
767				NUM_PREC_RADIX => 17,
768				INTERVAL_PRECISION => 18,
769				},
770				# Name DataType Max Literals Params Null Case Search Unsign Fixed Auto LocalTypeName M/M Scale SQLDataType DateTime_Sub Radix ItvPrec
771				[ "PID", DBI::SQL_INTEGER(), 32, undef, undef, undef, 0, 0, 3, 1, 1, 0, "PID", 0, 0, DBI::SQL_INTEGER(), undef, 2, undef],
772				[ "ID", DBI::SQL_INTEGER(), 32, undef, undef, undef, 1, 0, 3, 1, 1, 0, "ID", 0, 0, DBI::SQL_INTEGER(), undef, 2, undef],
773				[ "NUMBER", DBI::SQL_NUMERIC(), $nbits, undef, undef, undef, 1, 0, 3, 0, 0, 0, "Number", 0,$nbits, DBI::SQL_NUMERIC(), undef, 2, undef],
774				[ "STRING", DBI::SQL_VARCHAR(), 2**31, "'", "'", undef, 1, 1, 3, undef, undef, undef, "String", undef, undef, DBI::SQL_VARCHAR(), undef, undef, undef],
775				];
776				}
777
778				1;
779
780				##############################################################################
781				# DBD::TreeData::st
782
783				package # hide from PAUSE
784				DBD::TreeData::st; # ====== STATEMENT ======
785
786				our $imp_data_size = 0;
787				use DBD::AnyData;
788				use parent qw(-norequire DBD::AnyData::st); # no such file as ::st.pm
789
790				1;
791
792				##############################################################################
793				# DBD::TreeData::Statement
794
795				package # hide from PAUSE
796				DBD::TreeData::Statement; # ====== SqlEngine::Statement ======
797
798				our $imp_data_size = 0;
799				use DBD::AnyData;
800				use parent qw(-norequire DBD::AnyData::Statement); # no such file as ::Statement.pm
801
802				1;
803
804				##############################################################################
805				# DBD::TreeData::Table
806
807				package # hide from PAUSE
808				DBD::TreeData::Table; # ====== SqlEngine::Table ======
809
810				our $imp_data_size = 0;
811				use DBD::AnyData;
812				use parent qw(-norequire DBD::AnyData::Table); # no such file as ::Table.pm
813
814				1;
815
816				__END__
817				=pod
818
819				=head1 NAME
820
821				DBD::TreeData - DBI driver for any abstract hash/array tree
822
823				=head1 SYNOPSIS
824
825				use DBI;
826				use JSON::Any;
827				use LWP::Simple;
828
829				# Example JSON object
830				my $json = get 'http://maps.googleapis.com/maps/api/geocode/json?address=1600+Pennsylvania+Ave+NW,+20500&region=us&language=en&sensor=false';
831				my $obj = JSON::Any->jsonToObj($json);
832
833				my $dbh = DBI->connect('dbi:TreeData:', '', '', {
834				tree_table_name => 'geocode',
835				tree_data => $obj,
836				});
837
838				# Informational dump
839				use Data::Dump;
840				dd ($dbh->table_info->fetchall_arrayref);
841				dd (map { [ @{$_}[2 .. 6] ] } @{
842				$dbh->column_info('','','','')->fetchall_arrayref
843				});
844
845				# DBIC dump
846				use DBIx::Class::Schema::Loader 'make_schema_at';
847				make_schema_at(
848				'My::Schema', {
849				debug => 1,
850				dump_directory => './lib',
851				},
852				[ 'dbi:TreeData:geocode', '', '', { tree_data => $obj } ],
853				);
854
855				=head1 DESCRIPTION
856
857				DBD::TreeData provides a DBI driver to translate any sort of tree-based data set (encapsulated in a Perl object) into a flat set of tables,
858				complete with real SQL functionality. This module utilizes L<DBD::AnyData> to create the new tables, which uses L<SQL::Statement> to support
859				the SQL parsing. (Any caveats with those modules likely applies here.)
860
861				This module can be handy to translate JSON, XML, YAML, and many other tree formats to be used in class sets like L<DBIx::Class>. Unlike
862				L<DBD::AnyData>, the format of the data doesn't have to be pre-flattened, and will be spread out into multiple tables.
863
864				Also, this driver fully supports all of the C<<< *_info >>> methods, making it ideal to shove into modules like L<DBIx::Class::Schema::Loader>.
865				(The C<<< table_info >>> and C<<< column_info >>> filters use REs with beginE<sol>end bounds pre-set.)
866
867				=encoding utf8
868
869				=head1 CONNECT ATTRIBUTES
870
871				=head2 tree_data
872
873				The actual tree object. Of course, this attribute is required.
874
875				=head2 tree_table_name
876
877				The name of the starting table. Not required, but recommended. If not specified, defaults to 'tree_data', or the value of the driver
878				DSN string (after the C<<< dbi:TreeData: >>> part).
879
880				=head2 tree_debug
881
882				Boolean. Print debug information while translating the tree.
883
884				=head2 tree_rename_tables
885
886				Hashref of table names. If you don't like the name of an auto-created table, you can rename them while the database is being built. Within
887				the hashref, the keysE<sol>values are the oldE<sol>new names, respectively.
888
889				=head1 TRANSLATION BEHAVIOR
890
891				The tree translation into flat tables is done using a recursive descent algorithm. It starts with a check of the current node's reference
892				type, which dictates how it interprets the children. The goal is to create a fully L<4NF\|http://en.wikipedia.org/wiki/Fourth_normal_form>
893				database from the tree.
894
895				Arrays are interpreted as a list of rows, and typically get rolled up into "group" tables. Hashes are interpreted as a list of column names
896				and values. Non-references are considered values. Scalar refs and VStrings are de-referenced first. Other types of refs are processed as
897				best as possible, but the driver will complain. (Code ref blocks are currently NOT executed and discarded.)
898
899				Nested arrays will create nested group tables with different suffixes, like C<<< matrix >>>, C<<< cube >>>, and C<<< hypercube >>>. If it has to go beyond
900				that (and you really shouldn't have structures like that), it'll start complaining (sarcastically).
901
902				In almost all cases, the table name is derived from a previous key. Table names also use L<Lingua::EN::Inflect::Phrase> to create
903				pluralized names. Primary IDs will have singular names with a C<<< _id >>> suffix.
904
905				For example, this tree:
906
907				address_components => [
908				{
909				long_name => 1600,
910				short_name => 1600,
911				types => [ "street_number" ]
912				},
913				{
914				long_name => "President's Park",
915				short_name => "President's Park",
916				types => [ "establishment" ]
917				},
918				{
919				long_name => "Pennsylvania Avenue Northwest",
920				short_name => "Pennsylvania Ave NW",
921				types => [ "route" ]
922				},
923				{
924				long_name => "Washington",
925				short_name => "Washington",
926				types => [ "locality", "political" ]
927				},
928				... etc ...,
929				],
930
931				Would create the following tables:
932
933				<main_table>
934				address_component_groups
935				address_components
936				type_groups
937				types
938
939				In this case, C<<< address_components >>> has most of the columns and data, but it also has a tie to an ID of C<<< address_component_groups >>>.
940
941				Since C<<< types >>> points to an array, it will have its own dedicated table. That table would have data like:
942
943				type_id â”‚ type
944				â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
945				1 â”‚ street_number
946				2 â”‚ establishment
947				3 â”‚ route
948				4 â”‚ locality
949				5 â”‚ political
950				... â”‚ ...
951
952				Most of the C<<< type_groups >>> table would be a 1:1 match. However, the last component entry has more than one value in the C<<< types >>> array, so the
953				C<<< type_group_id >>> associated to that component would have multiple entries (4 & 5). Duplicate values are also tracked, so that IDs are reused.
954
955				=head1 CAVEATS
956
957				=head2 DBI E<sol> DBD::AnyData Conflict
958
959				As of the time of this writing, the latest version of L<DBI> (1.623) and the latest version of L<DBD::AnyData> (0.110) do not work together.
960				Since TreeData relies on L<DBD::AnyData> for table creation, you will need to downgrade to L<DBI> 1.622 to use this driver, until a new
961				version of L<DBD::AnyData> comes out.
962
963				=head1 AVAILABILITY
964
965				The project homepage is L<https://github.com/SineSwiper/DBD-TreeData/wiki>.
966
967				The latest version of this module is available from the Comprehensive Perl
968				Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
969				site near you, or see L<https://metacpan.org/module/DBD::TreeData/>.
970
971				=for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
972
973				=head1 SUPPORT
974
975				=head2 Internet Relay Chat
976
977				You can get live help by using IRC ( Internet Relay Chat ). If you don't know what IRC is,
978				please read this excellent guide: L<http://en.wikipedia.org/wiki/Internet_Relay_Chat>. Please
979				be courteous and patient when talking to us, as we might be busy or sleeping! You can join
980				those networks/channels and get help:
981
982				=over 4
983
984				=item *
985
986				irc.perl.org
987
988				You can connect to the server at 'irc.perl.org' and join this channel: #dbi then talk to this person for help: SineSwiper.
989
990				=back
991
992				=head2 Bugs / Feature Requests
993
994				Please report any bugs or feature requests via L<L<https://github.com/SineSwiper/DBD-TreeData/issues>\|GitHub>.
995
996				=head1 AUTHOR
997
998				Brendan Byrd <BBYRD@CPAN.org>
999
1000				=head1 COPYRIGHT AND LICENSE
1001
1002				This software is Copyright (c) 2013 by Brendan Byrd.
1003
1004				This is free software, licensed under:
1005
1006				The Artistic License 2.0 (GPL Compatible)
1007
1008				=cut
1009