File Coverage

lib/DBD/TreeData.pm
Criterion Covered Total %
statement 6 6 100.0
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 8 8 100.0


line stmt bran cond sub pod time code
1             ##############################################################################
2             # DBD::TreeData Module #
3             # E-mail: Brendan Byrd <Perl@resonatorsoft.org> #
4             ##############################################################################
5              
6             ##############################################################################
7             # DBD::TreeData
8              
9             package DBD::TreeData;
10              
11 1     1   49951 use sanity;
  1         560486  
  1         15  
12              
13 1     1   582448 use parent qw(DBD::AnyData);
  1         3  
  1         8  
14              
15             # ABSTRACT: DBI driver for any abstract hash/array tree
16              
17             our $VERSION = '0.90'; # VERSION
18             our $drh = undef; # holds driver handle once initialized
19             our $err = 0; # DBI::err
20             our $errstr = ""; # DBI::errstr
21             our $sqlstate = ""; # DBI::state
22              
23             our $methods_already_installed = 0;
24              
25             sub driver {
26             return $drh if $drh; # already created - return same one
27             my ($class, $attr) = @_;
28              
29             $drh = $class->DBI::DBD::SqlEngine::driver({ # DBD::AnyData doesn't pass over our $attr stuff, so let DBI::DBD::SqlEngine handle it
30             'Name' => 'TreeData',
31             'Version' => $VERSION,
32             'Err' => \$DBD::TreeData::err,
33             'Errstr' => \$DBD::TreeData::errstr,
34             'State' => \$DBD::TreeData::state,
35             'Attribution' => 'DBD::TreeData by Brendan Byrd',
36             }) || return undef;
37              
38             unless ( $methods_already_installed++ ) {
39             DBD::TreeData::dr->install_method('tree_process_hash_tree');
40              
41             # because of the DBD::AnyData driver by-pass, we have to do its dirty work
42             DBD::TreeData::db->install_method('ad_import');
43             DBD::TreeData::db->install_method('ad_catalog');
44             DBD::TreeData::db->install_method('ad_convert');
45             DBD::TreeData::db->install_method('ad_export');
46             DBD::TreeData::db->install_method('ad_clear');
47             DBD::TreeData::db->install_method('ad_dump');
48             }
49              
50             return $drh;
51             }
52              
53             sub CLONE {
54             undef $drh;
55             }
56              
57             1;
58              
59              
60             ##############################################################################
61             # DBD::TreeData::dr
62              
63             package # hide from PAUSE
64             DBD::TreeData::dr; # ====== DRIVER ======
65              
66             use sanity 0.94;
67             use DBI 1.619; # first version with tree_ prefix
68             use DBD::AnyData 0.110;
69             use parent qw(-norequire DBD::AnyData::dr); # no such file as ::dr.pm
70              
71             use List::AllUtils qw(none any uniq firstidx indexes);
72             use Scalar::Util qw(reftype looks_like_number);
73             use Lingua::EN::Inflect::Phrase qw(to_PL to_S);
74             use Data::Dumper;
75              
76             use subs qw(foundin notin col2word print_debug);
77              
78             our @dbh;
79             our $debug = 0;
80             our $VERSION = $DBD::TreeData::VERSION;
81             our $imp_data_size = 0;
82             our ($tables, $columns, $ids, $types, $can_null);
83              
84             sub connect {
85             my ($drh, $dr_dsn, $user, $auth, $attr) = @_;
86              
87             if ($dr_dsn =~ /\;|\=/) { # is DSN notation
88             foreach my $var (split /\;/, $dr_dsn) {
89             my ($attr_name, $attr_value) = split(/\=/, $var, 2);
90             return $drh->set_err($DBI::stderr, "Can't parse DSN part '$var'", '08001') unless (defined $attr_value);
91              
92             $attr_name = lc($attr_name);
93             $attr_name = 'tree_'.$attr_name unless ($attr_name =~ /^tree_/o);
94             $attr->{$attr_name} = $attr_value;
95             }
96             }
97             else {
98             $attr->{tree_table_name} ||= $dr_dsn;
99             }
100             $attr->{tree_table_name} ||= 'tree_data';
101             $debug = $attr->{tree_debug} || $attr->{TraceLevel} || $drh->{TraceLevel};
102              
103             # Run through the tree conversion
104             $attr->{tree_data} or return $drh->set_err($DBI::stderr, "Data! Data! Data! I cannot make bricks without clay!", '08004');
105             $drh->tree_process_hash_tree($attr->{tree_table_name}, $attr->{tree_data}, 0);
106              
107             # remove global data and keep the local tree in $tref
108             my $tref = $tables;
109             $attr->{tree_columns} = {
110             names => { map { $_ => $tref->{$_}{columns} } keys %$tref },
111             types => $types,
112             nulls => $can_null,
113             };
114             $attr->{tree_cardinality} = $ids->{table};
115              
116             ### TODO: Clean this up ###
117             undef $tables;
118             undef $columns;
119             undef $ids;
120             undef $types;
121             undef $can_null;
122              
123             # Add into our $dbh object, using AnyData's methods
124             my ($outer_dbh, $dbh) = DBI::_new_dbh($drh, {
125             Name => $attr->{tree_table_name},
126             }, $attr);
127             $dbh->func( 0, "init_default_attributes" ); # make sure we get all of the right sql_* vars in place
128             $dbh->func("init_done");
129             $dbh->STORE('Active', 1);
130              
131             ### TODO: Need error checking for tree_rename_tables ###
132              
133             foreach my $table (keys %$tref) {
134             my $table_name = exists $attr->{'tree_rename_tables'} ?
135             ($attr->{'tree_rename_tables'}{$table} || $table) : $table;
136              
137             $dbh->func($table_name, 'ARRAY', [@{$tref->{$table}{data}}], {
138             col_names => join(',', @{$tref->{$table}{columns}}),
139             }, 'ad_import');
140             }
141              
142             # Using the DBD::AnyData $dbh for the rest of the work
143             push @dbh, $dbh;
144             return $outer_dbh;
145             }
146              
147             sub data_sources {
148             # Typically no need for parameters, as the defaults work just fine...
149             return ('dbi:TreeData:');
150             }
151              
152             sub disconnect_all {
153             while (my $dbh = shift @dbh) {
154             ref $dbh && $dbh->disconnect;
155             }
156             return 1;
157             }
158              
159             sub tree_process_hash_tree ($$$;$) {
160             my ($drh, $col, $tree, $depth) = @_;
161             my ($col_id, $serialized_tree);
162              
163             if ($depth > 100) {
164             $drh->set_err(0, "Too deep down the rabbit hole; crawling back...");
165             return $col => undef;
166             }
167              
168             print_debug($depth, "$depth - $col => ".(reftype($tree) || substr($tree, 0, 30)));
169              
170             state $id_names = ['group', 'matrix', 'cube', 'hypercube']; # if you go past here, you've gone too far...
171              
172             given (reftype $tree) {
173             # Common code for both HASHs and ARRAYs
174             when (/HASH|ARRAY/) {
175             $col = to_S(col2word($col));
176             $col =~ s/ /_/g;
177             $col_id = $col.'_id';
178              
179             # compare serialized trees for the same IDs
180             if ($depth) { # no point if this is the first node
181             $serialized_tree = Data::Dumper->new([$tree], ['*XXXX'])->
182             # (options for consistency, for exact matches)
183             Reset->Sortkeys(1)->
184             # (options designed to use the smallest possible footprint, as these can get rather large)
185             Indent(0)->Quotekeys(0)->Pair('=')->Dump;
186              
187             # cycle through possible ID names
188             my @list = ('', (reftype $tree eq 'ARRAY') ? @$id_names : ());
189             foreach my $suffix (@list) {
190             my $id_name = $col.($suffix ? '_'.$suffix : '').'_id';
191             my $tree = $serialized_tree;
192             $tree =~ s/^(\W{1,2})XXXX/$1$id_name/;
193              
194             # already exists, makes this easier
195             my $id = $ids->{trees}{$tree};
196             if ($id) {
197             print_debug($depth+1, "TREE <=== Dumper match ID ".join(' => ', split(/\|/, $id)));
198             return split(/\|/, $id);
199             }
200             }
201             }
202             continue;
203             }
204             # HASHs control the main tables, providing column names and data for the rows
205             # Table = $col (plural)
206             # ID = $col.'_id'
207             when ('HASH') {
208             # parse out a table name (with plural form)
209             my $table_name = $depth ? to_PL($col) : $col;
210             $table_name =~ s/ /_/g;
211              
212             # now run through the columns and data (with recursive loop goodness)
213             my %data = map {
214             my ($dat, $id) = ($$tree{$_}, $_);
215             # clean up the column names
216             $id = col2word($id);
217             $id =~ s/ /_/g;
218             $drh->tree_process_hash_tree($id => $dat, $depth + 1);
219             } keys %$tree;
220             ### FIXME: don't forget about undef in keys ###
221              
222             # check the new column names to see if we've seen this table before
223             my @cols = ($col_id, sort keys %data); # new ID column first
224             my $col_key = join('|', @cols);
225              
226             if ($columns->{$col_key}) { # known table
227             $table_name = $columns->{$col_key};
228              
229             print_debug($depth+1, "HASH ===> Found known table '$table_name'");
230             }
231             elsif ($tables->{$table_name}) { # errr, known table, but not with this column structure
232             my $t;
233             foreach my $j ('', 2 .. 200) { # loop through a bunch of potential table names
234             my $tname = $table_name.$j;
235              
236             if ($t = $tables->{$tname}) {
237             my @tcols = @{$t->{columns}};
238             my @ucols = uniq(@cols, @tcols);
239             # have to be the same ID columns && need to have at least one field in common
240             # (remove keys while we're at it)
241             if (shift(@tcols) eq shift(@cols) && uniq(@cols, @tcols) < (@tcols + @cols)) {
242             my @extra_cols = notin(\@tcols, \@cols);
243              
244             # new table has extra columns to add
245             if (@extra_cols) {
246             # add new column names and resort
247             my @old_cols = @{$t->{columns}};
248             my @new_cols = ($col_id, sort(@tcols, @extra_cols));
249             my @diff_idx = grep { $old_cols[$_] ne $new_cols[$_] } (0 .. (@new_cols - 1));
250             $t->{columns} = \@new_cols;
251              
252             unless ($diff_idx[0] > @{$t->{columns}}-1) {
253             # well, the new columns aren't on the end, so old data needs to be shuffled
254             for (my $l = 0; $l < @{$t->{data}}; $l++) {
255             my @data = @{$t->{data}[$l]};
256             my %data = map { $old_cols[$_] => $data[$_] } (0 .. (@data - 1)); # change to hash
257             @data = map { $data{$_} } @new_cols; # change to array
258             $t->{data}[$l] = \@data;
259             }
260             }
261              
262             # remove the old column key and replace with a new one
263             delete $columns->{ join('|', @old_cols) };
264             }
265              
266             # if the new table is missing certain columns, they will insert undefs as needed naturally below
267             # however, nullability checks might be in order
268             my @missing_cols = notin(\@cols, \@tcols);
269             $can_null->{$_} = 1 for (@missing_cols, @extra_cols);
270              
271             print_debug($depth+1, "HASH ===> Found known table with different columns '$table_name'");
272             last;
273             }
274              
275             # wrong table to use; try next name
276             next;
277             }
278             else { # just treat this as as new table, then
279             $drh->set_err(0, "Found a table with a dupe name, but totally different columns; calling it '$tname'...") if ($j);
280             $table_name = $tname;
281             $tables->{$table_name} = $t = {
282             columns => [@cols],
283             data => [],
284             };
285              
286             print_debug($depth+1, "HASH ===> Creating new table '$table_name' because of conflicting columns");
287             last;
288             }
289             }
290              
291             $col_key = join('|', @{$t->{columns}});
292             $columns->{$col_key} = $table_name;
293             }
294             else { # new table
295             $tables->{$table_name} = {
296             columns => [@cols],
297             data => [],
298             };
299             $columns->{$col_key} = $table_name;
300              
301             print_debug($depth+1, "HASH ===> Creating new table '$table_name'");
302             }
303              
304             # Add new row
305             my $t = $tables->{$table_name};
306             my $id = ++($ids->{table}{$table_name});
307             $serialized_tree =~ s/^(\W{1,2})XXXX/$1$col_id/;
308             $ids->{trees}{$serialized_tree} = $col_id.'|'.$id;
309             push(@{$t->{data}}, [ $id, map { $data{$_} } grep { $_ ne $col_id } @{$t->{columns}} ]);
310              
311             # Since we're done with this table, send back the col_id and id#
312             print_debug($depth+1, "HASH <=== $col_id => $id");
313             $types->{$col_id} = 'ID';
314             return $col_id => $id;
315             }
316             # ARRAYs provide ID grouping tables, capturing the individual rows in a group
317             # These are going to be two-column tables with two different IDs
318             # Table = $col.'_groups' (plural)
319             # ID = $col.(group|matrix|cube|etc.).'_id'
320             when ('ARRAY') {
321             # Pass the data on down first (ARRAY of ARRAYS to prevent de-duplication of keys)
322             my @data = map {
323             my $dat = $_;
324             [ $drh->tree_process_hash_tree($col => $dat, $depth + 1) ]
325             } @$tree;
326              
327             # Okay, we could end up with several different scenarios:
328              
329             # A. All items have the same column name (as a ID)
330             # B. All items appear to be some form of data
331             # C. A mixture of IDs and data (scary!)
332              
333             # Process both groups individually (and hope for the best)
334             my @id_cols = grep { $data[$_]->[0] =~ /_id$/; } (0 .. (@data - 1));
335             my @data_cols = grep { $data[$_]->[0] !~ /_id$/; } (0 .. (@data - 1));
336             @id_cols = () unless ($depth); # skip any group ID tables if this is the very first node
337              
338             $drh->set_err(0, "Inconsistant sets of data within an array near '$col'; going to process it as best as possible...") if (@id_cols && @data_cols);
339              
340             # Items of IDs
341             ### TODO: Clean this up; the logic is a bit of a mess... ###
342             my (@max_id, @group_id);
343             foreach my $ii (@id_cols, @data_cols) {
344             # In all cases, there will be two tables to populate: a group/id table, and a id/data (or id/id) table
345             my ($icol, $item) = @{$data[$ii]};
346             my $is_id = ($icol =~ /_id$/i);
347              
348             # IDs are singular; table names are plural
349             my $strip = to_S(col2word($icol));
350             $icol = $strip;
351             $icol =~ s/ /_/g;
352             $icol .= '_id' if ($is_id);
353              
354             # Process group ID names
355             # ncol = N+1, icol = N (as in _group_id => _id, or _matrix_id => _group_id)
356             my $ncol = $icol;
357             $ncol =~ s/_id$//i;
358             my $i = firstidx { $ncol =~ s/(?<=_)$_$//; } @$id_names; # that's underscore + $_ + EOL
359             # $i = -1 if not found, which then ++$i = 0 and id_names = _group
360              
361             if (++$i > 3) { # start whining here
362             $ncol .= '_hypercube_'.$id_names->[$i -= 4];
363              
364             $drh->set_err(0, "Seriously?! We're using ridiculous names like '$ncol"."_id' at this point...");
365             }
366             else { $ncol .= '_'.$id_names->[$i]; }
367             $i++; # prevent -1 on @_id arrays
368              
369             # Parse out a group table name (with plural form)
370             my $grp_table_name = to_S(col2word($ncol));
371             $grp_table_name = to_PL($grp_table_name); # like blah_groups
372             $grp_table_name =~ s/ /_/g;
373              
374             $icol .= '_id' unless ($icol =~ /_id$/);
375             $ncol .= '_id' unless ($ncol =~ /_id$/);
376             $max_id[$i] = $ncol;
377             print_debug($depth+1, "ARRAY ===> max_id = $i/$ncol");
378              
379             # Create new group table (if it doesn't already exist)
380             my $t;
381             if ($depth) { # skip any group ID tables if this is the very first node
382             unless ($tables->{$grp_table_name}) {
383             ### FIXME: Assuming that table doesn't exist with the same columns ###
384             print_debug($depth+1, "ARRAY ===> Creating new group table '$grp_table_name'");
385              
386             $tables->{$grp_table_name} = {
387             columns => [ $ncol, $icol ],
388             data => [],
389             };
390             }
391             $t = $tables->{$grp_table_name};
392             my $col_key = join('|', @{$t->{columns}});
393             $columns->{$col_key} = $grp_table_name;
394             }
395              
396             # Add new row
397             $group_id[$i] = ++($ids->{table}{$grp_table_name}) unless ($group_id[$i]); # only increment once (per group type)
398             if ($is_id) { # ID column: $item = ID, and this goes in a group table (id/data table already processed)
399             print_debug($depth+1, "ARRAY ===> $grp_table_name => [ $group_id[$i], $item ] (new ID row for an group table)");
400             push(@{$t->{data}}, [ $group_id[$i], $item ]);
401             }
402             else { # data column: $item = data, and we process both tables
403             my $itbl_name = to_PL($strip); # like blahs
404             $itbl_name =~ s/ /_/g;
405             my $data_col = $strip;
406             $data_col =~ s/ /_/g;
407              
408             # Create new id table (if it doesn't already exist)
409             unless ($tables->{$itbl_name}) {
410             print_debug($depth+1, "ARRAY ===> Creating new ID table '$itbl_name'");
411              
412             $tables->{$itbl_name} = {
413             columns => [ $icol, $data_col ],
414             data => [],
415             };
416             }
417             my $n = $tables->{$itbl_name};
418             my $col_key = join('|', @{$n->{columns}});
419             $columns->{$col_key} = $itbl_name;
420             $types->{$icol} = 'ID';
421              
422             $max_id[$i-1] = $icol;
423             print_debug($depth+1, "ARRAY ===> max_id = ".int($i-1)."/$icol");
424             ### FIXME: Assuming that table doesn't exist with the same columns ###
425              
426             # First, check serial tree with single value
427             my $stree = Data::Dumper->new([$item], ['*'.$icol])->Reset->Indent(0)->Dump;
428             if ($ids->{trees}{$stree} && $depth) {
429             # Add new group row (with proper col_id)
430             my $id = (split(/\|/, $ids->{trees}{$stree}))[1];
431             print_debug($depth+1, "ARRAY ===> $grp_table_name => [ $group_id[$i], $id ] (serial tree found)");
432             push(@{$t->{data}}, [ $group_id[$i], $id ] );
433              
434             # (no need to add into main table; already exists)
435             }
436             else {
437             # Add new group row (with proper col_id)
438             my $id = ++($ids->{table}{$itbl_name});
439             if ($depth) {
440             print_debug($depth+1, "ARRAY ===> $grp_table_name => [ $group_id[$i], $id ] (new group row)");
441             push(@{$t->{data}}, [ $group_id[$i], $id ]);
442             }
443              
444             # Add new id row
445             $ids->{trees}{$stree} = $icol.'_id|'.$id;
446             print_debug($depth+2, "ARRAY ===> $itbl_name => [ $id, $item ] (new ID/data row)");
447             push(@{$n->{data}}, [ $id, $item ]);
448             }
449             }
450             }
451              
452             # Pass back an ID
453             my ($gid_col, $gid) = (pop(@max_id) || $col, pop(@group_id)); # undef @max_id might happen with an empty array
454              
455             print_debug($depth+1, "ARRAY <=== $gid_col => $gid");
456             $serialized_tree =~ s/^(\W{1,2})XXXX/$1$gid_col/;
457             $ids->{trees}{$serialized_tree} = $gid_col.'|'.$gid;
458             $types->{$gid_col} = 'ID';
459             return $gid_col => $gid;
460             }
461             # An actual scalar; return back the proper column name and data
462             when ('' || undef) {
463             return type_detect($col, $tree);
464             }
465             # De-reference
466             when (/SCALAR|VSTRING/) {
467             return type_detect($col, $$tree);
468             }
469             # Warn and de-reference
470             when (/Regexp|LVALUE/i) {
471             $drh->set_err(0, "Found a ".(reftype $tree)."; just going to treat this like a SCALAR...");
472             return type_detect($col, $$tree);
473             }
474             # Warn and de-reference (for further examination)
475             when ('REF') {
476             $drh->set_err(0, "Found a REF; going to dive in the rabbit hole...");
477             return $drh->tree_process_hash_tree($col => $$tree, $depth + 1);
478             }
479             # Warn and de-reference (for further examination)
480             when ('GLOB') {
481             foreach my $t (qw(Regexp VSTRING IO FORMAT LVALUE GLOB REF CODE HASH ARRAY SCALAR)) { # scalar last, since a ref is still a scalar
482             if (defined *$$tree{$t}) {
483             $drh->set_err(0, "Found a GLOB (which turn out to be a $t); going to dive in the rabbit hole...");
484             return $drh->tree_process_hash_tree($col => *$$tree{$t}, $depth + 1);
485             }
486             }
487             $drh->set_err(0, "Found a GLOB, but it didn't point to anything...");
488             return $col => undef;
489             }
490             # Warn and throw away
491             when ('CODE') {
492             ### TODO: Warn immediately, eval block with timer to use as output, then continue ###
493             ### Definitely need a switch, though ###
494             $drh->set_err(0, "Found a CODE block; not going to even touch this one...");
495             return $col => undef;
496             }
497             default {
498             $drh->set_err(0, "Found a ".(reftype $tree)."; WTF is this? Can't use this at all...");
499             return $col => undef;
500             }
501             }
502              
503             die "WTF?! Perl broke my given/when! Alert the Pumpking!!!";
504             }
505              
506             # Find items in @B that are in @A
507             sub foundin (\@\@) {
508             my ($A, $B) = @_;
509             return grep { my $i = $_; any { $i eq $_ } @$A; } @$B;
510             }
511              
512             # Find items in @B that are not in @A
513             sub notin (\@\@) {
514             my ($A, $B) = @_;
515             return grep { my $i = $_; none { $i eq $_ } @$A; } @$B;
516             }
517              
518             sub col2word ($) {
519             my $word = $_[0];
520             $word = lc($word);
521             $word =~ s/[\W_]+/ /g;
522             $word =~ s/^\s+|\s+(?:id)?$//g;
523             return $word;
524             }
525              
526             sub type_detect ($;$) {
527             my ($col, $val) = @_;
528             my $is_num = looks_like_number($val);
529              
530             $col = to_S(col2word($col)); # if we're at this point, it's a single item
531             $col =~ s/ /_/g;
532             unless (defined $val) {
533             $can_null->{$_} = 1;
534             return $col => undef;
535             }
536              
537             $types->{$col} = 'STRING' if (!$is_num && $types->{$col}); # any non-number data invalidates the NUMBER type
538             $types->{$col} ||= $is_num ? 'NUMBER' : 'STRING';
539             return $col => $val;
540             }
541              
542             sub print_debug ($$) {
543             my ($depth, $msg) = @_;
544             return unless ($debug);
545              
546             print (" " x $depth);
547             say $msg;
548             }
549              
550             1;
551              
552             ##############################################################################
553             # DBD::TreeData::db
554              
555             package # hide from PAUSE
556             DBD::TreeData::db; # ====== DATABASE ======
557              
558             our $imp_data_size = 0;
559             use DBD::AnyData;
560             use parent qw(-norequire DBD::AnyData::db); # no such file as ::db.pm
561              
562             use Config;
563             use List::AllUtils qw(first);
564              
565             # Overriding the package here to add some *_info methods
566              
567             ### TODO: get_info ###
568              
569             sub table_info {
570             my ($dbh, $catalog, $schema, $table) = @_;
571             my $names = [qw( TABLE_QUALIFIER TABLE_OWNER TABLE_NAME TABLE_TYPE REMARKS )];
572              
573             $table = '^'.$table.'$' if length $table;
574              
575             return sponge_sth_loader($dbh, 'TABLE_INFO', $names, [
576             grep { !$table || $_->[2] =~ /$table/i } $dbh->func("get_avail_tables")
577             ] );
578             }
579              
580             sub column_info {
581             my ($dbh, $catalog, $schema, $table, $column) = @_;
582             my $type = 'COLUMN_INFO';
583             my $names = [qw(
584             TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME DATA_TYPE TYPE_NAME COLUMN_SIZE BUFFER_LENGTH DECIMAL_DIGITS
585             NUM_PREC_RADIX NULLABLE REMARKS COLUMN_DEF SQL_DATA_TYPE SQL_DATETIME_SUB CHAR_OCTET_LENGTH ORDINAL_POSITION IS_NULLABLE
586             CHAR_SET_CAT CHAR_SET_SCHEM CHAR_SET_NAME COLLATION_CAT COLLATION_SCHEM COLLATION_NAME UDT_CAT UDT_SCHEM UDT_NAME
587             DOMAIN_CAT DOMAIN_SCHEM DOMAIN_NAME SCOPE_CAT SCOPE_SCHEM SCOPE_NAME MAX_CARDINALITY DTD_IDENTIFIER IS_SELF_REF
588             )];
589              
590             $table = '^'.$table .'$' if length $table;
591             $column = '^'.$column.'$' if length $column;
592              
593             my @tables = $dbh->func("get_avail_tables");
594             my @col_rows = ();
595             my $tc = $dbh->{tree_columns};
596              
597             # De-mangle types
598             my $types = $dbh->type_info_all;
599             shift(@$types); # helper "column key" row
600             my %types = map { $_->[0] => $_ } @$types;
601              
602             foreach my $tbl (sort { $a->[2] cmp $b->[2] } @tables) { # ->[2] = table name
603             next unless ($tbl);
604             next unless (!$table || $tbl->[2] =~ /$table/i);
605              
606             my $id = 0;
607             foreach my $col ( @{$tc->{names}{$tbl->[2]}} ) {
608             next unless (!$column || $col =~ /$column/i);
609             my $ti = $types{ $id ? uc($tc->{types}{$col}) : 'PID' };
610             my $can_null = $id && $tc->{nulls}{$col} || 0;
611              
612             my $col_row = [
613             # 0=TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME DATA_TYPE TYPE_NAME COLUMN_SIZE BUFFER_LENGTH DECIMAL_DIGITS
614             undef, undef, $tbl->[2], $col, $ti->[0], $ti->[1], $ti->[2], undef, $ti->[17] ? int($ti->[14] * log($ti->[17])/log(10)) : undef, # log(r^l) = l * log(r)
615             # 9=NUM_PREC_RADIX NULLABLE REMARKS COLUMN_DEF SQL_DATA_TYPE SQL_DATETIME_SUB CHAR_OCTET_LENGTH ORDINAL_POSITION IS_NULLABLE
616             $ti->[17], $can_null, undef, undef, $ti->[15], $ti->[16], $ti->[17] ? undef : $ti->[2], $id, $can_null ? 'YES' : 'NO',
617             # 18=CHAR_SET_CAT CHAR_SET_SCHEM CHAR_SET_NAME COLLATION_CAT COLLATION_SCHEM COLLATION_NAME UDT_CAT UDT_SCHEM UDT_NAME
618             undef, undef, undef, undef, undef, undef, undef, undef, undef,
619             # DOMAIN_CAT DOMAIN_SCHEM DOMAIN_NAME SCOPE_CAT SCOPE_SCHEM SCOPE_NAME MAX_CARDINALITY DTD_IDENTIFIER IS_SELF_REF
620             undef, undef, undef, undef, undef, undef, undef, undef, undef,
621             ];
622              
623             push @col_rows, $col_row;
624             $id++;
625             }
626             }
627              
628             return sponge_sth_loader($dbh, $type, $names, \@col_rows);
629             }
630              
631             sub primary_key_info {
632             my ($dbh, $catalog, $schema, $table) = @_;
633             my $type = 'PRIMARY_KEY_INFO';
634              
635             my $cols = $dbh->{tree_columns}{names}{$table} || return $dbh->set_err($DBI::stderr, "No such table name: $table", '42704');
636             my $pkey = $cols->[0];
637              
638             return sponge_sth_loader($dbh, $type,
639             [qw(TABLE_CAT TABLE_SCHEM TABLE_NAME COLUMN_NAME KEY_SEQ PK_NAME)],
640             [ [ undef, undef, $table, $pkey, 1, $pkey.'_pkey' ] ]
641             );
642             }
643              
644             sub foreign_key_info {
645             my ($dbh, $pk_catalog, $pk_schema, $pk_table, $fk_catalog, $fk_schema, $fk_table) = @_;
646             my $type = 'FOREIGN_KEY_INFO';
647             my $names = [qw(
648             PKTABLE_CAT PKTABLE_SCHEM PKTABLE_NAME PKCOLUMN_NAME FKTABLE_CAT FKTABLE_SCHEM FKTABLE_NAME FKCOLUMN_NAME
649             KEY_SEQ UPDATE_RULE DELETE_RULE FK_NAME PK_NAME DEFERRABILITY UNIQUE_OR_PRIMARY
650             )];
651              
652             my $colnames = $dbh->{tree_columns}{names};
653             my $pkey = $pk_table && $colnames->{$pk_table} ? $colnames->{$pk_table}[0] : undef;
654             my $fkey = $fk_table && $colnames->{$fk_table} ? $colnames->{$fk_table}[0] : undef;
655             my ($pk_list, $fk_list) = ([$pk_table], [$fk_table]);
656             my @dbi_data;
657              
658             # If both PKT and FKT are given, the function returns the foreign key, if any,
659             # in table FKT that refers to the primary (unique) key of table PKT.
660             if ($pkey && $fkey) {
661             $fkey = first { $_ eq $pkey } $colnames->{$fk_table}; # pkey or bust
662             }
663              
664             # If only PKT is given, then the result set contains the primary key of that table
665             # and all foreign keys that refer to it.
666             elsif ($pkey) { $fk_list = [ grep { $colnames->{$_} ~~ /^$pkey$/ } keys %$colnames ]; }
667              
668             # If only FKT is given, then the result set contains all foreign keys in that table
669             # and the primary keys to which they refer.
670             elsif ($fkey) {
671             my @cols = @{$colnames->{$fk_table}};
672             shift @cols; # remove primary key
673              
674             $pk_list = [];
675             foreach my $col (@cols) {
676             my $tbl = (first { $colnames->{$_}[0] eq $col } keys %$colnames) || next;
677             push @$pk_list, $tbl;
678             }
679             }
680             else { return sponge_sth_loader($dbh, $type, $names, []); }
681              
682             # main loop
683             foreach my $pt (@$pk_list) {
684             foreach my $ft (@$fk_list) {
685             my $key = $colnames->{$pt}[0]; # key links are named the same
686             push @dbi_data, [
687             # 0=PKTABLE_CAT PKTABLE_SCHEM PKTABLE_NAME PKCOLUMN_NAME FKTABLE_CAT FKTABLE_SCHEM FKTABLE_NAME FKCOLUMN_NAME
688             undef, undef, $pt, $key, undef, undef, $ft, $key,
689             # 8=KEY_SEQ UPDATE_RULE DELETE_RULE FK_NAME PK_NAME DEFERRABILITY UNIQUE_OR_PRIMARY
690             1, 3, 3, join('_', $ft, $key, 'fkey'), $key.'_pkey', 7, 'PRIMARY',
691             ];
692             }
693             }
694              
695             return sponge_sth_loader($dbh, $type, $names, \@dbi_data);
696             }
697              
698             sub statistics_info {
699             my ($dbh, $catalog, $schema, $table, $unique_only, $quick) = @_;
700             my $type = 'STATISTICS_INFO';
701              
702             my $cols = $dbh->{tree_columns}{names}{$table} || return $dbh->set_err($DBI::stderr, "No such table name: $table", '42704');
703             my $pkey = $cols->[0];
704             my $rows = $dbh->{tree_cardinality}{$table};
705              
706             return sponge_sth_loader($dbh, $type,
707             [qw(
708             TABLE_CAT TABLE_SCHEM TABLE_NAME NON_UNIQUE INDEX_QUALIFIER INDEX_NAME TYPE ORDINAL_POSITION
709             COLUMN_NAME ASC_OR_DESC CARDINALITY PAGES FILTER_CONDITION
710             )],
711             [
712             [
713             undef, undef, $table, 0, undef, undef, 'table', undef,
714             undef, undef, $rows, undef, undef
715             ],
716             [
717             undef, undef, $table, 0, undef, $pkey.'_pkey', 'content', 1,
718             $pkey, 'A', $rows, undef, undef
719             ],
720             ],
721             );
722             }
723              
724             sub sponge_sth_loader {
725             my ($dbh, $tbl_name, $names, $rows) = @_;
726              
727             # (mostly a straight copy from DBI::DBD::SqlEngine)
728             my $dbh2 = $dbh->func("sql_sponge_driver");
729             my $sth = $dbh2->prepare(
730             $tbl_name,
731             {
732             rows => $rows || [],
733             NAME => $names,
734             }
735             );
736             $sth or $dbh->set_err( $DBI::stderr, $dbh2->errstr, $dbh2->state );
737             return $sth;
738             }
739              
740             sub type_info_all {
741             # We are basically just translating Perl variable types to SQL,
742             # though once everything has been flattened, it's basically just
743             # string and number.
744              
745             # Perl's number size varies between 32/64-bit versions
746             my $nbits = $Config{ptrsize} * 16 - 11;
747              
748             return [
749             {
750             TYPE_NAME => 0,
751             DATA_TYPE => 1,
752             COLUMN_SIZE => 2, # was PRECISION originally
753             LITERAL_PREFIX => 3,
754             LITERAL_SUFFIX => 4,
755             CREATE_PARAMS => 5,
756             NULLABLE => 6,
757             CASE_SENSITIVE => 7,
758             SEARCHABLE => 8,
759             UNSIGNED_ATTRIBUTE => 9,
760             FIXED_PREC_SCALE => 10, # was MONEY originally
761             AUTO_UNIQUE_VALUE => 11, # was AUTO_INCREMENT originally
762             LOCAL_TYPE_NAME => 12,
763             MINIMUM_SCALE => 13,
764             MAXIMUM_SCALE => 14,
765             SQL_DATA_TYPE => 15,
766             SQL_DATETIME_SUB => 16,
767             NUM_PREC_RADIX => 17,
768             INTERVAL_PRECISION => 18,
769             },
770             # Name DataType Max Literals Params Null Case Search Unsign Fixed Auto LocalTypeName M/M Scale SQLDataType DateTime_Sub Radix ItvPrec
771             [ "PID", DBI::SQL_INTEGER(), 32, undef, undef, undef, 0, 0, 3, 1, 1, 0, "PID", 0, 0, DBI::SQL_INTEGER(), undef, 2, undef],
772             [ "ID", DBI::SQL_INTEGER(), 32, undef, undef, undef, 1, 0, 3, 1, 1, 0, "ID", 0, 0, DBI::SQL_INTEGER(), undef, 2, undef],
773             [ "NUMBER", DBI::SQL_NUMERIC(), $nbits, undef, undef, undef, 1, 0, 3, 0, 0, 0, "Number", 0,$nbits, DBI::SQL_NUMERIC(), undef, 2, undef],
774             [ "STRING", DBI::SQL_VARCHAR(), 2**31, "'", "'", undef, 1, 1, 3, undef, undef, undef, "String", undef, undef, DBI::SQL_VARCHAR(), undef, undef, undef],
775             ];
776             }
777              
778             1;
779              
780             ##############################################################################
781             # DBD::TreeData::st
782              
783             package # hide from PAUSE
784             DBD::TreeData::st; # ====== STATEMENT ======
785              
786             our $imp_data_size = 0;
787             use DBD::AnyData;
788             use parent qw(-norequire DBD::AnyData::st); # no such file as ::st.pm
789              
790             1;
791              
792             ##############################################################################
793             # DBD::TreeData::Statement
794              
795             package # hide from PAUSE
796             DBD::TreeData::Statement; # ====== SqlEngine::Statement ======
797              
798             our $imp_data_size = 0;
799             use DBD::AnyData;
800             use parent qw(-norequire DBD::AnyData::Statement); # no such file as ::Statement.pm
801              
802             1;
803              
804             ##############################################################################
805             # DBD::TreeData::Table
806              
807             package # hide from PAUSE
808             DBD::TreeData::Table; # ====== SqlEngine::Table ======
809              
810             our $imp_data_size = 0;
811             use DBD::AnyData;
812             use parent qw(-norequire DBD::AnyData::Table); # no such file as ::Table.pm
813              
814             1;
815              
816             __END__
817             =pod
818              
819             =head1 NAME
820              
821             DBD::TreeData - DBI driver for any abstract hash/array tree
822              
823             =head1 SYNOPSIS
824              
825             use DBI;
826             use JSON::Any;
827             use LWP::Simple;
828            
829             # Example JSON object
830             my $json = get 'http://maps.googleapis.com/maps/api/geocode/json?address=1600+Pennsylvania+Ave+NW,+20500&region=us&language=en&sensor=false';
831             my $obj = JSON::Any->jsonToObj($json);
832            
833             my $dbh = DBI->connect('dbi:TreeData:', '', '', {
834             tree_table_name => 'geocode',
835             tree_data => $obj,
836             });
837            
838             # Informational dump
839             use Data::Dump;
840             dd ($dbh->table_info->fetchall_arrayref);
841             dd (map { [ @{$_}[2 .. 6] ] } @{
842             $dbh->column_info('','','','')->fetchall_arrayref
843             });
844            
845             # DBIC dump
846             use DBIx::Class::Schema::Loader 'make_schema_at';
847             make_schema_at(
848             'My::Schema', {
849             debug => 1,
850             dump_directory => './lib',
851             },
852             [ 'dbi:TreeData:geocode', '', '', { tree_data => $obj } ],
853             );
854              
855             =head1 DESCRIPTION
856              
857             DBD::TreeData provides a DBI driver to translate any sort of tree-based data set (encapsulated in a Perl object) into a flat set of tables,
858             complete with real SQL functionality. This module utilizes L<DBD::AnyData> to create the new tables, which uses L<SQL::Statement> to support
859             the SQL parsing. (Any caveats with those modules likely applies here.)
860              
861             This module can be handy to translate JSON, XML, YAML, and many other tree formats to be used in class sets like L<DBIx::Class>. Unlike
862             L<DBD::AnyData>, the format of the data doesn't have to be pre-flattened, and will be spread out into multiple tables.
863              
864             Also, this driver fully supports all of the C<<< *_info >>> methods, making it ideal to shove into modules like L<DBIx::Class::Schema::Loader>.
865             (The C<<< table_info >>> and C<<< column_info >>> filters use REs with beginE<sol>end bounds pre-set.)
866              
867             =encoding utf8
868              
869             =head1 CONNECT ATTRIBUTES
870              
871             =head2 tree_data
872              
873             The actual tree object. Of course, this attribute is required.
874              
875             =head2 tree_table_name
876              
877             The name of the starting table. Not required, but recommended. If not specified, defaults to 'tree_data', or the value of the driver
878             DSN string (after the C<<< dbi:TreeData: >>> part).
879              
880             =head2 tree_debug
881              
882             Boolean. Print debug information while translating the tree.
883              
884             =head2 tree_rename_tables
885              
886             Hashref of table names. If you don't like the name of an auto-created table, you can rename them while the database is being built. Within
887             the hashref, the keysE<sol>values are the oldE<sol>new names, respectively.
888              
889             =head1 TRANSLATION BEHAVIOR
890              
891             The tree translation into flat tables is done using a recursive descent algorithm. It starts with a check of the current node's reference
892             type, which dictates how it interprets the children. The goal is to create a fully L<4NF|http://en.wikipedia.org/wiki/Fourth_normal_form>
893             database from the tree.
894              
895             Arrays are interpreted as a list of rows, and typically get rolled up into "group" tables. Hashes are interpreted as a list of column names
896             and values. Non-references are considered values. Scalar refs and VStrings are de-referenced first. Other types of refs are processed as
897             best as possible, but the driver will complain. (Code ref blocks are currently NOT executed and discarded.)
898              
899             Nested arrays will create nested group tables with different suffixes, like C<<< matrix >>>, C<<< cube >>>, and C<<< hypercube >>>. If it has to go beyond
900             that (and you really shouldn't have structures like that), it'll start complaining (sarcastically).
901              
902             In almost all cases, the table name is derived from a previous key. Table names also use L<Lingua::EN::Inflect::Phrase> to create
903             pluralized names. Primary IDs will have singular names with a C<<< _id >>> suffix.
904              
905             For example, this tree:
906              
907             address_components => [
908             {
909             long_name => 1600,
910             short_name => 1600,
911             types => [ "street_number" ]
912             },
913             {
914             long_name => "President's Park",
915             short_name => "President's Park",
916             types => [ "establishment" ]
917             },
918             {
919             long_name => "Pennsylvania Avenue Northwest",
920             short_name => "Pennsylvania Ave NW",
921             types => [ "route" ]
922             },
923             {
924             long_name => "Washington",
925             short_name => "Washington",
926             types => [ "locality", "political" ]
927             },
928             ... etc ...,
929             ],
930              
931             Would create the following tables:
932              
933             <main_table>
934             address_component_groups
935             address_components
936             type_groups
937             types
938              
939             In this case, C<<< address_components >>> has most of the columns and data, but it also has a tie to an ID of C<<< address_component_groups >>>.
940              
941             Since C<<< types >>> points to an array, it will have its own dedicated table. That table would have data like:
942              
943             type_id │ type
944             ════════╪════════════════
945             1 │ street_number
946             2 │ establishment
947             3 │ route
948             4 │ locality
949             5 │ political
950             ... │ ...
951              
952             Most of the C<<< type_groups >>> table would be a 1:1 match. However, the last component entry has more than one value in the C<<< types >>> array, so the
953             C<<< type_group_id >>> associated to that component would have multiple entries (4 & 5). Duplicate values are also tracked, so that IDs are reused.
954              
955             =head1 CAVEATS
956              
957             =head2 DBI E<sol> DBD::AnyData Conflict
958              
959             As of the time of this writing, the latest version of L<DBI> (1.623) and the latest version of L<DBD::AnyData> (0.110) do not work together.
960             Since TreeData relies on L<DBD::AnyData> for table creation, you will need to downgrade to L<DBI> 1.622 to use this driver, until a new
961             version of L<DBD::AnyData> comes out.
962              
963             =head1 AVAILABILITY
964              
965             The project homepage is L<https://github.com/SineSwiper/DBD-TreeData/wiki>.
966              
967             The latest version of this module is available from the Comprehensive Perl
968             Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
969             site near you, or see L<https://metacpan.org/module/DBD::TreeData/>.
970              
971             =for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
972              
973             =head1 SUPPORT
974              
975             =head2 Internet Relay Chat
976              
977             You can get live help by using IRC ( Internet Relay Chat ). If you don't know what IRC is,
978             please read this excellent guide: L<http://en.wikipedia.org/wiki/Internet_Relay_Chat>. Please
979             be courteous and patient when talking to us, as we might be busy or sleeping! You can join
980             those networks/channels and get help:
981              
982             =over 4
983              
984             =item *
985              
986             irc.perl.org
987              
988             You can connect to the server at 'irc.perl.org' and join this channel: #dbi then talk to this person for help: SineSwiper.
989              
990             =back
991              
992             =head2 Bugs / Feature Requests
993              
994             Please report any bugs or feature requests via L<L<https://github.com/SineSwiper/DBD-TreeData/issues>|GitHub>.
995              
996             =head1 AUTHOR
997              
998             Brendan Byrd <BBYRD@CPAN.org>
999              
1000             =head1 COPYRIGHT AND LICENSE
1001              
1002             This software is Copyright (c) 2013 by Brendan Byrd.
1003              
1004             This is free software, licensed under:
1005              
1006             The Artistic License 2.0 (GPL Compatible)
1007              
1008             =cut
1009