File Coverage

blib/lib/PDL/IO/DBI.pm
Criterion Covered Total %
statement 52 268 19.4
branch 3 188 1.6
condition 0 50 0.0
subroutine 17 24 70.8
pod 2 2 100.0
total 74 532 13.9


line stmt bran cond sub pod time code
1             package PDL::IO::DBI;
2              
3 5     5   738596 use strict;
  5         10  
  5         119  
4 5     5   16 use warnings;
  5         6  
  5         134  
5              
6 5     5   14 use Exporter 'import';
  5         10  
  5         271  
7             our @EXPORT_OK = qw(rdbi1D rdbi2D);
8             our %EXPORT_TAGS = (all => \@EXPORT_OK);
9              
10             our $VERSION = '0.010';
11              
12 5     5   15 use Config;
  5         46  
  5         202  
13 5 50   5   17 use constant NO64BITINT => ($Config{ivsize} < 8) ? 1 : 0;
  5         8  
  5         704  
14 5 50   5   21 use constant NODATETIME => eval { require PDL::DateTime; require Time::Moment; 1 } ? 0 : 1;
  5         7  
  5         7  
  5         1200  
  0         0  
  0         0  
15 5 50   5   21 use constant DEBUG => $ENV{PDL_IO_DBI_DEBUG} ? 1 : 0;
  5         5  
  5         204  
16              
17 5     5   696 use PDL;
  5         14  
  5         35  
18 5     5   187043 use DBI;
  5         15284  
  5         225  
19 5     5   2297 use Time::Moment;
  5         5597  
  5         130  
20              
21 5     5   28 use Carp;
  5         6  
  5         3727  
22             $Carp::Internal{ (__PACKAGE__) }++;
23              
24             my %pck = (
25             byte => "C",
26             short => "s",
27             ushort => "S",
28             long => "l",
29             longlong => "q",
30             float => "f",
31             double => "d",
32             );
33              
34             my %tmap = (
35             DBI::SQL_TINYINT => byte, # -6
36             DBI::SQL_BIGINT => longlong, # -5
37             DBI::SQL_NUMERIC => double, # 2
38             DBI::SQL_DECIMAL => double, # 3
39             DBI::SQL_INTEGER => long, # 4
40             DBI::SQL_SMALLINT => short, # 5
41             DBI::SQL_FLOAT => double, # 6
42             DBI::SQL_REAL => float, # 7
43             DBI::SQL_DOUBLE => double, # 8
44             DBI::SQL_DATETIME => '_dt_', # 9 == DBI::SQL_DATE
45             #DBI::SQL_INTERVAL => longlong, # 10 == DBI::SQL_TIME
46             DBI::SQL_TIMESTAMP => '_dt_', # 11
47             DBI::SQL_BOOLEAN => byte, # 16
48             DBI::SQL_TYPE_DATE => '_dt_', # 91
49             #DBI::SQL_TYPE_TIME # 92
50             DBI::SQL_TYPE_TIMESTAMP => '_dt_', # 93
51             #DBI::SQL_TYPE_TIME_WITH_TIMEZONE # 94
52             DBI::SQL_TYPE_TIMESTAMP_WITH_TIMEZONE => '_dt_', # 95
53             #DBI::SQL_INTERVAL_YEAR 101
54             #DBI::SQL_INTERVAL_MONTH 102
55             #DBI::SQL_INTERVAL_DAY 103
56             #DBI::SQL_INTERVAL_HOUR 104
57             #DBI::SQL_INTERVAL_MINUTE 105
58             #DBI::SQL_INTERVAL_SECOND 106
59             #DBI::SQL_INTERVAL_YEAR_TO_MONTH 107
60             #DBI::SQL_INTERVAL_DAY_TO_HOUR 108
61             #DBI::SQL_INTERVAL_DAY_TO_MINUTE 109
62             #DBI::SQL_INTERVAL_DAY_TO_SECOND 110
63             #DBI::SQL_INTERVAL_HOUR_TO_MINUTE 111
64             #DBI::SQL_INTERVAL_HOUR_TO_SECOND 112
65             #DBI::SQL_INTERVAL_MINUTE_TO_SECOND 113
66             ################## DBD::SQLite uses text values instead of numerical constants corresponding to DBI::SQL_*
67             'BIGINT' => longlong, # 8 bytes, -9223372036854775808 .. 9223372036854775807
68             'INT8' => longlong, # 8 bytes
69             'INTEGER' => long, # 4 bytes, -2147483648 .. 2147483647
70             'INT' => long, # 4 bytes
71             'INT4' => long, # 4 bytes
72             'MEDIUMINT' => long, # 3 bytes, -8388608 .. 8388607
73             'SMALLINT' => short, # 2 bytes, -32768 .. 32767
74             'INT2' => short, # 2 bytes
75             'TINYINT' => byte, # 1 byte, MySQL: -128 .. 127, MSSQL+Pg: 0 to 255
76             'REAL' => float, # 4 bytes
77             'FLOAT' => double, # 8 bytes
78             'NUMERIC' => double,
79             'DECIMAL' => double,
80             'DOUBLE' => double,
81             'DOUBLE PRECISION' => double,
82             'BOOLEAN' => byte,
83             'SMALLSERIAL' => short, # 2 bytes, 1 to 32767
84             'SERIAL' => long, # 4 bytes, 1 to 2147483647
85             'BIGSERIAL' => longlong, # 8 bytes, 1 to 9223372036854775807
86             'DATETIME' => '_dt_',
87             'DATE' => '_dt_',
88             'TIMESTAMP' => '_dt_',
89             );
90              
91             # https://www.sqlite.org/datatype3.html
92             # http://dev.mysql.com/doc/refman/5.7/en/integer-types.html
93             # http://www.postgresql.org/docs/9.3/static/datatype-numeric.html
94             # http://msdn.microsoft.com/en-us/library/ff848794.aspx
95              
96             sub _dt_to_double {
97 0     0     my $str = shift;
98 0 0         $str = $str."T00Z" if $str =~ /^\d\d\d\d-\d\d-\d\d$/;
99 0 0         $str = $str."Z" if $str !~ /(UTC|GMT|Z|\+)/;
100 0 0         my $t = eval { Time::Moment->from_string($str, lenient=>1) } or die "INVALID DATETIME: $str"; #XXX-FIXME PDL::dt2ll / dt2dbl ?
  0            
101 0           return $t->epoch * 1.0 + $t->millisecond / 1_000;
102             }
103              
104             sub _dt_to_longlong {
105 0     0     my $str = shift;
106 0 0         $str = $str."T00Z" if $str =~ /^\d\d\d\d-\d\d-\d\d$/;
107 0 0         $str = $str."Z" if $str !~ /(UTC|GMT|Z|\+)/;
108 0 0         my $t = eval { Time::Moment->from_string($str, lenient=>1) } or die "INVALID DATETIME: $str"; #XXX-FIXME PDL::dt2ll
  0            
109 0           return $t->epoch*1000000 + $t->microsecond;
110             }
111              
112             sub rdbi1D {
113 0     0 1   my ($dbh, $sql, $bind_values, $O) = _proc_args(@_);
114              
115             # reuse_sth (if defined) is a scalar reference to a statement handle to be reused
116 0           my $reuse_sth = $O->{reuse_sth};
117 0           my $sth;
118 0 0 0       if ($reuse_sth && $$reuse_sth) {
119 0           $sth = $$reuse_sth;
120             }
121             else {
122 0 0         $sth = $dbh->prepare($sql) or croak "FATAL: prepare failed: " . $dbh->errstr;
123 0 0         $sth->execute(@$bind_values) or croak "FATAL: execute failed: " . $sth->errstr;
124 0 0         $$reuse_sth = $sth if $reuse_sth;
125             }
126              
127 0           my ($c_type, $c_pack, $c_sizeof, $c_pdl, $c_bad, $c_dataref, $c_idx, $c_convert, $allocated, $cols) = _init_1D($sth->{TYPE}, $O);
128 0 0         warn "Initial size: '$allocated'\n" if $O->{debug};
129 0           my $null2bad = $O->{null2bad};
130 0           my $processed = 0;
131 0           my $headerline = $sth->{NAME_lc};
132              
133 0 0         warn "Fetching data (type=", join(',', @$c_type), ") ...\n" if $O->{debug};
134 0           while (my $data = $sth->fetchall_arrayref(undef, $O->{fetch_chunk})) { # limiting MaxRows
135 0           my $rows = scalar @$data;
136 0 0         if ($rows > 0) {
137 0           $processed += $rows;
138 0 0         if ($allocated < $processed) {
139 0           $allocated += $O->{reshape_inc};
140 0 0         warn "Reshape to: '$allocated'\n" if $O->{debug};
141 0           for (0..$cols-1) {
142 0           $c_pdl->[$_]->reshape($allocated);
143 0           $c_dataref->[$_] = $c_pdl->[$_]->get_dataref;
144             }
145             }
146 0 0         if ($null2bad) {
147 0           for my $tmp (@$data) {
148 0           for (0..$cols-1) {
149 0 0         unless (defined $tmp->[$_]) {
150 0           $tmp->[$_] = $c_bad->[$_];
151 0           $c_pdl->[$_]->badflag(1);
152             }
153             }
154             }
155             }
156 0 0         if (scalar @$c_convert > 0) {
157 0           for my $c (0..$cols-1) {
158 0 0         if (ref $c_convert->[$c] eq 'CODE') {
159 0           for my $r (0..$rows-1) {
160 0           $data->[$r]->[$c] = $c_convert->[$c]->($data->[$r]->[$c]);
161             }
162             }
163             }
164             }
165 0           for my $ci (0..$cols-1) {
166 0           my $bytes = '';
167             {
168 5     5   25 no warnings 'pack'; # intentionally disable all pack related warnings
  5         7  
  5         226  
  0            
169 5     5   19 no warnings 'numeric'; # disable: Argument ??? isn't numeric in pack
  5         5  
  5         133  
170 5     5   14 no warnings 'uninitialized'; # disable: Use of uninitialized value in pack
  5         5  
  5         2628  
171 0           $bytes .= pack($c_pack->[$ci], $data->[$_][$ci]) for(0..$rows-1);
172             }
173 0           my $len = length $bytes;
174 0           my $expected_len = $c_sizeof->[$ci] * $rows;
175 0 0         croak "FATAL: len mismatch $len != $expected_len" if $len != $expected_len;
176 0           substr(${$c_dataref->[$ci]}, $c_idx->[$ci], $len) = $bytes;
  0            
177 0           $c_idx->[$ci] += $expected_len;
178             }
179             }
180 0 0         last if $reuse_sth;
181             }
182 0 0         croak "FATAL: DB fetch failed: " . $sth->errstr if $sth->err;
183              
184 0 0         if ($processed != $allocated) {
185 0 0         warn "Reshape to: '$processed' (final)\n" if $O->{debug};
186 0           $c_pdl->[$_]->reshape($processed) for (0..$cols-1);
187             }
188 0           $c_pdl->[$_]->upd_data for (0..$cols-1);
189 0 0         if (ref $headerline eq 'ARRAY') {
190 0           for (0..$cols-1) {
191 0 0 0       $c_pdl->[$_]->hdr->{col_name} = $headerline->[$_] if $headerline->[$_] && $headerline->[$_] ne '';
192             };
193             }
194              
195 0 0         if ($processed == 0) {
196 0 0         if ($reuse_sth) {
197             # signal to callers that all chunks have been fetched
198 0           $$reuse_sth = undef;
199             }
200             else {
201 0           warn "rdbi1D: no data\n";
202             }
203             }
204              
205 0           return @$c_pdl;
206             }
207              
208             sub rdbi2D {
209 0     0 1   my ($dbh, $sql, $bind_values, $O) = _proc_args(@_);
210              
211 0 0         croak 'FATAL: reuse_sth not supported yet for rdbi2D' if $O->{reuse_sth};
212 0 0         my $sth = $dbh->prepare($sql) or croak "FATAL: prepare failed: " . $dbh->errstr;
213 0 0         $sth->execute(@$bind_values) or croak "FATAL: execute failed: " . $sth->errstr;
214              
215 0           my ($c_type, $c_pack, $c_sizeof, $c_pdl, $c_bad, $c_dataref, $c_convert, $allocated, $cols) = _init_2D($sth->{TYPE}, $O);
216 0 0         warn "Initial size: '$allocated'\n" if $O->{debug};
217 0           my $null2bad = $O->{null2bad};
218 0           my $processed = 0;
219 0           my $c_idx = 0;
220 0           my $pck = "$c_pack\[$cols\]";
221              
222 0 0         warn "Fetching data (type=$c_type) ...\n" if $O->{debug};
223 0           while (my $data = $sth->fetchall_arrayref(undef, $O->{fetch_chunk})) { # limiting MaxRows
224 0           my $rows = scalar @$data;
225 0 0         if ($rows > 0) {
226 0           $processed += $rows;
227 0 0         if ($allocated < $processed) {
228 0           $allocated += $O->{reshape_inc};
229 0 0         warn "Reshape to: '$allocated'\n" if $O->{debug};
230 0           $c_pdl->reshape($cols, $allocated);
231 0           $c_dataref = $c_pdl->get_dataref;
232             }
233 0           my $bytes = '';
234 0 0         if ($null2bad) {
235 0           for my $tmp (@$data) {
236 0           for (@$tmp) {
237 0 0         unless (defined $_) {
238 0           $_ = $c_bad;
239 0           $c_pdl->badflag(1);
240             }
241             }
242             }
243             }
244 0 0         if (scalar @$c_convert > 0) {
245 0           for my $c (0..$cols-1) {
246 0 0         if (ref $c_convert->[$c] eq 'CODE') {
247 0           for my $r (0..$rows-1) {
248 0           $data->[$r]->[$c] = $c_convert->[$c]->($data->[$r]->[$c]);
249             }
250             }
251             }
252             }
253             {
254 5     5   20 no warnings 'pack'; # intentionally disable all pack related warnings
  5         5  
  5         173  
  0            
255 5     5   18 no warnings 'numeric'; # disable: Argument ??? isn't numeric in pack
  5         4  
  5         101  
256 5     5   15 no warnings 'uninitialized'; # disable: Use of uninitialized value in pack
  5         4  
  5         6146  
257 0           $bytes .= pack($pck, @$_) for (@$data);
258             }
259 0           my $len = length $bytes;
260 0           my $expected_len = $c_sizeof * $cols * $rows;
261 0 0         croak "FATAL: len mismatch $len != $expected_len" if $len != $expected_len;
262 0           substr($$c_dataref, $c_idx, $len) = $bytes;
263 0           $c_idx += $len;
264             }
265             }
266 0 0         croak "FATAL: DB fetch failed: " . $sth->errstr if $sth->err;
267 0 0         if ($processed != $allocated) {
268 0 0         warn "Reshape to: '$processed' (final)\n" if $O->{debug};
269 0           $c_pdl->reshape($cols, $processed); # allocate the exact size
270             }
271 0           $c_pdl->upd_data;
272              
273 0 0         warn "rdbi2D: no data\n" unless $processed > 0;
274              
275 0           return $c_pdl->transpose;
276             }
277              
278             sub _proc_args {
279 0 0   0     my $options = ref $_[-1] eq 'HASH' ? pop : {};
280 0           my ($dsn_or_dbh, $sql, $bind_values) = @_;
281              
282 0 0         croak "FATAL: no SQL query" unless $sql;
283 0 0         croak "FATAL: no DBH or DSN" unless defined $dsn_or_dbh;
284 0           my $reuse_sth = $options->{reuse_sth};
285 0 0         if ($reuse_sth) {
286             croak "FATAL: reuse_sth must either be false, a reference to a false value, or a reference to a statement handle"
287 0 0 0       if $$reuse_sth && !eval { $$reuse_sth->isa('DBI::st') };
  0            
288             }
289 0           my $O = { %$options }; # make a copy
290              
291             # handle defaults for optional parameters
292 0 0         $O->{fetch_chunk} = 8_000 unless defined $O->{fetch_chunk};
293 0 0         my $alloc = $reuse_sth ? $O->{fetch_chunk} : 80_000;
294 0 0         $O->{reshape_inc} = $alloc unless defined $O->{reshape_inc};
295 0 0         $O->{type} = 'auto' unless defined $O->{type};
296 0 0         $O->{debug} = DEBUG unless defined $O->{debug};
297              
298             # reshape_inc cannot be lower than fetch_chunk
299 0 0         $O->{reshape_inc} = $O->{fetch_chunk} if $O->{reshape_inc} < $O->{fetch_chunk};
300              
301 0 0         $bind_values = [] unless ref $bind_values eq 'ARRAY';
302              
303             # launch db query
304 0 0         my $dbh = ref $dsn_or_dbh ? $dsn_or_dbh : DBI->connect($dsn_or_dbh) or croak "FATAL: connect failed: " . $DBI::errstr;
    0          
305              
306 0           return ($dbh, $sql, $bind_values, $O);
307             }
308              
309             sub _init_1D {
310 0     0     my ($sql_types, $O) = @_;
311              
312 0 0         croak "FATAL: no columns" unless ref $sql_types eq 'ARRAY';
313 0           my $cols = scalar @$sql_types;
314 0 0         croak "FATAL: no columns" unless $cols > 0;
315              
316 0           my @c_type;
317             my @c_pack;
318 0           my @c_sizeof;
319 0           my @c_pdl;
320 0           my @c_bad;
321 0           my @c_dataref;
322 0           my @c_idx;
323 0           my @c_convert;
324              
325 0 0         if (ref $O->{type} eq 'ARRAY') {
326 0           @c_type = @{$O->{type}};
  0            
327             }
328             else {
329 0           $c_type[$_] = $O->{type} for (0..$cols-1);
330             }
331 0 0         for (0..$cols-1) { $c_type[$_] = 'auto' if !$c_type[$_] }
  0            
332              
333 0 0         my @detected_type = map { $sql_types->[$_] ? $tmap{$sql_types->[$_]} : undef } (0..$cols-1);
  0            
334 0 0         if ($O->{debug}) {
335 0   0       $detected_type[$_] or warn "column $_ has unknown type '$sql_types->[$_]' gonna use Double\n" for (0..$cols-1);
336             }
337 0           my $allocated = $O->{reshape_inc};
338              
339 0           my @c_dt;
340 0           for (0..$cols-1) {
341 0 0 0       if ($detected_type[$_] && $detected_type[$_] eq '_dt_') {
342 0 0         if (!NODATETIME && ($c_type[$_] eq 'auto' || $c_type[$_] eq 'datetime')) {
343             croak "PDL::DateTime not installed" if NODATETIME;
344             $c_convert[$_] = \&_dt_to_longlong;
345             $c_type[$_] = longlong;
346             $c_dt[$_] = 'datetime';
347             }
348 0           elsif ($c_type[$_] eq longlong) {
349 0           $c_convert[$_] = \&_dt_to_longlong;
350             }
351             else {
352 0           $c_convert[$_] = \&_dt_to_double;
353 0           $c_type[$_] = double;
354             }
355             }
356 0 0 0       $c_type[$_] = $detected_type[$_] if !defined $c_type[$_] || $c_type[$_] eq 'auto';
357 0 0         $c_type[$_] = double if !$c_type[$_];
358 0           $c_pack[$_] = $pck{$c_type[$_]};
359 0 0 0       croak "FATAL: your perl does not support 64bitint (avoid using type longlong)" if $c_pack[$_] eq 'q' && NO64BITINT;
360 0 0         croak "FATAL: invalid type '$c_type[$_]' for column $_" if !$c_pack[$_];
361 0           $c_sizeof[$_] = length pack($c_pack[$_], 1);
362 0 0         $c_pdl[$_] = $c_dt[$_] ? PDL::DateTime->new(zeroes(longlong, $allocated)) : zeroes($c_type[$_], $allocated);
363 0           $c_dataref[$_] = $c_pdl[$_]->get_dataref;
364 0           $c_bad[$_] = $c_pdl[$_]->badvalue;
365 0           $c_idx[$_] = 0;
366 0           my $big = PDL::Core::howbig($c_pdl[$_]->get_datatype);
367 0 0         croak "FATAL: column $_ mismatch (type=$c_type[$_], sizeof=$c_sizeof[$_], big=$big)" if $big != $c_sizeof[$_];
368             }
369              
370 0           return (\@c_type, \@c_pack, \@c_sizeof, \@c_pdl, \@c_bad, \@c_dataref, \@c_idx, \@c_convert, $allocated, $cols);
371             }
372              
373             sub _init_2D {
374 0     0     my ($sql_types, $O) = @_;
375              
376 0 0         croak "FATAL: no columns" unless ref $sql_types eq 'ARRAY';
377 0           my $cols = scalar @$sql_types;
378 0 0         croak "FATAL: no columns" unless $cols > 0;
379              
380 0           my $c_type = $O->{type};
381 0 0 0       if (!$c_type || $c_type eq 'auto') {
382             # try to guess the best type
383 0 0         my @detected_type = map { $sql_types->[$_] ? $tmap{$sql_types->[$_]} : undef } (0..$cols-1);
  0            
384 0 0         if ($O->{debug}) {
385 0   0       $detected_type[$_] or warn "column $_ has unknown type '$sql_types->[$_]' gonna use Double\n" for (0..$cols-1);
386             }
387 0           for (0..$#detected_type) {
388             # DATETIME is auto-detected as double
389 0 0 0       $detected_type[$_] = 'double' if $detected_type[$_] && $detected_type[$_] eq '_dt_';
390 0   0       my $dt = $detected_type[$_] || 'double';
391 0 0         $c_type = double if $dt eq double;
392 0 0 0       $c_type = float if $dt eq float && $c_type ne double;
393 0 0 0       $c_type = longlong if $dt eq longlong && $c_type !~ /^(double|float)$/;
394 0 0 0       $c_type = long if $dt eq long && $c_type !~ /^(double|float|longlong)$/;
395 0 0 0       $c_type = short if $dt eq short && $c_type !~ /^(double|float|longlong|long)$/;
396 0 0 0       $c_type = byte if $dt eq byte && $c_type !~ /^(double|float|longlong|long|short)$/;
397             }
398 0 0         croak "FATAL: type detection failed" if !$c_type;
399             }
400 0           my $c_pack = $pck{$c_type};
401 0 0 0       croak "FATAL: your perl does not support 64bitint (avoid using type longlong)" if $c_pack eq 'q' && NO64BITINT;
402 0 0         croak "FATAL: invalid type '$c_type' for column $_" if !$c_pack;
403              
404 0           my @c_convert = ();
405 0           for (0..$cols-1) {
406 0   0       my $t = $tmap{$sql_types->[$_]} || '';
407 0 0         if ($t eq '_dt_') {
408 0 0         $c_convert[$_] = ($c_type eq 'longlong') ? \&_dt_to_longlong : \&_dt_to_double;
409             }
410             }
411              
412 0           my $allocated = $O->{reshape_inc};
413 0           my $c_sizeof = length pack($c_pack, 1);
414 0           my $c_pdl = zeroes($c_type, $cols, $allocated);
415 0           my $c_dataref = $c_pdl->get_dataref;
416 0           my $c_bad = $c_pdl->badvalue;
417              
418 0           my $howbig = PDL::Core::howbig($c_pdl->get_datatype);
419 0 0         croak "FATAL: column $_ size mismatch (type=$c_type, sizeof=$c_sizeof, howbig=$howbig)" unless $howbig == $c_sizeof;
420              
421 0           return ($c_type, $c_pack, $c_sizeof, $c_pdl, $c_bad, $c_dataref, \@c_convert, $allocated, $cols);
422             }
423              
424             1;
425              
426             __END__