File Coverage

lib/Org/ReadTables.pm
Criterion Covered Total %
statement 143 159 89.9
branch 58 78 74.3
condition 14 27 51.8
subroutine 12 13 92.3
pod 5 7 71.4
total 232 284 81.6


line stmt bran cond sub pod time code
1             package Org::ReadTables v0.0.2 {
2              
3 1     1   138705 use v5.38;
  1         4  
4 1     1   5 use List::Util qw(any);
  1         2  
  1         77  
5              
6 1     1   7 use feature 'signatures';
  1         2  
  1         209  
7 1     1   5 no warnings qw(experimental::signatures);
  1         2  
  1         56  
8              
9             ################
10             # Parse tables from an Org-mode file into our database
11             #
12             ################
13              
14 1     1   836 use Org::Parser;
  1         61004  
  1         2061  
15              
16 4     4 1 161513 sub new ($class, %args) {
  4         11  
  4         15  
  4         7  
17 4         13 my $self = bless {}, $class;
18 4         13 foreach my $arg (qw(dbh table tables cb cb_table)) {
19 20 100       65 $self->{$arg} = $args{$arg} if defined $args{$arg};
20             }
21 4 50       18 if (defined $self->{dbh}) {
22 0 0       0 if (ref $self->{dbh}) {
23 0 0       0 if (!$self->{dbh}->can('insert')) {
24 0         0 warn "Database handle should provide an 'insert' method";
25             }
26             } else {
27 0         0 warn "'dbh' should be a database handle";
28             }
29             }
30 4         11 return $self;
31             }
32              
33 0     0 1 0 sub errors ($self) {
  0         0  
  0         0  
34 0         0 return $self->{errors};
35             }
36              
37 1     1 1 6 sub inserted ($self, $table = undef) {
  1         3  
  1         2  
  1         1  
38 1 50       5 if (defined $table) {
39 0 0 0     0 return defined $self->{inserted} && exists $self->{inserted}->{$table} ? $self->{inserted}->{$table} : undef;
40             } else {
41 1         13 return $self->{inserted}; # All tables
42             }
43 0         0 return $self->{inserted};
44             }
45              
46 3     3 1 18 sub saved ($self, $table = undef) {
  3         6  
  3         7  
  3         6  
47 3 50       7 if (defined $table) {
48 0 0 0     0 return defined $self->{saved} && exists $self->{saved}->{$table} ? $self->{saved}->{$table} : undef;
49             } else {
50 3         51 return $self->{saved}; # All tables
51             }
52             }
53              
54 4     4 1 23 sub parse ($self, $text, $default_table = undef) {
  4         7  
  4         8  
  4         5  
  4         8  
55              
56 4         7 state $table; # as a 'state' so the nested 'walk' subroutine can use it
57 4         6 state $db;
58 4         7 state $errors = [];
59 4         6 state $inserted = 0; # 'state' so it can be seen by 'walk'
60 4         38 state $saved; # ...and by 'flush_row'
61 4         91 state $self_copy;
62 4         5 state @fixed_columns;
63 4         6 state @fixed_values;
64 4         5 state $data_column; # for pivot tables ~~~~~~~~ TODO
65 4         5 state $caption;
66              
67             sub flush_row {
68 101     101 0 152 my ($columns, $values) = @_;
69 101         125 my $save_fields = {};
70              
71 101 50 33     357 return unless defined $table && length($table);
72 101         120 @{$save_fields}{@{$columns}} = @{$values};
  101         430  
  101         126  
  101         134  
73 101 50       223 if ( defined $self_copy->{cb} ) {
    100          
    50          
74 64 100       154 if ( $self_copy->{cb}->($table, $save_fields) ) {
75 34         203 do { push @{$saved->{$table}}, $save_fields; 1; };
  34         37  
  34         63  
  34         39  
76 34         43 $inserted++;
77             }
78             } elsif ( defined $db ?
79 0         0 eval { $db->insert($table, $save_fields); 1; } :
  0         0  
80 37         41 do { push @{$saved->{$table}}, $save_fields; 1; } ) {
  37         71  
  37         95  
81 37         45 $inserted++;
82             } else {
83 0         0 push @{$errors} , [ $save_fields, $@ ];
  0         0  
84             }
85             }
86              
87             sub walk {
88             # Ignore leading TableHLine
89             # Find first row(s) with TableCell and accumulate column names
90             # Require a TableHLine to proceed
91             # For each TableRow, accumulate column values and save row.
92             #
93             # NOTE: Must call an extra time with a 'fake' TableRow afterwards,
94             # to flush the final row.
95              
96 767     767 0 11141 my ($el, $level) = @_;
97              
98 767         914 state $in_header;
99 767         818 state $found_header_row;
100 767         798 state $column;
101 767         783 state $is_data_row;
102 767         811 state @columns;
103 767         778 state @values;
104             # print sprintf("%4d %2d%-30s\n", $_[1], $column, ref $_[0]) ;
105 767 100       3607 if ((ref $el) =~ /Table\z/i) {
    100          
    100          
    100          
    50          
106 20         30 $in_header = 1;
107 20         26 $found_header_row = 0;
108 20         26 $inserted = 0;
109 20         50 @columns = @fixed_columns;
110 20         84 @values = @fixed_values;
111             } elsif ((ref $el) =~ /TableHLine\z/i) {
112 20 50       45 if ($found_header_row) {
113             #
114             # Skip this table if the callback returns a false value.
115             #
116 20 100 66     78 if ($in_header && defined $self_copy->{cb_table}) {
117             undef $table unless $self_copy->{cb_table}->({ name => $table,
118             nameref => \$table, # Modifiable
119             ( scalar @fixed_columns ? (
120 18 100       113 fixed => {map {($fixed_columns[$_], $fixed_values[$_])}
  12 50       137  
121             0..(scalar @fixed_columns -1)} ) : () ),
122             columns => \@columns,
123             data_column => $data_column,
124             caption => $caption,
125             });
126             }
127 20         1991 $in_header = 0;
128             }
129             } elsif ((ref $el) =~ /TableRow\z/i) {
130 141 100       293 if ($is_data_row) { # Flush previous row
131 101         219 flush_row (\@columns, \@values);
132 101         440 $is_data_row = 0;
133             }
134 141         190 $found_header_row = 1;
135 141         200 $column = scalar @fixed_columns - 1;
136 141         357 @values = @fixed_values;
137             } elsif ((ref $el) =~ /TableCell\z/i) {
138 293         492 $column++;
139             } elsif ((ref $el) =~ /Text\z/i) {
140 293 100       448 if ($in_header) {
141 55         129 $columns[$column] = lc($el->as_string);
142             } else {
143 238         435 $values[$column] = $el->as_string;
144 238         3032 $is_data_row = 1;
145             }
146             }
147             }
148              
149 4         50 my $orgp = Org::Parser->new;
150 4         159 my $org_doc = $orgp->parse($text);
151 4         231509 my @tables = $org_doc->find('Table');
152 4         23707 $self_copy = $self;
153 4         31 $saved = {};
154 4         98 $errors = [];
155 4         15 $db = $self->{dbh};
156              
157 4         22 foreach my $t (@tables) {
158             # Look for NAME property as Settings element in
159             # prev_siblings; also look for PROPERTIES drawer
160             # (containing NAME property) in prev_siblings, ignoring
161             # whitespace-only Text elements
162 32         49 undef $table;
163 32         107 my $prev_elem = $t->prev_sibling;
164 32         1210 @fixed_columns = ();
165 32         56 @fixed_values = ();
166              
167 32 50       77 if (defined $prev_elem) {
168             ELEMENT:
169 32         63 while (defined $prev_elem) {
170             # Only regard settings and drawers between headlines or tables.
171 120 100       2740 last ELEMENT if ref $prev_elem eq 'Org::Element::Table';
172             # Formerly we looked for a
173             # 'Org::Element::Headline' here, but actually that
174             # would always be the *parent* element. Perhaps
175             # we want to default to
176             # »$t->headline->title->as_string« ?
177              
178             # NOTE: Internal links (»#+NAME: sometable«) are
179             # always followed by all-whitespace Text (see
180             # t/drawer.t in Org::Parser). Ignore them.
181 100 100 100     295 next ELEMENT if ref $prev_elem eq 'Org::Element::Text' &&
182             $prev_elem->as_string =~ /^\s*\z/;
183              
184 60 100       195 if (ref $prev_elem eq 'Org::Element::Setting') {
    100          
185             # e.g., »#+NAME: sometable«
186 32 100 33     189 if ($prev_elem-> name =~ /^name\z/i) {
    50          
    100          
    50          
187 16         49 $table = $prev_elem->args->[0];
188             } elsif ($prev_elem-> name =~ /^data\z/i) {
189 0         0 $data_column = $prev_elem->args->[0];
190             } elsif ($prev_elem-> name =~ /^caption\z/i) {
191 8         37 $caption = $prev_elem->args->[0];
192 8         35 } elsif ($prev_elem->name =~ /^property\z/i && scalar @{$prev_elem->args} > 1) {
193 8         25 push @fixed_columns, $prev_elem->args->[0];
194             # Double quotes may be used around a
195             # string with spaces; otherwise this will
196             # add a single space between words.
197 8         16 push @fixed_values, join(' ',@{$prev_elem->args}[1..scalar @{$prev_elem->args}-1]);
  8         32  
  8         24  
198             }
199             } elsif (ref $prev_elem eq 'Org::Element::Drawer') {
200             # We regard the internal-link-target syntax
201             # «#+NAME: some_table» as meaning, Insert the
202             # following orgmode table into database table
203             # `some_table` (see orgmode manual §4.2
204             # "Internal Links")
205             #
206             # TODO: Verify column names and skip
207             # non-existent ones; otherwise, adding
208             # properties that do not map to sql column
209             # names will result in errors and no rows
210             # inserted!
211             #
212 12         18 foreach my $k ( keys %{$prev_elem->properties} ) {
  12         78  
213 20 100       73 if ($k =~ /^name\z/i) {
    100          
214 8         29 $table = $prev_elem->properties->{$k};
215             } elsif ($k =~ /^data\z/i) {
216 4         15 $data_column = $prev_elem->properties->{$k};
217             } else {
218 8         19 push @fixed_columns, $k;
219 8         35 push @fixed_values, $prev_elem->properties->{$k};
220             }
221             }
222 12 100       35 last ELEMENT unless !defined $table;
223             # Only process one drawer. If neither NAME nor
224             # default table (from new() or in parse()
225             # call) is given, this attached table will be
226             # skipped (see below).
227             }
228             }
229             continue {
230 92         835 $prev_elem = $prev_elem->prev_sibling;
231             }
232             }
233 32   66     162 $table //= $default_table // $self->{table};
      100        
234              
235             # TODO: When NAME property is encountered, check against our `table` or
236             # `tables` attributes to verify we can load this data
237 32 100       63 next unless defined $table;
238 26 100 66     94 if (defined $self->{tables} && ref $self->{tables} eq 'ARRAY') {
239 8 100   8   42 next unless any { $table eq $_ } @{$self->{tables}};
  8         39  
  8         37  
240             }
241              
242 20         73 $t->walk(\&walk); # Process all rows in this table
243 20         957 walk(Org::Element::TableRow->new, 0); # Flush last row in final table
244 20         56 $self->{errors} = $errors;
245 20         49 $self->{inserted}->{$table} += $inserted ;
246 20         51 $self->{saved} = $saved;
247             }
248 4         28 return $inserted;
249             }
250              
251             };
252              
253             1;
254              
255             =encoding utf8
256              
257             =head1 NAME
258              
259             Org::ReadTables - Import Org Mode tables into arrays, or
260             directly into database tables
261              
262             =head1 SYNOPSIS
263              
264             use Org::ReadTables;
265              
266             my $op = Org::ReadTables->new( dbh => $dbh,
267             table => 'example',
268             tables => ['a_table']
269             );
270             # or:
271             # When called without a 'dbh' argument, saves values
272             # which can be retrieved via the 'saved' method.
273             #
274             my $op = Org::ReadTables->new( cb => \&row_callback,
275             cb_table => \&table_callback,
276             );
277             # then:
278             $op->parse( $mojo_file->slurp );
279              
280             =head1 DESCRIPTION
281              
282             L<Org::ReadTables> loads data from one or more C<Emacs> Org Mode
283             tables in an org file into a L<DBI> style database which supports the
284             C<SQL::Abstract/insert> method. The underlying C<DBD> must also
285             support the C<returning> option for insertion.
286              
287             For example, given the following .org file:
288              
289             #+NAME: LCCN_Serial
290             | LCCN | Publication | City | Start_Date | End_Date |
291             |------------+-----------------------+---------+------------+------------|
292             | sn92024097 | Adahooniłigii | Phoenix | | |
293             | sn87062098 | Arizona Daily Citizen | Tucson | | |
294             | sn84020558 | Arizona Republican | Phoenix | | 1930-11-10 |
295             | sn83045137 | Arizona Republic | Phoenix | 1930-11-11 | |
296              
297             and a database containing a table called lccn_serial, the C<parse>
298             method would insert four rows into it, in the fields whose names are
299             given in the column headings.
300              
301             The C<NAME:> org attribute specifies the table name; a default value
302             may be passed in the C<table> parameter to the C<new> method.
303             Additionally, a C<tables> array may be passed by reference against
304             which the names of such tables will be validated; table names not
305             listed will have their org tables skipped.
306              
307             Table names may also be specified in a C<Name> property (not
308             case-sensitive) in an Orgmode Drawer preceding the table. For
309             example:
310              
311             :PROPERTIES:
312             :Name: Locos
313             :END:
314             | Wheel Arrangement | Locomotive Type |
315             |-------------------+-----------------|
316             | oo-oo> | American |
317             | ooo-oo> | Mogul |
318              
319             Additionally, with the Drawer format, fixed column values may
320             optionally be specified:
321              
322             :PROPERTIES:
323             :Name: Locos
324             :Country: .us
325             :END:
326             | Wheel Arrangement | Locomotive Type |
327             |-------------------+-----------------|
328             | oo-oo> | American |
329             | ooo-oo> | Mogul |
330              
331             which would have the effect of adding a 'country' column to the right
332             of each record, all having the value '.us'.
333              
334             =head2 Pivot Tables
335              
336             NOTE: This is a future feature, not yet fully implemented.
337              
338             When it is desirable to enter data two-dimensionally, a construct like
339             this may be used:
340              
341             :PROPERTIES:
342             :Name: sizes
343             :Data: size_desc
344             | class> | A | B | C |
345             | size_code | | | |
346             |------------+-------+-------+-----|
347             | 1 | 1-2 | 22-26 | |
348             | 2 | 3-4 | 26-30 | XS |
349             | 3 | 5-6 | 30-34 | S |
350              
351             where the `Data` property determines which field (column) is assigned
352             the pivoted value. The above table would generate eight data records
353             for the `sizes` table:
354              
355             size_code='1', class='A', size_desc='1-2'
356             size_code='2', class='A', size_desc='3-4'
357             size_code='3', class='A', size_desc='5-6'
358             size_code='1', class='B', size_desc='22-26'
359             size_code='2', class='B', size_desc='26-30'
360             size_code='3', class='B', size_desc='30-34'
361             size_code='2', class='C', size_desc='XS'
362             size_code='3', class='C', size_desc='S'
363              
364             Note that no record is created for class 'C' with size_code '1' as
365             that entry in the pivot table is blank.
366              
367             =head1 ATTRIBUTES
368              
369             L<Org::ReadTables> implements the following attributes.
370              
371             =head2 inserted
372              
373             Returns a hashref, each element's key being the name of a table into
374             which rows were inserted, and its value being the number of rows
375             inserted into that table.
376              
377             =head2 errors
378              
379             Returns a reference to an array, each entry in which itself be an
380             array whose values are:
381              
382             =over 4
383              
384             =item
385             a hash (the column names and values to be inserted), and
386              
387             =item
388             the resulting error report from that insertion
389              
390             =back
391              
392             =head1 METHODS
393              
394             =head2 new
395              
396             Creates a new Org::ReadTables object. Parameters include:
397              
398             =head3 dbh
399              
400             should be an open database handle from, e.g., L<DBD::SQLite>,
401             L<Mojo::SQLite> or L<Mojo::Pg>. Each row to be saved will invoke the
402             'insert' method of this handle (or, more generally, class instance);
403             no other methods will be called, so any object that has provides
404             'insert' may be used. For each found record, the insert() method of
405             this object will be called. Note that no protection is given here
406             against invalid column names or other database errors.
407              
408             =head3 cb
409              
410             Reference to a callback function to be called for each found record,
411             in tables which are processed. Parameters passed are the name of the
412             table, and a reference to a hash of the record's column-names and
413             values. The function should return the count of records successfully
414             saved (either 0 or 1, usually).
415              
416             =head3 cb_table
417              
418             Reference to a callback function to be called at the start of
419             processing of a new table, as they are found in the orgfile.
420             The callback will be passed one argument, a hash with keys:
421              
422             =over
423              
424             =item *
425              
426             name: A string with the name of the table
427              
428             =item *
429              
430             nameref: A reference to the table-name string. This may be changed by
431             the callback.
432              
433             =item *
434              
435             columns: A reference to the array of the names of the columns in the
436             table. The contents of the referred array may be maniuplated to match
437             the actual database field names, for example.
438              
439             =item *
440              
441             fixed: A reference to a hash of fixed column key/values. This may also
442             be changed by the callback.
443              
444             =item *
445              
446             caption: The caption, if any, attached above the table itself
447              
448             =item *
449              
450             data_column: The name of the data column in a pivot table
451              
452             =back
453              
454             ...and should return a true value if the table is to be processed or
455             saved, or a false or 'undef' value to skip the table (the 'cb'
456             callback will not be called for rows in such tables).
457              
458             =head3 table
459              
460             (optional) the default table name, which will be used for all unnamed
461             tables. Use an Orgmode property C<NAME> before each table to name it,
462             as:
463              
464             #+NAME: PostalAbbrev
465             | Code | State |
466             |------+---------|
467             | AZ | Arizona |
468             | FL | Florida |
469             | KS | Kansas |
470              
471             =head3 tables
472              
473             (optional) a reference to a list of valid table names to process
474             (others will be ignored). If C<tables> is not given, C<table> should
475             be present; otherwise for input not containing an Orgmode C<NAME>
476             property, no processing will occur.
477              
478             =head2 parse
479              
480             $op->parse($text, [$default_table]);
481              
482             Parses the given text which should be in Org Mode format. It is the
483             caller's responsibility to slurp a file or other data source. The
484             optional second parameter will be used as the default name of any
485             table not having an Orgmode C<NAME> property, overriding any C<table>
486             value provided via the C<new> method.
487              
488             =head2 saved
489              
490             $op->saved();
491             $op->saved->($selected_table);
492              
493             Returns a hash of the tables read; each key is a table name, the value
494             being an array of hashes of the rows. If a table name is passed,
495             returns the array of hashes only for that particular table, or undef
496             if no such table existed in any input.
497              
498             =head2 inserted
499              
500             $op->inserted();
501             $op->inserted->($selected_table);
502              
503             With no parameter, returns a hash of the tables processed and a count
504             of rows found (and presumably inserted) in each. With a parameter,
505             returns the count of rows for that table, or undef if no such table
506             was processed.
507              
508             =head1 BUGS
509              
510             Report any issues to the author.
511              
512             =head1 AUTHOR
513              
514             William Lindley, C<wlindley@cpan.org>
515              
516             =head1 COPYRIGHT AND LICENSE
517              
518             Copyright 2025, William Lindley.
519              
520             This library is free software; you may redistribute it and/or modify it under
521             the terms of the Artistic License version 2.0.
522              
523             =head1 SEE ALSO
524              
525             L<Org::Parser>, L<https://orgmode.org/>
526              
527             =cut