File Coverage

blib/lib/Org/Parser.pm
Criterion Covered Total %
statement 56 74 75.6
branch 23 34 67.6
condition 20 30 66.6
subroutine 6 6 100.0
pod 2 2 100.0
total 107 146 73.2


line stmt bran cond sub pod time code
1             package Org::Parser;
2              
3 24     24   1895647 use 5.014; # compilation failure in older perls, RT#141560
  24         103  
4 24     24   15544 use Moo;
  24         249835  
  24         193  
5              
6 24     24   59960 use Org::Document;
  24         110  
  24         1625  
7 24     24   274 use Scalar::Util qw(blessed);
  24         172  
  24         22494  
8              
9             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
10             our $DATE = '2023-11-06'; # DATE
11             our $DIST = 'Org-Parser'; # DIST
12             our $VERSION = '0.561'; # VERSION
13              
14             sub parse {
15 96     96 1 11576000 my ($self, $arg, $opts) = @_;
16 96 100       482 die "Please specify a defined argument to parse()\n" unless defined($arg);
17              
18 94   100     673 $opts //= {};
19              
20 94         176 my $str;
21 94         286 my $srclabel = $opts->{_srclabel};
22 94         244 my $r = ref($arg);
23 94 100 33     310 if (!$r) {
    100 66        
    100          
    100          
24 90         198 $str = $arg;
25 90   100     488 $srclabel //= "string";
26             } elsif ($r eq 'ARRAY') {
27 1         4 $str = join "", @$arg;
28 1   50     5 $srclabel //= "arrayref";
29             } elsif ($r eq 'GLOB' || blessed($arg) && $arg->isa('IO::Handle')) {
30 1         24 $str = join "", <$arg>;
31 1   50     6 $srclabel //= "filehandle";
32             } elsif ($r eq 'CODE') {
33 1         2 my @chunks;
34 1         4 while (defined(my $chunk = $arg->())) {
35 4         17 push @chunks, $chunk;
36             }
37 1         5 $str = join "", @chunks;
38 1   50     4 $srclabel //= "code";
39             } else {
40 1         5 die "Invalid argument, please supply a ".
41             "string|arrayref|coderef|filehandle\n";
42             }
43             Org::Document->new(
44             _srclabel=>$srclabel,
45             from_string=>$str,
46             time_zone=>$opts->{time_zone},
47             ignore_unknown_settings=>$opts->{ignore_unknown_settings},
48 93         3549 );
49             }
50              
51             sub parse_file {
52 13     13 1 6292139 require File::Slurper;
53 13         16163 my ($self, $filename, $opts) = @_;
54 13 100       64 $opts = {%$opts} if $opts; # shallow copy
55 13   100     176 $opts //= {};
56              
57 13         25 state $loaded;
58              
59 13         57 my $content = File::Slurper::read_text($filename);
60 13         2295 $opts->{_srclabel} = "file:$filename";
61              
62 13         43 my $cf = $opts->{cache_file}; # old option, new option is 'cache' (automatic setting of cache file)
63 13         32 my $doc;
64             my $cache; # undef = no caching; 0 = not cached, should cache; 1 = cached
65 13 50 33     240 if (!$cf && ($opts->{cache} // $ENV{PERL_ORG_PARSER_CACHE})) {
      66        
66 0         0 require Cwd;
67 0         0 require Digest::MD5;
68 0         0 my @dirs = ("$ENV{HOME}/.cache/perl-org-parser", $ENV{HOME});
69 0         0 my $dir;
70 0         0 for (@dirs) {
71 0 0       0 if (-d $_) { $dir = $_; last }
  0 0       0  
  0         0  
72 0         0 elsif (mkdir $_) { $dir = $_; last }
  0         0  
73             }
74 0 0       0 die "Can't find a suitable cache directory" unless $dir;
75 0 0       0 my $abs = Cwd::abs_path($filename) or die "Can't find $filename";
76 0         0 my $base = $abs; $base =~ s!.+/!!;
  0         0  
77 0         0 $cf = "$dir/$base.".Digest::MD5::md5_hex($abs).".storable";
78             }
79 13 100       75 if ($cf) {
80 3         26 require Storable;
81 3   66     108 $cache = !!((-e $cf) && (-M $cf) <= (-M $filename));
82 3 100       16 if ($cache) {
83 1         3 eval {
84 1         22 $doc = Storable::retrieve($cf);
85 1 50       244 $doc->load_element_modules unless $loaded++;
86 1         47 $doc->{_srclabel} = " (from cached file:$cf)";
87             };
88 1 50       6 if ($@) {
89 0         0 warn "Failed retrieving document from cache: $@, reparsing ...";
90 0         0 $cache = 0;
91             }
92             }
93             }
94              
95 13 100       96 $doc = $self->parse($content, $opts) unless $cache;
96 12 100 100     374 if (defined($cache) && !$cache) {
97 1         10 require Storable;
98 1         15 for ($doc->find('Timestamp')) {
99 0         0 $_->clear_parse_result;
100             }
101 1         9 Storable::store($doc, $cf);
102             }
103              
104 12         588 $doc;
105             }
106              
107             1;
108             # ABSTRACT: Parse Org documents
109              
110             __END__
111              
112             =pod
113              
114             =encoding UTF-8
115              
116             =head1 NAME
117              
118             Org::Parser - Parse Org documents
119              
120             =head1 VERSION
121              
122             This document describes version 0.561 of Org::Parser (from Perl distribution Org-Parser), released on 2023-11-06.
123              
124             =head1 SYNOPSIS
125              
126             use 5.010;
127             use Org::Parser;
128             my $orgp = Org::Parser->new();
129              
130             # parse a file
131             my $doc = $orgp->parse_file("$ENV{HOME}/todo.org");
132              
133             # parse a string
134             $doc = $orgp->parse(<<EOF);
135             #+TODO: TODO | DONE CANCELLED
136             <<<radio target>>>
137             * heading1a
138             ** TODO heading2a
139             SCHEDULED: <2011-03-31 Thu>
140             [[some][link]]
141             ** DONE heading2b
142             [2011-03-18 ]
143             this will become a link: radio target
144             * TODO heading1b *bold*
145             - some
146             - plain
147             - list
148             - [ ] with /checkbox/
149             * and
150             * sublist
151             * CANCELLED heading1c
152             + definition :: list
153             + another :: def
154             EOF
155              
156             # walk the document tree
157             $doc->walk(sub {
158             my ($el) = @_;
159             return unless $el->isa('Org::Element::Headline');
160             say "heading level ", $el->level, ": ", $el->title->as_string;
161             });
162              
163             will print something like:
164              
165             heading level 1: heading1a
166             heading level 2: heading2a
167             heading level 2: heading2b *bold*
168             heading level 1: heading1b
169             heading level 1: heading1c
170              
171             A command-line utility (in a separate distribution: L<App::OrgUtils>) is
172             available for debugging:
173              
174             % dump-org-structure ~/todo.org
175             Document:
176             Setting: "#+TODO: TODO | DONE CANCELLED\n"
177             RadioTarget: "<<<radio target>>>"
178             Text: "\n"
179             Headline: l=1
180             (title)
181             Text: "heading1a"
182             (children)
183             Headline: l=2 todo=TODO
184             (title)
185             Text: "heading2a"
186             (children)
187             Text: "SCHEDULED: "
188             ...
189              
190             =head1 DESCRIPTION
191              
192             This module parses Org documents. See http://orgmode.org/ for more details on
193             Org documents.
194              
195             See C<todo.org> in the distribution for the list of already- and not yet
196             implemented stuffs.
197              
198             =head1 ATTRIBUTES
199              
200             =head1 METHODS
201              
202             =head2 new()
203              
204             Create a new parser instance.
205              
206             =head2 $orgp->parse($str | $arrayref | $coderef | $filehandle, \%opts) => $doc
207              
208             Parse document (which can be contained in a scalar $str, an arrayref of lines
209             $arrayref, a subroutine which will be called for chunks until it returns undef,
210             or a filehandle).
211              
212             Returns L<Org::Document> object.
213              
214             If 'handler' attribute is specified, will call handler repeatedly during
215             parsing. See the 'handler' attribute for more details.
216              
217             Will die if there are syntax errors in documents.
218              
219             Known options:
220              
221             =over 4
222              
223             =item * time_zone => STR
224              
225             Will be passed to Org::Document's constructor.
226              
227             =back
228              
229             =head2 $orgp->parse_file($filename, \%opts) => $doc
230              
231             Just like parse(), but will load document from file instead.
232              
233             Known options (aside from those known by parse()):
234              
235             =over 4
236              
237             =item * cache => bool (default: from PERL_ORG_PARSER_CACHE, or 0)
238              
239             Since Org::Parser can spend some time to parse largish Org files, this is an
240             option to store the parse result (using L<Storable>). If caching is turned on,
241             then after the first parse, the result will be stored in:
242              
243             ~/.cache/perl-org-parser/<filename>.<md5-digest-of-file-absolute-path>.storable
244              
245             and subsequent calls to this function can directly use this cache, as long as
246             the cache is not stale.
247              
248             =back
249              
250             =head1 FAQ
251              
252             =head2 Why? Just as only perl can parse Perl, only org-mode can parse Org anyway!
253              
254             True. I'm only targetting good enough. As long as I can parse/process all my Org
255             notes and todo files, I have no complaints.
256              
257             =head2 It's too slow!
258              
259             Parser is completely regex-based at the moment (I plan to use L<Marpa> someday).
260             Performance is quite lousy but I'm not annoyed enough at the moment to overhaul
261             it.
262              
263             =head1 ENVIRONMENT
264              
265             =head2 PERL_ORG_PARSER_CACHE => bool
266              
267             Set default for C<cache> option in C<parse_file()>.
268              
269             =head1 HOMEPAGE
270              
271             Please visit the project's homepage at L<https://metacpan.org/release/Org-Parser>.
272              
273             =head1 SOURCE
274              
275             Source repository is at L<https://github.com/perlancar/perl-Org-Parser>.
276              
277             =head1 SEE ALSO
278              
279             L<Org::Document>
280              
281             =head1 AUTHOR
282              
283             perlancar <perlancar@cpan.org>
284              
285             =head1 CONTRIBUTORS
286              
287             =for stopwords Alex White Karl Williamson Steven Haryanto Tekki Trent Fisher William Lindley Wong Meng Weng
288              
289             =over 4
290              
291             =item *
292              
293             Alex White <VVu@geekfarm.org>
294              
295             =item *
296              
297             Karl Williamson <khw@cpan.org>
298              
299             =item *
300              
301             Steven Haryanto <stevenharyanto@gmail.com>
302              
303             =item *
304              
305             Tekki <tekki@tekki.ch>
306              
307             =item *
308              
309             Trent Fisher <trent@cs.pdx.edu>
310              
311             =item *
312              
313             William Lindley <wlindley@wlindley.com>
314              
315             =item *
316              
317             Wong Meng Weng <mengwong@pobox.com>
318              
319             =back
320              
321             =head1 CONTRIBUTING
322              
323              
324             To contribute, you can send patches by email/via RT, or send pull requests on
325             GitHub.
326              
327             Most of the time, you don't need to build the distribution yourself. You can
328             simply modify the code, then test via:
329              
330             % prove -l
331              
332             If you want to build the distribution (e.g. to try to install it locally on your
333             system), you can install L<Dist::Zilla>,
334             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
335             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
336             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
337             that are considered a bug and can be reported to me.
338              
339             =head1 COPYRIGHT AND LICENSE
340              
341             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2017, 2016, 2015, 2014, 2013, 2012, 2011 by perlancar <perlancar@cpan.org>.
342              
343             This is free software; you can redistribute it and/or modify it under
344             the same terms as the Perl 5 programming language system itself.
345              
346             =head1 BUGS
347              
348             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Org-Parser>
349              
350             When submitting a bug or request, please include a test-file or a
351             patch to an existing test-file that illustrates the bug or desired
352             feature.
353              
354             =cut