File Coverage

blib/lib/App/Basis/ConvertText2.pm

Criterion	Covered	Total	%
statement	34	36	94.4
branch			n/a
condition			n/a
subroutine	12	12	100.0
pod			n/a
total	46	48	95.8

line	stmt	sub	time	code
1
2				=head1 NAME
3
4				App::Basis::ConvertText2
5
6				=head1 SYNOPSIS
7
8				TO be used in conjuction with the supplied ct2 script, which is part of this distribution.
9				Not really to be used on its own.
10
11				=head1 DESCRIPTION
12
13				This is a perl module and a script that makes use of %TITLE%
14
15				This is a wrapper for [pandoc] implementing extra fenced code-blocks to allow the
16				creation of charts and graphs etc.
17				Documents may be created a variety of formats. If you want to create nice PDFs
18				then it can use [PrinceXML] to generate great looking PDFs or you can use [wkhtmltopdf] to create PDFs that are almost as good, the default is to use pandoc which, for me, does not work as well.
19
20				HTML templates can also be used to control the layout of your documents.
21
22				The fenced code block handlers are implemented as plugins and it is a simple process to add new ones.
23
24				There are plugins to handle
25
26				* ditaa
27				* mscgen
28				* graphviz
29				* uml
30				* gnuplot
31				* gle
32				* sparklines
33				* charts
34				* barcodes and qrcodes
35				* and many others
36
37				See
38				https://github.com/27escape/App-Basis-ConvertText2/blob/master/README.md
39				for more information.
40
41				=head1 Todo
42
43				Consider adding plugins for
44
45				* http://blockdiag.com/en/index.html,
46				* https://metacpan.org/pod/Chart::Strip
47				* https://metacpan.org/pod/Chart::Clicker
48
49				=head1 Public methods
50
51				=over 4
52
53				=cut
54
55				# ----------------------------------------------------------------------------
56
57				package App::Basis::ConvertText2;
58				$App::Basis::ConvertText2::VERSION = '0.4';
59	1	1	121299	use 5.10.0;
	1		4
	1		83
60	1	1	8	use strict;
	1		1
	1		42
61	1	1	4	use warnings;
	1		8
	1		44
62	1	1	5	use feature 'state';
	1		2
	1		130
63	1	1	5902	use Moo;
	1		34280
	1		8
64	1	1	3587	use Data::Printer;
	1		1624929
	1		7
65	1	1	1085	use Try::Tiny;
	1		1378
	1		54
66	1	1	7	use Path::Tiny;
	1		2
	1		39
67	1	1	5	use Digest::MD5 qw(md5_hex);
	1		1
	1		42
68	1	1	48551	use Encode qw(encode_utf8);
	1		26288
	1		125
69	1	1	1916	use Text::Markdown qw(markdown);
	1		56975
	1		169
70	1	1	528	use GD;
	0
	0
71				use MIME::Base64;
72				use Furl;
73				use Module::Pluggable
74				require => 1,
75				on_require_error => sub {
76				my ( $plugin, $err ) = @_;
77				warn "$plugin, $err";
78				};
79				use App::Basis;
80				use App::Basis::ConvertText2::Support;
81
82				# ----------------------------------------------------------------------------
83				# this contents string is to be replaced with the body of the markdown file
84				# when it has been converted
85				use constant CONTENTS => '_CONTENTS_';
86				use constant PANDOC => 'pandoc';
87				use constant PRINCE => 'prince';
88				use constant WKHTML => 'wkhtmltopdf';
89
90				my %valid_tags;
91
92				# ----------------------------------------------------------------------------
93				my $TITLE = "%TITLE%";
94
95				# ----------------------------------------------------------------------------
96
97				has 'name' => ( is => 'ro', );
98				has 'basedir' => ( is => 'ro', );
99
100				has 'use_cache' => ( is => 'rw', default => sub { 0; } );
101
102				has 'cache_dir' => (
103				is => 'ro',
104				default => sub {
105				my $self = shift;
106				return "/tmp/" . get_program() . "/cache/";
107				},
108				writer => "_set_cache_dir"
109				);
110
111				has 'template' => (
112				is => 'rw',
113				default => sub {
114				"
115
116
117				$TITLE
118
121
122
123				%TITLE%
124
125				%_CONTENTS_%
126
127				\n";
128				},
129				);
130
131				has 'replace' => (
132				is => 'ro',
133				default => sub { {} },
134				);
135
136				has 'verbose' => (
137				is => 'ro',
138				default => sub {0},
139				);
140
141				has '_output' => (
142				is => 'ro',
143				default => sub {""},
144				init_arg => 0
145				);
146
147				has '_input' => (
148				is => 'ro',
149				writer => '_set_input',
150				default => sub {""},
151				init_arg => 0
152				);
153
154				has '_md5id' => (
155				is => 'ro',
156				writer => '_set_md5id',
157				default => sub {""},
158				init_arg => 0
159				);
160
161				has 'embed' => (
162				is => 'ro',
163				default => sub {0},
164				);
165
166				# ----------------------------------------------------------------------------
167
168				=item new
169
170				Create a new instance of a of a data formating object
171
172				B passed in a HASH
173				name - name of this formatting action - required
174				basedir - root directory of document being processed
175				cache_dir - place to store cache files - optional
176				use_cache - decide if you want to use a cache or not
177				template - HTML template to use, must contain %_CONTENTS_%
178				replace - hashref of extra keywords to use as replaceable variables
179				verbose - be verbose
180				embed - embed images, do not create links to them
181
182				=cut
183
184				sub BUILD {
185				my $self = shift;
186
187				die "No name provided" if ( !$self->name() );
188
189				if ( $self->use_cache() ) {
190
191				# need to add the name to the cache dirname to make it distinct
192				$self->_set_cache_dir( fix_filename( $self->cache_dir() . "/" . $self->name() ) );
193
194				if ( !-d $self->cache_dir() ) {
195
196				# create the cache dir if needed
197				try {
198				path( $self->cache_dir() )->mkpath;
199				}
200				catch {};
201				die "Could not create cache dir " . $self->cache_dir() if ( !-d $self->cache_dir() );
202				}
203				}
204
205				# work out what plugins do what
206				foreach my $plug ( $self->plugins() ) {
207				my $obj = $plug->new();
208				if ( !$obj ) {
209				warn "Plugin $plug does not instantiate";
210				next;
211				}
212
213				# the process method does the work for all the tag handlers
214				if ( !$obj->can('process') ) {
215				warn "Plugin $plug does not provide a process method";
216				next;
217				}
218				foreach my $h ( @{ $obj->handles } ) {
219				$h = lc($h);
220				if ( $h eq 'buffer' ) {
221				die "Plugin $plug cannot provide a handler for $h, as this is already provided for internally";
222				}
223				if ( $valid_tags{$h} ) {
224				die "Plugin $plug cannot provide a handler for $h, as this is already provided by $valid_tags{ $h }";
225				}
226
227				# all handlers are lower case
228				$valid_tags{$h} = $obj;
229				}
230				}
231
232				# buffer is a special internal handler
233				$valid_tags{buffer} = 1;
234				}
235
236				# ----------------------------------------------------------------------------
237
238				sub _append_output {
239				my $self = shift;
240				my $str = shift;
241
242				$self->{output} .= $str if ($str);
243				}
244
245				# ----------------------------------------------------------------------------
246				# store a file to the cache
247				# if the contents are empty then any existing cache file will be removed
248				sub _store_cache {
249				my $self = shift;
250				my ( $filename, $contents ) = @_;
251
252				# don't do any cleanup if we are not using a cache
253				return if ( !$self->use_cache() );
254
255				# for some reason sometimes the full cache dir is not created or
256				# something deletes part of it, cannot figure it out
257				path( $self->cache_dir() )->mkpath if ( !-d $self->cache_dir() );
258
259				# make sure we are working in the right dir
260				my $f = $self->cache_dir() . "/" . path($filename)->basename;
261
262				if ( !$contents && -f $f ) {
263				unlink($f);
264				}
265				else {
266				path($f)->spew_raw($contents);
267				}
268				}
269
270				# ----------------------------------------------------------------------------
271				# get a file from the cache
272				sub _get_cache {
273				my $self = shift;
274				my ($filename) = @_;
275
276				# don't do any cleanup if we are not using a cache
277				return if ( !$self->use_cache() );
278
279				# make sure we are working in the right dir
280				my $f = $self->cache_dir() . "/" . path($filename)->basename;
281
282				my $result;
283				$result = path($f)->slurp_raw if ( -f $f );
284
285				return $result;
286				}
287
288				# ----------------------------------------------------------------------------
289
290				=item clean_cache
291
292				Remove all files from the cache
293
294				=cut
295
296				sub clean_cache {
297				my $self = shift;
298
299				# don't do any cleanup if we are not using a cache
300				return if ( !$self->use_cache() );
301
302				try { path( $self->cache_dir() )->remove_tree } catch {};
303
304				# and make it fresh again
305				path( $self->cache_dir() )->mkpath();
306				}
307
308				# ----------------------------------------------------------------------------
309				# _extract_args
310				sub _extract_args {
311				my $buf = shift;
312				my ( %attr, $eaten );
313				return \%attr if ( !$buf );
314
315				while ( $buf =~ s\|^\s?(([a-zA-Z][a-zA-Z0-9\.\-_])\s)\|\| ) {
316				$eaten .= $1;
317				my $attr = lc $2;
318				my $val;
319
320				# The attribute might take an optional value (first we
321				# check for an unquoted value)
322				if ( $buf =~ s\|(^=\s([^\"\'>\s][^>\s])\s*)\|\| ) {
323				$eaten .= $1;
324				$val = $2;
325
326				# or quoted by " or '
327				}
328				elsif ( $buf =~ s\|(^=\s([\"\'])(.?)\2\s*)\|\|s ) {
329				$eaten .= $1;
330				$val = $3;
331
332				# truncated just after the '=' or inside the attribute
333				}
334				elsif ($buf =~ m\|^(=\s*)$\|
335				or $buf =~ m\|^(=\s[\"\'].)\|s )
336				{
337				$buf = "$eaten$1";
338				last;
339				}
340				else {
341				# assume attribute with implicit value
342				$val = $attr;
343				}
344				$attr{$attr} = $val;
345				}
346
347				return \%attr;
348				}
349
350				# ----------------------------------------------------------------------------
351				# add into the replacements list
352				sub _add_replace {
353				my $self = shift;
354				my ( $key, $val ) = @_;
355
356				$self->{replace}->{ uc($key) } = $val;
357				}
358
359				# ----------------------------------------------------------------------------
360				sub _do_replacements {
361				my $self = shift;
362				my ($content) = @_;
363
364				foreach my $k ( keys %{ $self->replace() } ) {
365				next if ( !$self->{replace}->{$k} );
366
367				# in the text the variables to be replaced are surrounded by %
368				# zero width look behind to make sure the variable name has
369				# not been escaped _%VARIABLE% should be left alone
370				$content =~ s/(?{replace}->{$k}/gsm;
371				}
372
373				return $content;
374				}
375
376				# ----------------------------------------------------------------------------
377				sub _call_function {
378				my $self = shift;
379				my ( $block, $params, $content, $linepos ) = @_;
380				my $out;
381
382				if ( !$valid_tags{$block} ) {
383				debug( "ERROR:", "no valid handler for $block" );
384				}
385				else {
386				try {
387
388				# buffer is a special construct to allow us to hold output of content
389				# for later, allows multiple use of content or adding things to
390				# markdown tables that otherwise we could not do
391
392				# over-ride content with buffered content
393				my $from = $params->{from} \|\| $params->{from_buffer};
394				if ($from) {
395				$content = $self->{replace}->{ uc($from) };
396				}
397
398				my $to = $params->{to} \|\| $params->{to_buffer};
399
400				if ( $block eq 'buffer' ) {
401				if ($to) {
402				$self->_add_replace( $to, $content );
403				}
404				}
405				else {
406				# do any replacements we know about in the content block
407				$content = $self->_do_replacements($content);
408
409				# run the plugin with the data we have
410				$out = $valid_tags{$block}->process( $block, $content, $params, $self->cache_dir() );
411
412				if ( !$out ) {
413
414				# if we could not generate any output, lets put the block back together
415				$out .= "~~~~{.$block " . join( " ", map {"$_='$params->{$_}'"} keys %{$params} ) . " }\n" . "~~~~\n";
416				}
417				elsif ($to) {
418
419				# do we want to buffer the output?
420				$self->_add_replace( $to, $out );
421
422				# option not to show the output
423				$out = "" if ( $params->{no_output} );
424				}
425				}
426				$self->_append_output("$out\n") if ( defined $out );
427				}
428				catch {
429				debug( "ERROR", "failed processing $block near line $linepos, $_" );
430				warn "Issue processing $block around line $linepos";
431				$out = "~~~~{.$block " . join( " ", map {"$_='$params->{$_}'"} keys %{$params} ) . " }\n" . "~~~~\n";
432				$self->_append_output($out);
433				};
434				}
435				}
436
437				# ----------------------------------------------------------------------------
438				### _parse_lines
439				# parse the passed data
440				sub _parse_lines {
441				my $self = shift;
442				my $lines = shift;
443				my $count = 0;
444
445				return if ( !$lines );
446
447				my ( $class, $block, $content, $attributes );
448				my ( $buildline, $simple );
449				try {
450				foreach my $line ( @{$lines} ) {
451				$count++;
452
453				# header lines may have been removed
454				next if ( !defined $line );
455
456				if ( defined $simple ) {
457				if ( $line =~ /^~{4,}\s?$/ ) {
458				$self->_append_output("~~~~\n$simple\n~~~~\n");
459				$simple = undef;
460				}
461				else {
462				$simple .= "$line\n";
463				}
464
465				next;
466				}
467
468				# we may need to add successive lines together to get a completed fenced code block
469				if ( !$block && $buildline ) {
470				$buildline .= " $line";
471				if ( $line =~ /\}\s*$/ ) {
472				$line = $buildline;
473
474				# make sure to clear the builder
475				$buildline = undef;
476				}
477				else {
478				# continue to build the line
479				next;
480				}
481				}
482
483				# a simple block does not have an identifying {.tag}
484				if ( $line =~ /^~{4,}\s?$/ && !$block ) {
485				$simple = "";
486				next;
487				}
488
489				if ( $line =~ /^~{4,}/ ) {
490
491				# does the fenced line wrap before its ended
492				if ( !$block && $line !~ /\}\s*$/ ) {
493
494				# we need to start adding lines till its completed
495				$buildline = $line;
496				next;
497				}
498
499				if ( $line =~ /\{(.?)\.(\w+)\s(.?)\}\s$/ ) {
500				$class = $1;
501				$block = lc($2);
502				$attributes = $3;
503				}
504				elsif ( $line =~ /\{\.(\w+)\s?\}\s*$/ ) {
505				$block = lc($1);
506				$attributes = {};
507				}
508				else {
509				my $params = _extract_args($attributes);
510
511				# must have reached the end of a block
512				if ( $block && $valid_tags{$block} ) {
513				chomp $content;
514				$self->_call_function( $block, $params, $content, $count );
515				}
516				else {
517				if ( !$block ) {
518
519				# put it back
520				$content \|\|= "";
521				$self->_append_output("~~~~\n$content\n~~~~\n");
522
523				}
524				else {
525				$content \|\|= "";
526				$attributes \|\|= "";
527				$block \|\|= "";
528
529				# put it back
530				$self->_append_output("~~~~{ $class .$block $attributes}\n$content\n~~~~\n");
531
532				}
533				}
534				$content = "";
535				$attributes = "";
536				$block = "";
537				}
538				}
539				else {
540				if ($block) {
541				$content .= "$line\n";
542				}
543				else {
544				$self->_append_output("$line\n");
545				}
546				}
547				}
548				}
549				catch {
550				die "Issue at line $count $_";
551				};
552				}
553
554				# ----------------------------------------------------------------------------
555				# fetch any img references and copy into the cache, if the image is already
556				# in the cache then nothing will happen, will rewrite other img uri's
557				sub _rewrite_imgsrc {
558				my $self = shift;
559				my ( $pre, $img, $post, $want_size ) = @_;
560				my $ext;
561				if ( $img =~ /\.(\w+)$/ ) {
562				$ext = $1;
563				}
564
565				# if its an image we have generated then it may already be here
566				# check to see if we have this in the cache
567				my $cachefile = cachefile( $self->cache_dir, $img );
568				if ( !-f $cachefile ) {
569				my $id = md5_hex($img);
570				$id .= ".$ext";
571
572				# this is what it will be named in the cache
573				$cachefile = cachefile( $self->cache_dir, $id );
574
575				# not in the cache so we must fetch it and store it local to the cache
576				# if we are a local file
577				if ( $img !~ m\|^\w+://\| \|\| $img =~ m\|^file://\| ) {
578				$img =~ s\|^file://\|\|;
579				$img = fix_filename($img);
580
581				if ( $img !~ m\|/\| ) {
582
583				# if file is relative, then we need to add the basedir
584				$img = $self->basedir . "/$img";
585				}
586
587				# copy it to the cache location
588				try {
589				path($img)->copy($cachefile);
590				}
591				catch {
592				debug( "ERROR", "failed to copy $img to $cachefile" );
593				};
594
595				$img = $cachefile if ( -f $cachefile );
596				}
597				else {
598				if ( $img =~ m\|^(\w+)://(.*)\| ) {
599
600				my $furl = Furl->new(
601				agent => get_program(),
602				timeout => 0.2,
603				);
604
605				my $res = $furl->get($img);
606				if ( $res->is_success ) {
607				path($cachefile)->spew_raw( $res->content );
608				$img = $cachefile;
609				}
610				else {
611				debug( "ERROR", "unknown could not fetch $img" );
612				}
613				}
614				else {
615				debug( "ERROR", "unknown protocol for $img" );
616				}
617				}
618				}
619				else {
620				$img = $cachefile;
621				}
622
623				# make sure we add the image size if its not already there
624				if ( $want_size && $pre !~ /width=\|height=/i && $post !~ /width=\|height=/i ) {
625				my $image = GD::Image->new($img);
626				if ($image) {
627				$post =~ s/\/>$//;
628				$post .= " height='" . $image->height() . "' width='" . $image->width() . "' />";
629				}
630				}
631
632				# do we need to embed the images, if we do this then libreoffice may be pants
633				# however 'prince' is happy
634				if ( $self->embed() ) {
635
636				# we encode the image as base64 so that the HTML document can be moved with all images
637				# intact
638				my $base64 = MIME::Base64::encode( path($img)->slurp_raw );
639				$img = "data:image/$ext;base64,$base64";
640				}
641				return $pre . $img . $post;
642				}
643
644				# ----------------------------------------------------------------------------
645				# grab all the h2/h3 elements and make them toc items
646
647				sub _build_toc {
648				my $html = shift;
649
650				my @items = ( $html =~ m\|(.*?)\|gsm );
651
652				my $toc = " Contents \n \n";
653				for ( my $i = 0; $i < scalar(@items); $i += 2 ) {
654				my $ref = $items[$i];
655
656				my $h = $items[ $i + 1 ];
657
658				# remove any href inside the header title
659				$h =~ s/<\/?a.*?>//g;
660
661				if ( $h =~ /^\d+\./ ) {
662				$h = " $h";
663				}
664
665				# make sure reference is in lower case
666				$toc .= " $h \n";
667				}
668
669				$toc .= "\n";
670
671				return $toc;
672				}
673
674				# ----------------------------------------------------------------------------
675				# rewrite the headers so that they are nice for the TOC
676				sub _rewrite_hdrs {
677				state $counters = { 2 => 0, 3 => 0, 4 => 0 };
678				state $last_lvl = 0;
679				my ( $head, $txt, $tail ) = @_;
680				my $pre;
681
682				my ($lvl) = ( $head =~ /
683				my $ref = $txt;
684
685				if ( $lvl < $last_lvl ) {
686				debug( "ERROR", "something odd happening in _rewrite_hdrs" );
687				}
688				elsif ( $lvl > $last_lvl ) {
689
690				# if we are stepping back up a level then we need to reset the counter below
691				if ( $lvl == 3 ) {
692				$counters->{4} = 0;
693				}
694				elsif ( $lvl == 2 ) {
695				$counters->{3} = 0;
696				$counters->{4} = 0;
697				}
698
699				}
700				$counters->{$lvl}++;
701
702				if ( $lvl == 2 ) { $pre = "$counters->{2}"; }
703				elsif ( $lvl == 3 ) { $pre = "$counters->{2}.$counters->{3}"; }
704				elsif ( $lvl == 4 ) { $pre = "$counters->{2}.$counters->{3}.$counters->{4}"; }
705
706				$ref =~ s/\s/_/gsm;
707
708				# remove things we don't like from the reference
709				$ref =~ s/[\s'"\[\]<>]//g;
710
711				my $out = "$head$pre $txt$tail";
712				return $out;
713				}
714
715				# ----------------------------------------------------------------------------
716				# use pandoc to parse markdown into nice HTML
717				# pandoc has extra features over and above markdown, eg syntax highlighting
718				# and tables
719				# pandoc must be in user path
720
721				sub _pandoc_html {
722				my $input = shift;
723
724				my $resp = execute_cmd(
725				command => PANDOC . " --email-obfuscation=none -S -R --normalize -t html5 --highlight-style='kate'",
726				timeout => 30,
727				child_stdin => $input
728				);
729
730				my $html;
731
732				debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
733				if ( !$resp->{exit_code} ) {
734				$html = $resp->{stdout};
735				}
736				else {
737				debug( "ERROR", "Could not parse with pandoc, using markdown" );
738				warn "Could not parse with pandoc, using markdown";
739				$html = markdown($input);
740				}
741
742				return $html;
743				}
744
745				# ----------------------------------------------------------------------------
746				# use pandoc to convert HTML into another format
747				# pandoc must be in user path
748
749				sub _pandoc_format {
750				my ( $input, $output ) = @_;
751				my $status = 1;
752
753				my $resp = execute_cmd(
754
755				command => PANDOC . " $input -o $output",
756				timeout => 30,
757				);
758
759				debug( "Pandoc: " . $resp->{stderr} ) if ( $resp->{stderr} );
760				if ( !$resp->{exit_code} ) {
761				$status = 0;
762				}
763				else {
764				debug( "ERROR", "Could not parse with pandoc" );
765				$status = 1;
766				}
767
768				return $status;
769				}
770
771				# ----------------------------------------------------------------------------
772				# convert_file
773				# convert the file to a different format from HTML
774				# parameters
775				# file - file to re-convert
776				# format - format to convert to
777				# pdfconvertor - use prince/wkhtmltopdf rather than pandoc to convert to PDF
778
779				sub _convert_file {
780				my $self = shift ;
781				my ( $file, $format, $pdfconvertor ) = @_;
782
783				# we work on the is that pandoc should be in your PATH
784				my $fmt_str = $format;
785				my ( $outfile, $exit );
786
787				$outfile = $file;
788				$outfile =~ s/\.(\w+)$/.pdf/;
789
790				# we can use prince to do PDF conversion, its faster and better, but not free for commercial use
791				# you would have to ignore the P symbol on the resultant document
792				if ( $format =~ /pdf/i && $pdfconvertor ) {
793				my $cmd;
794
795				if ( $pdfconvertor =~ /^prince/i ) {
796				$cmd = PRINCE . " " ;
797				$cmd.= "--pdf-title='$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
798				$cmd.= "--pdf-subject='$self->{replace}->{SUBJECT}' " if ($self->{replace}->{SUBJECT}) ;
799				$cmd.= "--pdf-creator='$self->{replace}->{AUTHOR}' " if ($self->{replace}->{AUTHOR}) ;
800				$cmd.= "--pdf-keywords='$self->{replace}->{KEYWORDS}' " if ($self->{replace}->{KEYWORDS}) ;
801				$cmd .= " --media=print $file -o $outfile";
802				}
803				elsif ( $pdfconvertor =~ /^wkhtmltopdf/i ) {
804				$cmd = WKHTML . " -q --print-media-type " ;
805				$cmd.= "--title '$self->{replace}->{TITLE}' " if ($self->{replace}->{TITLE}) ;
806				# do we want to specify the size
807				$cmd .= "--page-size $self->{replace}->{PAGE_SIZE} " if( $self->{replace}->{PAGE_SIZE}) ;
808				$cmd .= "$file $outfile";
809				}
810				else {
811				warn "Unknown PDF converter ($pdfconvertor), using pandoc";
812
813				# otherwise lets use pandoc to create the file in the other formats
814				$exit = _pandoc_format( $file, $outfile );
815				}
816				if ($cmd) {
817				my ( $out, $err );
818				try {
819				# say "$cmd" ;
820				( $exit, $out, $err ) = run_cmd($cmd);
821				}
822				catch {
823				$err = "run_cmd($cmd) died - $_";
824				$exit = 1;
825				};
826
827				debug( "ERROR", $err ) if ($err); # only debug if return code is not 0
828				}
829				}
830				else {
831				# otherwise lets use pandoc to create the file in the other formats
832				$exit = _pandoc_format( $file, $outfile );
833				}
834
835				# if we failed to convert, then clear the filename
836				return $exit == 0 ? $outfile : undef;
837				}
838
839				# ----------------------------------------------------------------------------
840
841				=item parse
842
843				parse the markup into HTML and return it, HTML is also stored internally
844
845				B
846				markdown text
847
848				=cut
849
850				sub parse {
851				my $self = shift;
852				my ($data) = @_;
853
854				die "Nothing to parse" if ( !$data );
855
856				my $id = md5_hex( encode_utf8($data) );
857
858				# my $id = md5_hex( $data );
859				$self->_set_md5id($id);
860				$self->_set_input($data);
861
862				my $cachefile = cachefile( $self->cache_dir, "$id.html" );
863				if ( -f $cachefile ) {
864				my $cache = path($cachefile)->slurp_utf8;
865				$self->{output} = $cache; # put cached item into output
866				}
867				else {
868				$self->{output} = ""; # blank the output
869
870				my @lines = split( /\n/, $data );
871
872				# process top 20 lines for keywords
873				# maybe replace this with some YAML processor?
874				for ( my $i = 0; $i < 20; $i++ ) {
875				## if there is no keyword separator then we must have done the keywords
876				last if ( $lines[$i] !~ /:/ );
877
878				# allow keywords to be :keyword or keyword:
879				my ( $k, $v ) = ( $lines[$i] =~ /^:?(\w+):?\s+(.*?)\s?$/ );
880				next if ( !$k );
881
882				$self->_add_replace( $k, $v );
883				$lines[$i] = undef; # essentially remove the line
884				}
885
886				# parse the data find all fenced blocks we can handle
887				$self->_parse_lines( \@lines );
888
889				# store the markdown before parsing
890				$self->_store_cache( $self->cache_dir() . "/$id.md", encode_utf8( $self->{output} ) );
891
892				# fixup any markdown simple tables \| ------ \| -> \|---------\|
893
894				# my @tmp = split( /\n/, $self->{_output} );
895				# my $done = 0;
896				# for ( my $i = 0; $i < scalar @tmp; $i++ ) {
897				# if ( $tmp[$i] =~ /^\\|[\s\\|\-\+]+$/ ) {
898				# $tmp[$i] =~ s/\s/-/g;
899				# $done++;
900				# }
901				# }
902				# $self->{_output} = join( "\n", @tmp ) if ($done);
903
904				# we have created something so we can cache it, if use_cache is off
905				# then this will not happen lower down
906				# now we convert the parsed output into HTML
907				my $pan = _pandoc_html( $self->{output} );
908
909				# add the converted markdown into the template
910				my $html = $self->template;
911				my $rep = "%" . CONTENTS . "%";
912				$html =~ s/$rep/$pan/gsm;
913
914				# if the user has not used :title, the we need to grab the title from the page so far
915				if ( !$self->{replace}->{TITLE} ) {
916				my (@h1) = ( $html =~ m\|(.*?)\|gsmi );
917
918				# find the first header that does not contain %TITLE%
919				# I failed to get the zero width look-behind wotking
920				# my ($h) = ( $html =~ m\|.*?(?\|gsmi );
921				foreach my $h (@h1) {
922				if ( $h !~ /%TITLE/ ) {
923				$self->{replace}->{TITLE} = $h;
924				last;
925				}
926				}
927				}
928
929				# do we need to add a table of contents
930				if ( $html =~ /%TOC%/ ) {
931				$html =~ s\|()(.*?)()\|_rewrite_hdrs( $1, $2, $3)\|egsi;
932				$self->{replace}->{TOC} = _build_toc($html);
933				}
934
935				# replace things we have saved
936				$html = $self->_do_replacements($html);
937
938				# and remove any uppercased %word% things that are not processed
939				$html =~ s/(?
940				$html =~ s/_(%.*?%)/$1/gsm;
941
942				# fetch any images and store to the cache, make sure they have sizes too
943				$html =~ s/()/$self->_rewrite_imgsrc( $1, $2, $3, 1)/egs;
944
945				# write any css url images and store to the cache
946				$html =~ s/(url\s$['"]?)(.?)(['"]?$)/$self->_rewrite_imgsrc( $1, $2, $3, 0)/egs;
947
948				# strip out any HTML comments that may have come in from template
949				$html =~ s///gsm;
950
951				$self->{output} = $html;
952				$self->_store_cache( $cachefile, $html );
953				}
954				return $self->{output};
955				}
956
957				# ----------------------------------------------------------------------------
958
959				=item save_to_file
960
961				save the created html to a named file
962
963				B
964				filename filename to store/convert stored HTML into
965				pdfconvertor indicate that we should use prince or wkhtmltopdf to create PDF
966
967				=cut
968
969				sub save_to_file {
970				state $counter = 0;
971				my $self = shift;
972				my ( $filename, $pdfconvertor ) = @_;
973				my ($format) = ( $filename =~ /\.(\w+)$/ ); # get last thing after a '.'
974				if ( !$format ) {
975				warn "Could not determine outpout file format, using PDF";
976				$format = '.pdf';
977				}
978
979				my $f = $self->_md5id() . ".html";
980
981				# have we got the parsed data
982				my $cf = cachefile( $self->cache_dir, $f );
983				if ( !$self->{output} ) {
984				die "parse has not been run yet";
985				}
986
987				if ( !-f $cf ) {
988				if ( !$self->use_cache() ) {
989
990				# create a file name to store the output to
991				$cf = "/tmp/" . get_program() . "$$." . $counter++;
992				}
993
994				# either update the cache, or create temp file
995				path($cf)->spew_utf8( encode_utf8( $self->{output} ) );
996				}
997
998				my $outfile = $cf;
999				$outfile =~ s/\.html$/.$format/i;
1000
1001				# if the marked-up file is more recent than the converted one
1002				# then we need to convert it again
1003				if ( $format !~ /html/i ) {
1004
1005				# as we can generate PDF using a number of convertors we should
1006				# always regenerate PDF output incase the convertor used is different
1007				if ( !-f $outfile \|\| $format =~ /pdf/i \|\| ( ( stat($cf) )[9] > ( stat($outfile) )[9] ) ) {
1008				$outfile = $self->_convert_file( $cf, $format, $pdfconvertor );
1009
1010				# if we failed to convert, then clear the filename
1011				if ( !$outfile \|\| !-f $outfile ) {
1012				$outfile = undef;
1013				debug( "ERROR", "failed to create output file from cached file $cf" );
1014				}
1015				}
1016				}
1017
1018				my $status = 0;
1019
1020				# now lets copy it to its final resting place
1021				if ($outfile) {
1022				try {
1023				$status = path($outfile)->copy($filename);
1024				}
1025				catch {
1026				say STDERR "$_ ";
1027				debug( "ERROR", "failed to copy $outfile to $filename" );
1028				};
1029				}
1030				return $status;
1031				}
1032
1033				=back
1034
1035				=cut
1036
1037				# ----------------------------------------------------------------------------
1038
1039				1;
1040
1041				__END__

File Coverage

%TITLE%