blib/lib/Anki/Import.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 183 | 189 | 96.8 |
branch | 52 | 62 | 83.8 |
condition | 19 | 26 | 73.0 |
subroutine | 13 | 13 | 100.0 |
pod | 1 | 6 | 16.6 |
total | 268 | 296 | 90.5 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Anki::Import ; | ||||||
2 | $Anki::Import::VERSION = '0.030'; | ||||||
3 | 4 | 4 | 267057 | use strict; | |||
4 | 34 | ||||||
4 | 169 | ||||||
4 | 4 | 4 | 23 | use warnings; | |||
4 | 10 | ||||||
4 | 93 | ||||||
5 | 4 | 4 | 23 | use Cwd; | |||
4 | 7 | ||||||
4 | 257 | ||||||
6 | 4 | 4 | 2242 | use Getopt::Args; | |||
4 | 121110 | ||||||
4 | 31 | ||||||
7 | 4 | 4 | 2772 | use Log::Log4perl::Shortcuts 0.021 qw(:all); | |||
4 | 659238 | ||||||
4 | 761 | ||||||
8 | 4 | 4 | 40 | use Exporter qw(import); | |||
4 | 8 | ||||||
4 | 9030 | ||||||
9 | our @EXPORT = qw(anki_import); | ||||||
10 | |||||||
11 | # change log config to test for development for fine-tuned control over log output | ||||||
12 | set_log_config('anki-import.cfg'); | ||||||
13 | #set_log_config('test.cfg', __PACKAGE__); | ||||||
14 | |||||||
15 | # set up variables | ||||||
16 | my @lines; # lines from source file | ||||||
17 | my $line_count = 0; # count processed lines to give more helpful error msg | ||||||
18 | my $cline = ''; # current line getting processed | ||||||
19 | my $lline = ''; # last (previous) line processed | ||||||
20 | my $ntype = 'Basic'; # default note type | ||||||
21 | my @notes = (); # array for storing notes | ||||||
22 | my @autotags = (); # for storing automated tags | ||||||
23 | |||||||
24 | # argument processing | ||||||
25 | arg file => ( | ||||||
26 | isa => 'Str', | ||||||
27 | required => 1, | ||||||
28 | comment => 'the name of the source file' | ||||||
29 | ); | ||||||
30 | arg parent_dir => ( | ||||||
31 | isa => 'Str', | ||||||
32 | default => cwd, | ||||||
33 | comment => 'optional directory to save output files, defaults to current directory', | ||||||
34 | ); | ||||||
35 | opt quiet => ( | ||||||
36 | isa => 'Bool', | ||||||
37 | alias => 'q', | ||||||
38 | default => 1, | ||||||
39 | comment => 'On by default. Use --quiet to override this setting to suppress' | ||||||
40 | . ' the success message after a successful execution of the command.' | ||||||
41 | ); | ||||||
42 | opt verbose => ( | ||||||
43 | isa => 'Bool', | ||||||
44 | alias => 'v', | ||||||
45 | comment => 'provide details on progress of Anki::Import' | ||||||
46 | ); | ||||||
47 | opt vverbose => ( | ||||||
48 | isa => 'Bool', | ||||||
49 | alias => 'V', | ||||||
50 | comment => 'verbose information plus debug info' | ||||||
51 | ); | ||||||
52 | |||||||
53 | # start here | ||||||
54 | sub anki_import { | ||||||
55 | 9 | 9 | 1 | 17635 | my $args = optargs( @_ ); | ||
56 | |||||||
57 | 8 | 6475 | my $file = $args->{file}; | ||||
58 | 8 | 50 | 33 | if (!$file) { | |||
59 | 0 | 0 | logf('Aborting: No file passed to Anki::Import.'); | ||||
60 | } | ||||||
61 | |||||||
62 | # set parent directory | ||||||
63 | 8 | 42 | my $pd = $args->{parent_dir}; | ||||
64 | |||||||
65 | # set log level as appropriate | ||||||
66 | 8 | 50 | 35 | if ($args->{verbose}) { | |||
100 | |||||||
67 | 0 | 0 | set_log_level('info'); | ||||
68 | } elsif ($args->{vverbose}) { | ||||||
69 | 4 | 21 | set_log_level('debug'); | ||||
70 | } else { | ||||||
71 | 4 | 20 | set_log_level('error'); | ||||
72 | } | ||||||
73 | 8 | 92 | logi('Log level set'); | ||||
74 | |||||||
75 | # get and load the source file | ||||||
76 | 8 | 16347 | logi('Loading file'); | ||||
77 | 8 | 14517 | my $path = File::Spec->catfile($file); logd($path); | ||||
8 | 46 | ||||||
78 | 8 | 100 | 15218 | if (! -e $path) { | |||
79 | 1 | 11 | logf("Aborting: Source file named '$path' does not exist."); | ||||
80 | }; | ||||||
81 | 7 | 50 | 2 | 296 | open (my $handle, "<:encoding(UTF-8)", $path) or logf("Could not open $path");; | ||
2 | 17 | ||||||
2 | 3 | ||||||
2 | 14 | ||||||
82 | 7 | 2901 | chomp(@lines = <$handle>); | ||||
83 | 7 | 319 | close $handle; | ||||
84 | 7 | 45 | logi('Source file loaded.'); | ||||
85 | |||||||
86 | # pad data with a blank line to make it easier to process | ||||||
87 | 7 | 13809 | push @lines, ''; | ||||
88 | |||||||
89 | # do the stuff we came here for | ||||||
90 | 7 | 25 | validate_src_file(); logd(\@notes); | ||||
6 | 27 | ||||||
91 | 6 | 13719 | generate_importable_files($pd); | ||||
92 | |||||||
93 | # print a success message | ||||||
94 | 6 | 50 | 90 | unless ($args->{'quiet'}) { | |||
95 | 0 | 0 | set_log_level('info'); | ||||
96 | 0 | 0 | logi("Success! Your import files are in the $pd" | ||||
97 | . '/anki_import_files directory'); | ||||||
98 | } | ||||||
99 | |||||||
100 | # fin | ||||||
101 | } | ||||||
102 | |||||||
103 | # functions for first pass parsing of source data | ||||||
104 | sub validate_src_file { | ||||||
105 | 7 | 7 | 0 | 24 | logi('Validating source file'); | ||
106 | |||||||
107 | # throw error if file is empty | ||||||
108 | 7 | 50 | 14033 | logf('Source data file is empty.') if !$lines[0]; | |||
109 | |||||||
110 | # outer loop for parsing notes | ||||||
111 | 7 | 18 | my %fields; # keeps track of number of fields for each type of note | ||||
112 | 7 | 25 | while (next_line()) { | ||||
113 | |||||||
114 | # ignore blank lines | ||||||
115 | 32 | 50 | 222 | next if ($cline =~ /^$|^\s+$/); | |||
116 | |||||||
117 | 32 | 100 | 123 | if ($cline =~ /^#\s*(\S+)/) { | |||
118 | 9 | 34 | $ntype = $1; | ||||
119 | 9 | 35 | logi("Found note type"); | ||||
120 | 9 | 16759 | logd($ntype); | ||||
121 | 9 | 16870 | next; | ||||
122 | } | ||||||
123 | |||||||
124 | 23 | 79 | logi('Processing new note'); | ||||
125 | # get the note | ||||||
126 | 23 | 47506 | my $note = slurp_note(); | ||||
127 | 23 | 74 | logd($note); | ||||
128 | |||||||
129 | 23 | 48809 | logi('Checking number of note fields'); | ||||
130 | # validaate that notes of the same type have the same number of fields | ||||||
131 | 23 | 100 | 47741 | if (my $number_of_fields = $fields{$ntype}) { | |||
132 | 13 | 100 | 46 | if (scalar (@$note) != $number_of_fields) { | |||
133 | 1 | 3 | my $field_count = scalar(@$note); | ||||
134 | 1 | 20 | logf("A(n) $ntype note ending on line $line_count" | ||||
135 | . " has $field_count fields, a different amount than previous '$ntype' note types." | ||||||
136 | . " Notes of the same note type must have the same number of fields. One common reason" | ||||||
137 | . " for this error is that you did not indicate that you wanted to leave a field blank. To leave a field blank," | ||||||
138 | . " place a single '`' (backtick) on the line by itself in the source file. You may also" | ||||||
139 | . " have failed to separate notes with two or more blank lines." | ||||||
140 | . " Check your source file to ensure it is properly formatted.\n\n\tRefer to the" | ||||||
141 | . " Anki::Import documentation for more help with formatting your source file." | ||||||
142 | ); | ||||||
143 | } | ||||||
144 | } else { | ||||||
145 | 10 | 38 | $fields{$ntype} = scalar @$note; | ||||
146 | } | ||||||
147 | |||||||
148 | 22 | 67 | logi('Storing note'); | ||||
149 | 22 | 45534 | push @notes, {ntype => $ntype, note => $note}; | ||||
150 | } | ||||||
151 | |||||||
152 | } | ||||||
153 | |||||||
154 | sub slurp_note { | ||||||
155 | 23 | 23 | 0 | 50 | my @current_field; | ||
156 | my @note; | ||||||
157 | 23 | 48 | push @current_field, $cline; | ||||
158 | |||||||
159 | # loop over lines in the note | ||||||
160 | 23 | 47 | while (next_line()) { | ||||
161 | 143 | 448 | logd($cline, 'cline'); | ||||
162 | 143 | 100 | 305411 | if ($cline =~ /^$|^\s+$/) { | |||
163 | 76 | 230 | my @all_fields = @current_field; | ||||
164 | 76 | 100 | 211 | push (@note, \@all_fields) if @current_field; | |||
165 | 76 | 136 | @current_field = (); | ||||
166 | 76 | 100 | 425 | if ($lline =~ /^$|^\s+$/) { | |||
167 | 16 | 33 | last; | ||||
168 | } | ||||||
169 | } else { | ||||||
170 | 67 | 207 | push @current_field, $cline; | ||||
171 | } | ||||||
172 | } | ||||||
173 | 23 | 65 | return \@note; | ||||
174 | } | ||||||
175 | |||||||
176 | sub next_line { | ||||||
177 | 188 | 100 | 188 | 0 | 484 | return 0 if !@lines; # last line in file was made blank | |
178 | 175 | 277 | $lline = $cline; | ||||
179 | 175 | 100 | 536 | $cline = (shift @lines || ''); | |||
180 | |||||||
181 | # do some cleanup | ||||||
182 | 175 | 348 | chomp $cline; | ||||
183 | 175 | 404 | $cline =~ s/\t/ /g; # replace tabs with spaces | ||||
184 | |||||||
185 | 175 | 435 | ++$line_count; | ||||
186 | } | ||||||
187 | |||||||
188 | # functions for second pass parsing and formatting of source data | ||||||
189 | # and creation of import files | ||||||
190 | sub generate_importable_files { | ||||||
191 | 6 | 6 | 0 | 17 | my $pd = shift; logi('Generating files for import'); | ||
6 | 20 | ||||||
192 | |||||||
193 | 6 | 12177 | my %filenames; | ||||
194 | |||||||
195 | # loop over notes | ||||||
196 | 6 | 33 | foreach my $note (@notes) { | ||||
197 | 47 | 154 | logi('Looping over notes'); | ||||
198 | |||||||
199 | 47 | 108056 | my $line = process_note($note->{note}); | ||||
200 | |||||||
201 | # add our processed note to our data | ||||||
202 | 47 | 154 | my $filename = $note->{ntype} . '_notes_import.txt'; | ||||
203 | 47 | 208 | $filenames{$filename}{content} .= $line; | ||||
204 | } | ||||||
205 | |||||||
206 | 6 | 26 | logi('Writing notes out to file'); | ||||
207 | 6 | 14713 | foreach my $file ( keys %filenames ) { | ||||
208 | 10 | 198 | my $dir = File::Spec->catfile($pd, 'anki_import_files'); | ||||
209 | 10 | 33 | 517 | mkdir $dir || logf("Could not make directory: $dir, $!"); | |||
210 | 10 | 67 | logd($dir); | ||||
211 | 10 | 24782 | my $out_path = File::Spec->catfile($dir, $file); | ||||
212 | 10 | 50 | 720 | open (my $handle, ">>:encoding(UTF-8)", $out_path) or logf("Could not create file: $out_path"); | |||
213 | 10 | 967 | chomp $filenames{$file}{content}; | ||||
214 | 10 | 196 | print $handle $filenames{$file}{content}; | ||||
215 | 10 | 564 | close $handle; | ||||
216 | } | ||||||
217 | } | ||||||
218 | |||||||
219 | # the meat of the matter | ||||||
220 | # TODO: break up into shorter functions for readability | ||||||
221 | sub process_note { | ||||||
222 | 47 | 47 | 0 | 101 | my $note = shift; logd($note, 'note_2b_processed'); | ||
47 | 169 | ||||||
223 | |||||||
224 | 47 | 112295 | my @fields = (); | ||||
225 | 47 | 95 | my $new_autotags = 0; # flag raised if autotag line found | ||||
226 | |||||||
227 | # loop over note fields | ||||||
228 | 47 | 115 | foreach my $field (@$note) { | ||||
229 | 113 | 170 | my $ws_mode = 0; # tracks if we are preserving whitespace | ||||
230 | 113 | 175 | my $field_out = ''; | ||||
231 | |||||||
232 | # loop over lines in field and process accordingly | ||||||
233 | 113 | 225 | my @lines = (''); # can't take a reference to nothing | ||||
234 | 113 | 196 | foreach my $line (@$field) { | ||||
235 | 190 | 320 | my $last_line = \$lines[-1]; # just to make it easier to type | ||||
236 | |||||||
237 | # detect autotags | ||||||
238 | 190 | 600 | logd($line); | ||||
239 | 190 | 50 | 33 | 447539 | if ($line =~ /^\+\s*$/ && !$ws_mode) { | ||
240 | 0 | 0 | push @autotags, split (/\s+/, $$last_line); | ||||
241 | 0 | 0 | $new_autotags = 1; | ||||
242 | } | ||||||
243 | 190 | 100 | 66 | 552 | if ($line =~ /^\^\s*$/ && !$ws_mode) { | ||
244 | 2 | 16 | @autotags = split (/\s+/, $$last_line); | ||||
245 | 2 | 5 | $new_autotags = 1; | ||||
246 | 2 | 5 | next; | ||||
247 | } | ||||||
248 | |||||||
249 | # blanks lines not in non-whitespace mode | ||||||
250 | 188 | 100 | 100 | 621 | if ($line =~ /^`\s*$/ && !$ws_mode) { | ||
251 | 29 | 100 | 66 | 139 | if ($$last_line && $$last_line !~ /^ +$/) { |
||
252 | 9 | 33 | $$last_line .= ' '; |
||||
253 | } | ||||||
254 | 29 | 71 | next; | ||||
255 | } | ||||||
256 | |||||||
257 | # enter whitespace mode and adding appropriate HTML | ||||||
258 | 159 | 100 | 100 | 475 | if ($line =~ /^`{3,3}$/ && !$ws_mode) { | ||
259 | 11 | 26 | $ws_mode = 1; | ||||
260 | |||||||
261 | # add a couple of blank lines to previous line | ||||||
262 | 11 | 100 | 35 | if ($$last_line) { | |||
263 | 9 | 30 | $$last_line .= ' '; |
||||
264 | } | ||||||
265 | |||||||
266 | 11 | 28 | $$last_line .= ' '; |
||||
267 | 11 | 28 | next; | ||||
268 | } | ||||||
269 | |||||||
270 | # exit whitespace mode, close out HTML, add blank lines | ||||||
271 | 148 | 100 | 66 | 372 | if ($line =~ /^`{3,3}$/ && $ws_mode) { | ||
272 | 11 | 29 | $ws_mode = 0; | ||||
273 | 11 | 32 | $$last_line .= " "; |
||||
274 | 11 | 59 | next; | ||||
275 | } | ||||||
276 | |||||||
277 | # handle lines differently based on if we are preserving whitespace | ||||||
278 | 137 | 100 | 290 | if ($ws_mode) { | |||
279 | # escape characters in preserved text | ||||||
280 | 28 | 100 | 100 | if ($line =~ /^`\s*$/) { | |||
281 | 4 | 12 | $$last_line .= ' '; |
||||
282 | 4 | 39 | next; | ||||
283 | } | ||||||
284 | 24 | 107 | $line =~ s/(? | ||||
285 | 24 | 52 | $line =~ s/(? | ||||
286 | 24 | 47 | $line =~ s/(? | ||||
287 | 24 | 111 | $$last_line .= $line . " "; |
||||
288 | } else { | ||||||
289 | 109 | 268 | push @lines, $line; | ||||
290 | } | ||||||
291 | } | ||||||
292 | 113 | 50 | 216 | logf('A set of backticks (```) is unmatched or you failed to backtick a' | |||
293 | . ' blank line inside of a backtick set. Please correct the source' | ||||||
294 | . ' file and try again. Run "perldoc Anki::Import" for more help.') if $ws_mode; | ||||||
295 | |||||||
296 | 113 | 333 | logd($field_out, 'field_out'); | ||||
297 | |||||||
298 | 113 | 100 | 270742 | shift @lines if !$lines[0]; | |||
299 | 113 | 338 | my $field = join ' ', @lines; | ||||
300 | |||||||
301 | # clean up dangling breaks | ||||||
302 | 113 | 362 | $field =~ s/ <\/div>/<\/div>/g; |
||||
303 | |||||||
304 | # handle formatting codes in text, preserve escaped characters | ||||||
305 | |||||||
306 | # preserve angle brackets between backticks | ||||||
307 | 113 | 883 | my $parts = [ split /[^\\]`|^`/, $field, -1]; | ||||
308 | |||||||
309 | 113 | 198 | my $count = 0; | ||||
310 | 113 | 243 | foreach my $part (@$parts) { | ||||
311 | 120 | 164 | $count++; | ||||
312 | 120 | 100 | 321 | next if ($count % 2); # only substitute on odd number array items | |||
313 | 14 | 50 | $part =~ s/</g; | ||||
314 | } | ||||||
315 | |||||||
316 | 113 | 241 | $field = join '`', @$parts; | ||||
317 | |||||||
318 | # backticked characters | ||||||
319 | 113 | 404 | $field =~ s/(?$1<\/span>/gm; | ||||
320 | 113 | 245 | $field =~ s/\\`/`/g; | ||||
321 | |||||||
322 | # bold | ||||||
323 | 113 | 254 | $field =~ s/(?$1<\/span>/gm; | ||||
324 | 113 | 213 | $field =~ s/\\\*/*/g; | ||||
325 | |||||||
326 | # unordered lists | ||||||
327 | 113 | 286 | $field =~ s'(? |
||||
9 | 106 | ||||||
328 | 113 | 198 | $field =~ s/\\%/%/g; | ||||
329 | |||||||
330 | 113 | 306 | $field =~ s/( )+$//; |
||||
331 | 113 | 396 | push @fields, $field; | ||||
332 | |||||||
333 | } | ||||||
334 | |||||||
335 | # generate tag field | ||||||
336 | 47 | 100 | 100 | 179 | if (@autotags && !$new_autotags) { | ||
337 | |||||||
338 | # get tags from tag field | ||||||
339 | 3 | 12 | my @note_tags = split (/\s+/, $fields[-1]); logd(\@note_tags, 'raw_note_tags'); | ||||
3 | 13 | ||||||
340 | 3 | 9433 | my @new_tags = (); | ||||
341 | |||||||
342 | # add tags from tag field | ||||||
343 | 3 | 10 | foreach my $note_tag (@note_tags) { | ||||
344 | 1 | 2 | my $in_autotags = grep { $_ eq $note_tag } @autotags; | ||||
3 | 10 | ||||||
345 | 1 | 50 | 6 | push @new_tags, $note_tag unless $in_autotags; | |||
346 | } | ||||||
347 | |||||||
348 | # add autotags | ||||||
349 | 3 | 6 | foreach my $autotag (@autotags) { | ||||
350 | 9 | 15 | my $discard_autotag = grep { $_ eq $autotag } @note_tags; | ||||
3 | 7 | ||||||
351 | 9 | 100 | 24 | push @new_tags, $autotag if !$discard_autotag; | |||
352 | } | ||||||
353 | |||||||
354 | # add combined tags as a field | ||||||
355 | 3 | 12 | logd(\@new_tags, 'new_tags'); | ||||
356 | 3 | 9608 | my $new_tags = join (' ', @new_tags); | ||||
357 | 3 | 10 | $fields[-1] = $new_tags; | ||||
358 | } | ||||||
359 | 47 | 94 | $new_autotags = 0; | ||||
360 | |||||||
361 | 47 | 129 | my $out = join ("\t", @fields); | ||||
362 | |||||||
363 | # create cloze fields | ||||||
364 | 47 | 88 | my $cloze_count = 1; | ||||
365 | # TODO: should probably handle escaped braces just in case | ||||||
366 | 47 | 162 | while ($out =~ /\{\{\{(.*?)}}}/) { | ||||
367 | 2 | 22 | $out =~ s/\{\{\{(.*?)}}}/{{c${cloze_count}::$1}}/s; | ||||
368 | 2 | 10 | $cloze_count++; | ||||
369 | } | ||||||
370 | 47 | 190 | logd($out, 'out'); | ||||
371 | |||||||
372 | 47 | 113244 | $out .= "\n"; | ||||
373 | } | ||||||
374 | |||||||
375 | 1; # Magic true value | ||||||
376 | # ABSTRACT: Anki note generation made easy. | ||||||
377 | |||||||
378 | __END__ |