line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Amuse::Preprocessor::Footnotes; |
2
|
|
|
|
|
|
|
|
3
|
10
|
|
|
10
|
|
73
|
use strict; |
|
10
|
|
|
|
|
21
|
|
|
10
|
|
|
|
|
345
|
|
4
|
10
|
|
|
10
|
|
55
|
use warnings; |
|
10
|
|
|
|
|
21
|
|
|
10
|
|
|
|
|
290
|
|
5
|
10
|
|
|
10
|
|
57
|
use File::Spec; |
|
10
|
|
|
|
|
20
|
|
|
10
|
|
|
|
|
241
|
|
6
|
10
|
|
|
10
|
|
7276
|
use File::Temp; |
|
10
|
|
|
|
|
200529
|
|
|
10
|
|
|
|
|
772
|
|
7
|
10
|
|
|
10
|
|
5073
|
use File::Copy; |
|
10
|
|
|
|
|
24680
|
|
|
10
|
|
|
|
|
598
|
|
8
|
10
|
|
|
10
|
|
79
|
use Text::Amuse::Preprocessor::Parser; |
|
10
|
|
|
|
|
21
|
|
|
10
|
|
|
|
|
208
|
|
9
|
10
|
|
|
10
|
|
5354
|
use Text::Diff (); |
|
10
|
|
|
|
|
90476
|
|
|
10
|
|
|
|
|
12806
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=encoding utf8 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Text::Amuse::Preprocessor::Footnotes - Rearrange footnote numbering in muse docs |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Given an input file, scan its footnotes and rearrange them. This means |
20
|
|
|
|
|
|
|
that such document: |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
#title test |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Hello [1] There [1] Test [1] |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
[1] first |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Hello hello |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
[1] second |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
[1] third |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
will become |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
#title test |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
Hello [1] There [2] Test [3] |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Hello hello |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
[1] first |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
[2] second |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
[3] third |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Given that the effects of the rearranging could be very destructive |
49
|
|
|
|
|
|
|
and mess up your documents, the module try to play on the safe side |
50
|
|
|
|
|
|
|
and will refuse to write out a file if there is a count mismatch |
51
|
|
|
|
|
|
|
between the footnotes and the number of references in the body. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
The core concept is that the module doesn't care about the number. |
54
|
|
|
|
|
|
|
Only order matters. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
This could be tricky if the document uses the number between square |
57
|
|
|
|
|
|
|
brackets for other than footnotes. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Also used internally by L. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 METHODS |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 new(input => $infile, output => $outfile, debug => 0); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Constructor with the following options: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head3 input |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
The input file. It must exists. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head3 output |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
The output file. It will be written by the module if the parsing |
74
|
|
|
|
|
|
|
succeeds. If not specified, the module will run in dry-run mode. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head3 debug |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Print some additional info. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 process |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Do the job, write out C |
83
|
|
|
|
|
|
|
an arror and return false. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head2 rewrite($type, $fh_in, $fh_out) |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Internal method to rewrite the footnotes. Type can be primary or secondary. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 error |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Accesso to the error. If there is a error, an hashref with the |
92
|
|
|
|
|
|
|
following keys will be returned: |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=over 4 |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item references |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
The total number of footnote references in the body. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item footnotes |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The total number of footnotes. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item references_found |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
The reference's numbers found in the body as a long string. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item footnotes_found |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
The footnote' numbers found in the body as a long string. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=item differences |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
The unified diff between the footnotes and the references' list |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=back |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub new { |
122
|
24
|
|
|
24
|
1
|
126
|
my ($class, %options) = @_; |
123
|
24
|
|
|
|
|
102
|
my $self = { |
124
|
|
|
|
|
|
|
input => undef, |
125
|
|
|
|
|
|
|
output => undef, |
126
|
|
|
|
|
|
|
debug => 0, |
127
|
|
|
|
|
|
|
}; |
128
|
24
|
|
|
|
|
92
|
foreach my $k (keys %$self) { |
129
|
72
|
100
|
|
|
|
168
|
if (exists $options{$k}) { |
130
|
48
|
|
|
|
|
119
|
$self->{$k} = delete $options{$k}; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
24
|
|
|
|
|
72
|
$self->{_error} = ''; |
134
|
|
|
|
|
|
|
|
135
|
24
|
50
|
|
|
|
60
|
die "Unrecognized option: " . join(' ', keys %options) . "\n" if %options; |
136
|
24
|
50
|
|
|
|
72
|
die "Missing input" unless defined $self->{input}; |
137
|
|
|
|
|
|
|
# output is no checked. |
138
|
24
|
|
|
|
|
77
|
bless $self, $class; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub debug { |
142
|
0
|
|
|
0
|
1
|
0
|
return shift->{debug}; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub input { |
146
|
24
|
|
|
24
|
1
|
93
|
return shift->{input}; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub output { |
150
|
19
|
|
|
19
|
1
|
78
|
return shift->{output}; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 error |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Return a string with the errors caught, undef otherwise. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub error { |
160
|
5
|
|
|
5
|
1
|
42
|
return shift->{_error}; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub _set_error { |
164
|
5
|
|
|
5
|
|
2233
|
my ($self, $error) = @_; |
165
|
5
|
50
|
|
|
|
21
|
$self->{_error} = $error if $error; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 tmpdir |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Return the directory name used internally to hold the temporary files. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub tmpdir { |
176
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
177
|
0
|
0
|
|
|
|
0
|
unless ($self->{_tmpdir}) { |
178
|
0
|
|
|
|
|
0
|
$self->{_tmpdir} = File::Temp->newdir(CLEANUP => !$self->debug); |
179
|
|
|
|
|
|
|
} |
180
|
0
|
|
|
|
|
0
|
return $self->{_tmpdir}->dirname; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub process { |
184
|
24
|
|
|
24
|
1
|
42
|
my $self = shift; |
185
|
|
|
|
|
|
|
# auxiliary files |
186
|
24
|
|
|
|
|
66
|
my @body = Text::Amuse::Preprocessor::Parser::parse_text($self->_read_file($self->input)); |
187
|
24
|
100
|
|
|
|
108
|
if ($self->rewrite(primary => \@body)) { |
188
|
21
|
100
|
|
|
|
66
|
if ($self->rewrite(secondary => \@body)) { |
189
|
19
|
50
|
|
|
|
78
|
if (my $outfile = $self->output) { |
190
|
19
|
|
|
|
|
65
|
$self->_write_file($outfile, join('', map { $_->{string} } @body)); |
|
2083
|
|
|
|
|
3633
|
|
191
|
19
|
|
|
|
|
241
|
return $outfile; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
else { |
194
|
0
|
|
|
|
|
0
|
return 1; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} |
198
|
5
|
|
|
|
|
38
|
return; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub rewrite { |
202
|
45
|
|
|
45
|
1
|
109
|
my ($self, $type, $body) = @_; |
203
|
|
|
|
|
|
|
# read the file. |
204
|
45
|
|
|
|
|
70
|
my $fn_counter = 0; |
205
|
45
|
|
|
|
|
69
|
my $body_fn_counter = 0; |
206
|
45
|
|
|
|
|
122
|
my @footnotes_found; |
207
|
|
|
|
|
|
|
my @references_found; |
208
|
45
|
|
|
|
|
0
|
my ($primary, $secondary, $open, $close); |
209
|
45
|
100
|
|
|
|
133
|
if ($type eq 'primary') { |
|
|
50
|
|
|
|
|
|
210
|
24
|
|
|
|
|
55
|
($open, $close) = ('[', ']'); |
211
|
24
|
|
|
|
|
42
|
$primary = 1; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
elsif ($type eq 'secondary') { |
214
|
21
|
|
|
|
|
46
|
($open, $close) = ('{', '}'); |
215
|
21
|
|
|
|
|
32
|
$secondary = 1; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
else { |
218
|
0
|
|
|
|
|
0
|
die "$type can only be 'primary' or 'secondary'"; |
219
|
|
|
|
|
|
|
} |
220
|
45
|
|
|
|
|
935
|
my $start_re = qr{^ \Q$open\E ( [0-9]+ ) \Q$close\E (?=\s) }x; |
221
|
45
|
|
|
|
|
530
|
my $inbody_re = qr{ \Q$open\E ( [0-9]+ ) \Q$close\E }x; |
222
|
45
|
|
|
|
|
158
|
my $secondary_re = qr{^ (\{ [0-9]+ \}) (?=\s) }x; |
223
|
|
|
|
|
|
|
|
224
|
45
|
|
|
|
|
89
|
my $in_footnote = 0; |
225
|
|
|
|
|
|
|
CHUNK: |
226
|
45
|
|
|
|
|
137
|
foreach my $el (@$body) { |
227
|
4263
|
100
|
|
|
|
8532
|
next CHUNK if $el->{type} ne 'text'; |
228
|
3119
|
|
|
|
|
4763
|
my $r = $el->{string}; |
229
|
|
|
|
|
|
|
# a footnote |
230
|
3119
|
100
|
100
|
|
|
20608
|
if ($r =~ s/$start_re/_check_and_replace_fn($1, |
|
461
|
100
|
100
|
|
|
1164
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
231
|
|
|
|
|
|
|
\$fn_counter, |
232
|
|
|
|
|
|
|
\@footnotes_found, $open, $close, \$in_footnote)/xe) { |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
elsif ($primary and $r =~ m/$secondary_re/) { |
235
|
|
|
|
|
|
|
# entering a secondary footnote. never matched if type is |
236
|
|
|
|
|
|
|
# secondary. Leave them alone. |
237
|
223
|
|
|
|
|
533
|
$in_footnote = length($1) + 1; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
elsif ($r =~ m/\A\s*\z/) { |
240
|
|
|
|
|
|
|
# ignore blank lines |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
# we are in a footnote if there is indentation going on |
243
|
|
|
|
|
|
|
elsif ($in_footnote and $r =~ m/\A(\s{4,})/) { |
244
|
|
|
|
|
|
|
# print "In footnote, shifting indentation on $r\n"; |
245
|
12
|
|
|
|
|
44
|
$r =~ s/\A(\s{4,})/' ' x $in_footnote/e; |
|
12
|
|
|
|
|
36
|
|
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
else { |
248
|
|
|
|
|
|
|
# not a continuation, not blank |
249
|
1381
|
|
|
|
|
2362
|
$in_footnote = 0; |
250
|
1381
|
|
|
|
|
4949
|
$r =~ s/$inbody_re/_check_and_replace_fn($1, |
|
459
|
|
|
|
|
1087
|
|
251
|
|
|
|
|
|
|
\$body_fn_counter, |
252
|
|
|
|
|
|
|
\@references_found, $open, $close, \$in_footnote)/gxe; |
253
|
|
|
|
|
|
|
} |
254
|
3119
|
|
|
|
|
6829
|
$el->{string} = $r; |
255
|
|
|
|
|
|
|
} |
256
|
45
|
100
|
|
|
|
106
|
if ($body_fn_counter == $fn_counter) { |
257
|
40
|
|
|
|
|
253
|
return 1; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
else { |
260
|
|
|
|
|
|
|
$self->_set_error({ |
261
|
|
|
|
|
|
|
references => $body_fn_counter, |
262
|
|
|
|
|
|
|
footnotes => $fn_counter, |
263
|
|
|
|
|
|
|
references_found => join(" ", |
264
|
11
|
|
|
|
|
42
|
map { $open . $_ . $close } |
265
|
|
|
|
|
|
|
@references_found), |
266
|
|
|
|
|
|
|
footnotes_found => join(" ", |
267
|
13
|
|
|
|
|
68
|
map { $open . $_ . $close } |
268
|
|
|
|
|
|
|
@footnotes_found), |
269
|
13
|
|
|
|
|
50
|
differences => Text::Diff::diff([ map { $open . $_ . $close . "\n" } @footnotes_found ], |
270
|
5
|
|
|
|
|
18
|
[ map { $open . $_ . $close . "\n" } @references_found ], |
|
11
|
|
|
|
|
66
|
|
271
|
|
|
|
|
|
|
{ STYLE => 'Unified' }), |
272
|
|
|
|
|
|
|
}); |
273
|
5
|
|
|
|
|
42
|
return; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub _check_and_replace_fn { |
278
|
920
|
|
|
920
|
|
2185
|
my ($number, $current, $list, $open, $close, $in_footnote) = @_; |
279
|
920
|
100
|
|
|
|
2031
|
if ($number < ($$current + 100)) { |
280
|
912
|
|
|
|
|
1794
|
push @$list, $number; |
281
|
912
|
|
|
|
|
1365
|
$number = ++$$current; |
282
|
|
|
|
|
|
|
} |
283
|
920
|
|
|
|
|
1454
|
$$in_footnote = length($number) + 3; |
284
|
920
|
|
|
|
|
3572
|
return $open . $number . $close; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub _write_file { |
288
|
19
|
|
|
19
|
|
67
|
my ($self, $file, $body) = @_; |
289
|
19
|
50
|
33
|
|
|
82
|
die unless $file && $body; |
290
|
19
|
50
|
|
|
|
1462
|
open (my $fh, '>:encoding(UTF-8)', $file) or die "opening $file $!"; |
291
|
19
|
|
|
|
|
1921
|
print $fh $body; |
292
|
19
|
50
|
|
|
|
947
|
close $fh or die "closing $file: $!"; |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub _read_file { |
297
|
24
|
|
|
24
|
|
53
|
my ($self, $file) = @_; |
298
|
24
|
50
|
|
|
|
55
|
die unless $file; |
299
|
24
|
50
|
|
|
|
850
|
open (my $fh, '<:encoding(UTF-8)', $file) or die "$file: $!"; |
300
|
24
|
|
|
|
|
1677
|
local $/ = undef; |
301
|
24
|
|
|
|
|
660
|
my $body = <$fh>; |
302
|
24
|
|
|
|
|
744
|
close $fh; |
303
|
24
|
|
|
|
|
231
|
return $body; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
1; |