line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Amuse::Compile::Indexer; |
2
|
|
|
|
|
|
|
|
3
|
59
|
|
|
59
|
|
471
|
use strict; |
|
59
|
|
|
|
|
157
|
|
|
59
|
|
|
|
|
1890
|
|
4
|
59
|
|
|
59
|
|
333
|
use warnings; |
|
59
|
|
|
|
|
154
|
|
|
59
|
|
|
|
|
1442
|
|
5
|
59
|
|
|
59
|
|
370
|
use Moo; |
|
59
|
|
|
|
|
158
|
|
|
59
|
|
|
|
|
351
|
|
6
|
59
|
|
|
59
|
|
19626
|
use Types::Standard qw/Str ArrayRef Object CodeRef/; |
|
59
|
|
|
|
|
218
|
|
|
59
|
|
|
|
|
536
|
|
7
|
59
|
|
|
59
|
|
53507
|
use Data::Dumper; |
|
59
|
|
|
|
|
173
|
|
|
59
|
|
|
|
|
3727
|
|
8
|
59
|
|
|
59
|
|
25463
|
use Text::Amuse::Compile::Indexer::Specification; |
|
59
|
|
|
|
|
201
|
|
|
59
|
|
|
|
|
2225
|
|
9
|
59
|
|
|
59
|
|
449
|
use Text::Amuse::Functions qw/muse_format_line/; |
|
59
|
|
|
|
|
188
|
|
|
59
|
|
|
|
|
79804
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=encoding utf8 |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Text::Amuse::Compile::Indexer - Class for LaTeX indexes |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Everything here is pretty much private and used by L |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 ACCESSORS AND METHODS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=over 4 |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=item latex_body |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
The body provided to the constructor |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=item index_specs |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
The raw indexes |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=item specifications |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Lazy built, the L |
37
|
|
|
|
|
|
|
objects |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=item language_code |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
The language ISO code. To be passed to C |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item indexed_tex_body |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Method meant to be called from the L |
46
|
|
|
|
|
|
|
object. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item interpolate_indexes |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Main method to get the job done. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item logger |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Required coderef for logging. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=back |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
has latex_body => (is => 'ro', required => 1, isa => Str); |
62
|
|
|
|
|
|
|
has index_specs => (is => 'ro', required => 1, isa => ArrayRef[Str]); |
63
|
|
|
|
|
|
|
has specifications => (is => 'lazy', isa => ArrayRef[Object]); |
64
|
|
|
|
|
|
|
has language_code => (is => 'ro'); |
65
|
|
|
|
|
|
|
has logger => (is => 'ro', isa => CodeRef, required => 1); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub _build_specifications { |
68
|
8
|
|
|
8
|
|
3493
|
my $self = shift; |
69
|
8
|
|
|
|
|
17
|
my @specs; |
70
|
8
|
|
|
|
|
33
|
my $lang = $self->language_code; |
71
|
|
|
|
|
|
|
my $escape = sub { |
72
|
67
|
|
|
67
|
|
4861
|
return muse_format_line(ltx => $_[0], $lang); |
73
|
8
|
|
|
|
|
59
|
}; |
74
|
8
|
|
|
|
|
18
|
foreach my $str (@{$self->index_specs}) { |
|
8
|
|
|
|
|
36
|
|
75
|
11
|
|
|
|
|
114
|
my ($first, @lines) = grep { length($_) } split(/\n+/, $str); |
|
56
|
|
|
|
|
143
|
|
76
|
11
|
50
|
|
|
|
85
|
if ($first =~ m/^INDEX ([a-z]+): (.+)/) { |
77
|
11
|
|
|
|
|
49
|
my ($name, $label) = ($1, $2); |
78
|
11
|
|
|
|
|
23
|
my @patterns; |
79
|
|
|
|
|
|
|
# remove the comments and the white space |
80
|
11
|
|
|
|
|
31
|
foreach my $str (@lines) { |
81
|
45
|
|
|
|
|
17052
|
$str =~ s/\A\s*//g; |
82
|
45
|
|
|
|
|
279
|
$str =~ s/\s*\z//g; |
83
|
45
|
50
|
|
|
|
168
|
push @patterns, $escape->($str) if $str; |
84
|
|
|
|
|
|
|
} |
85
|
11
|
|
|
|
|
5274
|
push @specs, Text::Amuse::Compile::Indexer::Specification->new( |
86
|
|
|
|
|
|
|
index_name => $escape->($name), |
87
|
|
|
|
|
|
|
index_label => $escape->($label), |
88
|
|
|
|
|
|
|
patterns => \@patterns, |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
else { |
92
|
0
|
|
|
|
|
0
|
die "Invalid index specification $first, expecting INDEX : |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} |
95
|
8
|
|
|
|
|
195
|
return \@specs; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub indexed_tex_body { |
99
|
7
|
|
|
7
|
1
|
19
|
my $self = shift; |
100
|
7
|
50
|
|
|
|
16
|
if (@{$self->index_specs}) { |
|
7
|
|
|
|
|
81
|
|
101
|
7
|
|
|
|
|
29
|
return $self->interpolate_indexes; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
else { |
104
|
0
|
|
|
|
|
0
|
return $self->latex_body; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub interpolate_indexes { |
109
|
8
|
|
|
8
|
1
|
524
|
my $self = shift; |
110
|
8
|
|
|
|
|
32
|
my $full_body = $self->latex_body; |
111
|
|
|
|
|
|
|
# remove the indexes |
112
|
8
|
|
|
|
|
1111
|
$full_body =~ s/\\begin\{comment\} |
113
|
|
|
|
|
|
|
\s* |
114
|
|
|
|
|
|
|
INDEX\x{20}+[a-z]+: |
115
|
|
|
|
|
|
|
.*? |
116
|
|
|
|
|
|
|
\\end\{comment\}//gsx; |
117
|
|
|
|
|
|
|
|
118
|
8
|
|
|
|
|
1046
|
my @paragraphs = split(/\n\n/, $full_body); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# build a huge regexp with the matches |
121
|
8
|
|
|
|
|
28
|
my %labels; |
122
|
|
|
|
|
|
|
my @matches; |
123
|
8
|
|
|
|
|
36
|
for (my $i = 0; $i < @{$self->specifications}; $i++) { |
|
19
|
|
|
|
|
396
|
|
124
|
11
|
|
|
|
|
500
|
my $spec = $self->specifications->[$i]; |
125
|
|
|
|
|
|
|
MATCH: |
126
|
11
|
|
|
|
|
91
|
foreach my $match (@{$spec->matches}) { |
|
11
|
|
|
|
|
192
|
|
127
|
45
|
|
|
|
|
438
|
my $str = $match->{match}; |
128
|
45
|
50
|
|
|
|
116
|
if (my $exists = $labels{$str}) { |
129
|
0
|
|
|
|
|
0
|
$self->logger->("$str already has a label $exists->{label} " . $exists->{spec}->index_name . "\n"); |
130
|
0
|
|
|
|
|
0
|
next MATCH; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
$labels{$str} = { |
133
|
|
|
|
|
|
|
label => $match->{label}, |
134
|
45
|
|
|
|
|
171
|
matches => 0, |
135
|
|
|
|
|
|
|
spec => $spec, |
136
|
|
|
|
|
|
|
spec_index => $i, |
137
|
|
|
|
|
|
|
}; |
138
|
45
|
|
|
|
|
74
|
my @pieces; |
139
|
45
|
100
|
|
|
|
133
|
if ($match->{match} =~ m/\A\w/) { |
140
|
40
|
|
|
|
|
81
|
push @pieces, "\\b"; |
141
|
|
|
|
|
|
|
} |
142
|
45
|
|
|
|
|
91
|
push @pieces, quotemeta($match->{match}); |
143
|
45
|
100
|
|
|
|
146
|
if ($match->{match} =~ m/\w\z/) { |
144
|
41
|
|
|
|
|
74
|
push @pieces, "\\b"; |
145
|
|
|
|
|
|
|
} |
146
|
45
|
|
|
|
|
151
|
push @matches, join('', @pieces); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
8
|
|
|
|
|
101
|
my $re_string = join('|', @matches); |
150
|
8
|
|
|
|
|
257
|
my $re = qr{$re_string}; |
151
|
|
|
|
|
|
|
# print "Regex is $re\n"; |
152
|
8
|
|
|
|
|
24
|
my @out; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
my $add_index = sub { |
155
|
138
|
|
|
138
|
|
305
|
my ($match) = @_; |
156
|
138
|
50
|
|
|
|
355
|
die "Cannot find belonging specification for $match. Bug?" unless $labels{$match}; |
157
|
138
|
|
|
|
|
345
|
my $index_name = $labels{$match}{spec}->index_name; |
158
|
138
|
|
|
|
|
248
|
my $label = $labels{$match}{label}; |
159
|
138
|
|
|
|
|
220
|
$labels{$match}{matches}++; |
160
|
138
|
|
|
|
|
14952
|
return "\\index[$index_name]{$label}"; |
161
|
8
|
|
|
|
|
92
|
}; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
LINE: |
164
|
8
|
|
|
|
|
42
|
foreach my $p (@paragraphs) { |
165
|
|
|
|
|
|
|
# we index the inline comments as well, so we can index |
166
|
|
|
|
|
|
|
# what we want, where we want. |
167
|
266
|
100
|
|
|
|
864
|
if ($p =~ m/^%/m) { |
|
|
100
|
|
|
|
|
|
168
|
5
|
|
|
|
|
10
|
my @prepend; |
169
|
5
|
|
|
|
|
128
|
while ($p =~ m/($re)/g) { |
170
|
9
|
|
|
|
|
23
|
push @prepend, $add_index->($1); |
171
|
|
|
|
|
|
|
} |
172
|
5
|
100
|
|
|
|
22
|
if (@prepend) { |
173
|
3
|
|
|
|
|
13
|
$p = join("\n", @prepend) . "\n" . $p; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
elsif ($p =~ m/^\\(mark|part|chapter|section|subsection|subsubsection)/m) { |
177
|
26
|
|
|
|
|
49
|
my @append; |
178
|
26
|
|
|
|
|
372
|
while ($p =~ m/($re)/g) { |
179
|
10
|
|
|
|
|
26
|
push @append, $add_index->($1); |
180
|
|
|
|
|
|
|
} |
181
|
26
|
100
|
|
|
|
77
|
if (@append) { |
182
|
7
|
|
|
|
|
26
|
$p .= "\n" . join("\n", @append); |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
else { |
187
|
235
|
|
|
|
|
42447
|
$p =~ s/($re)/$add_index->($1) . $1/ge; |
|
119
|
|
|
|
|
334
|
|
188
|
|
|
|
|
|
|
} |
189
|
266
|
|
|
|
|
637
|
push @out, $p; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
# collect the stats |
192
|
8
|
|
|
|
|
29
|
my %stats; |
193
|
8
|
|
|
|
|
74
|
foreach my $regex (sort keys %labels) { |
194
|
45
|
|
|
|
|
654
|
my $stat = $labels{$regex}; |
195
|
45
|
|
100
|
|
|
166
|
$stats{$stat->{spec_index}} ||= 0; |
196
|
45
|
100
|
|
|
|
100
|
if ($stat->{matches} > 0) { |
197
|
41
|
|
|
|
|
77
|
$stats{$stat->{spec_index}} += $stat->{matches}; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
else { |
200
|
4
|
|
|
|
|
33
|
$self->logger->("No matches found for $regex\n"); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
} |
203
|
8
|
|
|
|
|
403
|
foreach my $k (sort keys %stats) { |
204
|
11
|
|
|
|
|
301
|
$self->specifications->[$k]->total_found($stats{$k}); |
205
|
|
|
|
|
|
|
} |
206
|
8
|
|
|
|
|
2144
|
return join("\n\n", @out); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
1; |