line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Amuse::Preprocessor::Parser; |
2
|
|
|
|
|
|
|
|
3
|
10
|
|
|
10
|
|
79410
|
use utf8; |
|
10
|
|
|
|
|
26
|
|
|
10
|
|
|
|
|
56
|
|
4
|
10
|
|
|
10
|
|
327
|
use strict; |
|
10
|
|
|
|
|
23
|
|
|
10
|
|
|
|
|
193
|
|
5
|
10
|
|
|
10
|
|
49
|
use warnings; |
|
10
|
|
|
|
|
19
|
|
|
10
|
|
|
|
|
2813
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Text::Amuse::Preprocessor::Parser - Stripped down Muse parser for Text::Amuse::Preprocessor |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head2 FUNCTIONS |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=over 4 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=item parse_text($body) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Parse the string provided as argument and return a list of hashrefs |
18
|
|
|
|
|
|
|
with this structure: |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
{ |
21
|
|
|
|
|
|
|
type => "markup" || "text", |
22
|
|
|
|
|
|
|
string => $chunk |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
the concatenation of the C values is equal to the original |
26
|
|
|
|
|
|
|
body (without carriage returns and null bytes, tabs normalized and |
27
|
|
|
|
|
|
|
final newline appended if missing). |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=back |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=cut |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub parse_text { |
34
|
109
|
|
|
109
|
1
|
4389
|
my $string = shift; |
35
|
109
|
|
|
|
|
2036
|
$string =~ s/[\r\0]//g; |
36
|
109
|
|
|
|
|
769
|
$string =~ s/\t/ /g; |
37
|
109
|
100
|
|
|
|
610
|
if ($string !~ m/\n\z/s) { |
38
|
10
|
|
|
|
|
32
|
$string .= "\n"; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
# remove trailing space |
41
|
109
|
|
|
|
|
1997
|
$string =~ s/ +$//gm; |
42
|
109
|
|
|
|
|
200
|
my @list; |
43
|
109
|
|
|
|
|
207
|
my $last_position = 0; |
44
|
109
|
|
|
|
|
869
|
pos($string) = $last_position; |
45
|
109
|
|
|
|
|
1657
|
while ($string =~ m{\G # last match |
46
|
|
|
|
|
|
|
(?.*?) # something not greedy, even nothing |
47
|
|
|
|
|
|
|
(? |
48
|
|
|
|
|
|
|
(?^\{\{\{ \x{20}*?\n .*? \n\}\}\}\n) | |
49
|
|
|
|
|
|
|
(?^\\x{20}*?\n .*? \n\\n) | |
50
|
|
|
|
|
|
|
(? \n\n+?) | |
51
|
|
|
|
|
|
|
(? \ .*? \<\/verbatim\> ) | |
52
|
|
|
|
|
|
|
(? \ .*? \<\/code\> ) | |
53
|
|
|
|
|
|
|
(? (?
|
54
|
|
|
|
|
|
|
)}gcxms) { |
55
|
10
|
|
|
10
|
|
4741
|
my %captures = %+; |
|
10
|
|
|
|
|
3855
|
|
|
10
|
|
|
|
|
2749
|
|
|
2336
|
|
|
|
|
23393
|
|
56
|
2336
|
100
|
|
|
|
8740
|
if (length($captures{text})) { |
57
|
2188
|
|
|
|
|
7398
|
my @lines = split(/(\n)/, $captures{text}); |
58
|
2188
|
|
|
|
|
4452
|
push @list, map { +{ type => 'text', string => $_ } } grep { length($_) } @lines; |
|
4932
|
|
|
|
|
15764
|
|
|
5016
|
|
|
|
|
9301
|
|
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
push @list, { |
61
|
|
|
|
|
|
|
type => 'markup', |
62
|
|
|
|
|
|
|
string => $captures{markup}, |
63
|
2336
|
|
|
|
|
6461
|
}; |
64
|
2336
|
|
|
|
|
38841
|
$last_position = pos($string); |
65
|
|
|
|
|
|
|
} |
66
|
109
|
|
|
|
|
387
|
my $last_chunk = substr $string, $last_position; |
67
|
109
|
100
|
|
|
|
285
|
if (length($last_chunk)) { |
68
|
96
|
|
|
|
|
541
|
my @lines = split(/(\n)/, $last_chunk); |
69
|
96
|
|
|
|
|
223
|
push @list, map { +{ type => 'text', string => $_ } } grep { length($_) } @lines; |
|
400
|
|
|
|
|
1145
|
|
|
446
|
|
|
|
|
929
|
|
70
|
|
|
|
|
|
|
} |
71
|
109
|
|
|
|
|
255
|
my $full = join('', map { $_->{string} } @list); |
|
7668
|
|
|
|
|
13260
|
|
72
|
109
|
50
|
|
|
|
643
|
die "Chunks lost during processing <$string> vs. <$full>" unless $string eq $full; |
73
|
109
|
|
|
|
|
893
|
return @list; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
1; |