line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
############################################################################# |
2
|
|
|
|
|
|
|
# (c) by Tels 2004. Part of Convert::Wiki |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# contains the from_txt() fuctionality for Convert::Wiki |
5
|
|
|
|
|
|
|
############################################################################# |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package Convert::Wiki; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub _from_txt |
10
|
|
|
|
|
|
|
{ |
11
|
7
|
|
|
7
|
|
17
|
my ($self,$txt) = @_; |
12
|
|
|
|
|
|
|
|
13
|
7
|
|
|
|
|
96
|
$self->clear(); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
######################################################################### |
16
|
|
|
|
|
|
|
# Stage 0: global normalization |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# convert "\n \n" to "\n\n" |
19
|
7
|
|
|
|
|
62
|
$txt =~ s/\n\s+\n/\n\n/; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# convert "foo:\nbah" to "foo:\n\nbah" (but not headlines ending in ":") |
22
|
7
|
|
|
|
|
39
|
$txt =~ s/:\s*\n([^=_-])/:\n\n$1/g; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# remove leading \n: |
25
|
7
|
|
|
|
|
35
|
$txt =~ s/^\n+//g; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# remove leading lines: |
28
|
7
|
|
|
|
|
24
|
$txt =~ s/^\s*[=_-]+\n+//; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# take the text, recognize parts at it's beginning until we no longer have |
31
|
|
|
|
|
|
|
# anything left |
32
|
7
|
|
|
|
|
11
|
my ($opt); |
33
|
7
|
|
|
|
|
12
|
my $tries = 0; |
34
|
7
|
|
|
|
|
9
|
my $node_nr = 0; |
35
|
|
|
|
|
|
|
|
36
|
7
|
|
|
|
|
11
|
my $last_node = $self->{nodes}; |
37
|
|
|
|
|
|
|
|
38
|
7
|
|
66
|
|
|
56
|
while ((length($txt) > 0) && ($tries++ < 16)) |
39
|
|
|
|
|
|
|
{ |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
######################################################################### |
42
|
|
|
|
|
|
|
# Stage 1: local normalization |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# remove superflous newlines |
45
|
47
|
|
|
|
|
135
|
$txt =~ s/^\n+//g; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# remove "=========" and similiar stray delimiters |
48
|
47
|
|
|
|
|
85
|
$txt =~ s/^[=]+\n+//; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
######################################################################### |
51
|
|
|
|
|
|
|
# Stage 2: conversion to internal format |
52
|
|
|
|
|
|
|
|
53
|
47
|
|
|
|
|
54
|
$opt = undef; # reset |
54
|
|
|
|
|
|
|
|
55
|
47
|
50
|
|
|
|
161
|
if ($self->{debug}) |
56
|
|
|
|
|
|
|
{ |
57
|
0
|
|
|
|
|
0
|
$txt =~ /^(.*)\n(.*)\n(.*)/; |
58
|
0
|
|
0
|
|
|
0
|
my $a = $1 || ''; |
59
|
0
|
|
0
|
|
|
0
|
my $b = $2 || ''; |
60
|
0
|
|
0
|
|
|
0
|
my $c = $3 || ''; |
61
|
0
|
|
|
|
|
0
|
print STDERR "# at node $node_nr\n"; |
62
|
0
|
|
|
|
|
0
|
print STDERR "# Text start is now:\n# '$a'\n# '$b'\n# '$c'\n"; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# "Foo\n===" looks like a headline |
66
|
47
|
100
|
|
|
|
708
|
if ($txt =~ s/^([^=_-].+)\n[=_-]+\n+//) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
67
|
|
|
|
|
|
|
{ |
68
|
13
|
50
|
|
|
|
34
|
print STDERR "# case 1\n" if $self->{debug}; |
69
|
13
|
|
|
|
|
64
|
$opt = { txt => $1, type => 'head1' }; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
# '-----' or '_____' to rulers |
72
|
|
|
|
|
|
|
elsif ($txt =~ s/^[-_]+\n//) |
73
|
|
|
|
|
|
|
{ |
74
|
2
|
50
|
|
|
|
8
|
print STDERR "# case 2\n" if $self->{debug}; |
75
|
2
|
|
|
|
|
7
|
$opt = { type => 'line' }; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
# "1. Foo\n" looks like a bullet |
78
|
|
|
|
|
|
|
elsif ($txt =~ s/^([\d\.]+) (.+)\n//) |
79
|
|
|
|
|
|
|
{ |
80
|
0
|
0
|
|
|
|
0
|
print STDERR "# case 3\n" if $self->{debug}; |
81
|
0
|
|
|
|
|
0
|
$opt = { txt => $2, name => $1, type => 'item' }; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
# "* Foo\n" looks like a bullet |
84
|
|
|
|
|
|
|
elsif ($txt =~ s/^(\s*[*+-](\s+(.|\n)+?))(\n\n|\n\s*[*+-])/$4/) |
85
|
|
|
|
|
|
|
{ |
86
|
7
|
50
|
|
|
|
20
|
print STDERR "# case 4\n" if $self->{debug}; |
87
|
7
|
|
|
|
|
18
|
my $t = $1; |
88
|
7
|
|
|
|
|
22
|
$t =~ s/^\s*[*+-]\s+//; # "- Foo" => "Foo" |
89
|
7
|
|
|
|
|
17
|
$t =~ s/\n\s+/\n/g; # "\n Boo" => "\nBoo" |
90
|
|
|
|
|
|
|
|
91
|
7
|
|
|
|
|
25
|
$t =~ s/\s+\z//g; # remove trailing space |
92
|
7
|
|
|
|
|
13
|
$t =~ s/\n/ /g; # remove newlines entirely |
93
|
|
|
|
|
|
|
|
94
|
7
|
|
|
|
|
23
|
$opt = { txt => $t, type => 'item' }; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
# " some text\nmore text" is one paragraph and not monospaced |
97
|
|
|
|
|
|
|
elsif ($txt =~ s/^\s+(([^\s].+\n){2,})//) |
98
|
|
|
|
|
|
|
{ |
99
|
2
|
50
|
|
|
|
7
|
print STDERR "# case 5\n" if $self->{debug}; |
100
|
2
|
|
|
|
|
11
|
$opt = { txt => $1, type => 'para' }; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
# " some text\n\n" is one monospaced line |
103
|
|
|
|
|
|
|
elsif ($txt =~ s/^\s+(((.+)\n\n))//) |
104
|
|
|
|
|
|
|
{ |
105
|
2
|
50
|
|
|
|
7
|
print STDERR "# case 6\n" if $self->{debug}; |
106
|
2
|
|
|
|
|
6
|
my $t = $1; |
107
|
2
|
|
|
|
|
11
|
$t =~ s/\n\s+/\n/g; # "\n Boo" => "\nBoo" |
108
|
2
|
|
|
|
|
6
|
$t =~ s/\n\z//g; # remove trailing \n |
109
|
|
|
|
|
|
|
|
110
|
2
|
|
|
|
|
8
|
$opt = { txt => $t, type => 'mono' }; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
# " Foo\n" looks like a monospaced text |
113
|
|
|
|
|
|
|
elsif ($txt =~ s/^(([ \t]+[^\s\n*+=-].+\n){2,})//) |
114
|
|
|
|
|
|
|
{ |
115
|
0
|
0
|
|
|
|
0
|
print STDERR "# case 7 '$1'\n" if $self->{debug}; |
116
|
0
|
|
|
|
|
0
|
my $t = $1; |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
0
|
$t =~ s/^\s+//g; # " Boo" => "Boo" |
119
|
0
|
|
|
|
|
0
|
$t =~ s/\n\s+/\n/g; # "\n Boo" => "\nBoo" |
120
|
0
|
|
|
|
|
0
|
$t =~ s/\n\z//g; # remove trailing \n |
121
|
|
|
|
|
|
|
|
122
|
0
|
|
|
|
|
0
|
$opt = { txt => $t, type => 'mono' }; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
# Also: "$ Foo\n" and "# bah" look like a monospaced text |
125
|
|
|
|
|
|
|
elsif ($txt =~ s/^([\$\#])\s+(((.+)\n){1,})//) |
126
|
|
|
|
|
|
|
{ |
127
|
1
|
50
|
|
|
|
6
|
print STDERR "# case 8\n" if $self->{debug}; |
128
|
1
|
|
|
|
|
6
|
my $t = "$1 $2"; |
129
|
1
|
|
|
|
|
3
|
$t =~ s/\n\s+/\n/g; # "\n Boo" => "\nBoo" |
130
|
1
|
|
|
|
|
5
|
$t =~ s/\n\z//g; # remove trailing \n |
131
|
|
|
|
|
|
|
|
132
|
1
|
|
|
|
|
4
|
$opt = { txt => $t, type => 'mono' }; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
# "Foo\n" looks like a text |
135
|
|
|
|
|
|
|
elsif ($txt =~ s/^([^\s](([^*+\n-].+)\n){1,})//) |
136
|
|
|
|
|
|
|
{ |
137
|
18
|
50
|
|
|
|
44
|
print STDERR "# case 9\n" if $self->{debug}; |
138
|
18
|
|
|
|
|
76
|
$opt = { txt => $1, type => 'para' }; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
47
|
100
|
|
|
|
117
|
if (defined $opt) |
142
|
|
|
|
|
|
|
{ |
143
|
45
|
|
|
|
|
49
|
$tries = 0; |
144
|
45
|
50
|
|
|
|
102
|
if ($self->{debug}) |
145
|
|
|
|
|
|
|
{ |
146
|
0
|
|
|
|
|
0
|
require Data::Dumper; |
147
|
0
|
|
|
|
|
0
|
print STDERR Data::Dumper::Dumper($opt); |
148
|
|
|
|
|
|
|
} |
149
|
45
|
|
|
|
|
89
|
$opt->{interlink} = $self->{interlink}; |
150
|
45
|
|
|
|
|
155
|
my $node = Convert::Wiki::Node->new( $opt ); |
151
|
45
|
100
|
|
|
|
99
|
if ($last_node) |
152
|
|
|
|
|
|
|
{ |
153
|
|
|
|
|
|
|
# link node to last node |
154
|
38
|
|
|
|
|
137
|
$last_node->link( $node ); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
else |
157
|
|
|
|
|
|
|
{ |
158
|
7
|
|
|
|
|
11
|
$self->{nodes} = $node; |
159
|
|
|
|
|
|
|
} |
160
|
45
|
|
|
|
|
65
|
$last_node = $node; |
161
|
45
|
|
|
|
|
226
|
$node_nr++; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
7
|
50
|
66
|
|
|
36
|
if ($tries > 0 && length($txt) > 0) |
165
|
|
|
|
|
|
|
{ |
166
|
|
|
|
|
|
|
# something was left over |
167
|
0
|
|
|
|
|
0
|
$self->error( "Cannot recognize text ahead of me. Giving up." ); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
7
|
|
|
|
|
27
|
$self; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
1; |
174
|
|
|
|
|
|
|
__END__ |