| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Markapl::FromHTML; |
|
2
|
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
254409
|
use warnings; |
|
|
6
|
|
|
|
|
16
|
|
|
|
6
|
|
|
|
|
225
|
|
|
4
|
6
|
|
|
6
|
|
33
|
use strict; |
|
|
6
|
|
|
|
|
12
|
|
|
|
6
|
|
|
|
|
200
|
|
|
5
|
6
|
|
|
6
|
|
159
|
use 5.008; |
|
|
6
|
|
|
|
|
28
|
|
|
|
6
|
|
|
|
|
294
|
|
|
6
|
6
|
|
|
6
|
|
11823
|
use Rubyish; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use HTML::PullParser; |
|
8
|
|
|
|
|
|
|
# use Data::Dump qw(pp); |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $indent_offset = 4; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
def load($html) { |
|
15
|
|
|
|
|
|
|
$self->{html} = $html; |
|
16
|
|
|
|
|
|
|
$self; |
|
17
|
|
|
|
|
|
|
} |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
def dump { |
|
20
|
|
|
|
|
|
|
return $_ if (defined($_ = $self->{markapl})); |
|
21
|
|
|
|
|
|
|
return $self->convert; |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
def convert { |
|
25
|
|
|
|
|
|
|
return "" unless $self->{html}; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $p = HTML::PullParser->new( |
|
28
|
|
|
|
|
|
|
doc => $self->{html}, |
|
29
|
|
|
|
|
|
|
start => '"S", tagname, @attr', |
|
30
|
|
|
|
|
|
|
text => '"T", text', |
|
31
|
|
|
|
|
|
|
end => '"E", tagname', |
|
32
|
|
|
|
|
|
|
); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $current_tag = ""; |
|
35
|
|
|
|
|
|
|
my @stack = (); |
|
36
|
|
|
|
|
|
|
my $indent = 0; |
|
37
|
|
|
|
|
|
|
while(my $token = $p->get_token) { |
|
38
|
|
|
|
|
|
|
# warn $token->[0],"\n";; |
|
39
|
|
|
|
|
|
|
if ($token->[0] eq 'S') { |
|
40
|
|
|
|
|
|
|
push @stack, { tag => $token->[1], attr => [@$token[2..$#$token]]}; |
|
41
|
|
|
|
|
|
|
$indent += 1; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
elsif ($token->[0] eq 'T') { |
|
44
|
|
|
|
|
|
|
unless($token->[1] =~ /^\s*$/s ) { |
|
45
|
|
|
|
|
|
|
push @stack, { text => $token->[1] } |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
elsif ($token->[0] eq 'E') { |
|
49
|
|
|
|
|
|
|
# pp $token; |
|
50
|
|
|
|
|
|
|
my @content; |
|
51
|
|
|
|
|
|
|
my $content = pop @stack; |
|
52
|
|
|
|
|
|
|
while (!$content->{tag} || $content->{tag} ne $token->[1]) { |
|
53
|
|
|
|
|
|
|
push @content, $content; |
|
54
|
|
|
|
|
|
|
$content = pop @stack; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $start_tag = $content; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
my $indent_str = " " x ($indent * $indent_offset); |
|
60
|
|
|
|
|
|
|
my $indent_str2 = " " x ( ($indent + 1) * $indent_offset); |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $attr = ""; |
|
63
|
|
|
|
|
|
|
my @attr = @{$start_tag->{attr}}; |
|
64
|
|
|
|
|
|
|
if (@attr) { |
|
65
|
|
|
|
|
|
|
while (my ($k, $v) = splice(@attr, 0, 2)) { |
|
66
|
|
|
|
|
|
|
$attr .= qq{ $k => "$v"}; |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
$attr = "($attr )"; |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
if (@content == 1) { |
|
72
|
|
|
|
|
|
|
my $content_text = $content[0]->{code}; |
|
73
|
|
|
|
|
|
|
if (!$content_text && $content[0]->{text}) { |
|
74
|
|
|
|
|
|
|
$content_text = "\"$content[0]->{text}\"" |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
$content_text ||= ''; |
|
77
|
|
|
|
|
|
|
push @stack, { |
|
78
|
|
|
|
|
|
|
code => "\n${indent_str}$start_tag->{tag}${attr} {\n${indent_str2}$content_text\n${indent_str}};\n" |
|
79
|
|
|
|
|
|
|
}; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
else { |
|
82
|
|
|
|
|
|
|
for (@content) { |
|
83
|
|
|
|
|
|
|
if ($_->{text}) { |
|
84
|
|
|
|
|
|
|
$_->{code} = "outs \"$_->{text}\";"; |
|
85
|
|
|
|
|
|
|
$_->{text} = undef; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
my $content_code = join "\n", map { $_->{code}||"" } reverse @content; |
|
89
|
|
|
|
|
|
|
# pp $start_tag->{tag}, $start_tag->{indent}; |
|
90
|
|
|
|
|
|
|
push @stack, { |
|
91
|
|
|
|
|
|
|
code => "\n${indent_str}$start_tag->{tag}${attr} {\n${indent_str2}$content_code\n${indent_str}};\n" |
|
92
|
|
|
|
|
|
|
}; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
$indent -= 1; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my $ret = join "\n", "sub {", (map { $_->{code} || $_->{text} } @stack), "\n}\n"; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Squeeze empty lines. |
|
102
|
|
|
|
|
|
|
$ret =~ s/\n\s*\n/\n/g; |
|
103
|
|
|
|
|
|
|
$ret =~ s/\{\n\s+\}(;?)\n/{}$1\n/g; |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Re-org all text only blocks to a single line. |
|
106
|
|
|
|
|
|
|
$ret =~ s/\{\n\s+(".+")\n\s+\}(;?)\n/{ $1 }$2\n/g; |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
return $ret; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
1; |
|
112
|
|
|
|
|
|
|
__END__ |