line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Markapl::FromHTML; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
254409
|
use warnings; |
|
6
|
|
|
|
|
16
|
|
|
6
|
|
|
|
|
225
|
|
4
|
6
|
|
|
6
|
|
33
|
use strict; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
200
|
|
5
|
6
|
|
|
6
|
|
159
|
use 5.008; |
|
6
|
|
|
|
|
28
|
|
|
6
|
|
|
|
|
294
|
|
6
|
6
|
|
|
6
|
|
11823
|
use Rubyish; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use HTML::PullParser; |
8
|
|
|
|
|
|
|
# use Data::Dump qw(pp); |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $indent_offset = 4; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
def load($html) { |
15
|
|
|
|
|
|
|
$self->{html} = $html; |
16
|
|
|
|
|
|
|
$self; |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
def dump { |
20
|
|
|
|
|
|
|
return $_ if (defined($_ = $self->{markapl})); |
21
|
|
|
|
|
|
|
return $self->convert; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
def convert { |
25
|
|
|
|
|
|
|
return "" unless $self->{html}; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $p = HTML::PullParser->new( |
28
|
|
|
|
|
|
|
doc => $self->{html}, |
29
|
|
|
|
|
|
|
start => '"S", tagname, @attr', |
30
|
|
|
|
|
|
|
text => '"T", text', |
31
|
|
|
|
|
|
|
end => '"E", tagname', |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $current_tag = ""; |
35
|
|
|
|
|
|
|
my @stack = (); |
36
|
|
|
|
|
|
|
my $indent = 0; |
37
|
|
|
|
|
|
|
while(my $token = $p->get_token) { |
38
|
|
|
|
|
|
|
# warn $token->[0],"\n";; |
39
|
|
|
|
|
|
|
if ($token->[0] eq 'S') { |
40
|
|
|
|
|
|
|
push @stack, { tag => $token->[1], attr => [@$token[2..$#$token]]}; |
41
|
|
|
|
|
|
|
$indent += 1; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
elsif ($token->[0] eq 'T') { |
44
|
|
|
|
|
|
|
unless($token->[1] =~ /^\s*$/s ) { |
45
|
|
|
|
|
|
|
push @stack, { text => $token->[1] } |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
elsif ($token->[0] eq 'E') { |
49
|
|
|
|
|
|
|
# pp $token; |
50
|
|
|
|
|
|
|
my @content; |
51
|
|
|
|
|
|
|
my $content = pop @stack; |
52
|
|
|
|
|
|
|
while (!$content->{tag} || $content->{tag} ne $token->[1]) { |
53
|
|
|
|
|
|
|
push @content, $content; |
54
|
|
|
|
|
|
|
$content = pop @stack; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $start_tag = $content; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
my $indent_str = " " x ($indent * $indent_offset); |
60
|
|
|
|
|
|
|
my $indent_str2 = " " x ( ($indent + 1) * $indent_offset); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $attr = ""; |
63
|
|
|
|
|
|
|
my @attr = @{$start_tag->{attr}}; |
64
|
|
|
|
|
|
|
if (@attr) { |
65
|
|
|
|
|
|
|
while (my ($k, $v) = splice(@attr, 0, 2)) { |
66
|
|
|
|
|
|
|
$attr .= qq{ $k => "$v"}; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
$attr = "($attr )"; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
if (@content == 1) { |
72
|
|
|
|
|
|
|
my $content_text = $content[0]->{code}; |
73
|
|
|
|
|
|
|
if (!$content_text && $content[0]->{text}) { |
74
|
|
|
|
|
|
|
$content_text = "\"$content[0]->{text}\"" |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
$content_text ||= ''; |
77
|
|
|
|
|
|
|
push @stack, { |
78
|
|
|
|
|
|
|
code => "\n${indent_str}$start_tag->{tag}${attr} {\n${indent_str2}$content_text\n${indent_str}};\n" |
79
|
|
|
|
|
|
|
}; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
else { |
82
|
|
|
|
|
|
|
for (@content) { |
83
|
|
|
|
|
|
|
if ($_->{text}) { |
84
|
|
|
|
|
|
|
$_->{code} = "outs \"$_->{text}\";"; |
85
|
|
|
|
|
|
|
$_->{text} = undef; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
my $content_code = join "\n", map { $_->{code}||"" } reverse @content; |
89
|
|
|
|
|
|
|
# pp $start_tag->{tag}, $start_tag->{indent}; |
90
|
|
|
|
|
|
|
push @stack, { |
91
|
|
|
|
|
|
|
code => "\n${indent_str}$start_tag->{tag}${attr} {\n${indent_str2}$content_code\n${indent_str}};\n" |
92
|
|
|
|
|
|
|
}; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
$indent -= 1; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my $ret = join "\n", "sub {", (map { $_->{code} || $_->{text} } @stack), "\n}\n"; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Squeeze empty lines. |
102
|
|
|
|
|
|
|
$ret =~ s/\n\s*\n/\n/g; |
103
|
|
|
|
|
|
|
$ret =~ s/\{\n\s+\}(;?)\n/{}$1\n/g; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Re-org all text only blocks to a single line. |
106
|
|
|
|
|
|
|
$ret =~ s/\{\n\s+(".+")\n\s+\}(;?)\n/{ $1 }$2\n/g; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
return $ret; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
1; |
112
|
|
|
|
|
|
|
__END__ |