line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
# MKDoc::XML::Tagger::Preserve |
3
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------------- |
4
|
|
|
|
|
|
|
# Author : Jean-Michel Hiver. |
5
|
|
|
|
|
|
|
# Copyright : (c) MKDoc Holdings Ltd, 2003 |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# This module uses MKDoc::XML::Tagger, except it preserves specific tags to prevent |
8
|
|
|
|
|
|
|
# them from being tagged twice. At the moment the module uses regexes to do that so it |
9
|
|
|
|
|
|
|
# might not be very generic but it should at least work for XHTML tags. |
10
|
|
|
|
|
|
|
# ------------------------------------------------------------------------------------- |
11
|
|
|
|
|
|
|
package MKDoc::XML::Tagger::Preserve; |
12
|
5
|
|
|
5
|
|
303345
|
use MKDoc::XML::Tagger; |
|
5
|
|
|
|
|
18
|
|
|
5
|
|
|
|
|
261
|
|
13
|
5
|
|
|
5
|
|
179
|
use strict; |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
167
|
|
14
|
5
|
|
|
5
|
|
27
|
use warnings; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
140
|
|
15
|
5
|
|
|
5
|
|
97
|
use utf8; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
19
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @Preserve = (); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
## |
21
|
|
|
|
|
|
|
# $class->process_data ($xml, @expressions); |
22
|
|
|
|
|
|
|
# ------------------------------------------ |
23
|
|
|
|
|
|
|
# Tags $xml with @expressions, where expression is a list of hashes. |
24
|
|
|
|
|
|
|
# |
25
|
|
|
|
|
|
|
# For example: |
26
|
|
|
|
|
|
|
# |
27
|
|
|
|
|
|
|
# MKDoc::XML::Tagger::Preserve->process ( |
28
|
|
|
|
|
|
|
# [ 'i_will_be_preserved', 'a' ], |
29
|
|
|
|
|
|
|
# 'I like oranges and bananas', |
30
|
|
|
|
|
|
|
# { _expr => 'oranges', _tag => 'a', href => 'http://www.google.com?q=oranges' }, |
31
|
|
|
|
|
|
|
# { _expr => 'bananas', _tag => 'a', href => 'http://www.google.com?q=bananas' }, |
32
|
|
|
|
|
|
|
# |
33
|
|
|
|
|
|
|
# Will return |
34
|
|
|
|
|
|
|
# |
35
|
|
|
|
|
|
|
# 'I like oranges and \ |
36
|
|
|
|
|
|
|
# bananas. |
37
|
|
|
|
|
|
|
## |
38
|
|
|
|
|
|
|
sub process_data |
39
|
|
|
|
|
|
|
{ |
40
|
2
|
|
|
2
|
0
|
1233
|
my $class = shift; |
41
|
2
|
|
|
|
|
5
|
local @Preserve = @{shift()}; |
|
2
|
|
|
|
|
15
|
|
42
|
2
|
|
|
|
|
14
|
my $text = shift; |
43
|
2
|
|
|
|
|
5
|
my @list = (); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
2
|
|
|
|
|
8
|
($text, @list) = _preserve_encode ($text); |
47
|
2
|
|
|
|
|
19
|
$text = MKDoc::XML::Tagger->process_data ($text, @_); |
48
|
2
|
|
|
|
|
10
|
$text = _preserve_decode ($text, @list); |
49
|
|
|
|
|
|
|
|
50
|
2
|
|
|
|
|
14
|
return $text; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub process_file |
55
|
|
|
|
|
|
|
{ |
56
|
0
|
|
|
0
|
0
|
0
|
my $class = shift; |
57
|
0
|
|
|
|
|
0
|
my $file = shift; |
58
|
0
|
|
0
|
|
|
0
|
open FP, "<$file" || do { |
59
|
|
|
|
|
|
|
warn "Cannot read-open $file"; |
60
|
|
|
|
|
|
|
return []; |
61
|
|
|
|
|
|
|
}; |
62
|
|
|
|
|
|
|
|
63
|
0
|
|
|
|
|
0
|
my $data = ''; |
64
|
0
|
|
|
|
|
0
|
while () { $data .= $_ } |
|
0
|
|
|
|
|
0
|
|
65
|
0
|
|
|
|
|
0
|
close FP; |
66
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
0
|
return $class->process_data ($data); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _preserve_encode |
72
|
|
|
|
|
|
|
{ |
73
|
3
|
|
|
3
|
|
1849
|
my $text = shift; |
74
|
3
|
|
|
|
|
8
|
my @list = (); |
75
|
3
|
|
|
|
|
9
|
for my $tag (@Preserve) |
76
|
|
|
|
|
|
|
{ |
77
|
3
|
|
|
|
|
70
|
my @tags = $text =~ /(<$tag\s.*?<\/$tag>)/gs; |
78
|
3
|
|
|
|
|
12
|
for my $tag (@tags) { while ($text =~ s/\Q$tag\E/_compute_unique_string ($text, $tag, \@list)/e) {} } |
|
2
|
|
|
|
|
20
|
|
|
2
|
|
|
|
|
8
|
|
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
3
|
|
|
|
|
16
|
return $text, @list; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _preserve_decode |
86
|
|
|
|
|
|
|
{ |
87
|
2
|
|
|
2
|
|
4
|
my $text = shift; |
88
|
2
|
|
|
|
|
7
|
my @tsil = reverse (@_); |
89
|
|
|
|
|
|
|
|
90
|
2
|
|
|
|
|
9
|
while (@tsil) |
91
|
|
|
|
|
|
|
{ |
92
|
1
|
|
|
|
|
3
|
my $val = shift (@tsil); |
93
|
1
|
|
|
|
|
3
|
my $id = shift (@tsil); |
94
|
1
|
|
|
|
|
15
|
$text =~ s/$id/$val/; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
2
|
|
|
|
|
8
|
return $text; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub _compute_unique_string |
102
|
|
|
|
|
|
|
{ |
103
|
3
|
|
|
3
|
|
21
|
my $text = shift; |
104
|
3
|
|
|
|
|
5
|
my $str = shift; |
105
|
3
|
|
|
|
|
6
|
my $list = shift; |
106
|
3
|
|
|
|
|
9
|
my $id = join '', map { chr (ord ('a') + int (rand (26))) } 1..10; |
|
30
|
|
|
|
|
142
|
|
107
|
3
|
|
|
|
|
52
|
while ($text =~ /\Q$id\E/) |
108
|
|
|
|
|
|
|
{ |
109
|
0
|
|
|
|
|
0
|
$id = join '', map { chr (ord ('a') + int (rand (26))) } 1..10; |
|
0
|
|
|
|
|
0
|
|
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
3
|
|
|
|
|
5
|
push @{$list}, $id => $str; |
|
3
|
|
|
|
|
8
|
|
113
|
3
|
|
|
|
|
27
|
return $id; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
1; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
__END__ |