| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# See copyright, etc in below POD section. |
|
2
|
|
|
|
|
|
|
###################################################################### |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package SystemC::Vregs::Input::TableExtract; |
|
5
|
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
28643
|
use base qw(HTML::TableExtract); |
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
10219
|
|
|
7
|
|
|
|
|
|
|
$VERSION = '1.470'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
4
|
|
|
4
|
|
66241
|
use strict; |
|
|
4
|
|
|
|
|
12
|
|
|
|
4
|
|
|
|
|
132
|
|
|
10
|
4
|
|
|
4
|
|
27
|
use vars qw($Debug %Find_Start_Headers %Find_Headers); |
|
|
4
|
|
|
|
|
21
|
|
|
|
4
|
|
|
|
|
226
|
|
|
11
|
4
|
|
|
4
|
|
23
|
use IO::File; |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
709
|
|
|
12
|
4
|
|
|
4
|
|
23
|
use HTML::TableExtract; |
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
32
|
|
|
13
|
4
|
|
|
4
|
|
150
|
use HTML::Entities (); |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
10641
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
%Find_Start_Headers = (Class=>1, |
|
16
|
|
|
|
|
|
|
Package=>1, |
|
17
|
|
|
|
|
|
|
Defines=>1, |
|
18
|
|
|
|
|
|
|
Enum=>1, |
|
19
|
|
|
|
|
|
|
Register=>1, |
|
20
|
|
|
|
|
|
|
Vregs_End_Of_Decl=>1, |
|
21
|
|
|
|
|
|
|
); |
|
22
|
|
|
|
|
|
|
%Find_Headers = (%Find_Start_Headers, |
|
23
|
|
|
|
|
|
|
Address=>1, |
|
24
|
|
|
|
|
|
|
Attributes=>1, |
|
25
|
|
|
|
|
|
|
); |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
###################################################################### |
|
28
|
|
|
|
|
|
|
#### Parsing |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub parse_file { |
|
31
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
32
|
0
|
|
|
|
|
0
|
my $filename = shift; |
|
33
|
|
|
|
|
|
|
|
|
34
|
0
|
0
|
|
|
|
0
|
print "parse_file $filename\n" if $Debug; |
|
35
|
0
|
0
|
|
|
|
0
|
my $fh = IO::File->new ($filename) or die "%Error: $! $filename\n"; |
|
36
|
0
|
|
|
|
|
0
|
$self->{_fh} = $fh; |
|
37
|
0
|
|
|
|
|
0
|
$self->{_vregs_filename} = $filename; |
|
38
|
0
|
|
|
|
|
0
|
$self->{_vregs_num_tables} = -1; |
|
39
|
0
|
|
|
|
|
0
|
$self->{_vregs_line} = 1; |
|
40
|
0
|
|
|
|
|
0
|
$self->{_format} = ''; #latex2html |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Track line numbers |
|
43
|
0
|
|
|
|
|
0
|
$self->handler(start => "start", "self,tagname,attr,attrseq,text,line"); |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# I've had lots of problems with parse_file missing stuff and not handling |
|
46
|
|
|
|
|
|
|
# chunks breaking in the middle of HTML objects. So, we read it ourself. |
|
47
|
|
|
|
|
|
|
# $self->SUPER::parse_file ($fh); |
|
48
|
0
|
|
|
|
|
0
|
$self->parse(join('',$fh->getlines())); |
|
49
|
|
|
|
|
|
|
|
|
50
|
0
|
|
|
|
|
0
|
$self->eof(); |
|
51
|
0
|
|
|
|
|
0
|
$self->start("p"); |
|
52
|
0
|
|
|
|
|
0
|
$self->text("Vregs_End_Of_Decl"); |
|
53
|
0
|
|
|
|
|
0
|
$fh->close(); |
|
54
|
0
|
0
|
|
|
|
0
|
print "parse_file DONE\n" if $Debug; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub clean_html_text { |
|
58
|
0
|
|
|
0
|
0
|
0
|
$_ = shift; |
|
59
|
|
|
|
|
|
|
# HTML escapes to normal Latin1 ASCII |
|
60
|
0
|
|
|
|
|
0
|
$_ = HTML::Entities::decode($_); |
|
61
|
|
|
|
|
|
|
# Hack: Decode two-byte UTF-8 chars without checking that the HTML body |
|
62
|
|
|
|
|
|
|
# uses UTF-8 encoding. wvHtml writes HTML in UTF-8 encoding when the |
|
63
|
|
|
|
|
|
|
# .doc file comes from StarOffice7. |
|
64
|
0
|
|
|
|
|
0
|
s{[\302\303].}{ |
|
65
|
0
|
|
|
|
|
0
|
my ($a, $b) = unpack("CC", $&); |
|
66
|
0
|
0
|
|
|
|
0
|
(($b & 0xC0) != 0x80 ? $& # Not a 2-byte UTF-8 char. |
|
67
|
|
|
|
|
|
|
: pack("C", ($a & 0x3)<<6 | ($b & 0x3f) )) |
|
68
|
|
|
|
|
|
|
}eg; |
|
69
|
|
|
|
|
|
|
# Latin1 decoding |
|
70
|
|
|
|
|
|
|
# Substituting 2 or 3 periods for 'elipses' causes Vspecs to truncate |
|
71
|
|
|
|
|
|
|
# the field description at the periods, so use dashes. |
|
72
|
0
|
|
|
|
|
0
|
s/\205/-/g; # ISO-Latin1 horizontal elipses (0x85) |
|
73
|
0
|
|
|
|
|
0
|
s/\221/\'/g; # ISO-Latin1 left single-quote |
|
74
|
0
|
|
|
|
|
0
|
s/\222/\'/g; # ISO-Latin1 right single-quote |
|
75
|
0
|
|
|
|
|
0
|
s/\223/\"/g; # ISO-Latin1 left double-quote |
|
76
|
0
|
|
|
|
|
0
|
s/\224/\"/g; # ISO-Latin1 right double-quote |
|
77
|
0
|
|
|
|
|
0
|
s/\225/\*/g; # ISO-Latin1 bullet |
|
78
|
0
|
|
|
|
|
0
|
s/\226/-/g; # ISO-Latin1 en-dash (0x96) |
|
79
|
0
|
|
|
|
|
0
|
s/\227/-/g; # ISO-Latin1 em-dash (0x97) |
|
80
|
0
|
|
|
|
|
0
|
s/\230/~/g; # ISO-Latin1 small tilde |
|
81
|
0
|
|
|
|
|
0
|
s/\233/>/g; # ISO-Latin1 single right angle quote |
|
82
|
0
|
|
|
|
|
0
|
s/\240/ /g; # ISO-Latin1 nonbreaking space |
|
83
|
0
|
|
|
|
|
0
|
s/\246/\|/g; # ISO-Latin1 broken vertical bar |
|
84
|
0
|
|
|
|
|
0
|
s/\255//g; # ISO-Latin1 soft hyphen (0xAD) |
|
85
|
0
|
|
|
|
|
0
|
s/\264/\'/g; # ISO-Latin1 spacing acute |
|
86
|
0
|
|
|
|
|
0
|
s/\267/\*/g; # ISO-Latin1 middle dot (0xB7) |
|
87
|
0
|
|
|
|
|
0
|
s/\327/\*/g;# x multiplication |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# These are automatically removed by HTML::Entities in Perl 5.7 and greater |
|
90
|
|
|
|
|
|
|
# Also decode unicode sequences decoded by perl 5.8.* |
|
91
|
0
|
|
|
|
|
0
|
s/\&\#8209;/-/g; # ISOpub nonbreaking hyphen (U+2011, ‑) |
|
92
|
0
|
|
|
|
|
0
|
s/\&\#8211;/-/g; s/\–/-/g; s/\342\200\041/\-/g; # ISOpub en-dash (U+2013, –) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
93
|
0
|
|
|
|
|
0
|
s/\&\#8212;/-/g; s/\—/-/g; s/\342\200\042/\-/g; # ISOpub em-dash (U+2014, —) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
94
|
0
|
|
|
|
|
0
|
s/\&\#8216;/\'/g; s/\‘/\'/g; s/\342\200\176/\`/g; # ISOnum left single-quote (U+2018, ‘) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
95
|
0
|
|
|
|
|
0
|
s/\&\#8217;/\'/g; s/\’/\'/g; s/\342\200\177/\`/g; # ISOnum right single-quote (U+2019, ’) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
96
|
0
|
|
|
|
|
0
|
s/\&\#8230;/-/g; s/\…/-/g; s/\342\200\174/-/g; # ISOpub horizontal elipses (U+2026, …) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
97
|
0
|
|
|
|
|
0
|
; s/(\342\200\230|\x{2018})/\`/g; # ISOnum left single-quote (U+2018, ‘) |
|
98
|
0
|
|
|
|
|
0
|
; s/(\342\200\231|\x{2019})/\`/g; # ISOnum right single-quote (U+2019, ’) |
|
99
|
0
|
|
|
|
|
0
|
s/\&\#8220;/\"/g; s/\“/\"/g; s/(\342\200\234|\x{201c})/\"/g; # ISOnum left double-quote (U+201C, “) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
100
|
0
|
|
|
|
|
0
|
s/\&\#8221;/\"/g; s/\”/\"/g; s/(\342\200\235|\x{201d})/\"/g; # ISOnum right double-quote (U+201D, ”) |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
101
|
0
|
|
|
|
|
0
|
; s/(\342\210\222|\x{2212})/-/g; # ISOpub horizontal elipses (U+2026, …) |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Compress spacing |
|
104
|
0
|
|
|
|
|
0
|
s/\`/\'/g; |
|
105
|
0
|
|
|
|
|
0
|
s/[\t\n\r ]+/ /g; |
|
106
|
0
|
|
|
|
|
0
|
s/^ *//g; |
|
107
|
0
|
|
|
|
|
0
|
return $_; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub start { |
|
111
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
112
|
0
|
0
|
0
|
|
|
0
|
$self->{_vregs_line} = $_[4] if $_[4] && !$self->{_vregs_forced_line}; # As we have a handler requesting it |
|
113
|
0
|
0
|
0
|
|
|
0
|
if ($_[0] eq 'p' || $_[0] =~ /^h[0-9]/) { |
|
114
|
|
|
|
|
|
|
#print " \n" if $Debug; |
|
115
|
0
|
|
|
|
|
0
|
$self->{_vregs_first_word_in_p} = 1; |
|
116
|
0
|
0
|
0
|
|
|
0
|
if ($self->{_format} eq 'latex2html' # Latex2html doesn't do any 's |
|
|
|
|
0
|
|
|
|
|
|
117
|
|
|
|
|
|
|
&& $self->{_vregs_next_tag} |
|
118
|
|
|
|
|
|
|
&& $self->{_vregs_next}{$self->{_vregs_next_tag}} ne "") { |
|
119
|
0
|
|
|
|
|
0
|
$self->{_vregs_next_tag} = undef; |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
} |
|
122
|
0
|
0
|
0
|
|
|
0
|
if ($_[0] eq 'meta' && $_[1]->{content} && $_[1]->{content} =~ /latex2html/i) { |
|
|
|
|
0
|
|
|
|
|
|
123
|
0
|
|
|
|
|
0
|
$self->{_format} = 'latex2html'; |
|
124
|
0
|
0
|
|
|
|
0
|
print "Format = latex2html\n" if $Debug; |
|
125
|
|
|
|
|
|
|
} |
|
126
|
0
|
|
|
|
|
0
|
$self->SUPER::start (@_); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub comment { |
|
130
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
131
|
0
|
|
|
|
|
0
|
my $text = shift; |
|
132
|
0
|
0
|
|
|
|
0
|
if ($text =~ /^\s*line\s+(\d+)\s+"(.*)"/) { |
|
133
|
0
|
|
|
|
|
0
|
$self->{_vregs_forced_line} = $1; |
|
134
|
0
|
|
|
|
|
0
|
$self->{_vregs_line} = $1; |
|
135
|
0
|
|
|
|
|
0
|
$self->{_vregs_filename} = $2; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub end { |
|
140
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
141
|
0
|
0
|
|
|
|
0
|
if ($_[0] eq 'p') { |
|
142
|
0
|
0
|
|
|
|
0
|
print "<\/$_[0]>\n" if $Debug; |
|
143
|
0
|
0
|
|
|
|
0
|
if (!$self->{_vregs_first_endp}) { |
|
144
|
0
|
|
|
|
|
0
|
$self->{_vregs_first_endp} = 1; |
|
145
|
|
|
|
|
|
|
} else { |
|
146
|
0
|
|
|
|
|
0
|
$self->{_vregs_next_tag} = undef; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
} |
|
149
|
0
|
|
|
|
|
0
|
$self->SUPER::end (@_); |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub text { |
|
153
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
154
|
|
|
|
|
|
|
# Don't bother if we are in a table |
|
155
|
0
|
0
|
|
|
|
0
|
if (! $self->{_in_a_table}) { |
|
156
|
0
|
|
|
|
|
0
|
my $text = $_[0]; |
|
157
|
0
|
0
|
|
|
|
0
|
if ($Debug) { |
|
158
|
0
|
|
|
|
|
0
|
(my $printtext = $text) =~ s/[\n \t]+/ /g; |
|
159
|
0
|
0
|
|
|
|
0
|
printf +("Text %s: %s:|%s|\n", $self->{_vregs_line}, |
|
160
|
|
|
|
|
|
|
($self->{_vregs_first_word_in_p}?"FW":" "),$printtext); |
|
161
|
|
|
|
|
|
|
} |
|
162
|
0
|
|
|
|
|
0
|
$text = clean_html_text($text); |
|
163
|
0
|
0
|
0
|
|
|
0
|
if ($text !~ /^\s*$/ |
|
164
|
|
|
|
|
|
|
&& $text !~ /^) { |
|
165
|
0
|
|
|
|
|
0
|
my $nosp_text = $text; |
|
166
|
0
|
0
|
|
|
|
0
|
if ($self->{_vregs_first_word_in_p}) { |
|
167
|
0
|
|
|
|
|
0
|
$nosp_text =~ s/^\s+//; $nosp_text =~ s/\s+$//; |
|
|
0
|
|
|
|
|
0
|
|
|
168
|
|
|
|
|
|
|
# Per W3C HTML spec, "SGML specifies that a line break immediately |
|
169
|
|
|
|
|
|
|
# following a start tag must be ignored, as must a line break |
|
170
|
|
|
|
|
|
|
# immediately before an end tag. This applies to all HTML elements |
|
171
|
|
|
|
|
|
|
# without exception." |
|
172
|
|
|
|
|
|
|
# And, "For all HTML elements except PRE, sequences of white space |
|
173
|
|
|
|
|
|
|
# separate words." |
|
174
|
|
|
|
|
|
|
# Sadly, the HTML parser did not take care of this for us. |
|
175
|
0
|
|
|
|
|
0
|
$text = $nosp_text; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
0
|
0
|
0
|
|
|
0
|
if ($self->{_vregs_first_word_in_p} |
|
|
|
0
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
&& $Find_Headers{$nosp_text}) { |
|
179
|
0
|
0
|
|
|
|
0
|
print "Keyword '$text'\n" if $Debug; |
|
180
|
0
|
0
|
0
|
|
|
0
|
if ($Find_Start_Headers{$text} |
|
181
|
|
|
|
|
|
|
&& $self->{_vregs_next}) { |
|
182
|
|
|
|
|
|
|
# Process previous entry, and prepare for next discovery |
|
183
|
0
|
|
|
|
|
0
|
my $inp = $self->{_vregs_inp}; |
|
184
|
0
|
|
|
|
|
0
|
$inp->new_item([], $self->{_vregs_next}); # note no table |
|
185
|
0
|
|
|
|
|
0
|
$self->{_vregs_next} = undef; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
0
|
|
|
|
|
0
|
$self->{_vregs_next}{at} = ($self->{_vregs_filename}.":".$self->{_vregs_line}); |
|
188
|
0
|
|
|
|
|
0
|
$self->{_vregs_next_tag} = $text; |
|
189
|
0
|
|
|
|
|
0
|
$self->{_vregs_next}{$self->{_vregs_next_tag}} = ""; |
|
190
|
0
|
|
|
|
|
0
|
$self->{_vregs_first_endp} = 0; |
|
191
|
0
|
0
|
|
|
|
0
|
$self->{_vregs_next} = undef if $text eq 'Vregs_End_Of_Decl'; |
|
192
|
0
|
0
|
|
|
|
0
|
$self->{_vregs_next_tag} = undef if $text eq 'Vregs_End_Of_Decl'; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
elsif ($self->{_vregs_next_tag}) { |
|
195
|
0
|
0
|
|
|
|
0
|
print "TAG '$self->{_vregs_next_tag}' '$text'\n" if $Debug; |
|
196
|
0
|
|
|
|
|
0
|
$self->{_vregs_next}{$self->{_vregs_next_tag}} .= $text; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
} |
|
199
|
0
|
0
|
|
|
|
0
|
$self->{_vregs_first_word_in_p} = 0 if ($text !~ /^\s*$/); |
|
200
|
0
|
|
|
|
|
0
|
my @tables = $self->table_states(); |
|
201
|
0
|
|
|
|
|
0
|
my $numtables = (); |
|
202
|
0
|
0
|
|
|
|
0
|
if ($#tables != $self->{_vregs_num_tables}) { |
|
203
|
0
|
|
|
|
|
0
|
$self->{_vregs_num_tables} = $#tables; |
|
204
|
0
|
0
|
|
|
|
0
|
if ($self->{_vregs_next}) { # else a table outside of a register decl |
|
205
|
0
|
|
|
|
|
0
|
my $table = $tables[$#tables]; |
|
206
|
|
|
|
|
|
|
# Clean up the table to remove extra spaces in the tables |
|
207
|
0
|
|
|
|
|
0
|
my @bittable = ($table->rows); |
|
208
|
0
|
|
|
|
|
0
|
foreach my $row (@bittable) { |
|
209
|
0
|
0
|
|
|
|
0
|
@$row = map { |
|
210
|
0
|
|
|
|
|
0
|
$_ = '' if !defined $_; # TableExtract may return '' if empty column |
|
211
|
0
|
|
|
|
|
0
|
$_ = clean_html_text($_); |
|
212
|
|
|
|
|
|
|
# The below isn't quite right, as a < will become a < by the extractor |
|
213
|
0
|
|
|
|
|
0
|
$_ =~ s/<[^>]+>//ig; #$_="" if /SupportEmptyParas/i; # Microsloth Word junk |
|
214
|
0
|
|
|
|
|
0
|
$_ =~ s/^[ \t\n\r]+//sig; |
|
215
|
0
|
|
|
|
|
0
|
$_ =~ s/[ \t\n\r]+$//sig; |
|
216
|
0
|
|
|
|
|
0
|
$_; |
|
217
|
|
|
|
|
|
|
} @$row; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
# Process this, and prepare for next discovery |
|
220
|
0
|
|
|
|
|
0
|
my $inp = $self->{_vregs_inp}; |
|
221
|
0
|
0
|
|
|
|
0
|
print ::Dumper(\@bittable, $self->{_vregs_next}) if $Debug; |
|
222
|
0
|
|
|
|
|
0
|
$inp->new_item(\@bittable, $self->{_vregs_next}); |
|
223
|
0
|
|
|
|
|
0
|
$self->{_vregs_next} = undef; |
|
224
|
0
|
|
|
|
|
0
|
$self->{_vregs_next_tag} = undef; |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
} |
|
228
|
0
|
|
|
|
|
0
|
$self->SUPER::text (@_); |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
###################################################################### |
|
232
|
|
|
|
|
|
|
#### HTML cleaning |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub clean_html_file { |
|
235
|
1
|
|
|
1
|
0
|
103
|
my $filename = shift; |
|
236
|
|
|
|
|
|
|
# Word puts so much XML &*(#@$ in the file... Strip it so it's more obvious. |
|
237
|
|
|
|
|
|
|
|
|
238
|
1
|
|
|
|
|
2
|
my $wholefile; |
|
239
|
|
|
|
|
|
|
{ |
|
240
|
1
|
50
|
|
|
|
2
|
my $fh = IO::File->new($filename) or die "%Error: $! $filename\n"; |
|
|
1
|
|
|
|
|
11
|
|
|
241
|
1
|
|
|
|
|
112
|
local $/; |
|
242
|
1
|
|
|
|
|
2
|
undef $/; |
|
243
|
1
|
|
|
|
|
116
|
$wholefile = <$fh>; |
|
244
|
1
|
|
|
|
|
10
|
$fh->close; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
1
|
|
|
|
|
61
|
$wholefile =~ s%\r%%smg; |
|
248
|
1
|
|
|
|
|
40
|
$wholefile =~ s%%%smg; |
|
249
|
1
|
|
|
|
|
28
|
$wholefile =~ s%[ \n]style='.*?'%%smg; |
|
250
|
1
|
|
|
|
|
94
|
$wholefile =~ s%[ \n]style=".*?"%%smg; |
|
251
|
1
|
|
|
|
|
28
|
$wholefile =~ s% border=0 % border=1 %smg; |
|
252
|
1
|
|
|
|
|
44
|
$wholefile =~ s%<\/?span\s*>%%smg; |
|
253
|
1
|
|
|
|
|
47
|
$wholefile =~ s%<\/?o:p>%%smg; |
|
254
|
1
|
|
|
|
|
33
|
$wholefile =~ s% width=\d+ % %smg; |
|
255
|
1
|
|
|
|
|
12
|
$wholefile =~ s%\ %%smg; |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Microsoft Word changebars |
|
258
|
1
|
|
|
|
|
16
|
$wholefile =~ s%\[Author\s+\S+:\s+at\s+\S+\s+\S+\s+\d+\s+\S+\s+\d+\s*\]\s*%%smg; |
|
259
|
|
|
|
|
|
|
|
|
260
|
1
|
50
|
|
|
|
6
|
my $fh = IO::File->new($filename,"w") or die "%Error: $! writing $filename\n"; |
|
261
|
1
|
|
|
|
|
348
|
print $fh $wholefile; |
|
262
|
1
|
|
|
|
|
5
|
$fh->close; |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
###################################################################### |
|
266
|
|
|
|
|
|
|
#### Package return |
|
267
|
|
|
|
|
|
|
1; |
|
268
|
|
|
|
|
|
|
__END__ |