blib/lib/MsOffice/Word/HTML/Writer.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 146 | 172 | 84.8 |
branch | 25 | 52 | 48.0 |
condition | 11 | 20 | 55.0 |
subroutine | 24 | 26 | 92.3 |
pod | 10 | 10 | 100.0 |
total | 216 | 280 | 77.1 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package MsOffice::Word::HTML::Writer; | ||||||
2 | 4 | 4 | 278431 | use utf8; | |||
4 | 57 | ||||||
4 | 23 | ||||||
3 | 4 | 4 | 129 | use warnings; | |||
4 | 7 | ||||||
4 | 95 | ||||||
4 | 4 | 4 | 19 | use strict; | |||
4 | 9 | ||||||
4 | 84 | ||||||
5 | 4 | 4 | 1958 | use MIME::Base64 qw/encode_base64/; | |||
4 | 2816 | ||||||
4 | 231 | ||||||
6 | 4 | 4 | 1971 | use MIME::Types; | |||
4 | 26908 | ||||||
4 | 186 | ||||||
7 | 4 | 4 | 29 | use Carp; | |||
4 | 8 | ||||||
4 | 199 | ||||||
8 | 4 | 4 | 2377 | use Params::Validate qw/validate SCALAR HASHREF/; | |||
4 | 38240 | ||||||
4 | 305 | ||||||
9 | 4 | 4 | 29 | use Scalar::Util qw/looks_like_number/; | |||
4 | 9 | ||||||
4 | 12003 | ||||||
10 | |||||||
11 | our $VERSION = '1.09'; | ||||||
12 | |||||||
13 | sub new { | ||||||
14 | 4 | 4 | 1 | 285 | my $class = shift; | ||
15 | |||||||
16 | # validate named parameters | ||||||
17 | 4 | 50 | my $param_spec = { | ||||
18 | title => {type => SCALAR, optional => 1}, | ||||||
19 | head => {type => SCALAR, optional => 1}, | ||||||
20 | hf_head => {type => SCALAR, optional => 1}, | ||||||
21 | WordDocument => {type => HASHREF, optional => 1}, | ||||||
22 | charset => {type => SCALAR, optional => 1, default => 'utf-8'}, | ||||||
23 | }; | ||||||
24 | 4 | 116 | my %params = validate(@_, $param_spec); | ||||
25 | |||||||
26 | # create instance | ||||||
27 | my $self = { | ||||||
28 | MIME_parts => [], | ||||||
29 | sections => [{}], | ||||||
30 | title => $params{title} | ||||||
31 | || "Document generated by MsOffice::Word::HTML::Writer", | ||||||
32 | head => $params{head} || "", | ||||||
33 | hf_head => $params{hf_head} || "", | ||||||
34 | WordDocument => $params{WordDocument}, | ||||||
35 | charset => $params{charset}, | ||||||
36 | 4 | 100 | 85 | }; | |||
50 | |||||||
50 | |||||||
37 | |||||||
38 | 4 | 26 | bless $self, $class; | ||||
39 | } | ||||||
40 | |||||||
41 | |||||||
42 | sub create_section { | ||||||
43 | 4 | 4 | 1 | 19 | my $self = shift; | ||
44 | |||||||
45 | # validate named parameters | ||||||
46 | 4 | 14 | my $param_spec = {page => {type => HASHREF, optional => 1}}; | ||||
47 | $param_spec->{$_} = {type => SCALAR, optional => 1} | ||||||
48 | 4 | 60 | for qw/header footer first_header first_footer new_page/; | ||||
49 | 4 | 65 | my %params = validate(@_, $param_spec); | ||||
50 | |||||||
51 | # if first automatic section is empty, delete it | ||||||
52 | $self->{sections} = [] | ||||||
53 | 4 | 50 | 66 | 17 | if scalar(@{$self->{sections}}) == 1 && !$self->{sections}[0]{content}; | ||
4 | 22 | ||||||
54 | |||||||
55 | # add the new section | ||||||
56 | 4 | 7 | push @{$self->{sections}}, \%params; | ||||
4 | 23 | ||||||
57 | } | ||||||
58 | |||||||
59 | |||||||
60 | sub write { | ||||||
61 | 22 | 22 | 1 | 88 | my $self = shift; | ||
62 | |||||||
63 | # add html arguments to current section content | ||||||
64 | 22 | 99 | $self->{sections}[-1]{content} .= join ("", @_); | ||||
65 | } | ||||||
66 | |||||||
67 | |||||||
68 | |||||||
69 | sub save_as { | ||||||
70 | 4 | 4 | 1 | 10490 | my ($self, $filename) = @_; | ||
71 | |||||||
72 | # default extension is ".doc" | ||||||
73 | 4 | 50 | 31 | $filename .= ".doc" unless $filename =~ /\.\w{1,5}$/; | |||
74 | |||||||
75 | # open the file | ||||||
76 | 4 | 50 | 490 | open my $fh, ">:raw:encoding($self->{charset}):crlf", $filename | |||
77 | or croak "could not open >$filename: $!"; | ||||||
78 | |||||||
79 | # write content and close | ||||||
80 | 4 | 942 | print $fh $self->content; | ||||
81 | 4 | 467 | close $fh; | ||||
82 | |||||||
83 | 4 | 342 | return $filename; | ||||
84 | } | ||||||
85 | |||||||
86 | |||||||
87 | sub attach { | ||||||
88 | 0 | 0 | 1 | 0 | my ($self, $name, $open1, $open2, @other) = @_; | ||
89 | |||||||
90 | # open a handle to the attachment (need to dispatch according to number | ||||||
91 | # of args, because perlfunc/open() has complex prototyping behaviour) | ||||||
92 | 0 | 0 | my $fh; | ||||
93 | 0 | 0 | 0 | if (@other) { | |||
0 | |||||||
94 | 0 | 0 | 0 | open $fh, $open1, $open2, @other | |||
95 | or croak "open $open1, $open2, @other : $!"; | ||||||
96 | } | ||||||
97 | elsif ($open2) { | ||||||
98 | 0 | 0 | 0 | open $fh, $open1, $open2 | |||
99 | or croak "open $open1, $open2 : $!"; | ||||||
100 | } | ||||||
101 | else { | ||||||
102 | 0 | 0 | 0 | open $fh, $open1 | |||
103 | or croak "open $open1 : $!"; | ||||||
104 | } | ||||||
105 | |||||||
106 | # slurp the content | ||||||
107 | 0 | 0 | 0 | binmode($fh) unless $name =~ /\.(html?|css|te?xt|rtf)$/i; | |||
108 | 0 | 0 | local $/; | ||||
109 | 0 | 0 | my $attachment = <$fh>; | ||||
110 | |||||||
111 | # add the attachment (filename and content) | ||||||
112 | 0 | 0 | push @{$self->{MIME_parts}}, ["files/$name", $attachment]; | ||||
0 | 0 | ||||||
113 | } | ||||||
114 | |||||||
115 | |||||||
116 | sub page_break { | ||||||
117 | 2 | 2 | 1 | 11 | my ($self) = @_; | ||
118 | 2 | 6 | return qq{ \n}; |
||||
119 | } | ||||||
120 | |||||||
121 | |||||||
122 | sub tab { | ||||||
123 | 0 | 0 | 1 | 0 | my ($self, $n_tabs) = @_; | ||
124 | 0 | 0 | 0 | $n_tabs ||= 1; | |||
125 | 0 | 0 | return qq{}; | ||||
126 | } | ||||||
127 | |||||||
128 | sub field { | ||||||
129 | 2 | 2 | 1 | 8 | my ($self, $fieldname, $args, $content, $prevent_html_entity_encoding) = @_; | ||
130 | |||||||
131 | 2 | 7 | for ($args, $content) { | ||||
132 | 4 | 50 | 12 | $_ ||= ""; # undef replaced by empty string | |||
133 | 4 | 50 | 13 | s/&/&/g, s/</g, s/>/>/g # replace HTML entities | |||
134 | unless $prevent_html_entity_encoding; | ||||||
135 | } | ||||||
136 | |||||||
137 | 2 | 4 | my $field; | ||||
138 | |||||||
139 | # when args : long form of field encoding | ||||||
140 | 2 | 50 | 7 | if ($args) { | |||
141 | 2 | 4 | my $space = qq{ }; | ||||
142 | 2 | 16 | $field = qq{} | ||||
143 | . $space . $fieldname . $space . $args | ||||||
144 | . qq{} | ||||||
145 | . $content | ||||||
146 | . qq{}; | ||||||
147 | } | ||||||
148 | # otherwise : short form of field encoding | ||||||
149 | else { | ||||||
150 | 0 | 0 | $field = qq{$content}; | ||||
151 | } | ||||||
152 | |||||||
153 | 2 | 22 | return $field; | ||||
154 | } | ||||||
155 | |||||||
156 | sub quote { | ||||||
157 | 2 | 2 | 1 | 16 | my ($self, $text, $prevent_html_entity_encoding) = @_; | ||
158 | 2 | 5 | my $args = $text; | ||||
159 | 2 | 6 | $args =~ s/"/\\"/g; | ||||
160 | 2 | 7 | $args = qq{"$args"}; | ||||
161 | 2 | 10 | $args =~ s/"/"/g; | ||||
162 | 2 | 6 | return $self->field('QUOTE', $args, $text, $prevent_html_entity_encoding); | ||||
163 | } | ||||||
164 | |||||||
165 | |||||||
166 | |||||||
167 | sub content { | ||||||
168 | 8 | 8 | 1 | 27 | my ($self) = @_; | ||
169 | |||||||
170 | # separator for parts in MIME document | ||||||
171 | 8 | 18 | my $boundary = qw/__NEXT_PART__/; | ||||
172 | |||||||
173 | # MIME multipart header | ||||||
174 | 8 | 31 | my $mime = qq{MIME-Version: 1.0\n} | ||||
175 | . qq{Content-Type: multipart/related; boundary="$boundary"\n\n} | ||||||
176 | . qq{MIME document generated by MsOffice::Word::HTML::Writer\n\n}; | ||||||
177 | |||||||
178 | # generate each part (main document must be first) | ||||||
179 | 8 | 27 | my @parts = $self->_MIME_parts; | ||||
180 | 8 | 20 | my $filelist = $self->_filelist(@parts); | ||||
181 | 8 | 25 | for my $pair ($self->_main, @parts, $filelist) { | ||||
182 | 16 | 47 | my ($filename, $content) = @$pair; | ||||
183 | 16 | 50 | 75 | my $mime_type = MIME::Types->new->mimeTypeOf($filename) || ''; | |||
184 | 16 | 197416 | my ($encoding, $encoded); | ||||
185 | 16 | 50 | 37 | if ($mime_type =~ /^text|xml$/) { | |||
186 | # no need for Windows-style end-of-lines of shape CRLF | ||||||
187 | 16 | 167 | $content =~ s/\r\n/\n/g; | ||||
188 | |||||||
189 | # if charset is not utf-8, wide chars are encoded as numerical HTML entities | ||||||
190 | 16 | 100 | 205 | $content =~ s/([^\x{0}-\x{FF}])/''.ord($1).';'/eg unless $self->{charset} eq 'utf-8'; | |||
18 | 70 | ||||||
191 | |||||||
192 | # simple-minded MIME quoted-printable encoding | ||||||
193 | 16 | 37 | $encoding = 'quoted-printable'; | ||||
194 | 16 | 177 | ($encoded = $content) =~ s/=/=3D/g; | ||||
195 | 16 | 63 | $mime_type .= qq{; charset="$self->{charset}"}; | ||||
196 | } | ||||||
197 | else { | ||||||
198 | 0 | 0 | $encoding = 'base64'; | ||||
199 | 0 | 0 | $encoded = encode_base64($content); | ||||
200 | } | ||||||
201 | |||||||
202 | 16 | 186 | $mime .= qq{--$boundary\n} | ||||
203 | . qq{Content-Location: file:///C:/foo/$filename\n} | ||||||
204 | . qq{Content-Transfer-Encoding: $encoding\n} | ||||||
205 | . qq{Content-Type: $mime_type\n\n} | ||||||
206 | . $encoded | ||||||
207 | . "\n"; | ||||||
208 | } | ||||||
209 | |||||||
210 | # close last MIME part | ||||||
211 | 8 | 29 | $mime .= "--$boundary--\n"; | ||||
212 | |||||||
213 | 8 | 198 | return $mime; | ||||
214 | } | ||||||
215 | |||||||
216 | |||||||
217 | #====================================================================== | ||||||
218 | # PRIVATE METHODS | ||||||
219 | #====================================================================== | ||||||
220 | |||||||
221 | sub _main { | ||||||
222 | 8 | 8 | 16 | my ($self) = @_; | |||
223 | |||||||
224 | # body : concatenate content from all sections | ||||||
225 | 8 | 16 | my $body = ""; | ||||
226 | 8 | 15 | my $i = 1; | ||||
227 | 8 | 12 | foreach my $section (@{$self->{sections}}) { | ||||
8 | 19 | ||||||
228 | |||||||
229 | # section break | ||||||
230 | 16 | 100 | 43 | if ($i > 1) { | |||
231 | # type of break | ||||||
232 | 8 | 19 | my $break = $section->{new_page}; | ||||
233 | 8 | 100 | 66 | 44 | $break = 'always' if $break && looks_like_number($break); # if true but not a word | ||
234 | 8 | 50 | 21 | $break ||= 'auto'; # if false | |||
235 | # otherwise, type of break will just be the word given in {new_page} | ||||||
236 | |||||||
237 | # insert into body | ||||||
238 | 8 | 20 | my $style = qq{page-break-before:$break;mso-break-type:section-break}; | ||||
239 | 8 | 20 | $body .= qq{ \n}; |
||||
240 | } | ||||||
241 | |||||||
242 | # section content | ||||||
243 | 16 | 84 | $body .= qq{ \n$section->{content}\n \n};
|
||||
244 | |||||||
245 | 16 | 34 | $i += 1; | ||||
246 | } | ||||||
247 | |||||||
248 | # assemble head and body into a full document | ||||||
249 | 8 | 28 | my $html | ||||
250 | = qq{ | ||||||
251 | . qq{ xmlns:o="urn:schemas-microsoft-com:office:office"\n} | ||||||
252 | . qq{ xmlns:w="urn:schemas-microsoft-com:office:word"\n} | ||||||
253 | . qq{ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"\n} | ||||||
254 | . qq{ xmlns="http://www.w3.org/TR/REC-html40">\n} | ||||||
255 | . $self->_head | ||||||
256 | . qq{\n$body\n} | ||||||
257 | . qq{\n}; | ||||||
258 | 8 | 41 | return ["main.htm", $html]; | ||||
259 | } | ||||||
260 | |||||||
261 | |||||||
262 | sub _head { | ||||||
263 | 8 | 8 | 17 | my ($self) = @_; | |||
264 | |||||||
265 | # HTML head : link to filelist, title, view format and styles | ||||||
266 | my $head | ||||||
267 | = qq{\n} | ||||||
268 | . qq{\n} | ||||||
269 | . qq{\n} | ||||||
270 | . qq{ |
||||||
271 | . $self->_xml_WordDocument | ||||||
272 | . qq{\n} | ||||||
273 | . $self->{head} | ||||||
274 | 8 | 47 | . qq{\n}; | ||||
275 | 8 | 74 | return $head; | ||||
276 | } | ||||||
277 | |||||||
278 | |||||||
279 | |||||||
280 | sub _xml_WordDocument { | ||||||
281 | 8 | 8 | 15 | my ($self) = @_; | |||
282 | 8 | 100 | 33 | my $xml_root = $self->{WordDocument} or return ""; | |||
283 | 4 | 11 | return " |
||||
284 | . _w_xml($xml_root) | ||||||
285 | . "\n"; | ||||||
286 | } | ||||||
287 | |||||||
288 | |||||||
289 | sub _w_xml { | ||||||
290 | 8 | 8 | 16 | my $node = shift; | |||
291 | 8 | 13 | my $xml = ""; | ||||
292 | 8 | 33 | while (my ($k, $v) = each %$node) { | ||||
293 | 12 | 100 | 68 | $xml .= $v ? ( # node with content | |||
100 | |||||||
294 | " |
||||||
295 | . (ref $v ? _w_xml($v) : $v) | ||||||
296 | . "\n" ) | ||||||
297 | : " |
||||||
298 | } | ||||||
299 | 8 | 63 | return $xml; | ||||
300 | } | ||||||
301 | |||||||
302 | |||||||
303 | sub _section_styles { | ||||||
304 | 8 | 8 | 20 | my ($self) = @_; | |||
305 | |||||||
306 | 8 | 16 | my $styles = ""; | ||||
307 | 8 | 14 | my $i = 1; | ||||
308 | 8 | 22 | foreach my $section (@{$self->{sections}}) { | ||||
8 | 24 | ||||||
309 | |||||||
310 | 16 | 22 | my $properties = ""; | ||||
311 | |||||||
312 | # page properties (size and margin) | ||||||
313 | 16 | 44 | foreach my $prop (qw/size margin/) { | ||||
314 | 32 | 50 | 80 | my $val = $section->{page}{$prop} or next; | |||
315 | 0 | 0 | $properties .= qq{ $prop:$val;\n}; | ||||
316 | } | ||||||
317 | |||||||
318 | # headers and footers | ||||||
319 | 16 | 28 | my $has_first_page; | ||||
320 | 16 | 41 | foreach my $prop (qw/header_margin footer_margin | ||||
321 | page_numbers paper_source/) { | ||||||
322 | 64 | 50 | 126 | my $val = $section->{page}{$prop} or next; | |||
323 | 0 | 0 | (my $property = $prop) =~ s/_/-/g; | ||||
324 | 0 | 0 | $properties .= qq{ mso-$property:$val;\n}; | ||||
325 | } | ||||||
326 | 16 | 32 | foreach my $hf (qw/header footer first_header first_footer/) { | ||||
327 | 64 | 50 | 127 | $section->{$hf} or next; | |||
328 | 0 | 0 | 0 | $has_first_page = 1 if $hf =~ /^first/; | |||
329 | 0 | 0 | (my $property = $hf) =~ s/_/-/; | ||||
330 | 0 | 0 | $properties | ||||
331 | .= qq{ mso-$property:url("files/header_footer.htm") $hf$i;\n}; | ||||||
332 | } | ||||||
333 | 16 | 50 | 45 | $properties .= qq{ mso-title-page:yes;\n} if $has_first_page; | |||
334 | |||||||
335 | # style definitions for this section | ||||||
336 | 16 | 58 | $styles .= qq[\@page Section$i {\n$properties}\n] | ||||
337 | . qq[div.Section$i {page:Section$i}\n]; | ||||||
338 | 16 | 35 | $i += 1; | ||||
339 | } | ||||||
340 | |||||||
341 | 8 | 31 | return $styles; | ||||
342 | } | ||||||
343 | |||||||
344 | |||||||
345 | sub _MIME_parts { | ||||||
346 | 8 | 8 | 30 | my ($self) = @_; | |||
347 | |||||||
348 | # attachments supplied by user | ||||||
349 | 8 | 18 | my @parts = @{$self->{MIME_parts}}; | ||||
8 | 22 | ||||||
350 | |||||||
351 | # additional attachment : computed file with headers and footers | ||||||
352 | 8 | 26 | my $hf_content = $self->_header_footer; | ||||
353 | 8 | 50 | 35 | unshift @parts, ["files/header_footer.htm", $hf_content] if $hf_content; | |||
354 | |||||||
355 | 8 | 19 | return @parts; | ||||
356 | } | ||||||
357 | |||||||
358 | |||||||
359 | sub _header_footer { | ||||||
360 | 8 | 8 | 19 | my ($self) = @_; | |||
361 | |||||||
362 | # create a div for each header/footer in each section | ||||||
363 | 8 | 15 | my $hf_divs = ""; | ||||
364 | 8 | 15 | my $i = 1; | ||||
365 | 8 | 12 | foreach my $section (@{$self->{sections}}) { | ||||
8 | 26 | ||||||
366 | |||||||
367 | # deal with headers/footers defined in that section | ||||||
368 | 16 | 30 | foreach my $hf (qw/header footer first_header first_footer/) { | ||||
369 | 64 | 50 | 143 | $section->{$hf} or next; | |||
370 | 0 | 0 | (my $style = $hf) =~ s/^first_//; | ||||
371 | $hf_divs .= qq{ \n}
|
||||||
372 | 0 | 0 | . $section->{$hf} . "\n" | ||||
373 | . qq{\n}; | ||||||
374 | } | ||||||
375 | |||||||
376 | 16 | 31 | $i += 1; | ||||
377 | } | ||||||
378 | |||||||
379 | # if at least one such div, need to create an attached file | ||||||
380 | my $header_footer = !$hf_divs ? "" : | ||||||
381 | qq{\n} | ||||||
382 | . qq{\n} | ||||||
383 | . qq{\n} | ||||||
384 | . qq{\n} | ||||||
385 | . $self->{hf_head} | ||||||
386 | 8 | 50 | 25 | . qq{\n} | |||
387 | . qq{\n} . $hf_divs . qq{\n} | ||||||
388 | . qq{\n}; | ||||||
389 | |||||||
390 | 8 | 27 | return $header_footer; | ||||
391 | } | ||||||
392 | |||||||
393 | |||||||
394 | |||||||
395 | sub _filelist { | ||||||
396 | 8 | 8 | 28 | my ($self, @parts) = @_; | |||
397 | |||||||
398 | # xml header | ||||||
399 | 8 | 18 | my $xml = qq{ |
||||
400 | . qq{ |
||||||
401 | |||||||
402 | # refer to each attached file | ||||||
403 | 8 | 18 | foreach my $part (@parts) { | ||||
404 | 0 | 0 | $xml .= qq{ |
||||
405 | } | ||||||
406 | |||||||
407 | # the filelist is itself an attached file | ||||||
408 | 8 | 20 | $xml .= qq{ |
||||
409 | |||||||
410 | # closing tag; | ||||||
411 | 8 | 17 | $xml .= qq{\n}; | ||||
412 | |||||||
413 | 8 | 25 | return ["files/filelist.xml", $xml]; | ||||
414 | } | ||||||
415 | |||||||
416 | |||||||
417 | |||||||
418 | 1; | ||||||
419 | |||||||
420 | __END__ |