blib/lib/MsOffice/Word/HTML/Writer.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 159 | 187 | 85.0 |
branch | 27 | 58 | 46.5 |
condition | 9 | 17 | 52.9 |
subroutine | 27 | 29 | 93.1 |
pod | 10 | 10 | 100.0 |
total | 232 | 301 | 77.0 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package MsOffice::Word::HTML::Writer; | ||||||
2 | 5 | 5 | 350364 | use utf8; | |||
5 | 84 | ||||||
5 | 36 | ||||||
3 | 5 | 5 | 159 | use warnings; | |||
5 | 13 | ||||||
5 | 128 | ||||||
4 | 5 | 5 | 40 | use strict; | |||
5 | 11 | ||||||
5 | 121 | ||||||
5 | 5 | 5 | 2474 | use MIME::Base64 qw/encode_base64/; | |||
5 | 3953 | ||||||
5 | 302 | ||||||
6 | 5 | 5 | 2296 | use MIME::Types; | |||
5 | 32983 | ||||||
5 | 242 | ||||||
7 | 5 | 5 | 36 | use Carp; | |||
5 | 11 | ||||||
5 | 262 | ||||||
8 | 5 | 5 | 2839 | use Params::Validate qw/validate SCALAR HASHREF CODEREF UNDEF/; | |||
5 | 47071 | ||||||
5 | 410 | ||||||
9 | 5 | 5 | 40 | use Scalar::Util qw/looks_like_number openhandle/; | |||
5 | 11 | ||||||
5 | 289 | ||||||
10 | 5 | 5 | 2966 | use Encode 2.12 qw/encode/; | |||
5 | 52577 | ||||||
5 | 836 | ||||||
11 | |||||||
12 | our $VERSION = '1.10'; | ||||||
13 | |||||||
14 | sub new { | ||||||
15 | 6 | 6 | 1 | 2901 | my $class = shift; | ||
16 | |||||||
17 | # specification for validating named parameters | ||||||
18 | 6 | 97 | my $param_spec = { | ||||
19 | title => {type => SCALAR, default => "Document generated by MsOffice::Word::HTML::Writer"}, | ||||||
20 | head => {type => SCALAR, default => "" }, | ||||||
21 | hf_head => {type => SCALAR, default => "" }, | ||||||
22 | WordDocument => {type => HASHREF, optional => 1 }, | ||||||
23 | charset => {type => SCALAR, default => 'utf-8'}, | ||||||
24 | encode_fallback => {type => SCALAR|CODEREF|UNDEF, optional => 1 }, | ||||||
25 | }; | ||||||
26 | |||||||
27 | # build instance | ||||||
28 | 6 | 192 | my $self = {validate(@_, $param_spec)}; | ||||
29 | |||||||
30 | # default encode fallback : let native characters pass through if charset is not utf-8 | ||||||
31 | 5 | 5 | 46 | $self->{encode_fallback} = sub {no utf8; sprintf "%c", shift} # see L |
|||
5 | 2 | 11 | |||||
5 | 49 | ||||||
2 | 87 | ||||||
32 | 6 | 100 | 66 | 89 | unless exists $self->{encode_fallback} or $self->{charset} eq 'utf-8'; | ||
33 | |||||||
34 | # initializing internal slots | ||||||
35 | 6 | 21 | $self->{MIME_parts} = []; | ||||
36 | 6 | 77 | $self->{sections} = [{}]; | ||||
37 | |||||||
38 | # returning a blessed object | ||||||
39 | 6 | 41 | bless $self, $class; | ||||
40 | } | ||||||
41 | |||||||
42 | |||||||
43 | sub create_section { | ||||||
44 | 4 | 4 | 1 | 19 | my $self = shift; | ||
45 | |||||||
46 | # validate named parameters | ||||||
47 | 4 | 13 | my $param_spec = {page => {type => HASHREF, optional => 1}}; | ||||
48 | $param_spec->{$_} = {type => SCALAR, optional => 1} | ||||||
49 | 4 | 33 | for qw/header footer first_header first_footer new_page/; | ||||
50 | 4 | 66 | my %params = validate(@_, $param_spec); | ||||
51 | |||||||
52 | # if first automatic section is empty, delete it | ||||||
53 | $self->{sections} = [] | ||||||
54 | 4 | 50 | 66 | 16 | if scalar(@{$self->{sections}}) == 1 && !$self->{sections}[0]{content}; | ||
4 | 24 | ||||||
55 | |||||||
56 | # add the new section | ||||||
57 | 4 | 6 | push @{$self->{sections}}, \%params; | ||||
4 | 22 | ||||||
58 | } | ||||||
59 | |||||||
60 | |||||||
61 | sub write { | ||||||
62 | 24 | 24 | 1 | 100 | my $self = shift; | ||
63 | |||||||
64 | # add html arguments to current section content | ||||||
65 | 24 | 102 | $self->{sections}[-1]{content} .= join ("", @_); | ||||
66 | } | ||||||
67 | |||||||
68 | |||||||
69 | |||||||
70 | sub save_as { | ||||||
71 | 2 | 2 | 1 | 1184 | my ($self, $file_or_handle) = @_; | ||
72 | |||||||
73 | 2 | 50 | 7 | croak '->save_as($target) : $target is empty' if !$file_or_handle; | |||
74 | |||||||
75 | 2 | 5 | my $fh; | ||||
76 | 2 | 50 | 10 | if (openhandle($file_or_handle)) { | |||
77 | 2 | 5 | $fh = $file_or_handle; | ||||
78 | 2 | 8 | binmode($fh, ':raw'); | ||||
79 | } | ||||||
80 | else { | ||||||
81 | # default extension is ".doc" | ||||||
82 | 0 | 0 | 0 | $file_or_handle .= ".doc" unless $file_or_handle =~ /\.\w{1,5}$/; | |||
83 | |||||||
84 | 0 | 0 | 0 | open $fh, ">:raw", $file_or_handle | |||
85 | or croak "could not open >$file_or_handle: $!"; | ||||||
86 | } | ||||||
87 | |||||||
88 | # write file contents | ||||||
89 | 2 | 9 | my $octets = encode($self->{charset}, $self->content, $self->{encode_fallback}); | ||||
90 | 2 | 234 | print $fh $octets; | ||||
91 | 2 | 7 | close $fh; | ||||
92 | |||||||
93 | 2 | 6 | return $file_or_handle; | ||||
94 | } | ||||||
95 | |||||||
96 | |||||||
97 | sub attach { | ||||||
98 | 0 | 0 | 1 | 0 | my ($self, $name, $open1, $open2, @other) = @_; | ||
99 | |||||||
100 | # open a handle to the attachment (need to dispatch according to number | ||||||
101 | # of args, because perlfunc/open() has complex prototyping behaviour) | ||||||
102 | 0 | 0 | my $fh; | ||||
103 | 0 | 0 | 0 | if (@other) { | |||
0 | |||||||
104 | 0 | 0 | 0 | open $fh, $open1, $open2, @other | |||
105 | or croak "open $open1, $open2, @other : $!"; | ||||||
106 | } | ||||||
107 | elsif ($open2) { | ||||||
108 | 0 | 0 | 0 | open $fh, $open1, $open2 | |||
109 | or croak "open $open1, $open2 : $!"; | ||||||
110 | } | ||||||
111 | else { | ||||||
112 | 0 | 0 | 0 | open $fh, $open1 | |||
113 | or croak "open $open1 : $!"; | ||||||
114 | } | ||||||
115 | |||||||
116 | # slurp the content | ||||||
117 | 0 | 0 | 0 | binmode($fh) unless $name =~ /\.(html?|css|te?xt|rtf)$/i; | |||
118 | 0 | 0 | local $/; | ||||
119 | 0 | 0 | my $attachment = <$fh>; | ||||
120 | |||||||
121 | # add the attachment (filename and content) | ||||||
122 | 0 | 0 | push @{$self->{MIME_parts}}, ["files/$name", $attachment]; | ||||
0 | 0 | ||||||
123 | } | ||||||
124 | |||||||
125 | |||||||
126 | sub page_break { | ||||||
127 | 2 | 2 | 1 | 14 | my ($self) = @_; | ||
128 | 2 | 7 | return qq{ \n}; |
||||
129 | } | ||||||
130 | |||||||
131 | |||||||
132 | sub tab { | ||||||
133 | 0 | 0 | 1 | 0 | my ($self, $n_tabs) = @_; | ||
134 | 0 | 0 | 0 | $n_tabs ||= 1; | |||
135 | 0 | 0 | return qq{}; | ||||
136 | } | ||||||
137 | |||||||
138 | sub field { | ||||||
139 | 2 | 2 | 1 | 8 | my ($self, $fieldname, $args, $content, $prevent_html_entity_encoding) = @_; | ||
140 | |||||||
141 | 2 | 6 | for ($args, $content) { | ||||
142 | 4 | 50 | 10 | $_ ||= ""; # undef replaced by empty string | |||
143 | 4 | 50 | 10 | s/&/&/g, s/</g, s/>/>/g # replace HTML entities | |||
144 | unless $prevent_html_entity_encoding; | ||||||
145 | } | ||||||
146 | |||||||
147 | 2 | 5 | my $field; | ||||
148 | |||||||
149 | # when args : long form of field encoding | ||||||
150 | 2 | 50 | 7 | if ($args) { | |||
151 | 2 | 5 | my $space = qq{ }; | ||||
152 | 2 | 15 | $field = qq{} | ||||
153 | . $space . $fieldname . $space . $args | ||||||
154 | . qq{} | ||||||
155 | . $content | ||||||
156 | . qq{}; | ||||||
157 | } | ||||||
158 | # otherwise : short form of field encoding | ||||||
159 | else { | ||||||
160 | 0 | 0 | $field = qq{$content}; | ||||
161 | } | ||||||
162 | |||||||
163 | 2 | 10 | return $field; | ||||
164 | } | ||||||
165 | |||||||
166 | sub quote { | ||||||
167 | 2 | 2 | 1 | 15 | my ($self, $text, $prevent_html_entity_encoding) = @_; | ||
168 | 2 | 4 | my $args = $text; | ||||
169 | 2 | 6 | $args =~ s/"/\\"/g; | ||||
170 | 2 | 7 | $args = qq{"$args"}; | ||||
171 | 2 | 8 | $args =~ s/"/"/g; | ||||
172 | 2 | 9 | return $self->field('QUOTE', $args, $text, $prevent_html_entity_encoding); | ||||
173 | } | ||||||
174 | |||||||
175 | |||||||
176 | |||||||
177 | sub content { | ||||||
178 | 8 | 8 | 1 | 33 | my ($self) = @_; | ||
179 | |||||||
180 | # separator for parts in MIME document | ||||||
181 | 8 | 18 | my $boundary = qw/__NEXT_PART__/; | ||||
182 | |||||||
183 | # MIME multipart header | ||||||
184 | 8 | 33 | my $mime = qq{MIME-Version: 1.0\n} | ||||
185 | . qq{Content-Type: multipart/related; boundary="$boundary"\n\n} | ||||||
186 | . qq{MIME document generated by MsOffice::Word::HTML::Writer\n\n}; | ||||||
187 | |||||||
188 | # generate each part (main document must be first) | ||||||
189 | 8 | 29 | my @parts = $self->_MIME_parts; | ||||
190 | 8 | 28 | my $filelist = $self->_filelist(@parts); | ||||
191 | 8 | 45 | for my $pair ($self->_main, @parts, $filelist) { | ||||
192 | 16 | 51 | my ($filename, $content) = @$pair; | ||||
193 | 16 | 50 | 91 | my $mime_type = MIME::Types->new->mimeTypeOf($filename) || ''; | |||
194 | 16 | 219904 | my ($encoding, $encoded); | ||||
195 | 16 | 50 | 42 | if ($mime_type =~ /^text|xml$/) { | |||
196 | # no need for Windows-style end-of-lines of shape CRLF | ||||||
197 | 16 | 181 | $content =~ s/\r\n/\n/g; | ||||
198 | |||||||
199 | # if charset is not utf-8, wide chars are encoded as numerical HTML entities | ||||||
200 | 16 | 100 | 354 | $content =~ s/([^\x{0}-\x{FF}])/''.ord($1).';'/eg unless $self->{charset} eq 'utf-8'; | |||
13 | 61 | ||||||
201 | |||||||
202 | # simple-minded MIME quoted-printable encoding | ||||||
203 | 16 | 36 | $encoding = 'quoted-printable'; | ||||
204 | 16 | 183 | ($encoded = $content) =~ s/=/=3D/g; | ||||
205 | 16 | 74 | $mime_type .= qq{; charset="$self->{charset}"}; | ||||
206 | } | ||||||
207 | else { | ||||||
208 | 0 | 0 | $encoding = 'base64'; | ||||
209 | 0 | 0 | $encoded = encode_base64($content); | ||||
210 | } | ||||||
211 | |||||||
212 | 16 | 206 | $mime .= qq{--$boundary\n} | ||||
213 | . qq{Content-Location: file:///C:/foo/$filename\n} | ||||||
214 | . qq{Content-Transfer-Encoding: $encoding\n} | ||||||
215 | . qq{Content-Type: $mime_type\n\n} | ||||||
216 | . $encoded | ||||||
217 | . "\n"; | ||||||
218 | } | ||||||
219 | |||||||
220 | # close last MIME part | ||||||
221 | 8 | 30 | $mime .= "--$boundary--\n"; | ||||
222 | |||||||
223 | 8 | 71 | return $mime; | ||||
224 | } | ||||||
225 | |||||||
226 | |||||||
227 | #====================================================================== | ||||||
228 | # PRIVATE METHODS | ||||||
229 | #====================================================================== | ||||||
230 | |||||||
231 | sub _main { | ||||||
232 | 8 | 8 | 19 | my ($self) = @_; | |||
233 | |||||||
234 | # body : concatenate content from all sections | ||||||
235 | 8 | 18 | my $body = ""; | ||||
236 | 8 | 12 | my $i = 1; | ||||
237 | 8 | 16 | foreach my $section (@{$self->{sections}}) { | ||||
8 | 22 | ||||||
238 | |||||||
239 | # section break | ||||||
240 | 12 | 100 | 31 | if ($i > 1) { | |||
241 | # type of break | ||||||
242 | 4 | 10 | my $break = $section->{new_page}; | ||||
243 | 4 | 100 | 66 | 44 | $break = 'always' if $break && looks_like_number($break); # if true but not a word | ||
244 | 4 | 50 | 11 | $break ||= 'auto'; # if false | |||
245 | # otherwise, type of break will just be the word given in {new_page} | ||||||
246 | |||||||
247 | # insert into body | ||||||
248 | 4 | 13 | my $style = qq{page-break-before:$break;mso-break-type:section-break}; | ||||
249 | 4 | 18 | $body .= qq{ \n}; |
||||
250 | } | ||||||
251 | |||||||
252 | # section content | ||||||
253 | 12 | 64 | $body .= qq{ \n$section->{content}\n \n};
|
||||
254 | |||||||
255 | 12 | 29 | $i += 1; | ||||
256 | } | ||||||
257 | |||||||
258 | # assemble head and body into a full document | ||||||
259 | 8 | 26 | my $html | ||||
260 | = qq{ | ||||||
261 | . qq{ xmlns:o="urn:schemas-microsoft-com:office:office"\n} | ||||||
262 | . qq{ xmlns:w="urn:schemas-microsoft-com:office:word"\n} | ||||||
263 | . qq{ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"\n} | ||||||
264 | . qq{ xmlns="http://www.w3.org/TR/REC-html40">\n} | ||||||
265 | . $self->_head | ||||||
266 | . qq{\n$body\n} | ||||||
267 | . qq{\n}; | ||||||
268 | 8 | 38 | return ["main.htm", $html]; | ||||
269 | } | ||||||
270 | |||||||
271 | |||||||
272 | sub _head { | ||||||
273 | 8 | 8 | 21 | my ($self) = @_; | |||
274 | |||||||
275 | # HTML head : link to filelist, title, view format and styles | ||||||
276 | my $head | ||||||
277 | = qq{\n} | ||||||
278 | . qq{\n} | ||||||
279 | . qq{\n} | ||||||
280 | . qq{ |
||||||
281 | . $self->_xml_WordDocument | ||||||
282 | . qq{\n} | ||||||
283 | . $self->{head} | ||||||
284 | 8 | 61 | . qq{\n}; | ||||
285 | 8 | 78 | return $head; | ||||
286 | } | ||||||
287 | |||||||
288 | |||||||
289 | |||||||
290 | sub _xml_WordDocument { | ||||||
291 | 8 | 8 | 18 | my ($self) = @_; | |||
292 | 8 | 100 | 42 | my $xml_root = $self->{WordDocument} or return ""; | |||
293 | 2 | 9 | return " |
||||
294 | . _w_xml($xml_root) | ||||||
295 | . "\n"; | ||||||
296 | } | ||||||
297 | |||||||
298 | |||||||
299 | sub _w_xml { | ||||||
300 | 4 | 4 | 8 | my $node = shift; | |||
301 | 4 | 8 | my $xml = ""; | ||||
302 | 4 | 25 | while (my ($k, $v) = each %$node) { | ||||
303 | 6 | 100 | 40 | $xml .= $v ? ( # node with content | |||
100 | |||||||
304 | " |
||||||
305 | . (ref $v ? _w_xml($v) : $v) | ||||||
306 | . "\n" ) | ||||||
307 | : " |
||||||
308 | } | ||||||
309 | 4 | 32 | return $xml; | ||||
310 | } | ||||||
311 | |||||||
312 | |||||||
313 | sub _section_styles { | ||||||
314 | 8 | 8 | 22 | my ($self) = @_; | |||
315 | |||||||
316 | 8 | 18 | my $styles = ""; | ||||
317 | 8 | 26 | my $i = 1; | ||||
318 | 8 | 16 | foreach my $section (@{$self->{sections}}) { | ||||
8 | 22 | ||||||
319 | |||||||
320 | 12 | 23 | my $properties = ""; | ||||
321 | |||||||
322 | # page properties (size and margin) | ||||||
323 | 12 | 27 | foreach my $prop (qw/size margin/) { | ||||
324 | 24 | 50 | 74 | my $val = $section->{page}{$prop} or next; | |||
325 | 0 | 0 | $properties .= qq{ $prop:$val;\n}; | ||||
326 | } | ||||||
327 | |||||||
328 | # headers and footers | ||||||
329 | 12 | 21 | my $has_first_page; | ||||
330 | 12 | 27 | foreach my $prop (qw/header_margin footer_margin | ||||
331 | page_numbers paper_source/) { | ||||||
332 | 48 | 50 | 131 | my $val = $section->{page}{$prop} or next; | |||
333 | 0 | 0 | (my $property = $prop) =~ s/_/-/g; | ||||
334 | 0 | 0 | $properties .= qq{ mso-$property:$val;\n}; | ||||
335 | } | ||||||
336 | 12 | 34 | foreach my $hf (qw/header footer first_header first_footer/) { | ||||
337 | 48 | 50 | 110 | $section->{$hf} or next; | |||
338 | 0 | 0 | 0 | $has_first_page = 1 if $hf =~ /^first/; | |||
339 | 0 | 0 | (my $property = $hf) =~ s/_/-/; | ||||
340 | 0 | 0 | $properties | ||||
341 | .= qq{ mso-$property:url("files/header_footer.htm") $hf$i;\n}; | ||||||
342 | } | ||||||
343 | 12 | 50 | 25 | $properties .= qq{ mso-title-page:yes;\n} if $has_first_page; | |||
344 | |||||||
345 | # style definitions for this section | ||||||
346 | 12 | 55 | $styles .= qq[\@page Section$i {\n$properties}\n] | ||||
347 | . qq[div.Section$i {page:Section$i}\n]; | ||||||
348 | 12 | 28 | $i += 1; | ||||
349 | } | ||||||
350 | |||||||
351 | 8 | 54 | return $styles; | ||||
352 | } | ||||||
353 | |||||||
354 | |||||||
355 | sub _MIME_parts { | ||||||
356 | 8 | 8 | 21 | my ($self) = @_; | |||
357 | |||||||
358 | # attachments supplied by user | ||||||
359 | 8 | 15 | my @parts = @{$self->{MIME_parts}}; | ||||
8 | 27 | ||||||
360 | |||||||
361 | # additional attachment : computed file with headers and footers | ||||||
362 | 8 | 26 | my $hf_content = $self->_header_footer; | ||||
363 | 8 | 50 | 39 | unshift @parts, ["files/header_footer.htm", $hf_content] if $hf_content; | |||
364 | |||||||
365 | 8 | 20 | return @parts; | ||||
366 | } | ||||||
367 | |||||||
368 | |||||||
369 | sub _header_footer { | ||||||
370 | 8 | 8 | 19 | my ($self) = @_; | |||
371 | |||||||
372 | # create a div for each header/footer in each section | ||||||
373 | 8 | 17 | my $hf_divs = ""; | ||||
374 | 8 | 14 | my $i = 1; | ||||
375 | 8 | 18 | foreach my $section (@{$self->{sections}}) { | ||||
8 | 26 | ||||||
376 | |||||||
377 | # deal with headers/footers defined in that section | ||||||
378 | 12 | 28 | foreach my $hf (qw/header footer first_header first_footer/) { | ||||
379 | 48 | 50 | 123 | $section->{$hf} or next; | |||
380 | 0 | 0 | (my $style = $hf) =~ s/^first_//; | ||||
381 | $hf_divs .= qq{ \n}
|
||||||
382 | 0 | 0 | . $section->{$hf} . "\n" | ||||
383 | . qq{\n}; | ||||||
384 | } | ||||||
385 | |||||||
386 | 12 | 26 | $i += 1; | ||||
387 | } | ||||||
388 | |||||||
389 | # if at least one such div, need to create an attached file | ||||||
390 | my $header_footer = !$hf_divs ? "" : | ||||||
391 | qq{\n} | ||||||
392 | . qq{\n} | ||||||
393 | . qq{\n} | ||||||
394 | . qq{\n} | ||||||
395 | . $self->{hf_head} | ||||||
396 | 8 | 50 | 31 | . qq{\n} | |||
397 | . qq{\n} . $hf_divs . qq{\n} | ||||||
398 | . qq{\n}; | ||||||
399 | |||||||
400 | 8 | 21 | return $header_footer; | ||||
401 | } | ||||||
402 | |||||||
403 | |||||||
404 | |||||||
405 | sub _filelist { | ||||||
406 | 8 | 8 | 21 | my ($self, @parts) = @_; | |||
407 | |||||||
408 | # xml header | ||||||
409 | 8 | 15 | my $xml = qq{ |
||||
410 | . qq{ |
||||||
411 | |||||||
412 | # refer to each attached file | ||||||
413 | 8 | 21 | foreach my $part (@parts) { | ||||
414 | 0 | 0 | $xml .= qq{ |
||||
415 | } | ||||||
416 | |||||||
417 | # the filelist is itself an attached file | ||||||
418 | 8 | 20 | $xml .= qq{ |
||||
419 | |||||||
420 | # closing tag; | ||||||
421 | 8 | 17 | $xml .= qq{\n}; | ||||
422 | |||||||
423 | 8 | 25 | return ["files/filelist.xml", $xml]; | ||||
424 | } | ||||||
425 | |||||||
426 | |||||||
427 | |||||||
428 | 1; | ||||||
429 | |||||||
430 | __END__ |