File Coverage

blib/lib/MsOffice/Word/Surgeon/PackagePart.pm
Criterion Covered Total %
statement 297 346 85.8
branch 64 106 60.3
condition 36 54 66.6
subroutine 55 59 93.2
pod 18 19 94.7
total 470 584 80.4


line stmt bran cond sub pod time code
1             package MsOffice::Word::Surgeon::PackagePart;
2 4     4   71 use 5.24.0;
  4         14  
3 4     4   18 use Moose;
  4         8  
  4         25  
4 4     4   23464 use MooseX::StrictConstructor;
  4         7  
  4         32  
5 4     4   11696 use MsOffice::Word::Surgeon::Carp;
  4         9  
  4         37  
6 4     4   267 use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces is_at_run_level parse_attrs decode_entities encode_entities);
  4         7  
  4         319  
7 4     4   1987 use MsOffice::Word::Surgeon::Run;
  4         54  
  4         219  
8 4     4   2251 use MsOffice::Word::Surgeon::Text;
  4         31  
  4         180  
9 4     4   2216 use MsOffice::Word::Surgeon::Field;
  4         40  
  4         211  
10 4     4   2639 use MsOffice::Word::Surgeon::BookmarkBoundary;
  4         35  
  4         173  
11 4     4   3462 use XML::LibXML ();;
  4         147608  
  4         189  
12 4     4   35 use List::Util qw(max);
  4         7  
  4         290  
13 4     4   2014 use match::simple qw(match);
  4         30632  
  4         31  
14              
15             # syntactic sugar for attributes
16             sub has_inner ($@) {my $attr = shift; has($attr => @_, lazy => 1, builder => "_$attr", init_arg => undef)}
17              
18              
19             # constant integers to specify indentation modes -- see L<XML::LibXML>
20 4     4   1327 use constant XML_NO_INDENT => 0;
  4         8  
  4         275  
21 4     4   23 use constant XML_SIMPLE_INDENT => 1;
  4         9  
  4         197  
22              
23 4     4   39 use namespace::clean -except => 'meta';
  4         8  
  4         46  
24              
25             #======================================================================
26             # ATTRIBUTES
27             #======================================================================
28              
29             # attributes passed to the constructor
30             has 'surgeon' => (is => 'ro', isa => 'MsOffice::Word::Surgeon', required => 1, weak_ref => 1);
31             has 'part_name' => (is => 'ro', isa => 'Str', required => 1);
32              
33             # attributes constructed by the module -- not received through the constructor
34             has_inner 'contents' => (is => 'rw', isa => 'Str', trigger => \&_on_new_contents);
35             has_inner 'runs' => (is => 'ro', isa => 'ArrayRef', clearer => 'clear_runs');
36             has_inner 'relationships' => (is => 'ro', isa => 'ArrayRef');
37             has_inner 'images' => (is => 'ro', isa => 'HashRef');
38              
39             has 'contents_has_changed' => (is => 'bare', isa => 'Bool', default => 0);
40             has 'was_cleaned_up' => (is => 'bare', isa => 'Bool', default => 0);
41              
42             #======================================================================
43             # GLOBAL VARIABLES
44             #======================================================================
45              
46             # Various regexes for removing uninteresting XML information
47             my %noise_reduction_regexes = (
48             proof_checking => qr(<w:(?:proofErr[^>]+|noProof/)>),
49             revision_ids => qr(\sw:rsid\w+="[^"]+"),
50             complex_script_bold => qr(<w:bCs/>),
51             page_breaks => qr(<w:lastRenderedPageBreak/>),
52             language => qr(<w:lang w:val="[^/>]+/>),
53             empty_run_props => qr(<w:rPr></w:rPr>),
54             soft_hyphens => qr(<w:softHyphen/>),
55             );
56             my @noise_reduction_list = qw/proof_checking revision_ids
57             complex_script_bold page_breaks language
58             empty_run_props soft_hyphens/;
59              
60             #======================================================================
61             # LAZY ATTRIBUTE CONSTRUCTORS AND TRIGGERS
62             #======================================================================
63              
64              
65             sub _runs {
66 14     14   25 my $self = shift;
67              
68 14         24 state $run_regex = qr[
69             <w:r> # opening tag for the run
70             (?:<w:rPr>(.*?)</w:rPr>)? # run properties -- capture in $1
71             (.*?) # run contents -- capture in $2
72             </w:r> # closing tag for the run
73             ]x;
74              
75 14         26 state $txt_regex = qr[
76             <w:t(?:\ xml:space="preserve")?> # opening tag for the text contents
77             (.*?) # text contents -- capture in $1
78             </w:t> # closing tag for text
79             ]x;
80              
81             # split XML content into run fragments
82 14         661 my $contents = $self->contents;
83 14         14031 my @run_fragments = split m[$run_regex], $contents, -1; # -1 : don't strip trailing items
84 14         43 my @runs;
85              
86             # build internal RUN objects
87             RUN:
88 14         78 while (my ($xml_before_run, $props, $run_contents) = splice @run_fragments, 0, 3) {
89 2727   100     4148001 $run_contents //= '';
90              
91             # split XML of this run into text fragmentsn
92 2727         41887 my @txt_fragments = split m[$txt_regex], $run_contents, -1; # -1 : don't strip trailing items
93 2727         5596 my @texts;
94              
95             # build internal TEXT objects
96             TXT:
97 2727         9746 while (my ($xml_before_text, $txt_contents) = splice @txt_fragments, 0, 2) {
98 5307 100 100     2919452 next TXT if !$xml_before_text && !length($txt_contents);
99 2686   100     9763 $_ //= '' for $xml_before_text, $txt_contents;
100 2686         12233 decode_entities($txt_contents);
101 2686         10090 push @texts, MsOffice::Word::Surgeon::Text->new(xml_before => $xml_before_text,
102             literal_text => $txt_contents);
103             }
104              
105             # assemble TEXT objects into a RUN object
106 2727 100 100     57144 next RUN if !$xml_before_run && !@texts;
107 2686   100     10263 $_ //= '' for $xml_before_run, $props;
108 2686         10600 push @runs, MsOffice::Word::Surgeon::Run->new(xml_before => $xml_before_run,
109             props => $props,
110             inner_texts => \@texts);
111             }
112              
113 14         21020 return \@runs;
114             }
115              
116              
117             sub _relationships {
118 4     4   11 my $self = shift;
119              
120             # xml that describes the relationships for this package part
121 4         22 my $rel_xml = $self->_rels_xml;
122              
123             # parse the relationships and assemble into a sparse array indexed by relationship ids
124 4         76 my @relationships;
125 4         78 while ($rel_xml =~ m[<Relationship\s+(.*?)/>]g) {
126 132         307 my %attrs = parse_attrs($1);
127 132   50     581 $attrs{$_} or croak "missing attribute '$_' in <Relationship> node" for qw/Id Type Target/;
128 132         519 ($attrs{num} = $attrs{Id}) =~ s[^\D+][];
129 132         581 ($attrs{short_type} = $attrs{Type}) =~ s[^.*/][];
130 132         947 $relationships[$attrs{num}] = \%attrs;
131             }
132              
133 4         212 return \@relationships;
134             }
135              
136              
137             sub _images {
138 0     0   0 my $self = shift;
139              
140             # get relationship ids associated with images
141 0         0 my %rel_image = map {$_->{Id} => $_->{Target}}
142 0 0       0 grep {$_ && $_->{short_type} eq 'image'}
  0         0  
143             $self->relationships->@*;
144              
145             # get titles and relationship ids of images found within the part contents
146 0         0 my %image;
147 0         0 my @drawings = $self->contents =~ m[<w:drawing>(.*?)</w:drawing>]g;
148             DRAWING:
149 0         0 foreach my $drawing (@drawings) {
150 0 0       0 if ($drawing =~ m[<wp:docPr \s+ (.*?) />
151             .*?
152             <a:blip \s+ r:embed="(\w+)"]x) {
153 0         0 my ($lst_attrs, $rId) = ($1, $2);
154 0         0 my %attrs = parse_attrs($lst_attrs);
155             my $img_id = $attrs{title} || $attrs{descr}
156 0 0 0     0 or next DRAWING;
157              
158 0 0       0 $image{$img_id} = "word/$rel_image{$rId}"
159             or die "couldn't find image for relationship '$rId' associated with image '$img_id'";
160             # NOTE: targets in the rels XML miss the "word/" prefix, I don't know why.
161             }
162             }
163              
164 0         0 return \%image;
165             }
166              
167              
168 10     10   34 sub _contents {shift->original_contents}
169              
170             sub _on_new_contents {
171 23     23   42 my $self = shift;
172              
173 23         1026 $self->clear_runs;
174 23         263712 $self->{contents_has_changed} = 1;
175 23         1283 $self->{was_cleaned_up} = 0;
176             }
177              
178             #======================================================================
179             # GENERAL METHODS
180             #======================================================================
181              
182              
183             sub _rels_xml { # rw accessor
184 4     4   11 my ($self, $new_xml) = @_;
185 4         253 my $rels_name = sprintf "word/_rels/%s.xml.rels", $self->part_name;
186 4         182 return $self->surgeon->xml_member($rels_name, $new_xml);
187             }
188              
189              
190             sub zip_member_name {
191 10     10 0 20 my $self = shift;
192 10         382 return sprintf "word/%s.xml", $self->part_name;
193             }
194              
195              
196             sub original_contents {
197 10     10 1 20 my $self = shift;
198              
199 10         373 return $self->surgeon->xml_member($self->zip_member_name);
200             }
201              
202              
203             #======================================================================
204             # CONTENTS RESTITUTION
205             #======================================================================
206              
207             sub indented_contents {
208 0     0 1 0 my $self = shift;
209              
210 0         0 my $dom = XML::LibXML->load_xml(string => $self->contents);
211 0         0 return $dom->toString(XML_SIMPLE_INDENT); # returned as bytes sequence, not a Perl string
212             }
213              
214              
215             sub plain_text {
216 4     4 1 45 my $self = shift;
217              
218             # XML contents
219 4         165 my $txt = $self->contents;
220              
221             # replace opening paragraph tags by newlines
222 4         2446 $txt =~ s/(<w:p[ >])/\n$1/g;
223              
224             # replace break tags by newlines
225 4         1517 $txt =~ s[<w:br/>][\n]g;
226              
227             # replace tab nodes by ASCII tabs
228 4         3101 $txt =~ s/<w:tab[^s][^>]*>/\t/g;
229              
230             # remove all remaining XML tags
231 4         24078 $txt =~ s/<[^>]+>//g;
232              
233             # decode entities
234 4         46 decode_entities($txt);
235              
236 4         129 return $txt;
237             }
238              
239              
240             #======================================================================
241             # MODIFYING CONTENTS
242             #======================================================================
243              
244             sub cleanup_XML {
245 3     3 1 10 my ($self, @merge_args) = @_;
246              
247             # avoid doing it twice
248 3 100       14 return if $self->{was_cleaned_up};
249              
250             # start the cleanup
251 2         10 $self->reduce_all_noises;
252              
253 2         72 my $contents = $self->contents;
254              
255             # unlink fields, suppress bookmarks, merge runs
256 2         10 $self->unlink_fields;
257 2         481 $self->suppress_bookmarks;
258 2         4559 $self->merge_runs(@merge_args);
259              
260             # flag the fact that the cleanup was done
261 2         175891 $self->{was_cleaned_up} = 1;
262             }
263              
264             sub reduce_noise {
265 2     2 1 11 my ($self, @noises) = @_;
266              
267             # gather regexes to apply, given either directly as regex refs, or as names of builtin regexes
268 2 50       6 my @regexes = map {ref $_ eq 'Regexp' ? $_ : $self->noise_reduction_regex($_)} @noises;
  14         32  
269              
270             # get contents, apply all regexes, put back the modified contents.
271 2         90 my $contents = $self->contents;
272 4     4   9167 no warnings 'uninitialized'; # for regexes without capture groups, $1 will be undef
  4         9  
  4         13241  
273 2         8367 $contents =~ s/$_/$1/g foreach @regexes;
274 2         113 $self->contents($contents);
275             }
276              
277             sub noise_reduction_regex {
278 14     14 1 21 my ($self, $regex_name) = @_;
279 14 50       39 my $regex = $noise_reduction_regexes{$regex_name}
280             or croak "->noise_reduction_regex('$regex_name') : unknown regex name";
281 14         28 return $regex;
282             }
283              
284             sub reduce_all_noises {
285 2     2 1 4 my $self = shift;
286              
287 2         11 $self->reduce_noise(@noise_reduction_list);
288             }
289              
290              
291             sub merge_runs {
292 2     2 1 9 my ($self, %args) = @_;
293              
294             # check validity of received args
295 2         7 state $is_valid_arg = {no_caps => 1};
296 2         9 my @invalid_args = grep {!$is_valid_arg->{$_}} keys %args;
  1         5  
297 2 50       8 croak "merge_runs(): invalid arg(s): " . join ", ", @invalid_args if @invalid_args;
298              
299 2         6 my @new_runs;
300             # loop over internal "run" objects
301 2         4 foreach my $run (@{$self->runs}) {
  2         133  
302              
303 1098 100       4081 $run->remove_caps_property if $args{no_caps};
304              
305             # check if the current run can be merged with the previous one
306 1098 100 66     50646 if ( !$run->xml_before # no other XML markup between the 2 runs
      100        
307             && @new_runs # there was a previous run
308             && $new_runs[-1]->props eq $run->props # both runs have the same properties
309             ) {
310             # conditions are OK, so merge this run with the previous one
311 54         181 $new_runs[-1]->merge($run);
312             }
313             else {
314             # conditions not OK, just push this run without merging
315 1044         3281 push @new_runs, $run;
316             }
317             }
318              
319             # reassemble the whole stuff and inject it as new contents
320 2         17 $self->contents(join "", map {$_->as_xml} @new_runs);
  1044         3251  
321             }
322              
323             sub replace {
324 12     12 1 58 my ($self, $pattern, $replacement_callback, %replacement_args) = @_;
325              
326             # shared initial string for error messages
327 12         23 my $error_msg = '->replace($pattern, $callback, %args)';
328              
329             # default value for arg 'cleanup_XML', possibly from deprecated arg 'keep_xml_as_is'
330 12 50       53 if (delete $replacement_args{keep_xml_as_is}) {
331             not exists $replacement_args{cleanup_XML}
332 0 0       0 or croak "$error_msg: deprecated arg 'keep_xml_as_is' conflicts with arg 'cleanup_XML'";
333 0         0 carp "$error_msg: arg 'keep_xml_as_is' is deprecated, use 'cleanup_XML' instead";
334 0         0 $replacement_args{cleanup_XML} = 0;
335             }
336             else {
337 12   100     45 $replacement_args{cleanup_XML} //= 1; # default
338             }
339              
340             # cleanup the XML structure so that replacements work better
341 12 100       42 if (my $cleanup_args = $replacement_args{cleanup_XML}) {
342 2 50       11 $cleanup_args = {} if ! ref $cleanup_args;
343 2 50       8 ref $cleanup_args eq 'HASH'
344             or croak "$error_msg: arg 'cleanup_XML' should be a hashref";
345 2         10 $self->cleanup_XML(%$cleanup_args);
346             }
347              
348             # check for presences of a special option to avoid modying contents
349 12         25 my $dont_overwrite_contents = delete $replacement_args{dont_overwrite_contents};
350              
351             # apply replacements and generate new XML
352             my $xml = join "",
353 12         467 map {$_->replace($pattern, $replacement_callback, %replacement_args)} $self->runs->@*;
  1588         5774  
354              
355             # overwrite previous contents
356 12 50       760 $self->contents($xml) unless $dont_overwrite_contents;
357              
358 12         107 return $xml;
359             }
360              
361             sub _update_contents_in_zip { # called for each part before saving the zip file
362 7     7   8 my $self = shift;
363              
364             $self->surgeon->xml_member($self->zip_member_name, $self->contents)
365 7 50       16 if $self->{contents_has_changed};
366             }
367              
368              
369              
370             #======================================================================
371             # OPERATIONS ON BOOKMARKS
372             #======================================================================
373              
374             sub bookmark_boundaries {
375 3     3 1 13 my ($self) = @_;
376              
377             # regex to find bookmark tags
378 3         9 state $bookmark_rx = qr{
379             ( # $1: the whole tag
380             <w:bookmark(Start|End) # $2: kind of bookmark boundary
381             \h* ([^>]*?) # $3: node attributes
382             /> # end of tag
383             ) # end of capture 1
384             }sx;
385              
386             # split the whole xml according to the regex. Captured groups are also added to the list
387 3         111 my @xml_chunks = split /$bookmark_rx/, $self->contents;
388 3         10 my $final_xml = pop @xml_chunks;
389              
390             # walk through the list of fragments and build BookmarkBoundary objects
391 3         7 my @bookmark_boundaries;
392 3         16 while (my @chunk = splice @xml_chunks, 0, 4) {
393 79         127523 my %bkmk_args; @bkmk_args{qw/xml_before node_xml kind attrs/} = @chunk;
  79         302  
394 79   50     321 my %attrs = parse_attrs(delete $bkmk_args{attrs} // "");
395 79         197 $bkmk_args{id} = $attrs{'w:id'};
396 79 100       172 $bkmk_args{name} = $attrs{'w:name'} if $attrs{'w:name'};
397 79         315 push @bookmark_boundaries, MsOffice::Word::Surgeon::BookmarkBoundary->new(%bkmk_args);
398             }
399              
400 3 50       5665 return wantarray ? (\@bookmark_boundaries, $final_xml) : \@bookmark_boundaries;
401             }
402              
403              
404              
405             sub suppress_bookmarks {
406 2     2 1 8 my ($self, %options) = @_;
407              
408             # check if options are valid and supply defaults
409 2         6 my @invalid_opt = grep {!/^(full_range|markup_only)$/} keys %options;
  0         0  
410 2 50       8 croak "suppress_bookmarks: invalid options: " . join(", ", @invalid_opt) if @invalid_opt;
411 2 50       17 %options = (markup_only => qr/./) if ! keys %options;
412              
413             # parse bookmark boundaries
414 2         7 my ($bookmark_boundaries, $final_xml) = $self->bookmark_boundaries;
415            
416             # loop on bookmark boundaries
417 2         6 my %boundary_ix_by_id;
418 2         12 while (my ($ix, $boundary) = each @$bookmark_boundaries) {
419              
420             # for starting boundaries, just remember the starting index
421 37 100       1970 if ($boundary->kind eq 'Start') {
    50          
422 18         1289 $boundary_ix_by_id{$boundary->id} = $ix;
423             }
424              
425             # for ending boundaries, do the suppression
426             elsif ($boundary->kind eq 'End') {
427              
428             # try to find the corresponding bookmarkStart node.
429 19         1000 my $start_ix = $boundary_ix_by_id{$boundary->id};
430              
431             # if not found, this is because the start was within a field that has been erased. So just clear the bookmarkEnd
432 19 100       57 if (!defined $start_ix) {
433 1         52 $boundary->node_xml("");
434             }
435              
436             # if found, do the normal suppression
437             else {
438 18         43 my $bookmark_start = $bookmark_boundaries->[$start_ix];
439 18         2272 my $bookmark_name = $bookmark_start->name;
440 18         196 my $should_erase_markup = match($bookmark_name, $options{markup_only});
441 18         200 my $should_erase_range = match($bookmark_name, $options{full_range});
442 18 50 33     71 if ($should_erase_markup || $should_erase_range) {
443              
444             # erase markup (start and end bookmarks)
445 18         1017 $_->node_xml("") for $boundary, $bookmark_start;
446              
447             # if required, also erase inner range
448 18 50       116 if ($should_erase_range) {
449 0         0 for my $erase_ix ($start_ix+1 .. $ix) {
450 0         0 my $inner_boundary = $bookmark_boundaries->[$erase_ix];
451 0 0       0 !$inner_boundary->node_xml
452             or die "cannot erase contents of bookmark '$bookmark_name' "
453             . "because it contains the start of bookmark '". $inner_boundary->name . "'";
454 0         0 $inner_boundary->xml_before("");
455             }
456             }
457             }
458             }
459             }
460             }
461              
462             # re-build the whole XML from all remaining fragments, and inject it back
463 2         114 my $new_contents = join "", (map {$_->xml_before, $_->node_xml} @$bookmark_boundaries), $final_xml;
  37         2103  
464 2         144 $self->contents($new_contents);
465             }
466              
467              
468             sub reveal_bookmarks {
469 1     1 1 4 my ($self, @marking_args) = @_;
470              
471             # auxiliary objects
472 1         16 my $marker = MsOffice::Word::Surgeon::PackagePart::_BookmarkMarker->new(@marking_args);
473 1         6 my $paragraph_tracker = MsOffice::Word::Surgeon::PackagePart::_ParaTracker->new;
474              
475             # parse bookmark boundaries
476 1         4 my ($bookmark_boundaries, $final_xml) = $self->bookmark_boundaries;
477              
478             # loop on bookmark boundaries
479 1         3 my @bookmark_name_by_id;
480 1         3 foreach my $boundary (@$bookmark_boundaries) {
481              
482             # count opening and closing paragraphs in xml before this node
483 42         1043 $paragraph_tracker->count_paragraphs($boundary->xml_before);
484              
485             # add visible runs before or after bookmark nodes
486 42 100       964 if ($boundary->kind eq 'Start') {
    50          
487 21         503 $bookmark_name_by_id[$boundary->id] = $boundary->name;
488 21         490 $boundary->prepend_xml($paragraph_tracker->maybe_add_paragraph($marker->mark($boundary->name, 0)));
489             }
490             elsif ($boundary->kind eq 'End') {
491 21         449 my $bookmark_name = $bookmark_name_by_id[$boundary->id];
492 21         30 $boundary->append_xml($paragraph_tracker->maybe_add_paragraph($marker->mark($bookmark_name, 1)));
493             }
494             }
495              
496             # re-build the whole XML and inject it back
497 1         3 my $new_contents = join "", (map {$_->xml_before, $_->node_xml} @$bookmark_boundaries), $final_xml;
  42         954  
498 1         41 $self->contents($new_contents);
499             }
500              
501              
502             #======================================================================
503             # OPERATIONS ON FIELDS
504             #======================================================================
505              
506             sub fields {
507 3     3 1 7 my ($self) = @_;
508              
509             # regex to find field nodes
510 3         52 state $field_rx = qr{
511             < w:fld # initial prefix for a field node
512             (Simple|Char) # $1 : distinguish between simple fields and complex fields
513             \h* ([^>]*?) # $2 : node attributes
514             (?: # either ..
515             /> # .. the end of an empty XML element
516             | # or ..
517             > # .. the end of the opening tag
518             (.*?) # .. $3: some node content
519             </w:fld\g1> # .. the closing tag
520             )
521             }sx;
522              
523             # split the whole xml according to the regex. Captured groups are also added to the list
524 3         147 my @xml_chunks = split /$field_rx/, $self->contents;
525 3         16 my $final_xml = pop @xml_chunks;
526              
527             # walk through the list of fragments and build a stack of field objects
528 3         7 my @field_stack;
529              
530             NODE:
531 3         21 while (my @chunk = splice @xml_chunks, 0, 4) {
532              
533             # initialize a node hash
534 144         3856 my %node; @node{qw/xml_before field_kind attrs node_content/} = @chunk;
  144         691  
535 144   100     984 $node{$_} //= "" for qw/xml_before field_kind attrs node_content/;
536              
537             # node attributes
538 144         481 my %attrs = parse_attrs($node{attrs});
539              
540 144 50       563 if ($node{field_kind} eq 'Simple') {
    50          
541             # for a simple field, all information is within the XML node
542             push @field_stack, MsOffice::Word::Surgeon::Field->new(
543             xml_before => $node{xml_before},
544             code => $attrs{'w:instr'},
545             result => $node{node_content},
546 0         0 );
547             }
548            
549             elsif ($node{field_kind} eq 'Char') {
550             # for a complex field, we need an auxiliary subroutine to handle the begin/separate/end parts
551 144         408 _handle_fldChar_node(\@field_stack, \%node, \%attrs);
552             }
553            
554 144         90836 $self->_maybe_embed_last_field(\@field_stack);
555             }
556            
557 3 50       22 return wantarray ? (\@field_stack, $final_xml) : \@field_stack;
558             }
559              
560              
561             sub replace_fields {
562 3     3 1 9 my ($self, $field_replacer) = @_;
563              
564 3         14 my ($fields, $final_xml) = $self->fields;
565 3         10 my @xml_parts = map {$_->xml_before, $field_replacer->($_)} @$fields;
  16         551  
566              
567 3         1489 $self->contents(join "", @xml_parts, $final_xml);
568             }
569              
570              
571             sub reveal_fields {
572 1     1 1 2 my $self = shift;
573              
574             # replace all fields by a textual representatio of their "code" part
575 1     8   7 my $revealer = sub {my $code = shift->code; encode_entities($code); return "<w:t>{$code}</w:t>"};
  8         382  
  8         77  
  8         30  
576 1         24 $self->replace_fields($revealer);
577             }
578              
579              
580             sub unlink_fields {
581 2     2 1 6 my $self = shift;
582              
583             # replace all fields by just their "result" part (in other words, ignore the "code" part).
584             # ASK fields return an empty string (because they have a special treatment in Word, where
585             # their 'result' part is hidden, unlike all other fields.
586             my $unlinker = sub {
587 8     8   12 my $field = shift;
588 8 100       152 return $field->type eq 'ASK' ? '' : $field->result;
589 2         12 };
590 2         9 $self->replace_fields($unlinker);
591             }
592              
593              
594             # below: auxiliary methods or subroutines for field handling
595              
596             sub _decode_instr_text {
597 50     50   87 my ($xml) = @_;
598              
599 50         434 my @instr_text = $xml =~ m{<w:instrText.*?>(.*?)</w:instrText>}g;
600 50         135 my $instr = join "", @instr_text;
601 50         161 decode_entities($instr);
602 50         258 return $instr;
603             }
604              
605             sub _handle_fldChar_node {
606 144     144   273 my ($field_stack, $node, $attrs) = @_;
607              
608 144         284 my $fldChar_type = $attrs->{"w:fldCharType"};
609              
610             # if this is the beginning a of a field : push a new field object on top of the stack
611 144 100       289 if ($fldChar_type eq 'begin') {
612             push @$field_stack, MsOffice::Word::Surgeon::Field->new(
613             xml_before => $node->{xml_before},
614 48         244 code => '',
615             result => '',
616             status => "begin",
617             );
618             }
619              
620             # otherwise this is the continuation of the current field (eiter "separate" or "end") : update it
621             else {
622 96 50       234 my $current_field = $field_stack->[-1]
623             or croak qq{met <w:fldChar w:fldCharType="$fldChar_type"> but there is no current field};
624 96         3490 my $current_status = $current_field->status;
625              
626 96 100       256 if ($current_status eq "begin") {
    50          
    0          
627 48         114 $current_field->append_to_code(_decode_instr_text($node->{xml_before}));
628             }
629              
630             elsif ($current_status eq "separate") {
631 48 50       105 $fldChar_type eq "end"
632             or croak qq{after a "separate" node, w:fldCharType cannot be "$fldChar_type"};
633 48         218 $current_field->append_to_result($node->{xml_before});
634             }
635              
636             elsif ($current_status eq "end") {
637 0         0 croak qq{met <w:fldChar w:fldCharType="$fldChar_type"> but last field is not open};
638             }
639              
640              
641 96         3373 $current_field->status($fldChar_type);
642             }
643             }
644              
645             sub _maybe_embed_last_field {
646 144     144   274 my ($self, $field_stack) = @_;
647              
648 144         254 my $last_field = $field_stack->[-1];
649 144         246 my $prev_field = $field_stack->[-2];
650              
651 144 100 66     5544 if ($last_field && $prev_field && $last_field->status eq 'end') {
      100        
652              
653 46         1533 my $prev_status = $prev_field->status;
654              
655 46 100       203 if ($prev_status eq 'begin') {
    100          
656             # the last field is embedded within the "code" part of the previous field
657 2         68 $prev_field->append_to_code(_decode_instr_text($last_field->xml_before)
658             . sprintf $self->surgeon->show_embedded_field, $last_field->code);
659 2         18 pop @$field_stack;
660             }
661              
662             elsif ($prev_status eq 'separate') {
663             # the last field is embedded within the "result" part of the previous field
664 30         1027 $prev_field->append_to_result($last_field->xml_before . $last_field->result);
665 30         234 pop @$field_stack;
666             }
667              
668             # elsif ($prev_status eq 'end') : $last_field is an independend field, just leave it on top of stack
669             }
670             }
671              
672              
673             #======================================================================
674             # OPERATIONS ON IMAGES
675             #======================================================================
676              
677              
678             sub replace_image {
679 0     0 1 0 my ($self, $image_title, $image_PNG_content) = @_;
680              
681 0 0       0 my $member_name = $self->images->{$image_title}
682             or die "could not find an image with title: $image_title";
683 0         0 $self->surgeon->zip->contents($member_name, $image_PNG_content);
684             }
685              
686              
687              
688             sub add_image {
689 0     0 1 0 my ($self, $image_PNG_content) = @_;
690              
691             # compute a fresh image number and a fresh relationship id
692 0         0 my @image_members = $self->surgeon->zip->membersMatching(qr[^word/media/image]);
693 0         0 my @image_nums = map {$_->fileName =~ /(\d+)/} @image_members;
  0         0  
694 0   0     0 my $last_img_num = max @image_nums // 0;
695 0         0 my $target = sprintf "media/image%d.png", $last_img_num + 1;
696 0         0 my $last_rId_num = $self->relationships->$#*;
697 0         0 my $rId = sprintf "rId%d", $last_rId_num + 1;
698              
699             # assemble XML for the new relationship
700 0         0 my $type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
701 0         0 my $new_rel_xml = qq{<Relationship Id="$rId" Type="$type" Target="$target"/>};
702              
703             # update the rels member
704 0         0 my $xml = $self->_rels_xml;
705 0         0 $xml =~ s[</Relationships>][$new_rel_xml</Relationships>];
706 0         0 $self->_rels_xml($xml);
707              
708             # add the image as a new member into the archive
709 0         0 my $member_name = "word/$target";
710 0         0 $self->surgeon->zip->addString(\$image_PNG_content, $member_name);
711              
712             # update the global content_types if it doesn't include PNG
713 0         0 my $ct = $self->surgeon->_content_types;
714 0 0       0 if ($ct !~ /Extension="png"/) {
715 0         0 $ct =~ s[(<Types[^>]+>)][$1<Default Extension="png" ContentType="image/png"/>];
716 0         0 $self->surgeon->_content_types($ct);
717             }
718              
719             # return the relationship id
720 0         0 return $rId;
721             }
722              
723              
724              
725              
726             #======================================================================
727             # INTERNAL CLASS FOR TRACKING PARAGRAPHS
728             #======================================================================
729              
730             package # hide from PAUSE
731             MsOffice::Word::Surgeon::PackagePart::_ParaTracker;
732 4     4   31 use strict;
  4         8  
  4         148  
733 4     4   18 use warnings;
  4         8  
  4         1222  
734              
735             sub new {
736 1     1   2 my $class = shift;
737 1         2 my $nb_para = 0;
738 1         2 bless \$nb_para, $class;
739             }
740              
741             sub count_paragraphs {
742 42     42   53 my ($self, $xml) = @_;
743              
744             # count opening and closing paragraph nodes
745 42         120 while ($xml =~ m[<(/)?w:p.*?(/)?>]g) {
746 794 100       1086 next if $2; # self-ending node -- doesn't change the number of paragraphs
747 777 100       3839 $$self += $1 ? -1 : +1;
748             }
749             }
750              
751             sub maybe_add_paragraph {
752 42     42   53 my ($self, $xml) = @_;
753              
754             # add paragraph nodes only if the ParaTracker is currently outside of any paragraph
755 42         46 my $is_outside_para = !$$self;
756 42 50 33     109 return $is_outside_para && $xml ? "<w:p>$xml</w:p>" : $xml;
757             };
758            
759              
760             #======================================================================
761             # INTERNAL CLASS FOR INTRODUCING BOOKMARK MARKERS
762             #======================================================================
763              
764             package # hide from PAUSE
765             MsOffice::Word::Surgeon::PackagePart::_BookmarkMarker;
766 4     4   32 use strict;
  4         14  
  4         85  
767 4     4   15 use warnings;
  4         8  
  4         224  
768 4     4   23 use MsOffice::Word::Surgeon::Utils qw(encode_entities);
  4         6  
  4         234  
769 4     4   21 use Carp qw(croak carp);
  4         6  
  4         1468  
770              
771             sub new {
772 1     1   3 my $class = shift;
773 1         4 my %self = @_;
774              
775 1   50     3 $self{color} //= "yellow";
776 1   50     7 $self{props} //= qq{<w:highlight w:val="%s"/>};
777 1   50     4 $self{start} //= "<%s>";
778 1   50     6 $self{end} //= "</%s>";
779 1 50       4 $self{ignore} = qr/^_/ if not exists $self{ignore};
780              
781 1 50       6 $self{color} =~ m{ ^( black | blue | cyan | darkBlue | darkCyan |
782             darkGray | darkGreen | darkMagenta | darkRed | darkYellow | green |
783             lightGray | magenta | none | red | white | yellow )$}x
784             or carp "invalid color : $self{color}";
785            
786 1         2 bless \%self, $class;
787             }
788              
789             sub mark {
790 42     42   62 my ($self, $bookmark_name, $is_end_node) = @_;
791              
792             # some bookmarks are just ignored
793             return ""
794 42 100 66     210 if $self->{ignore} and $bookmark_name =~ $self->{ignore};
795              
796             # build the visible text
797 4     4   29 no warnings 'redundant'; # because sprintf templates may decide not to use their arguments
  4         11  
  4         571  
798 12 100       24 my $sprintf_node = $is_end_node ? $self->{end} : $self->{start};
799 12         24 my $text = sprintf $sprintf_node, $bookmark_name;
800 12         21 my $props = sprintf $self->{props}, $self->{color};
801 12         49 encode_entities($text);
802              
803             # full xml for a visible run before or after the boookmark node
804 12         28 return "<w:r><w:rPr>$props</w:rPr><w:t>$text</w:t></w:r>";
805             }
806              
807              
808             1;
809              
810             __END__
811              
812             =encoding ISO-8859-1
813              
814             =head1 NAME
815              
816             MsOffice::Word::Surgeon::PackagePart - Operations on a single part within the ZIP package of a docx document
817              
818             =head1 SYNOPSIS
819              
820             my $part = $surgeon->document;
821             print $part->plain_text;
822             $part->replace(qr[$pattern], $replacement_callback);
823             $part->replace_image($image_alt_text, $image_PNG_content);
824             $part->unlink_fields;
825             $part->reveal_bookmarks;
826              
827              
828             =head1 DESCRIPTION
829              
830             This class is part of L<MsOffice::Word::Surgeon>; it encapsulates operations for a single
831             I<package part> within the ZIP package of a C<.docx> document.
832             It is mostly used for the I<document> part, that contains the XML representation of the
833             main document body. However, other parts such as headers, footers, footnotes, etc. have the
834             same internal representation and therefore the same operations can be invoked.
835              
836              
837             =head1 METHODS
838              
839             =head2 new
840              
841             my $part = MsOffice::Word::Surgeon::PackagePart->new(
842             surgeon => $surgeon,
843             part_name => $name,
844             );
845              
846             Constructor for a new part object. This is called internally from
847             L<MsOffice::Word::Surgeon>; it is not meant to be called directly
848             by clients.
849              
850             =head3 Constructor arguments
851              
852              
853             =over
854              
855             =item surgeon
856              
857             a weak reference to the main surgeon object
858              
859             =item part_name
860              
861             ZIP member name of this part
862              
863             =back
864              
865             =head3 Other attributes
866              
867             Other attributes, not passed through the constructor but generated lazily on demand, are :
868              
869             =over
870              
871             =item contents
872              
873             the XML contents of this part
874              
875             =item runs
876              
877             a decomposition of the XML contents into a collection of
878             L<MsOffice::Word::Surgeon::Run> objects.
879              
880             =item relationships
881              
882             an arrayref of Office relationships associated with this part. This information comes from
883             a C<.rels> member in the ZIP archive, named after the name of the package part.
884             Array indices correspond to relationship numbers. Array values are hashrefs with
885             keys
886              
887             =over
888              
889             =item Id
890              
891             the full relationship id
892              
893             =item num
894              
895             the numeric part of C<rId>
896              
897             =item Type
898              
899             the full reference to the XML schema for this relationship
900              
901             =item short_type
902              
903             only the last word of the type, e.g. 'image', 'style', etc.
904              
905             =item Target
906              
907             designation of the target within the ZIP file. The prefix 'word/' must be
908             added for having a complete Zip member name.
909              
910             =back
911              
912              
913              
914             =item images
915              
916             a hashref of images within this package part. Keys of the hash are image I<alternative texts>.
917             If present, the alternative I<title> will be preferred; otherwise the alternative I<description> will be taken
918             (note : the I<title> field was displayed in Office 2013 and 2016, but more recent versions only display
919             the I<description> field -- see
920             L<MsOffice documentation|https://support.microsoft.com/en-us/office/add-alternative-text-to-a-shape-picture-chart-smartart-graphic-or-other-object-44989b2a-903c-4d9a-b742-6a75b451c669>).
921              
922             Images without alternative text will not be accessible through the current Perl module.
923              
924             Values of the hash are zip member names for the corresponding
925             image representations in C<.png> format.
926              
927              
928             =back
929              
930              
931             =head2 Contents restitution
932              
933             =head3 contents
934              
935             Returns a Perl string with the current internal XML representation of the part
936             contents.
937              
938             =head3 original_contents
939              
940             Returns a Perl string with the XML representation of the
941             part contents, as it was in the ZIP archive before any
942             modification.
943              
944             =head3 indented_contents
945              
946             Returns an indented version of the XML contents, suitable for inspection in a text editor.
947             This is produced by L<XML::LibXML::Document/toString> and therefore is returned as an encoded
948             byte string, not a Perl string.
949              
950             =head3 plain_text
951              
952             Returns the text contents of the part, without any markup.
953             Paragraphs and breaks are converted to newlines, all other formatting instructions are ignored.
954              
955              
956             =head3 runs
957              
958             Returns a list of L<MsOffice::Word::Surgeon::Run> objects. Each of
959             these objects holds an XML fragment; joining all fragments
960             restores the complete document.
961              
962             my $contents = join "", map {$_->as_xml} $self->runs;
963              
964              
965             =head2 Modifying contents
966              
967              
968             =head3 cleanup_XML
969              
970             $part->cleanup_XML(%args);
971              
972             Apply several other methods for removing unnecessary nodes within the internal
973             XML. This method successively calls L</reduce_all_noises>, L</unlink_fields>,
974             L</suppress_bookmarks> and L</merge_runs>.
975              
976             Currently there is only one legal arg :
977              
978             =over
979              
980             =item C<no_caps>
981              
982             If true, the method L<MsOffice::Word::Surgeon::Run/remove_caps_property> is automatically
983             called for each run object. As a result, all texts within runs with the C<caps> property are automatically
984             converted to uppercase.
985              
986             =back
987              
988              
989              
990             =head3 reduce_noise
991              
992             $part->reduce_noise($regex1, $regex2, ...);
993              
994             This method is used for removing unnecessary information in the XML
995             markup. It applies the given list of regexes to the whole document,
996             suppressing matches. The final result is put back into
997             C<< $self->contents >>. Regexes may be given either as C<< qr/.../ >>
998             references, or as names of builtin regexes (described below). Regexes
999             are applied to the whole XML contents, not only to run nodes.
1000              
1001              
1002             =head3 noise_reduction_regex
1003              
1004             my $regex = $part->noise_reduction_regex($regex_name);
1005              
1006             Returns the builtin regex corresponding to the given name.
1007             Known regexes are :
1008              
1009             proof_checking => qr(<w:(?:proofErr[^>]+|noProof/)>),
1010             revision_ids => qr(\sw:rsid\w+="[^"]+"),
1011             complex_script_bold => qr(<w:bCs/>),
1012             page_breaks => qr(<w:lastRenderedPageBreak/>),
1013             language => qr(<w:lang w:val="[^/>]+/>),
1014             empty_run_props => qr(<w:rPr></w:rPr>),
1015             soft_hyphens => qr(<w:softHyphen/>),
1016              
1017             =head3 reduce_all_noises
1018              
1019             $part->reduce_all_noises;
1020              
1021             Applies all regexes from the previous method.
1022              
1023              
1024             =head3 merge_runs
1025              
1026             $part->merge_runs(no_caps => 1); # optional arg
1027              
1028             Walks through all runs of text within the document, trying to merge
1029             adjacent runs when possible (i.e. when both runs have the same
1030             properties, and there is no other XML node inbetween).
1031              
1032             This operation is a prerequisite before performing replace operations, because
1033             documents edited in MsWord often have run boundaries across sentences or
1034             even in the middle of words; so regex searches can only be successful if those
1035             artificial boundaries have been removed.
1036              
1037             If the argument C<< no_caps => 1 >> is present, the merge operation
1038             will also convert runs with the C<w:caps> property, putting all letters
1039             into uppercase and removing the property; this makes more merges possible.
1040              
1041              
1042              
1043             =head3 replace
1044              
1045             $part->replace($pattern, $replacement, %replacement_args);
1046              
1047             Replaces all occurrences of C<$pattern> regex within the text nodes by the
1048             given C<$replacement>. This is not exactly like a search-replace
1049             operation performed within MsWord, because the search does not cross boundaries
1050             of text nodes. In order to maximize the chances of successful replacements,
1051             the L</cleanup_XML> method is automatically called before starting the operation.
1052              
1053             The argument C<$pattern> can be either a string or a reference to a regular expression.
1054             It should not contain any capturing parentheses, because that would perturb text
1055             splitting operations.
1056              
1057             The argument C<$replacement> can be either a fixed string, or a reference to
1058             a callback subroutine that will be called for each match.
1059              
1060              
1061             The C<< %replacement_args >> hash can be used to pass information to the callback
1062             subroutine. That hash will be enriched with three entries :
1063              
1064             =over
1065              
1066             =item matched
1067              
1068             The string that has been matched by C<$pattern>.
1069              
1070             =item run
1071              
1072             The run object in which this text resides.
1073              
1074             =item xml_before
1075              
1076             The XML fragment (possibly empty) found before the matched text .
1077              
1078             =back
1079              
1080             The callback subroutine may return either plain text or structured XML.
1081             See L<MsOffice::Word::Surgeon::Run/SYNOPSIS> for an example of a replacement callback.
1082              
1083             The following special keys within C<< %replacement_args >> are interpreted by the
1084             C<replace()> method itself, and therefore are not passed to the callback subroutine :
1085              
1086             =over
1087              
1088             =item keep_xml_as_is
1089              
1090             if true, no call is made to the L</cleanup_XML> method before performing the replacements
1091              
1092             =item dont_overwrite_contents
1093              
1094             if true, the internal XML contents is not modified in place; the new XML after performing
1095             replacements is merely returned to the caller.
1096              
1097             =item cleanup_args
1098              
1099             the argument should be an arrayref and will be passed to the L</cleanup_XML> method. This
1100             is typically used as
1101              
1102             $part->replace($pattern, $replacement, cleanup_args => [no_caps => 1]);
1103              
1104             =back
1105              
1106              
1107             =head2 Operations on bookmarks
1108              
1109              
1110             =head3 bookmark_boundaries
1111              
1112             my $boundaries = part->bookmark_boundaries;
1113             my ($boundaries, $final_xml) = part->bookmark_boundaries;
1114              
1115             Parses the XML content to discover bookmark boundaries.
1116             In scalar context, returns an arrayref of L<MsOffice::Word::Surgeon::BookmarkBoundary> objects.
1117             In list context, returns the arrayref followed by a plain string containing the final XML fragment.
1118              
1119              
1120              
1121             =head3 suppress_bookmarks
1122              
1123             $part->suppress_bookmarks(full_range => [qw/foo bar/], markup_only => qr/^_/);
1124              
1125             Suppresses bookmarks according to the specified options :
1126              
1127             =over
1128              
1129             =item full_range
1130              
1131             For bookmark names matching this option, the bookmark will be fully
1132             suppressed (not only the start and end markers, but also any
1133             content inbetween).
1134              
1135              
1136             =item markup_only
1137              
1138             For bookmark names matching this option, start and end markers
1139             are suppressed, but the inner content remains.
1140              
1141             =back
1142              
1143             Options may be specified as lists of strings, or regexes, or coderefs ... anything suitable
1144             to be compared through L<match::simple>. In absence of any options, the default
1145             is C<< markup_only => qr/./ >>, meaning that all bookmarks markup is suppressed.
1146              
1147             Removing bookmarks is useful because
1148             MsWord may silently insert bookmarks in unexpected places; therefore
1149             some searches within the text may fail because of such bookmarks.
1150              
1151             The C<full_range> option is especially convenient for removing bookmarks associated
1152             with ASK fields. Such bookmarks contain ranges of text that are
1153             never displayed by MsWord.
1154              
1155              
1156             =head3 reveal_bookmarks
1157              
1158             $part->reveal_bookmarks(color => 'green');
1159              
1160             Usually bookmarks boundaries in MsWord are not visible; the only way to have a visual clue is to turn on
1161             an option in
1162             L<Advanced / Show document content / Show bookmarks|https://support.microsoft.com/en-gb/office/troubleshoot-bookmarks-9cad566f-913d-49c6-8d37-c21e0e8d6db0> -- but this only displays where bookmarks start and end, without the names of the bookmarks.
1163              
1164             The C<reveal_bookmarks()> method will insert a visible run before each bookmark start and after each bookmark end, showing
1165             the bookmark name. This is an interesting tool for documenting where bookmarks are located in an existing document.
1166              
1167             Options to this method are :
1168              
1169             =over
1170              
1171             =item color
1172              
1173             The highlighting color for visible marks. This should be a valid
1174             highlighting color, i.e black, blue, cyan, darkBlue, darkCyan,
1175             darkGray, darkGreen, darkMagenta, darkRed, darkYellow, green,
1176             lightGray, magenta, none, red, white or yellow. Default is yellow.
1177              
1178             =item props
1179              
1180             A string in C<sprintf> format for building the XML to be inserted in C<< <w:rPr> >> node
1181             when displaying bookmarks marks, i.e. the style for displaying such marks.
1182             The default is just a highlighting property : C<< <w:highlight w:val="%s"/> >>.
1183              
1184             =item start
1185              
1186             A string in C<sprintf> format for generating text before a bookmark start.
1187             Default is C<< <%s> >>.
1188              
1189             =item end
1190              
1191             A string in C<sprintf> format for generating text after a bookmark end.
1192             Default is C<< </%s> >>.
1193              
1194             =item ignore
1195              
1196             A regexp for deciding which bookmarks will not be revealed. Default is C<< qr/^_/ >>,
1197             because bookmarks with an initial underscore are usually technical bookmarks inserted
1198             automatically by MsWord, such as C<_GoBack> or C<_Toc53196147>.
1199              
1200              
1201             =back
1202              
1203              
1204             =head2 Operations on fields
1205              
1206             =head3 fields
1207              
1208             my $fields = part->fields;
1209             my ($fields, $final_xml) = part->fields;
1210              
1211             Parses the XML content to discover MsWord fields.
1212             In scalar context, returns an arrayref of L<MsOffice::Word::Surgeon::Field> objects.
1213             In list context, returns the arrayref followed by a plain string containing the final XML fragment.
1214              
1215              
1216              
1217             =head3 replace_fields
1218              
1219             my $field_replacer = sub {my ($code, $result) = @_; return "...";};
1220             $part->replace_fields($field_replacer);
1221              
1222             Replaces MsWord fields by the product of the C<< $field_replacer >> callback.
1223             The callback receives two arguments :
1224              
1225             =over
1226              
1227             =item C<$code>
1228              
1229             A plain string containing the field's full code instruction, i.e a keyword followed by optional arguments and switches,
1230             including initial and final spaces. Embedded fields are represented in curly braces, like for example
1231              
1232             C<< IF { DOCPROPERTY foo } = "bar" "is bar" "is not bar" >>.
1233              
1234             =item C<$result>
1235              
1236             An XML fragment containing the current value for the field.
1237              
1238             =back
1239              
1240             The callback should return an XML fragment suitable to be inserted within an MsWord I<run>.
1241              
1242              
1243             =head3 reveal_fields
1244              
1245             $part->reveal_fields;
1246              
1247             Replaces each field with a textual representation of its code instruction, embedded in curly braces.
1248              
1249              
1250             =head3 unlink_fields
1251              
1252             $part->unlink_fields;
1253              
1254             Replaces each field with its current result, i.e removing the code instruction.
1255             This is the equivalent of performing Ctrl-Shift-F9 in MsWord on the whole document.
1256              
1257              
1258              
1259             =head2 Operations on images
1260              
1261              
1262             =head3 replace_image
1263              
1264             $part->replace_image($image_alt_text, $image_PNG_content);
1265              
1266             Replaces an existing PNG image by a new image. All features of the old image will
1267             be preserved (size, positioning, border, etc.) -- only the image itself will be
1268             replaced. The C<$image_alt_text> must correspond to the I<alternative text> set in Word
1269             for this image.
1270              
1271             This operation replaces a ZIP member within the C<.docx> file. If several XML
1272             nodes refer to the I<same> ZIP member, i.e. if the same image is displayed at several
1273             locations, the new image will appear at all locations, even if they do not have the
1274             same alternative text -- unfortunately this module currently has no facility for
1275             duplicating an existing image into separate instances. So if your intent is to only replace
1276             one instance of the image, your original document should contain several distinct copies
1277             of the C<.PNG> file.
1278              
1279              
1280             =head3 add_image
1281              
1282             my $rId = $part->add_image($image_PNG_content);
1283              
1284             Stores the given PNG image within the ZIP file, adds it as a relationship to the
1285             current part, and returns the relationship id. This operation is not sufficient
1286             to make the image visible in Word : it just stores the image, but you still
1287             have to insert a proper C<drawing> node in the contents XML, using the C<$rId>.
1288             Future versions of this module may offer helper methods for that purpose;
1289             currently it must be done by hand.
1290              
1291              
1292             =head1 AUTHOR
1293              
1294             Laurent Dami, E<lt>dami AT cpan DOT org<gt>
1295              
1296             =head1 COPYRIGHT AND LICENSE
1297              
1298             Copyright 2019-2024 by Laurent Dami.
1299              
1300             This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0.
1301              
1302              
1303