File Coverage

blib/lib/MsOffice/Word/Surgeon/Text.pm
Criterion Covered Total %
statement 72 79 91.1
branch 19 24 79.1
condition 7 10 70.0
subroutine 14 15 93.3
pod 4 4 100.0
total 116 132 87.8


line stmt bran cond sub pod time code
1             package MsOffice::Word::Surgeon::Text;
2 4     4   62 use 5.24.0;
  4         14  
3 4     4   21 use Moose;
  4         10  
  4         30  
4 4     4   19224 use MooseX::StrictConstructor;
  4         9  
  4         33  
5 4     4   13585 use MsOffice::Word::Surgeon::Carp;
  4         11  
  4         59  
6 4     4   292 use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces is_at_run_level encode_entities);
  4         41  
  4         363  
7              
8 4     4   25 use namespace::clean -except => 'meta';
  4         98  
  4         45  
9              
10              
11             #======================================================================
12             # ATTRIBUTES
13             #======================================================================
14              
15             has 'xml_before' => (is => 'ro', isa => 'Str');
16             has 'literal_text' => (is => 'ro', isa => 'Str', required => 1);
17              
18             #======================================================================
19             # METHODS
20             #======================================================================
21              
22              
23             sub as_xml {
24 2613     2613 1 4452 my $self = shift;
25              
26 2613   100     109743 my $xml = $self->xml_before // '';
27 2613         104068 my $lit_txt = $self->literal_text;
28 2613 100 66     11644 if (defined $lit_txt && $lit_txt ne '') {
29 2605         9581 encode_entities($lit_txt);
30 2605         6258 my $space_attr = maybe_preserve_spaces($lit_txt);
31 2605         8021 $xml .= "<w:t$space_attr>$lit_txt</w:t>";
32             }
33 2613         9128 return $xml;
34             }
35              
36             sub merge {
37 49     49 1 112 my ($self, $next_text) = @_;
38              
39 49 50       2204 !$next_text->xml_before
40             or croak "cannot merge -- next text contains xml before the text : "
41             . $next_text->xml_before;
42              
43 49         2284 $self->{literal_text} .= $next_text->literal_text;
44              
45             }
46              
47             sub replace {
48 1589     1589 1 4541 my ($self, $pattern, $replacement, %args) = @_;
49              
50 1589         2791 my $xml = "";
51 1589         2464 my $current_text_node;
52 1589         59787 my $xml_before = $self->xml_before;
53              
54             # closure to make sure that $xml_before is used only once
55             my $maybe_xml_before = sub {
56 1567 100   1567   3478 my @r = $xml_before ? (xml_before => $xml_before) : ();
57 1567         2857 $xml_before = undef;
58 1567         6496 return @r;
59 1589         5626 };
60              
61             # closure to create a new text node
62             my $mk_new_text = sub {
63 1565     1565   2876 my ($literal_text) = @_;
64 1565         2929 return MsOffice::Word::Surgeon::Text->new(
65             $maybe_xml_before->(),
66             literal_text => $literal_text,
67             );
68 1589         5362 };
69              
70             # closure to create a new run node for enclosing a text node
71             my $add_new_run = sub {
72 0     0   0 my ($text_node) = @_;
73             my $run = MsOffice::Word::Surgeon::Run->new(
74             xml_before => '',
75             props => $args{run}->props,
76 0         0 inner_texts => [$text_node],
77             );
78 0         0 $xml .= $run->as_xml;
79 1589         4746 };
80              
81             # closure to add text to the current text node
82             my $add_to_current_text_node = sub {
83 1575     1575   3459 my ($txt_to_add) = @_;
84 1575   66     5407 $current_text_node //= $mk_new_text->('');
85 1575         1418967 $current_text_node->{literal_text} .= $txt_to_add;
86 1589         4024 };
87              
88             # closure to clear the current text node
89             my $maybe_clear_current_text_node = sub {
90 1589 100   1589   3768 if ($current_text_node) {
91 1565 50       6138 if (is_at_run_level($xml)) {
92 0         0 $add_new_run->($current_text_node);
93             }
94             else {
95 1565         4003 $xml .= $current_text_node->as_xml;
96             }
97 1565         6917 $current_text_node = undef;
98             }
99 1589         4727 };
100              
101             # find pattern within $self, each match becomes a fragment to handle
102 1589         15767 my @fragments = split qr[($pattern)], $self->{literal_text}, -1;
103 1589         4778 my $txt_after_last_match = pop @fragments;
104              
105             # loop to handle each match
106 1589         6979 while (my ($txt_before, $matched) = splice (@fragments, 0, 2)) {
107              
108             # new contents to replace the matched fragment
109 16 100       139 my $replacement_contents
    50          
110             = !ref $replacement ? $replacement
111             : $replacement->(matched => $matched,
112             (!$txt_before ? $maybe_xml_before->() : ()),
113             %args);
114              
115 16         113 my $replacement_is_xml = $replacement_contents =~ /^<w:/;
116 16 100       36 if ($replacement_is_xml) {
117             # if there was text before the match, add it as a new run
118 1 50       5 if ($txt_before) {
119 0         0 $maybe_clear_current_text_node->();
120 0         0 $add_new_run->($mk_new_text->($txt_before));
121             }
122              
123             # add the xml that replaces the match
124 1         7 $xml .= $replacement_contents;
125             }
126             else { # $replacement_contents is not xml but just literal text
127 15   50     74 $add_to_current_text_node->(($txt_before // '') . $replacement_contents);
128             }
129             }
130              
131             # handle remaining contents after the last match
132 1589 100       3587 if ($txt_after_last_match) {
133 1560         3376 $add_to_current_text_node->($txt_after_last_match);
134             }
135 1589         5013 $maybe_clear_current_text_node->();
136 1589 100       183748 if ($xml_before) {
137 24 50       54 !$xml or croak "internal error : Text::xml_before was ignored during replacements";
138 24         48 $xml = $xml_before;
139             }
140              
141 1589         24073 return $xml;
142             }
143              
144              
145              
146             sub to_uppercase {
147 1     1 1 3 my $self = shift;
148              
149             # split text fragments around HTML entities
150 1         9 my @fragments = split /(&\w+?;)/, $self->{literal_text};
151 1         4 my $txt_after_last_entity = pop @fragments;
152 1         4 my $txt_upcase = "";
153              
154             # assemble upcased text fragments
155 1         7 while (my ($txt_before, $entity) = splice (@fragments, 0, 2)) {
156 0         0 $txt_upcase .= uc($txt_before) . $entity;
157             }
158 1         7 $txt_upcase .= uc($txt_after_last_entity);
159              
160             # return the upcased text
161 1         6 $self->{literal_text} = $txt_upcase;
162             }
163              
164              
165             1;
166              
167             __END__
168              
169             =encoding ISO-8859-1
170              
171             =head1 NAME
172              
173             MsOffice::Word::Surgeon::Text - internal representation for a node of literal text
174              
175             =head1 DESCRIPTION
176              
177             This is used internally by L<MsOffice::Word::Surgeon> for storing
178             a chunk of literal text in a MsWord document. It loosely corresponds to
179             a C<< <w:t> >> node in OOXML, but may also contain an anonymous XML
180             fragment which is the part of the document just before the C<< <w:t> >>
181             node -- used for reconstructing the complete document after having changed
182             the contents of some text nodes.
183              
184              
185             =head1 METHODS
186              
187             =head2 new
188              
189             my $text_node = MsOffice::Word::Surgeon::Text(
190             xml_before => $xml_string,
191             literal_text => $text_string,
192             );
193              
194             Constructor for a new text object. Arguments are :
195              
196             =over
197              
198             =item xml_before
199              
200             A string containing arbitrary XML preceding that text node in the complete document.
201             The string may be empty but must be present.
202              
203              
204             =item literal_text
205              
206             A string of literal text.
207              
208             =back
209              
210              
211              
212             =head2 as_xml
213              
214             my $xml = $text_node->as_xml;
215              
216             Returns the XML representation of that text node.
217             The attribute C<< xml:space="preserve" >> is automatically added
218             if the literal text starts of ends with a space character.
219              
220              
221             =head2 merge
222              
223             $text_node->merge($next_text_node);
224              
225             Merge the contents of C<$next_text_node> together with the current text node.
226             This is only possible if the next text node has
227             an empty C<xml_before> attribute; if this condition is not met,
228             an exception is raised.
229              
230             =head2 replace
231              
232             my $xml = $text_node->replace($pattern, $replacement_callback, %args);
233              
234             Replaces all occurrences of C<$pattern> within the text node by
235             a new string computed by C<$replacement_callback>, and returns a new xml
236             string corresponding to the result of all these replacements. This is the
237             internal implementation for public method
238             L<MsOffice::Word::Surgeon/replace>.
239              
240             =head2 to_uppercase
241              
242             Puts the literal text within the node into uppercase letters.
243              
244              
245             =head1 AUTHOR
246              
247             Laurent Dami, E<lt>dami AT cpan DOT org<gt>
248              
249             =head1 COPYRIGHT AND LICENSE
250              
251             Copyright 2019-2024 by Laurent Dami.
252              
253             This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0.
254