File Coverage

blib/lib/MsOffice/Word/Surgeon/Utils.pm
Criterion Covered Total %
statement 30 30 100.0
branch 2 2 100.0
condition 6 6 100.0
subroutine 10 10 100.0
pod 5 5 100.0
total 53 53 100.0


line stmt bran cond sub pod time code
1             package MsOffice::Word::Surgeon::Utils;
2 4     4   49 use 5.24.0;
  4         16  
3 4     4   20 use strict;
  4         5  
  4         83  
4 4     4   15 use warnings;
  4         8  
  4         230  
5 4     4   19 use MsOffice::Word::Surgeon::Carp;
  4         5  
  4         25  
6 4     4   223 use Exporter qw/import/;
  4         9  
  4         2968  
7              
8             our @EXPORT = qw/maybe_preserve_spaces is_at_run_level parse_attrs decode_entities encode_entities/;
9              
10             sub maybe_preserve_spaces {
11 2605     2605 1 5326 my ($txt) = @_;
12 2605 100 100     16249 return $txt =~ /^\s/ || $txt =~ /\s$/ ? ' xml:space="preserve"' : '';
13             }
14              
15             sub is_at_run_level {
16 3154     3154 1 6761 my ($xml) = @_;
17 3154         15811 return $xml =~ m[</w:(?:r|del|ins)>$];
18             }
19              
20             sub parse_attrs { # cheap parsing of attribute lists in an XML node
21 355     355 1 741 my ($lst_attrs) = @_;
22              
23 355         500 state $attr_pair_regex = qr[
24             ([^=\s"'&<>]+) # attribute name
25             \h* = \h* # Eq
26             (?: # attribute value
27             " ([^<"]*) " # .. enclosed in double quotes
28             |
29             ' ([^<']*) ' # .. or enclosed in single quotes
30             )
31             ]x;
32              
33 355         504 my %attr;
34 355         3003 while ($lst_attrs =~ /$attr_pair_regex/g) {
35 730   100     2510 my ($name, $val) = ($1, $2 // $3);
36 730         1619 decode_entities($val);
37 730         3964 $attr{$name} = $val;
38             }
39              
40 355         1954 return %attr;
41             }
42              
43              
44              
45              
46             # Cheap version for encoding/decoding XML Entities.
47             # We just need 4 of them, so no need for a module with complete support.
48             my %entities = (quot => '"', amp => '&', 'lt' => '<', gt => '>');
49             my $entity_names = join "|", keys %entities;
50             my $entity_chars = "[" . join("", values %entities) . "]";
51             my %entity_for_char = reverse %entities;
52              
53 3470     3470 1 18904 sub decode_entities { $_[0] =~ s{&($entity_names);}{$entities{$1} }eg; }
  639         3002  
54 2625     2625 1 15685 sub encode_entities { $_[0] =~ s{($entity_chars)} {'&'.$entity_for_char{$1}.';'}eg; }
  431         2325  
55              
56             1;
57              
58             __END__
59              
60             =encoding ISO-8859-1
61              
62             =head1 NAME
63              
64             MsOffice::Word::Surgeon::Utils - utility functions for MsOffice::Word::Surgeon
65              
66             =head1 SYNOPSIS
67              
68             use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces);
69             my $attr = maybe_preserve_spaces($some_text);
70              
71              
72             =head1 DESCRIPTION
73              
74             Functions in this module are used internally by L<MsOffice::Word::Surgeon>.
75              
76             =head1 FUNCTIONS
77              
78             =head2 maybe_preserve_spaces
79              
80             my $attr = maybe_preserve_spaces($some_text);
81              
82             Returns the XML attribute to be inserted into C<< <w:t> >> nodes and
83             C<< <w:delText> >> nodes when the literal text within the node starts
84             or ends with a space -- in that case the XML should contain the
85             attribute C<< xml:space="preserve" >>
86              
87             =head2 is_at_run_level
88              
89             if (is_at_run_level($xml)) {...}
90              
91             Returns true if the given XML fragment ends with a C<< </w:r> >>,
92             C<< </w:del> >> or C<< </w:ins> >> node.
93              
94             =head2 parse_attrs
95              
96             my %attrs = parse_attrs($lst_attrs)
97              
98             Returns a hash of name-value pairs parsed from the input string.
99             Values may be enclosed in single or in double quotes.
100             Values are entity-decoded.
101              
102             =head2 decode_entities
103              
104             decode_entities($string)
105              
106             Decodes XML entities within the supplied string (in-place decoding).
107              
108             =head2 encode_entities
109              
110             encode_entities($string)
111              
112             Encodes XML entities within the supplied string (in-place encoding).
113              
114              
115             =head1 COPYRIGHT AND LICENSE
116              
117             Copyright 2019-2024 by Laurent Dami.
118              
119             This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0.