| line | stmt | bran | cond | sub | pod | time | code | 
| 1 | 2 |  |  | 2 |  | 178682 | use utf8; | 
|  | 2 |  |  |  |  | 17 |  | 
|  | 2 |  |  |  |  | 14 |  | 
| 2 |  |  |  |  |  |  | package Document::OOXML; | 
| 3 | 2 |  |  | 2 |  | 653 | use Moose; | 
|  | 2 |  |  |  |  | 775411 |  | 
|  | 2 |  |  |  |  | 16 |  | 
| 4 | 2 |  |  | 2 |  | 13860 | use namespace::autoclean; | 
|  | 2 |  |  |  |  | 12070 |  | 
|  | 2 |  |  |  |  | 7 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | # ABSTRACT: Manipulation of Office Open XML files | 
| 7 |  |  |  |  |  |  | our $VERSION = '0.172650'; # VERSION | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 2 |  |  | 2 |  | 545 | use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); | 
|  | 2 |  |  |  |  | 64485 |  | 
|  | 2 |  |  |  |  | 289 |  | 
| 10 | 2 |  |  | 2 |  | 15 | use Carp; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 87 |  | 
| 11 | 2 |  |  | 2 |  | 120 | use XML::LibXML; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | use Document::OOXML::ContentTypes; | 
| 14 |  |  |  |  |  |  | use Document::OOXML::Document::Wordprocessor; | 
| 15 |  |  |  |  |  |  | use Document::OOXML::PartParser; | 
| 16 |  |  |  |  |  |  | use Document::OOXML::Rels; | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | my %ROOT_PART_REL_TYPES = ( | 
| 20 |  |  |  |  |  |  | transitionalDocument => 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument', | 
| 21 |  |  |  |  |  |  | strictDocument       => 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument', | 
| 22 |  |  |  |  |  |  | ); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | sub read_document { | 
| 26 |  |  |  |  |  |  | my $class = shift; | 
| 27 |  |  |  |  |  |  | my $filename = shift; | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | my $zip = Archive::Zip->new(); | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | my $zip_status = $zip->read($filename); | 
| 32 |  |  |  |  |  |  | croak("Cannot read: $zip_status") unless $zip_status == AZ_OK; | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | my $content_types = do { | 
| 35 |  |  |  |  |  |  | my $ct_xml = $zip->contents('[Content_Types].xml') | 
| 36 |  |  |  |  |  |  | or croak("No member named '/[Content_Types].xml'. Is it OOXML?"); | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | Document::OOXML::ContentTypes->new_from_xml($ct_xml); | 
| 39 |  |  |  |  |  |  | }; | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | my $base_rels_data = $zip->contents('_rels/.rels') | 
| 42 |  |  |  |  |  |  | or croak("No member named '_rels/.rels' in document. Is it OOXML?"); | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | my $rels = Document::OOXML::Rels->new_from_xml($base_rels_data, ''); | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | # The "old"/transitional XML uses schemas.openxmlformats.org | 
| 47 |  |  |  |  |  |  | # "New"/ISO standard/strict XML uses purl.oclc.org/ooxml | 
| 48 |  |  |  |  |  |  | my %document_part_relation = %{ | 
| 49 |  |  |  |  |  |  | $rels->get_part_relation_by_type($ROOT_PART_REL_TYPES{transitionalDocument}) | 
| 50 |  |  |  |  |  |  | || $rels->get_part_relation_by_type($ROOT_PART_REL_TYPES{strictDocument}) | 
| 51 |  |  |  |  |  |  | }; | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | my $type      = $document_part_relation{type}; | 
| 54 |  |  |  |  |  |  | my $part_name = $document_part_relation{part_name}; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | my $strict; | 
| 57 |  |  |  |  |  |  | if ($type eq $ROOT_PART_REL_TYPES{strictDocument}) { | 
| 58 |  |  |  |  |  |  | $strict = 1; | 
| 59 |  |  |  |  |  |  | } else { | 
| 60 |  |  |  |  |  |  | $strict = 0; | 
| 61 |  |  |  |  |  |  | } | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | my $part_contents = $zip->contents($part_name) | 
| 64 |  |  |  |  |  |  | or croak("No member named '$part_name' in document. Is it OOXML?"); | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | my $doc_part = Document::OOXML::PartParser->parse_part( | 
| 67 |  |  |  |  |  |  | content_type  => $content_types->get_content_type_for_part($part_name), | 
| 68 |  |  |  |  |  |  | contents      => $part_contents, | 
| 69 |  |  |  |  |  |  | part_name     => $part_name, | 
| 70 |  |  |  |  |  |  | is_strict     => $strict, | 
| 71 |  |  |  |  |  |  | ); | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | my $document_class; | 
| 74 |  |  |  |  |  |  | if ($doc_part->isa('Document::OOXML::Part::WordprocessingML')) { | 
| 75 |  |  |  |  |  |  | $document_class = 'Document::OOXML::Document::Wordprocessor'; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  | else { | 
| 78 |  |  |  |  |  |  | croak("Unsupported document type"); | 
| 79 |  |  |  |  |  |  | } | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | my $ooxml = $document_class->new( | 
| 82 |  |  |  |  |  |  | content_types => $content_types, | 
| 83 |  |  |  |  |  |  | filename      => $filename, | 
| 84 |  |  |  |  |  |  | source        => $zip, | 
| 85 |  |  |  |  |  |  | is_strict     => $strict, | 
| 86 |  |  |  |  |  |  | ); | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | # Parts have weak references to the document they're in, so they don't | 
| 89 |  |  |  |  |  |  | # create reference loops. | 
| 90 |  |  |  |  |  |  | # | 
| 91 |  |  |  |  |  |  | # They can use this reference to find or add other parts (images, | 
| 92 |  |  |  |  |  |  | # headers, footers, etc.) referenced by the main document. | 
| 93 |  |  |  |  |  |  | $doc_part->document($ooxml); | 
| 94 |  |  |  |  |  |  | $ooxml->set_document_part($doc_part); | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | return $ooxml; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | __PACKAGE__->meta->make_immutable; | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | __END__ | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =pod | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | =encoding UTF-8 | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | =head1 NAME | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | Document::OOXML - Manipulation of Office Open XML files | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | =head1 VERSION | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | version 0.172650 | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | my $doc = Document::OOXML->read_document('some.docx'); | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | $doc->replace_text('old', 'new'); | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | $doc->save_to_file('some_other.docx'); | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | This module provides a way to open, modify and save Office Open XML files | 
| 126 |  |  |  |  |  |  | (also known as OOXML or Microsoft Office XML). | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | =head1 METHODS | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | =head2 read_document($filename) | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | Opens the file named C<$filename> and parses it. | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | If the file doesn't appear to be a valid package, it will croak. | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | Returns an instance of a subclass of L<Document::OOXML::Document> that can | 
| 137 |  |  |  |  |  |  | be used to manipulate the contents of the document: | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | =over | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =item * L<Document::OOXML::Document::Wordprocessor> | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | =back | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | The format of Office Open XML files is described in the | 
| 148 |  |  |  |  |  |  | L<ISO/IEC 29500|https://www.iso.org/standard/71691.html> and | 
| 149 |  |  |  |  |  |  | L<ECMA-376|https://www.ecma-international.org/publications/standards/Ecma-376.htm> | 
| 150 |  |  |  |  |  |  | standards. | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | =head1 AUTHOR | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | Martijn van de Streek <martijn@vandestreek.net> | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | This software is copyright (c) 2017 by Martijn van de Streek. | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 161 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | =cut |