File Coverage

blib/lib/Catmandu/XSD.pm
Criterion Covered Total %
statement 31 34 91.1
branch 2 2 100.0
condition n/a
subroutine 9 10 90.0
pod 0 4 0.0
total 42 50 84.0


line stmt bran cond sub pod time code
1             package Catmandu::XSD;
2              
3 3     3   378974 use Moo;
  3         11  
  3         19  
4 3     3   816 use Catmandu::Util;
  3         4  
  3         133  
5 3     3   1065 use XML::Compile;
  3         217337  
  3         96  
6 3     3   1213 use XML::Compile::Cache;
  3         325113  
  3         124  
7 3     3   22 use XML::Compile::Util 'pack_type';
  3         8  
  3         1964  
8              
9             our $VERSION = '0.05';
10              
11             has 'root' => (is => 'ro' , required => 1);
12             has 'schemas' => (is => 'ro' , required => 1 , coerce => sub {
13             my ($value) = @_;
14             if (Catmandu::Util::is_array_ref($value)) {
15             return $value;
16             }
17             elsif ($value =~ /\*/) {
18             my @files = glob($value);
19             \@files;
20             }
21             else {
22             my @files = split(/,/,$value);
23             \@files;
24             }
25             });
26              
27             has 'mixed' => (is => 'ro' , default => sub { 'ATTRIBUTES' });
28             has 'any_element' => (is => 'ro' , default => sub { 'TAKE_ALL' } , coerce => sub {
29             my $val = $_[0];
30             if (defined $val && $val eq 'XML_STRING') {
31             return sub {
32             my ($path, $node , $handler) = @_;
33             if ($node && ref($node)) {
34             my $str = '';
35             for (@$node) {
36             $str .= $_->toString;
37             }
38             ('_',$str);
39             }
40             else {
41             $node;
42             }
43             };
44             }
45             else {
46             $val;
47             }
48             });
49             has 'prefixes' => (is => 'ro' , coerce => sub {
50             my ($value) = @_;
51             if (Catmandu::Util::is_array_ref($value)) {
52             return $value;
53             }
54             elsif (defined($value)) {
55             my $ret = [];
56             for (split(/,/,$value)) {
57             my ($ns,$url) = split(/:/,$_,2);
58             push @$ret , { $ns => $url };
59             }
60             return $ret;
61             }
62             else {
63             undef;
64             }
65             });
66              
67             has '_reader' => (is => 'ro');
68             has '_writer' => (is => 'ro');
69              
70             sub BUILD {
71 11     11 0 134 my ($self) = @_;
72              
73 11         142 my $schema = XML::Compile::Cache->new($self->schemas);
74              
75             $schema->addHook(
76             action => 'READER' ,
77             after => sub {
78 2327     2327   3127972 my ($xml, $data, $path) = @_;
79 2327 100       28757 delete $data->{_MIXED_ELEMENT_MODE} if Catmandu::Util::is_hash_ref($data);
80 2327         4061 $data;
81             }
82 11         64239 );
83              
84 11         482 $self->{_reader} = $schema->compile(
85             READER => $self->root,
86             mixed_elements => $self->mixed ,
87             any_element => $self->any_element ,
88             sloppy_floats => 'true',
89             sloppy_integers => 'true' ,
90             );
91              
92 11         7259954 $self->{_writer} = $schema->compile(
93             WRITER => $self->root,
94             prefixes => $self->prefixes,
95             sloppy_floats => 'true',
96             sloppy_integers => 'true' ,
97             );
98              
99 11         7797285 $schema = undef;
100             }
101              
102             sub template {
103 0     0 0 0 my ($self) = @_;
104 0         0 my $schema = XML::Compile::Cache->new($self->schemas);
105 0         0 $schema->template('PERL', $self->root , show => 'ALL');
106             }
107              
108             sub parse {
109 10     10 0 27967 my ($self,$input) = @_;
110 10         61 $self->_reader->($input);
111             }
112              
113             sub to_xml {
114 5     5 0 2715 my ($self,$data) = @_;
115 5         46 my $doc = XML::LibXML::Document->new('1.0', 'UTF-8');
116 5         28 my $xml = $self->_writer->($doc, $data);
117 5         7747 $doc->setDocumentElement($xml);
118 5         87 $doc->toString(1);
119             }
120              
121             1;
122              
123             __END__
124              
125             =encoding utf8
126              
127             =head1 NAME
128              
129             Catmandu::XSD - Modules for handling XML data with XSD compilation
130              
131             =head1 SYNOPSIS
132              
133             ## Converting XML to YAML/JSON/CSV/etc
134              
135             # Compile an XSD schema file and parse one shiporder.xml file
136             catmandu convert XSD --root '{}shiporder'
137             --schemas demo/order/*.xsd
138             to YAML < shiporder.xml
139              
140             # Same as above but parse more than one file into an array of records
141             catmandu convert XSD --root '{}shiporder'
142             --schemas demo/order/*.xsd
143             --files 'data/*.xml'
144             to YAML
145              
146             # Same as above but all array of records are in a XML container file
147             catmandu convert XSD --root '{}shiporder'
148             --schemas demo/order/*.xsd
149             --xpath '/Container/List//Record/Payload/*'
150             to YAML < data/container.xml
151              
152             ## Convert an YAML/JSON/CSV into XML validated against an XSD schemas
153              
154             # Convert one shiporder YAML to XML
155             catmandu convert YAML to XSD --root '{}shiporder'
156             --schemas demo/order/*.xsd < shiporder.YAML
157              
158             # Same as above but store multiple shiporders in the YAML into a separate file
159             catmandu convert YAML to XSD --root '{}shiporder'
160             --schemas demo/order/*.xsd
161             --split 1
162             < shiporder.YAML
163              
164             # Same as above but use template toolkit to pack the XML into an container
165             # (The xml record is stored in the 'xml' key which can be retrieved in the
166             # template by [% xml %])
167             catmandu convert YAML to XSD --root '{}shiporder'
168             --schemas demo/order/*.xsd
169             --template_before t/xml_header.tt
170             --template t/xml_record.tt
171             --template t/xml_footer.tt
172             < shiporder.YAML
173              
174             ## Example documents
175              
176             # Show an example how a valid XML document needs to be structured for an
177             # XSD scheme.
178             catmandu convert XSD --root {}shiporder
179             --schemas "t/demo/order/*xsd"
180             --example 1 to YAML
181              
182             =head1 DESCRIPTION
183              
184             L<Catmandu::XSD> contains modules for handling XML data within the L<Catmandu>
185             framework. Parsing and serializing is based on L<XML::Compile>.
186              
187             There are two modules available for handling XML data in the Catmandu framework:
188             L<Catmandu::XML> and L<Catmandu::XSD>. The former one can be used when no XML schema
189             is available for the data. It provides a simple interface to read in XML data and
190             transform it to other formats. Because L<Catmandu::XML> doesn't depend on an
191             XSD schema, it can't know which fields in the input XML files are sequences or
192             single value elements. Each record is parsed on its own. A record with content:
193              
194             <foo>
195             <bar>test</bar>
196             </foo>
197              
198             will be parsed into a YAML output like:
199              
200             catmandu XML to YAML < test.xml
201             --
202             bar: test
203              
204             A record with content:
205              
206             <foo>
207             <bar>test</bar>
208             <bar>test</bar>
209             </foo>
210              
211             will be parsed into a YAL output like:
212              
213             catmandu XML to YAML < test2.xml
214             --
215             bar:
216             - test
217             - test
218              
219             In the first case 'bar' will contain a string, in the second case an array. This
220             might no be what you want in some programming projects. E.g. when you need the 'bar'
221             field to be always an array of values, then you an XSD schema file is required
222             containing the exact structure of the XML document:
223              
224             test.xsd:
225             <?xml version="1.0" encoding="UTF-8" ?>
226             <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
227             <xs:element name="foo">
228             <xs:complexType>
229             <xs:sequence>
230             <xs:element name="bar" type="xs:string" maxOccurs="unbounded"/>
231             </xs:sequence>
232             </xs:complexType>
233             </xs:element>
234             </xs:schema>
235              
236             And now the test.xml and test2.xml can be parsed with help of Catmandu::XSD:
237              
238             catmandu XSD --root '{}foo' --schemas test.xsd to YAML < test.xml
239             --
240             bar:
241             - test
242              
243             catmandu XSD --root '{}foo' --schemas test.xsd to YAML < test2.xml
244             --
245             bar:
246             - test
247             - test
248              
249             =head1 WILDCARDS
250              
251             Some XSD Schema allow for C<any> or C<anyAttribute> specifications in the schema.
252             The L<Catmandu::XSD> modules can't guess in these cases what the schema implementation
253             is. These nodes will be parsed as L<XML::LibXML::Node>s in the
254             resulting documents. Catmandu output formats such as L<Catmandu::Exporter::JSON>
255             can't handle these XML::LibXML::Node nodes. You have to implement yourself a
256             L<Catmandu::Fix> to translate these values in to plain string, array or hash elements.
257              
258             But in general a round trip should be problematic:
259              
260             catmandu XSD --root ... --schema wildcard.xsd to XSD --root ... --schema wildcard.xsd < data.xml
261              
262             =head1 MIXED ELEMENTS
263              
264             ComplexType and ComplexContent in the XSD schema can be declared with the C<<mixed="true">> attribute.
265             This means that in the XML documents simple text and XML elements can be mixed as in:
266              
267             Hello, I'm <name>John</name> how can I <bold>help</bold> you?
268              
269             In these cases it is not know if the elements are required as an hash or should be ignored. By
270             defaults L<Catmandu::XSD> will parse these elements as L<XML::LibXML::Node>s documents.
271             This behavious can be changed by setting the 'mixed' flag:
272              
273             # All mixed elements will be XML::LibXML::Node-s
274             catmandu XSD --root ... --schema mixed.xsd < data.xml
275              
276             # The mixed elements will be ignored, only the text will survive
277             #
278             # Hello, I'm <name>John</name> how can I <bold>help</bold> you?
279             #
280             # => Hello, I'm John how can I help you?
281             catmandu XSD --root ... --schema mixed.xsd --mixed TEXTUAL < data.xml
282              
283             # The mixed text will be ignored, only the elements will survive
284             #
285             # Hello, I'm <name>John</name> how can I <bold>help</bold> you?
286             #
287             # => { name => 'John' , bold => 'help' }
288             catmandu XSD --root ... --schema mixed.xsd --mixed STRUCTURAL < data.xml
289              
290             # The mixed elements will be a plain XML fragment string
291             #
292             # Hello, I'm <name>John</name> how can I <bold>help</bold> you?
293             #
294             # => $r = 'Hello, I'm <name>John</name> how can I <bold>help</bold> you?'
295             catmandu XSD --root ... --schema mixed.xsd --mixed XML_STRING < data.xml
296              
297             =head1 MODULES
298              
299             =over
300              
301             =item L<Catmandu::Importer::XSD>
302              
303             Parse and validate XML data using an XSD file for structural data.
304              
305             =item L<Catmandu::Exporter::XSD>
306              
307             Serialize and validate XML data using an XSD file for structural data.
308              
309             =item L<Catmandu::Fix::xpath_map>
310              
311             Map XML from XSD-any elements into data fields using XPath expressions.
312              
313             =back
314              
315             =head1 BUGS, QUESTIONS, HELP
316              
317             Use the github issue tracker for any bug reports or questions on this module:
318             https://github.com/LibreCat/Catmandu-XSD/issues
319              
320             =head1 DISCLAIMER
321              
322             This module is based on L<XML::Compile> and the L<Catmandu> framework.
323              
324             L<XML::Compile> is the workhorse that forms the core of this module to
325             compile XSD file into parser and serializers.
326              
327             L<Catmandu> is used to transform parsed XML into any format you like.
328             Catmandu contains a simple DSL languages called L<Catmandu::Fix> to create
329             small scripts to manipulate data. The L<Catmandu> toolkit is used by many
330             university libraries to process metadata collections.
331              
332             For more information on Catmandu visit: http://librecat.org/Catmandu/
333             or follow the blog posts at: https://librecatproject.wordpress.com/
334              
335             =head1 AUTHOR
336              
337             Patrick Hochstenbach , C<< patrick.hochstenbach at ugent.be >>
338              
339             =head1 LICENSE AND COPYRIGHT
340              
341             This program is free software; you can redistribute it and/or modify it
342             under the terms of either: the GNU General Public License as published
343             by the Free Software Foundation; or the Artistic License.
344              
345             See L<http://dev.perl.org/licenses/> for more information.
346              
347             =head1 SEE ALSO
348              
349             L<XML::Compile> , L<Catmandu> , L<Template> , L<Catmandu::XML>
350              
351             =cut