File Coverage

blib/lib/Treex/PML/Schema.pm
Criterion Covered Total %
statement 344 522 65.9
branch 131 282 46.4
condition 116 254 45.6
subroutine 62 83 74.7
pod 31 34 91.1
total 684 1175 58.2


line stmt bran cond sub pod time code
1             # -*- cperl -*-
2             package Treex::PML::Schema;
3              
4              
5 8     8   55 use strict;
  8         16  
  8         348  
6 8     8   42 use warnings;
  8         19  
  8         537  
7 8     8   55 no warnings 'uninitialized';
  8         14  
  8         363  
8              
9 8     8   4096 use UNIVERSAL::DOES;
  8         5369  
  8         448  
10              
11 8     8   89 use Carp;
  8         13  
  8         496  
12 8     8   3820 use Treex::PML::Schema::Constants;
  8         23  
  8         877  
13 8     8   3830 use Treex::PML::Resource::URI;
  8         31  
  8         911  
14              
15             BEGIN {
16 8     8   25 our $VERSION = '2.28'; # version template
17 8         71 require Exporter;
18 8         111 import Exporter qw(import);
19 8         53 our @EXPORT = (
20             @Treex::PML::Schema::Constants::EXPORT,
21             qw(PML_VERSION_SUPPORTED),
22             );
23 8         304 our %EXPORT_TAGS = (
24             'constants' => [ @EXPORT ],
25             );
26             } # BEGIN
27              
28 8     8   48 use constant PML_VERSION_SUPPORTED => "1.2";
  8         17  
  8         670  
29              
30 8     8   4621 use Treex::PML::Schema::XMLNode;
  8         28  
  8         380  
31 8     8   5007 use Treex::PML::Schema::Decl;
  8         29  
  8         343  
32 8     8   3928 use Treex::PML::Schema::Root;
  8         30  
  8         278  
33 8     8   3861 use Treex::PML::Schema::Template;
  8         29  
  8         277  
34 8     8   3973 use Treex::PML::Schema::Derive;
  8         31  
  8         336  
35 8     8   4078 use Treex::PML::Schema::Copy;
  8         29  
  8         323  
36 8     8   4245 use Treex::PML::Schema::Import;
  8         36  
  8         327  
37 8     8   9781 use Treex::PML::Schema::Type;
  8         30  
  8         364  
38 8     8   4229 use Treex::PML::Schema::Struct;
  8         33  
  8         410  
39 8     8   4246 use Treex::PML::Schema::Container;
  8         33  
  8         409  
40 8     8   4186 use Treex::PML::Schema::Seq;
  8         31  
  8         326  
41 8     8   3823 use Treex::PML::Schema::List;
  8         34  
  8         309  
42 8     8   4183 use Treex::PML::Schema::Alt;
  8         31  
  8         294  
43 8     8   3756 use Treex::PML::Schema::Choice;
  8         27  
  8         275  
44 8     8   4599 use Treex::PML::Schema::CDATA;
  8         42  
  8         1101  
45 8     8   4996 use Treex::PML::Schema::Constant;
  8         36  
  8         329  
46 8     8   4179 use Treex::PML::Schema::Member;
  8         34  
  8         298  
47 8     8   3999 use Treex::PML::Schema::Element;
  8         28  
  8         312  
48 8     8   4059 use Treex::PML::Schema::Attribute;
  8         28  
  8         340  
49 8     8   4281 use Treex::PML::Schema::Reader;
  8         31  
  8         366  
50 8     8   5733 use Treex::PML::IO;
  8         53  
  8         762  
51 8     8   5785 use XML::Writer;
  8         73445  
  8         393  
52              
53 8     8   80 use base qw(Treex::PML::Schema::Template);
  8         19  
  8         1404  
54              
55 8     8   62 use Scalar::Util qw(weaken isweak);
  8         24  
  8         10310  
56             require Treex::PML;
57              
58             =head1 NAME
59              
60             Treex::PML::Schema - Perl implements a PML schema.
61              
62             =head2 DESCRIPTION
63              
64             This class implements PML schemas. PML schema consists of a set of
65             type declarations of several kinds, represented by objects inheriting
66             from a common base class C.
67              
68             =head2 INHERITANCE
69              
70             This class inherits from L.
71              
72             =head3 Attribute Paths
73              
74             Some methods use so called 'attribute paths' to navigate through
75             nested and referenced type declarations. An attribute path is a
76             '/'-separated sequence of steps, where step can be one of the
77             following:
78              
79             =over 3
80              
81             =item CI
82              
83             '!' followed by name of a named type (this step can only occur
84             as the very first step
85              
86             =item I
87              
88             name (of a member of a structure, element of a sequence or attribute
89             of a container), specifying the type declaration of the specified
90             named component
91              
92             =item C<#content>
93              
94             the string '#content', specifying the content type declaration
95             of a container
96              
97             =item C
98              
99             specifying the type declaration of a list
100              
101             =item C
102              
103             specifying the type declaration of an alt
104              
105             =item C<[>IC<]>
106              
107             where I is a decimal number (ignored) are an equivalent of LM or AM
108              
109             =back
110              
111             Steps of the form LM, AM, and [NNN] (except when occuring at the end
112             of an attribute path) may be omitted.
113              
114             =head2 EXPORT
115              
116             This module exports constants for declaration types.
117              
118             =head2 EXPORT TAGS
119              
120             =over 3
121              
122             =item :constants
123              
124             Export constant symbols (exported by default, too).
125              
126             =back
127              
128             =head2 CONSTANTS
129              
130             See Treex::PML::Schema::Constants.
131              
132             =cut
133              
134             =head1 METHODS
135              
136             =over 3
137              
138             =item Treex::PML::Schema->new ({ option => value, ... })
139              
140             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createPMLSchema() instead!
141              
142             Parses an XML representation of a PML Schema
143             from a string, filehandle, local file, or URL,
144             processing the modular instructions as described in
145              
146             L
147              
148             and returns the corresponding C object.
149              
150             One of the following options must be given:
151              
152             =over 5
153              
154             =item C
155              
156             a XML string to parse
157              
158             =item C
159              
160             a file name or URL
161              
162             =item C
163              
164             a file-handle (IO::File, IO::Pipe, etc.) open for reading
165              
166             =back
167              
168             The following options are optional:
169              
170             =over 5
171              
172             =item C
173              
174             base URL for referred schemas (usefull when parsing from a file-handle or a string)
175              
176             =item C
177              
178             if this option is used with a true value, the parser will attempt to
179             locate referred schemas also in L resource paths.
180              
181             =item C, C, C
182              
183             constraints to the revision number of the schema.
184              
185             =item C
186              
187             if this option is used with a true value, the parser will validate the
188             schema on the fly using a RelaxNG grammar given using the
189             C parameter; if C is not given, the
190             file 'pml_schema_inline.rng' searched for in L resource paths
191             is assumed.
192              
193             =item C
194              
195             a particular RelaxNG grammar to validate against. The value may be an
196             URL or filename for the grammar in the RelaxNG XML format, or a
197             XML::LibXML::RelaxNG object representation. The compact format is not
198             supported.
199              
200             =back
201              
202             =cut
203              
204 0         0 BEGIN{
205             my %parse_opts = (
206             KeyAttr => {
207             "member" => "name",
208             "attribute" => "name",
209             "element" => "name",
210             "type" => "name",
211             "template" => "name",
212             "derive" => "name",
213             "let" => "param",
214             "param" => "name",
215             },
216             TextOnly => {
217             description => 'content',
218             revision => 'content',
219             value => 'content',
220             delete => 'content',
221             constant => 'value',
222             },
223             Stringify => {
224             description => 'content',
225             revision => 'content',
226             value => 'content',
227             delete => 'content',
228             },
229             Solitary => {
230 8     8   162 map { $_ => 1 }
  88         57462  
231             qw(description revision root cdata structure container sequence constant list alt choice)
232             },
233             Bless => {
234             member => 'Treex::PML::Schema::Member',
235             attribute => 'Treex::PML::Schema::Attribute',
236             element => 'Treex::PML::Schema::Element',
237             type => 'Treex::PML::Schema::Type',
238             root => 'Treex::PML::Schema::Root',
239             structure => 'Treex::PML::Schema::Struct',
240             container => 'Treex::PML::Schema::Container',
241             sequence => 'Treex::PML::Schema::Seq',
242             list => 'Treex::PML::Schema::List',
243             alt => 'Treex::PML::Schema::Alt',
244             cdata => 'Treex::PML::Schema::CDATA',
245             constant => 'Treex::PML::Schema::Constant',
246             choice => 'Treex::PML::Schema::Choice',
247             template => 'Treex::PML::Schema::Template',
248             copy => 'Treex::PML::Schema::Copy',
249             import => 'Treex::PML::Schema::Import',
250             derive => 'Treex::PML::Schema::Derive',
251             '*' => 'Treex::PML::Schema::XMLNode',
252             },
253             DefaultNs => PML_SCHEMA_NS,
254             );
255              
256             sub new {
257 66     66 1 433 my ($class,$opts, $more_opts)=@_;
258 66 50       228 if (!ref $opts) {
259             # compatibility with older API
260 0   0     0 $more_opts ||= {};
261 0         0 $opts = { %$more_opts, string => $opts };
262             }
263              
264 66         200 my $file = $opts->{filename};
265              
266 66         168 my $base = $opts->{base_url};
267 66 100 66     389 if (defined $base and length $base) {
    100          
268 16         135 $file = Treex::PML::ResolvePath($base,$file,$opts->{use_resources});
269             } elsif ($opts->{use_resources}) {
270 27         102 $file = Treex::PML::FindInResources($file);
271             }
272 66         260 my $schema;
273             my $revision_opts = {
274 66         168 map { $_ => delete($opts->{$_}) }
  264         960  
275             qw(revision_error revision minimal_revision maximal_revision)
276             };
277 66 50 33     497 if (defined($file) and ref($schema = $opts->{schemas}{$file})) {
278 0 0       0 print STDERR "schema $file already hashed\n" if $Treex::PML::Debug;
279 0         0 $schema->check_revision($revision_opts);
280 0         0 return $schema;
281             }
282 66         940 my $parse_opts = {%parse_opts,%$opts};
283 66         257 $parse_opts->{Bless}{pml_schema}=$class;
284 66 100 100     607 $parse_opts->{URL} = (ref $file && $file->isa('Treex::PML::Resource::URI')) ? $file->file : $file;
285              
286 66         2139 my $pml_reader = Treex::PML::Schema::Reader->new($parse_opts);
287 66         329 my $reader = $pml_reader->reader;
288 66         135 my $version;
289 66         128 eval {
290 66 50       4664 unless ( $reader->nextElement('pml_schema', PML_SCHEMA_NS)==1 ) {
291 0         0 die "Not a PML schema: $file!\n";
292             }
293 66         610 $version = $reader->getAttribute('version');
294 66         251 $reader->moveToElement;
295 66         281 $schema = $pml_reader->parse_element();
296             };
297 66 50       260 if ($@) {
298 0         0 die "Treex::PML::Schema::Reader error while parsing: $file near line ".$reader->lineNumber."\n$@\n";
299 0         0 return;
300             }
301 66 50 33     540 if (defined $version and length $version) {
302 66 50       308 unless (cmp_revisions($version,PML_VERSION_SUPPORTED)<=0) {
303 0         0 die "Unsupported version of PML schema '$file': this module supports versions up to ".PML_VERSION_SUPPORTED."\n";
304             }
305             } else {
306 0         0 warn "WARNING: PML schema '$file' does not specify version! Assuming ".PML_VERSION_SUPPORTED."\n";
307             }
308 66         357 $schema->check_revision($revision_opts);
309 66         247 $schema->{-VERSION}=$Treex::PML::Schema::VERSION;
310 66         347 return $schema;
311             }
312             } # BEGIN
313              
314              
315             =item Treex::PML::Schema->readFrom (filename,opts)
316              
317             An obsolete alias for Treex::PML::Schema->new({%$opts, filename=>$filename}).
318              
319             =cut
320              
321             sub readFrom {
322 0     0 1 0 my ($self,$file,$opts)=@_;
323 0         0 return $self->new({%$opts, filename=>$file});
324             }
325              
326             =item $schema->write ({option => value})
327              
328             This method serializes the Treex::PML::Schema object to XML. See Treex::PML::Schema::XMLNode->write for implementation.
329              
330             IMPORTANT: The resulting schema is simplified, that is all modular instructions
331             are processed and removed from it, see L
332              
333             One of the following options must be given:
334              
335             =over 5
336              
337             =item C
338              
339             a scalar reference to which the XML is to be stored as a string
340              
341             =item C
342              
343             a file name
344              
345             =item C
346              
347             a file-handle (IO::File, IO::Pipe, etc.) open for writing
348              
349             =back
350              
351             One of the following options are optional:
352              
353             =over 5
354              
355             =item C
356              
357             if this option is used with a true value, the writer will not attempt
358             to create backup (tilda) files when overwriting an existing file.
359              
360             =item C
361              
362             if this option is used with a true value, the writer will not add
363             additional newlines and indentatin white-space to the result XML.
364              
365             =back
366              
367             =cut
368              
369             # for implementation see XMLNode.pm
370              
371              
372             =item $schema->get_url ()
373              
374             Return location of the PML schema file.
375              
376             =cut
377              
378 1     1 1 6 sub get_url { return $_[0]->{URL}; }
379              
380             =item $schema->set_url ($URI)
381              
382             Set location of the PML schema file.
383              
384             =cut
385              
386 22     22 1 5596 sub set_url { return $_[0]->{URL} = Treex::PML::IO::make_URI($_[1]) }
387              
388              
389             =item $schema->get_pml_version ()
390              
391             Return PML version the schema conforms to.
392              
393             =cut
394              
395 0     0 1 0 sub get_pml_version { return $_[0]->{version}; }
396              
397              
398             =item $schema->get_revision ()
399              
400             Return PML schema revision.
401              
402             =cut
403              
404 0     0 1 0 sub get_revision { return $_[0]->{revision}; }
405              
406             =item $schema->get_description ()
407              
408             Return PML schema description.
409              
410             =cut
411              
412 0     0 1 0 sub get_description { return $_[0]->{description}; }
413              
414             =item $schema->get_root_decl ()
415              
416             Return the root type declaration (see C).
417              
418             =cut
419              
420 150     150 1 8282 sub get_root_decl { return $_[0]->{root}; }
421              
422             =item $schema->get_root_type ()
423              
424             Like $schema->get_root_decl->get_content_decl.
425              
426             =cut
427              
428             sub get_root_type {
429 0     0 1 0 my ($self,$name) = @_;
430 0         0 return $self->{root}->get_content_decl;
431             }
432             *get_root_type_obj = \&get_root_type;
433              
434              
435 0     0   0 sub _internal_api_version { return $_[0]->{'-api_version'} }
436              
437             =item $decl->get_decl_type ()
438              
439             Return the constant PML_SCHEMA_DECL (for compatibility with the Treex::PML::Schema::Decl interface).
440              
441             =item $decl->get_decl_type_str ()
442              
443             Return the string 'schema' (for compatibility with the Treex::PML::Schema::Decl interface).
444              
445             =cut
446              
447 196     196 1 809 sub get_decl_type { return(PML_SCHEMA_DECL); }
448 0     0 1 0 sub get_decl_type_str { return('schema'); }
449              
450             =item $schema->get_root_name ()
451              
452             Return name of the root element for PML instance.
453              
454             =cut
455              
456             sub get_root_name {
457 1     1 1 4 my $root = $_[0]->{root};
458 1 50       12 return $root ? $root->{name} : undef;
459             }
460              
461             =item $schema->get_type_names ()
462              
463             Return names of all named type declarations.
464              
465             =cut
466              
467             sub get_type_names {
468 0     0 1 0 my $types = $_[0]->{type};
469 0 0       0 return $types ? keys(%$types) : ();
470             }
471              
472             =item $schema->get_named_references ()
473              
474             This method returns a list of HASHrefs containing
475             information about a named references to PML instances
476             (each hash will currently have the keys 'name' and 'readas').
477              
478             =cut
479              
480             sub get_named_references {
481 106     106 1 319 my ($self, $name) = @_;
482 106 100       448 if ($self->{reference}) {
483 56         138 return map { my $r=$_; my $h = { map { ($_=>$r->{$_}) } @{$r->{'-attributes'}} }; $h }
  56         108  
  112         608  
  56         211  
  56         304  
484 52         135 @{$self->{reference}} ;
  52         216  
485             }
486 54         168 return;
487             }
488              
489             =item $schema->get_named_reference_info (name)
490              
491             This method retrieves information about a specific named instance
492             reference as a hash (currently with keys 'name' and 'readas').
493              
494             =cut
495              
496             sub get_named_reference_info {
497 0     0 1 0 my ($self, $name) = @_;
498 0 0       0 if ($self->{reference}) {
499 0         0 return { map { my $r=$_; map { $_=>$r->{$_} } @{$r->{'-attributes'}} }
  0         0  
  0         0  
  0         0  
500 0 0       0 grep { defined($_->{name}) and $_->{name} eq $name } @{$self->{reference}} };
  0         0  
  0         0  
501             }
502 0         0 return;
503             }
504              
505             =item Treex::PML::Schema::cmp_revisions($A, $B)
506              
507             This function compares two schema revision strings according to the
508             ruls described in the PML specification. Returns -1 if revision $A
509             precedes revision $B, 0 if the revisions are equal (equivalent), and 1
510             if revision $A follows revision $B.
511              
512             =cut
513              
514             # compare two revision numbers
515             sub cmp_revisions {
516 77     77 1 236 my ($my_revision,$revision)=@_;
517 77         431 my @my_revision = split(/\./,$my_revision);
518 77         308 my @revision = split(/\./,$revision);
519 77         200 my $cmp=0;
520 77   66     483 while ($cmp==0 and (@my_revision or @revision)) {
      66        
521 156         621 $cmp = (shift(@my_revision) <=> shift(@revision));
522             }
523 77         386 return $cmp;
524             }
525              
526             # compare schema revision number with a given revision number
527             sub _match_revision {
528 11     11   37 my ($self,$revision)=@_;
529 11   50     40 my $my_revision=$self->{revision} || 0;
530 11   50     63 return cmp_revisions($self->{revision} || 0, $revision);
531             }
532              
533             # for internal use only
534             sub _resolve_type {
535 9     9   23 my ($self,$type)=@_;
536 9 50       21 return $type unless ref($type);
537 9         21 my $ref = $type->{type};
538 9 50       26 if ($ref) {
539 9         20 my $rtype = $self->{type}{$ref};
540 9 50       23 if (ref($rtype)) {
541 9         25 return $rtype;
542             } else {
543             # couldn't resolve
544 0         0 warn "No declaration for type '$ref' in schema '".$self->get_url."'\n";
545 0         0 return $type->{type};
546             }
547             } else {
548 0         0 return $type;
549             }
550             }
551              
552             =item $schema->for_each_decl (sub{...})
553              
554             This method traverses all nested declarations and sub-declarations and
555             calls a given subroutine passing the sub-declaration object as a
556             parameter.
557              
558             =cut
559              
560             sub for_each_decl {
561 150     150 1 499 my ($self,$sub) = @_;
562 150 100       627 if (ref $self->{root}) {
563 149         917 $self->{root}->for_each_decl($sub);
564             }
565 150         360 for my $d (qw(template type)) {
566 300 100       817 if (ref $self->{$d}) {
567 136         228 foreach (values %{$self->{$d}}) {
  136         633  
568 942         2379 $_->for_each_decl($sub);
569             }
570             }
571             }
572             }
573              
574             # traverse type data structure and collect types referred via
575             # type="type-name" declarations in the refferred hash
576             sub _get_referred_types {
577 22     22   84 my ($self,$type,$referred) = @_;
578             $type->for_each_decl(
579             sub {
580 93     93   153 my ($type)=@_;
581 93 50       184 return unless ref($type);
582 93 50 66     315 if (defined($type->{type}) and length($type->{type}) and !exists($referred->{$type->{type}})) {
      66        
583             # this type declaration reffers to another type - get it
584 9         27 my $resolved = $self->_resolve_type($type);
585 9         32 $referred->{$type->{type}} = $resolved;
586 9 50       41 $self->_get_referred_types($resolved,$referred) if ref $resolved;
587             }
588 22         157 });
589             }
590              
591             # import given named type and all named types it requires
592             # from src_schema into the current schema (self)
593             sub _import_type {
594 0     0   0 my ($self,$src_schema, $name) = @_;
595 0 0       0 unless (exists $src_schema->{type}{$name}) {
596 0         0 croak "Cannot import type '$name' from '$src_schema->{URL}' to '$self->{URL}': type not declared in the source schema\n";
597             }
598 0         0 my $type = $src_schema->{type}{$name};
599 0         0 my %referred = ($name => $type);
600 0         0 $src_schema->_get_referred_types($type,\%referred);
601 0         0 foreach my $n (keys %referred) {
602 0 0       0 unless (exists $self->{type}{$n}) {
603 0         0 my $parent = $referred{$n}->{-parent};
604 0 0       0 if (defined $parent) {
605 0         0 $self->{type}{$n}=Treex::PML::CloneValue($referred{$n},[$parent], [$self]);
606             } else {
607 0         0 $self->{type}{$n}=Treex::PML::CloneValue($referred{$n});
608             }
609             } else {
610            
611             }
612             }
613             }
614              
615             sub __fmt {
616 0     0   0 my ($string,$fmt) =@_;
617 0         0 $string =~ s{%(.)}{ $1 eq "%" ? "%" :
618 0 0       0 exists($fmt->{$1}) ? $fmt->{$1} : "%$1" }eg;
    0          
619 0         0 return $string;
620             }
621              
622             =item $schema->check_revision({ option=>value })
623              
624             Check that schema revision satisfies given constraints. The following options are suported:
625              
626             C: exact revision number to match
627              
628             C: minimal revision number to match
629              
630             C: maximal revision number to match
631              
632             C: an optional error message format string with %f
633             mark for the schema filename or URL and %e for the error
634             string. Defaults to 'Error: wrong schema revision of %f: %e';
635              
636             =cut
637              
638             sub check_revision {
639 66     66 1 178 my ($self,$opts)=@_;
640              
641 66   100     364 my $error = $opts->{revision_error} || 'Error: wrong schema revision of %f: %e';
642 66 50 66     304 if ($opts->{revision} and
643             $self->_match_revision($opts->{revision})!=0) {
644             croak(__fmt($error, { 'e' => "required $opts->{revision}, got $self->{revision}",
645 0         0 'f' => $self->{URL}}));
646             } else {
647 66 50 66     300 if ($opts->{minimal_revision} and
648             $self->_match_revision($opts->{minimal_revision})<0) {
649             croak(__fmt($error, { 'e' => "required at least $opts->{minimal_revision}, got $self->{revision}",
650 0         0 'f' => $self->{URL}}));
651             }
652 66 50 66     287 if ($opts->{maximal_revision} and
653             $self->_match_revision($opts->{maximal_revision})>0) {
654             croak(__fmt($error, { 'e' => "required at most $opts->{maximal_revision}, got $self->{revision}",
655 0         0 'f' => $self->{URL}}));
656             }
657             }
658             }
659              
660             =item $schema->convert_from_hash
661              
662             Compatibility method building the schema object from a nested hash
663             structure created by XML::Simple which was used in older
664             implementations. This is useful for upgrading objects stored in old
665             binary dumps.
666              
667             =cut
668              
669             sub convert_from_hash {
670 2     2 1 4 my $class = shift;
671 2         3 my $schema_hash;
672 2 50       5 if (ref($class)) {
673 0         0 $schema_hash = $class;
674 0         0 $class = ref( $schema_hash );
675             } else {
676 2         3 $schema_hash = shift;
677 2         4 bless $schema_hash,$class;
678             }
679 2   50     52 $schema_hash->{-api_version} ||= '2.0';
680 2         6 $schema_hash->{'-xml_name'}='pml_schema';
681 2         8 $schema_hash->{-attributes}=[qw(xmlns version)];
682 2 50       7 if (ref $schema_hash->{reference}) {
683 0         0 for my $ref (@{$schema_hash->{reference}}) {
  0         0  
684 0         0 $ref->{'-xml_name'}='reference';
685 0         0 $ref->{'-attributes'}=[qw(name readas)];
686 0         0 bless $ref,'Treex::PML::Schema::XMLNode';
687 0         0 weaken($ref->{-parent}=$schema_hash);
688             }
689             }
690 2         5 my $root = $schema_hash->{root};
691 2 50       5 if (defined($root)) {
692 2         10 bless $root, 'Treex::PML::Schema::Root';
693 2         11 weaken($root->{-parent}=$schema_hash);
694 2         3 $root->{'-xml_name'}='root';
695 2         5 $root->{'-attributes'}=['name','type'];
696 2         13 Treex::PML::Schema::Decl->convert_from_hash($root,
697             $schema_hash,
698             undef # path = '' for root
699             );
700             }
701 2         3 my $types = $schema_hash->{type};
702 2 50       5 if ($types) {
703 2         3 my ($name, $decl);
704 2         6 while (($name, $decl) = each %$types) {
705 2         8 bless $decl, 'Treex::PML::Schema::Type';
706 2         7 $decl->{'-xml_name'}='type';
707 2         4 $decl->{'-attributes'}=['name'];
708 2         6 Treex::PML::Schema::Decl->convert_from_hash($decl,
709             $schema_hash,
710             '!'.$name
711             );
712             }
713             }
714 2         7 return $schema_hash;
715             }
716              
717              
718             =item $schema->find_type_by_path (attribute-path,noresolve,decl)
719              
720             Locate a declaration specified by C starting from
721             declaration C. If C is undefined the root type declaration
722             is used. (Note that attribute paths starting with '/' are always
723             evaluated startng from the root declaration and paths starting with
724             '!' followed by a name of a named type are evaluated starting from
725             that type.) All references to named types are transparently resolved
726             in each step.
727              
728             The caller should pass a true value in C to enforce Member,
729             Attribute, Element, Type, or Root declaration objects to be returned
730             rather than declarations of their content.
731              
732             Attribute path is a '/'-separated sequence of steps (member,
733             attribute, element names or strings matching [\d*]) which identifying
734             a certain nested type declaration. A step of the aforementioned form
735             [\d*] is match the content declaration of a List or Alt. Note however, that
736             named stepsdive into List or Alt declarations automatically, too.
737              
738             =cut
739              
740             sub find_type_by_path {
741 99     99 1 1036 my ($schema, $path, $noresolve, $decl) = @_;
742 99 50 33     417 if (defined($path) and length($path)) {
    0          
743 99 100 33     618 if ($path=~s{^!([^/]+)/?}{}) {
    50          
744 2         10 $decl = $schema->get_type_by_name($1);
745 2 50       8 if (defined $decl) {
746 2         24 $decl = $decl->get_content_decl;
747             } else {
748 0         0 return;
749             }
750             } elsif ($path=~s{^/}{} or !$decl) {
751 97         313 $decl = $schema->get_root_decl->get_content_decl;
752             }
753 99         639 for my $step (split /\//, $path,-1) {
754 495 50       1120 next if $step eq '.';
755 495 50       995 if (ref($decl)) {
756 495         1498 my $decl_is = $decl->get_decl_type;
757 495 100 66     2922 if ($decl_is == PML_ATTRIBUTE_DECL ||
      100        
      66        
758             $decl_is == PML_MEMBER_DECL ||
759             $decl_is == PML_ELEMENT_DECL ||
760             $decl_is == PML_TYPE_DECL ) {
761 139         488 $decl = $decl->get_knit_content_decl;
762 139 50 33     587 next unless defined($step) and length($step);
763 139         331 redo;
764             }
765 356 100 66     1230 if ($decl_is == PML_LIST_DECL ||
766             $decl_is == PML_ALT_DECL ) {
767 54         184 $decl = $decl->get_knit_content_decl;
768 54 50 33     399 next if ($step =~ /^\[[-+]?\d+\]$/ or
    50 0        
      33        
769             (($decl_is == PML_LIST_DECL) ?
770             ($step eq 'LM' or $step eq '[LIST]')
771             :($step eq 'AM' or $step eq '[ALT]')));
772 0         0 redo;
773             }
774 302 100       876 if ($decl_is == PML_STRUCTURE_DECL) {
    100          
    50          
    0          
775 42         157 my $member = $decl->get_member_by_name($step);
776 42 50       113 if ($member) {
777 42         96 $decl = $member;
778             } else {
779 0         0 $member = $decl->get_member_by_name($step.'.rf');
780 0 0       0 return unless $member;
781 0 0       0 if ($member->get_knit_name eq $step) {
782 0         0 $decl = $member;
783             } else {
784 0         0 return;
785             }
786             }
787             } elsif ($decl_is == PML_CONTAINER_DECL) {
788 97 50       238 if ($step eq '#content') {
789 97         250 $decl = $decl->get_content_decl;
790 97         240 next;
791             }
792 0         0 my $attr = $decl->get_attribute_by_name($step);
793 0         0 $decl = $attr;
794             } elsif ($decl_is == PML_SEQUENCE_DECL) {
795 163         360 $step =~ s/^\[\d+\]//; # name must follow
796 163         457 $decl = $decl->get_element_by_name($step);
797             } elsif ($decl_is == PML_ROOT_DECL) {
798 0 0 0     0 if (!(defined($step) and length($step)) or ($step eq $decl->get_name)) {
      0        
799 0         0 $decl = $decl->get_content_decl;
800             } else {
801 0         0 return;
802             }
803             } else {
804 0         0 return;
805             }
806             } else {
807             # warn "Can't follow type path '$path' (step '$step')\n";
808 0         0 return(undef); # ERROR
809             }
810             }
811             } elsif (!$decl) {
812 0   0     0 $decl ||= $schema->get_root_decl->get_content_decl;
813             }
814 99   33     535 my $decl_is = $decl && $decl->get_decl_type;
815 99 100 66     1090 return $noresolve ? $decl :
    50          
816             $decl && (
817             $decl_is == PML_ATTRIBUTE_DECL ||
818             $decl_is == PML_MEMBER_DECL ||
819             $decl_is == PML_ELEMENT_DECL ||
820             $decl_is == PML_TYPE_DECL ||
821             $decl_is == PML_ROOT_DECL
822             )
823             ? ($decl->get_knit_content_decl) : $decl;
824             }
825              
826              
827             =item $schema->find_types_by_role (role,start_decls)
828              
829             Return a list of declarations (objects derived from Treex::PML::Schema::Decl)
830             that have role equal to C.
831              
832             If C is specified, it must be an ARRAY reference of
833             declarations; in that case, only declarations nested below the listed
834             ones are considered.
835              
836             =cut
837              
838             sub find_types_by_role {
839 0     0 1 0 my ($self,$role,$start_decls)=@_;
840 0         0 my @decls;
841 0 0   0   0 my $sub = sub { push @decls, $_[0] if $_[0]->{role} eq $role };
  0         0  
842 0 0       0 if (defined($start_decls)) {
843 0         0 for (@$start_decls) {
844 0         0 $_->for_each_decl($sub);
845             }
846             } else {
847 0         0 $self->for_each_decl($sub);
848             }
849 0         0 return @decls;
850             }
851              
852             =item $schema->find_role (role,start_decl,opts)
853              
854             WARINING: this function can be very slow, esp. if the type
855             declarations are recursive.
856              
857             Return a list of attribute paths leading to nested type declarations
858             of C with role equal to C.
859              
860             This is equivalent to
861              
862             $schema->find_decl($decl,sub{ $_[0]->{role} eq $role },$opts);
863              
864             Please, see the documentation for C for more information.
865              
866             =cut
867              
868             sub find_role {
869 22     22 1 82 my ($self, $role, $decl, $opts)=@_;
870 22 50 33     136 if (!$decl and wantarray()) {
871 22   100     194 $self->{-ROLE_CACHE}{$role} ||= [ $self->_find_role($role,$decl,$opts) ];
872 22         103 return @{$self->{-ROLE_CACHE}{$role}};
  22         217  
873             }
874 0         0 return $self->_find_role($role,$decl,$opts);
875             }
876              
877             sub _find_role {
878 12     12   34 my ($self, $role, $decl, $opts)=@_;
879 12 100   1826   107 return $self->find_decl(sub{ defined($_[0]->{role}) and $_[0]->{role} eq $role },$decl,$opts);
  1826         6275  
880             }
881              
882             =item $schema->find_decl (callback,start_decl,opts)
883              
884             WARINING: this function can be very slow, esp. if the type
885             declarations are recursive.
886              
887             Return a list of attribute paths leading to nested type declarations
888             of C for which a given callback returns a true value. The tested
889             type declaration is passed to the callback as the first (and only)
890             argument.
891              
892             If C is specified, it must be an ARRAY reference of
893             declarations; in that case, only declarations nested or referred to
894             from the listed ones are considered.
895              
896             In array context return all matching nested declarations are
897             returned. In scalar context only the first one is returned (with early
898             stopping).
899              
900             The last argument C can be used to pass some flags to the
901             algorithm. Currently only the flag C is available. If
902             true, then the function never recurses into content declaration of
903             declarations with the role #CHILDNODES.
904              
905             =cut
906              
907             sub find_decl {
908 12     12 1 35 my ($self, $sub, $decl, $opts)=@_;
909 12   33     85 $decl ||= $self->{root};
910 12         30 my $first = not(wantarray);
911 12         95 my @res = grep { defined } $self->_find($decl,$sub,$first,{},$opts);
  49         119  
912 12 50       188 return $first ? $res[0] : @res;
913             }
914              
915              
916             sub _find {
917 1859     1859   4250 my ($self, $decl, $test, $first, $cache, $opts)=@_;
918              
919 1859         2784 my @result = ();
920              
921 1859 50       4209 return () unless ref $decl;
922              
923              
924 1859 100       5445 if ($cache->{'#RECURSE'}{ $decl }) {
925             return ()
926 33         120 }
927 1826         5855 local $cache->{'#RECURSE'}{ $decl } = 1;
928              
929 1826 0 66     6581 if ( ref $opts and $opts->{no_childnodes} and defined($decl->{role}) and $decl->{role} eq '#CHILDNODES') {
      33        
      33        
930 0         0 return ();
931             }
932              
933 1826 100       3888 if ( $test->($decl) ) {
934 76 50       170 if ($first) {
935 0         0 return '';
936             } else {
937 76         180 push @result, '';
938             }
939             }
940 1826         5170 my $type_ref = $decl->get_type_ref;
941 1826         5499 my $decl_is = $decl->get_decl_type;
942 1826 50       3888 my $seq_bracket = $opts->{with_Seq_brackets} ? '[0]' : '';
943              
944 1826 100       3742 if ($type_ref) {
945 367         934 my $cached = $cache->{ $type_ref };
946 367 100       876 unless ($cached) {
947 109         312 $cached = $cache->{ $type_ref } = [ $self->_find( $self->get_type_by_name($type_ref),
948             $test, $first, $cache, $opts ) ];
949             }
950 367 50       1108 if ($decl_is == PML_CONTAINER_DECL) {
    100          
    100          
951 0 0 0     0 push @result, map { (defined($_) and length($_)) ? '#content/'.$_ : '#content' } @$cached;
  0         0  
952             } elsif ($decl_is == PML_LIST_DECL) {
953 81 100 66     191 push @result, map { (defined($_) and length($_)) ? 'LM/'.$_ : 'LM' } @$cached;
  34         202  
954             } elsif ($decl_is == PML_ALT_DECL) {
955 24 0 0     60 push @result, map { (defined($_) and length($_)) ? 'AM/'.$_ : 'AM' } @$cached;
  0         0  
956             } else {
957 262         507 push @result, @$cached;
958             }
959 367 50 33     1038 return $result[0] if ($first and @result);
960             }
961 1826 100 100     13235 if ($decl_is == PML_STRUCTURE_DECL) {
    100 100        
    100 100        
    100 100        
    100          
    100          
962 132         378 foreach my $member ($decl->get_members) {
963 624 50 33     1715 my @res = map { (defined($_) and length($_)) ? $member->get_name.'/'.$_ : $member->get_name }
  37         237  
964             $self->_find($member, $test, $first, $cache, $opts);
965 624 50 33     1574 return $res[0] if ($first and @res);
966 624         1398 push @result,@res;
967             }
968             } elsif ($decl_is == PML_CONTAINER_DECL) {
969 48         135 my $cdecl = $decl->get_content_decl;
970 48         155 foreach my $attr ($decl->get_attributes) {
971 28 0 0     164 my @res = map { (defined($_) and length($_)) ? $attr->get_name.'/'.$_ : $attr->get_name }
  0         0  
972             $self->_find($attr, $test, $first, $cache, $opts);
973 28 50 33     94 return $res[0] if ($first and @res);
974 28         95 push @result,@res;
975             }
976 48 50       142 if ($cdecl) {
977 48 50 33     135 push @result, map { (defined($_) and length($_)) ? '#content/'.$_ : '#content' }
  51         311  
978             $self->_find($cdecl, $test, $first, $cache, $opts);
979 48 50 33     149 return $result[0] if ($first and @result);
980             }
981             } elsif ($decl_is == PML_SEQUENCE_DECL) {
982 34         141 foreach my $element ($decl->get_elements) {
983 55 100 66     167 my @res = map { (defined($_) and length($_)) ? $element->get_name.$seq_bracket.'/'.$_ : $element->get_name.$seq_bracket }
  97         487  
984             $self->_find($element, $test, $first, $cache, $opts);
985 55 50 33     177 return $res[0] if ($first and @res);
986 55         164 push @result,@res;
987             }
988             } elsif ($decl_is == PML_LIST_DECL) {
989 134 100 66     421 push @result, map { (defined($_) and length($_)) ? 'LM/'.$_ : 'LM' }
  34         179  
990             $self->_find($decl->get_content_decl, $test, $first, $cache, $opts);
991             } elsif ($decl_is == PML_ALT_DECL) {
992 28 0 0     118 push @result, map { (defined($_) and length($_)) ? 'AM/'.$_ : 'AM' }
  0         0  
993             $self->_find($decl->get_content_decl, $test, $first, $cache, $opts);
994             } elsif ($decl_is == PML_TYPE_DECL ||
995             $decl_is == PML_ROOT_DECL ||
996             $decl_is == PML_ATTRIBUTE_DECL ||
997             $decl_is == PML_MEMBER_DECL ||
998             $decl_is == PML_ELEMENT_DECL ) {
999 821         2310 push @result, $self->_find($decl->get_content_decl, $test, $first, $cache, $opts);
1000             }
1001 1826         3501 my %uniq;
1002             return $first ? (@result ? $result[0] : ())
1003 1826 0       6441 : grep { !$uniq{$_} && ($uniq{$_}=1) } @result;
  633 100       3667  
    50          
1004             }
1005              
1006             =item $schema->node_types ()
1007              
1008             Return a list of all type declarations with the role C<#NODE>.
1009              
1010             =cut
1011              
1012             sub node_types {
1013 0     0 1 0 my ($self) = @_;
1014 0         0 my @result;
1015 0         0 return $self->find_types_by_role('#NODE');
1016             }
1017              
1018              
1019              
1020             =item $schema->get_type_by_name (name)
1021              
1022             Return the declaration of the named type with a given name (see
1023             C).
1024              
1025             =cut
1026              
1027             sub get_type_by_name {
1028 111     111 1 294 my ($self,$name) = @_;
1029 111         501 return $self->{type}{$name};
1030             }
1031             *get_type_by_name_obj = \&get_type_by_name;
1032              
1033              
1034             # OBSOLETE: for backward compatibility only
1035             sub type {
1036 0     0 1 0 my ($self,$decl)=@_;
1037 0 0       0 if (UNIVERSAL::DOES::does($decl,'Treex::PML::Schema::Decl')) {
1038 0         0 return $decl
1039             } else {
1040 0         0 return Treex::PML::Type->new($self,$decl);
1041             }
1042             }
1043              
1044             =item $schema->validate_object (object, type_decl, log, flags)
1045              
1046             Validates the data content of the given object against a specified
1047             type declaration. The type_decl argument must either be an object
1048             derived from the C class or the name of a named
1049             type.
1050              
1051             An array reference may be passed as the optional 3rd argument C
1052             to obtain a detailed report of all validation errors.
1053              
1054             The C argument can specify flags that influance the
1055             validation. The following constants can binary-OR'ed to obtain the
1056             fags:
1057              
1058             PML_VALIDATE_NO_TREES - do not validate nested data with roles
1059             #CHIDLNODES or #TREES and do not require that objects with the role
1060             #NODE implement the Treex::PML::Node role.
1061              
1062             PML_VALIDATE_NO_CHILDNODES - do not validate nested data with the
1063             role #CHIDLNODES.
1064              
1065             Returns: 1 if the content conforms, 0 otherwise.
1066              
1067             =cut
1068              
1069             sub validate_object { # (path, base_type)
1070 0     0 1 0 my ($schema, $object, $type,$log)=@_;
1071 0 0 0     0 if (defined $log and UNIVERSAL::isa($log,'ARRAY')) {
1072 0         0 croak "Treex::PML::Schema::validate_object: log must be an ARRAY reference";
1073             }
1074 0   0     0 $type ||= $schema->get_type_by_name($type);
1075 0 0       0 if (!ref($type)) {
1076 0         0 croak "Treex::PML::Schema::validate_object: Cannot determine data type";
1077             }
1078 0         0 return $type->validate_object($object,{log => $log});
1079             }
1080              
1081              
1082             =item $schema->validate_field (object, attr-path, type, log)
1083              
1084             This method is similar to C, but in this case the
1085             validation is restricted to the data substructure of C
1086             specified by the C argument.
1087              
1088             C is the type of C specified either by the name of a
1089             named type, or as a Treex::PML::Type, or a type declaration.
1090              
1091             An array reference may be passed as the optional 3rd argument C
1092             to obtain a detailed report of all validation errors.
1093              
1094             Returns: 1 if the content conforms, 0 otherwise.
1095              
1096             =cut
1097              
1098             sub validate_field {
1099 0     0 1 0 my ($schema, $object, $path, $type, $log) = @_;
1100 0 0 0     0 if (defined $log and UNIVERSAL::isa($log,'ARRAY')) {
1101 0         0 croak "Treex::PML::Schema::validate_field: log must be an ARRAY reference";
1102             }
1103 0 0       0 if (!ref($type)) {
1104 0         0 my $named_type = $schema->get_type_by_name($type);
1105 0 0       0 croak "Treex::PML::Schema::validate_field: Cannot find type '$type'"
1106             unless $named_type;
1107 0         0 $type = $named_type;
1108             }
1109 0 0 0     0 if (!(defined($path) and length($path))) {
1110 0         0 return $type->validate_object($object, { log => $log });
1111             }
1112 0         0 $type = $type->find($path);
1113 0 0       0 croak "Treex::PML::Schema::validate_field: Cannot determine data type for attribute-path '$path'" unless $type;
1114             return
1115 0         0 $type->validate_object(Treex::PML::Instance::get_data($object,$path),{ path => $path,
1116             log => $log
1117             });
1118             }
1119              
1120              
1121             =item $schema->get_paths_to_atoms (\@decls, \%opts)
1122              
1123             This method returns a list of all non-periodic canonical paths leading
1124             from given types to atomic values. Currently only the following options
1125             are supported:
1126              
1127             no_childnodes => $bool
1128              
1129             If true, the method does not descent to member types with the role
1130             #CHILDNODES.
1131              
1132             no_nodes => $bool
1133              
1134             If true, the method does not descent to member types with the role
1135             #NODE (except for the starting types).
1136              
1137             with_LM => $bool
1138              
1139             If true, the paths will include a LM step for each List type on the path.
1140              
1141             with_AM => $bool
1142              
1143             If true, the paths will include a AM step for each Alt type on the path.
1144              
1145             with_Seq_brackets => $bool
1146              
1147             If true, the paths will append a [0] after each step representing a sequence element
1148              
1149             =cut
1150              
1151             sub get_paths_to_atoms {
1152 0     0 1 0 my ($self,$types,$opts) = @_;
1153             # find node type
1154              
1155 0 0       0 unless (defined $types) {
1156 0         0 $types = [ $self->node_types ];
1157             }
1158 0   0     0 $opts||={};
1159 0         0 return $self->_get_paths_to_atoms($types,{},$opts);
1160             }
1161              
1162             sub _get_paths_to_atoms {
1163 0     0   0 my ($self,$types,$seen,$opts)=@_;
1164 0         0 my @result;
1165 0         0 my $no_children = $opts->{no_childnodes};
1166 0         0 my $no_nodes = $opts->{no_nodes};
1167 0         0 my $with_LM = $opts->{with_LM};
1168 0         0 my $with_AM = $opts->{with_AM};
1169 0         0 my $with_Seq_brackets = $opts->{with_Seq_brackets};
1170 0         0 foreach my $type (@$types) {
1171 0 0       0 next if $seen->{$type};
1172 0         0 my $decl_is = $type->get_decl_type;
1173 0 0 0     0 next if $no_children and $type->get_role eq '#CHILDNODES';
1174 0 0 0     0 if ($decl_is == PML_TYPE_DECL ||
      0        
      0        
      0        
      0        
      0        
      0        
      0        
1175             $decl_is == PML_ROOT_DECL ||
1176             $decl_is == PML_ATTRIBUTE_DECL ||
1177             $decl_is == PML_MEMBER_DECL ||
1178             $decl_is == PML_ELEMENT_DECL ||
1179             (!$with_LM && $decl_is == PML_LIST_DECL) ||
1180             (!$with_AM && $decl_is == PML_ALT_DECL)) {
1181 0         0 $type = $type->get_knit_content_decl;
1182 0 0 0     0 next if $no_nodes and $type->get_role eq '#NODE';
1183 0         0 redo;
1184             }
1185 0 0       0 next unless ref($type);
1186 0         0 my @members;
1187 0 0       0 if ($decl_is == PML_STRUCTURE_DECL) {
    0          
    0          
    0          
    0          
1188 0         0 @members = map { [$_,$_->get_knit_name] } $type->get_members;
  0         0  
1189             } elsif ($decl_is == PML_CONTAINER_DECL) {
1190 0         0 my $cdecl = $type->get_knit_content_decl;
1191 0 0       0 @members = ((map { [ $_, $_->get_name ] } $type->get_attributes),
  0         0  
1192             ($cdecl ? [$cdecl, '#content'] : ()));
1193             } elsif ($decl_is == PML_SEQUENCE_DECL) {
1194 0 0       0 if ($with_Seq_brackets) {
1195 0         0 @members = map { [ $_, $_->get_name.'[0]' ] } $type->get_elements;
  0         0  
1196             } else {
1197 0         0 @members = map { [ $_, $_->get_name ] } $type->get_elements;
  0         0  
1198             }
1199             } elsif ($decl_is == PML_LIST_DECL) {
1200 0         0 @members = [$type->get_knit_content_decl,'LM'];
1201             } elsif ($decl_is == PML_ALT_DECL) {
1202 0         0 @members = [$type->get_knit_content_decl,'AM'];
1203             } else {
1204 0         0 push @result, qq{};
1205             }
1206 0 0       0 if (@members) {
1207 0         0 for my $m (@members) {
1208 0         0 my ($mdecl,$name) = @$m;
1209 0         0 local $seen->{$type}=1;
1210 0 0 0     0 push @result, map { (defined($_) and length($_)) ? $name."/".$_ : $name }
  0         0  
1211             $self->_get_paths_to_atoms([$mdecl],$seen,$opts);
1212             }
1213             }
1214             }
1215 0         0 my %uniq;
1216 0 0       0 return grep { !$uniq{$_} && ($uniq{$_}=1) } @result;
  0         0  
1217             }
1218              
1219              
1220             =item $schema->attributes (decl...)
1221              
1222             This function tries to emulate the behavior of
1223             C<<< Treex::PML::FSFormat->attributes >>> to some extent.
1224              
1225             Return attribute paths to all atomic subtypes of given type
1226             declarations. If no type declaration objects are given, then types
1227             with role C<#NODE> are assumed. This function never descends to
1228             subtypes with role C<#CHILDNODES>.
1229              
1230             =cut
1231              
1232             sub attributes {
1233 0     0 1 0 my ($self,@types) = @_;
1234             # find node type
1235 0 0       0 return $self->get_paths_to_atoms(@types ? \@types : undef, { no_childnodes => 1 });
1236             }
1237              
1238              
1239              
1240             sub init {
1241 66     66 0 187 my ($schema,$opts)=@_;
1242 66         255 $schema->{URL} = $opts->{URL};
1243 66         245 $schema->{-api_version} = '2.0';
1244             }
1245              
1246              
1247             # these functions are used internally by the serializer
1248             sub serialize_exclude_keys {
1249 11     11 0 114 return qw(URL revision description);
1250             }
1251             sub serialize_get_children {
1252 11     11 0 33 my ($self,$opts)=@_;
1253 11         52 my @children = $self->SUPER::serialize_get_children($opts);
1254             return (
1255 22 100       147 (grep { defined($_->[1]) and length($_->[1]) } (
1256             ['revision',$self->{revision}],
1257             ['description',$self->{description}]
1258             )
1259             ),
1260 62         143 (grep { $_->[0] eq 'reference' } @children),
1261 62         135 (grep { $_->[0] eq 'root' } @children),
1262 11         98 (grep { $_->[0] !~ /^(?:root|reference)$/ } @children)
  62         213  
1263             );
1264             }
1265              
1266             =item $schema->post_process($options)
1267              
1268             Auxiliary method used internally by the PML Schema parser. It
1269             simplifies the schema and for each declaration object creates back
1270             references to its parent declaration and schema and pre-computes the
1271             type attribute path returned by $decl->get_decl_path().
1272              
1273             =cut
1274              
1275             sub post_process {
1276 66     66 1 199 my ($schema,$opts)=@_;
1277 66         499 $schema->simplify($opts);
1278             $schema->for_each_decl(sub{
1279 2326     2326   3216 my ($decl)=@_;
1280 2326         4742 weaken( $decl->{-schema} = $schema );
1281 2326         3396 my $parent = $decl->{-parent};
1282 2326         5849 my $decl_is = $decl->get_decl_type;
1283 2326 100 100     18651 if (
    100 100        
      100        
      100        
      100        
      100        
      100        
1284             $decl_is == PML_STRUCTURE_DECL ||
1285             $decl_is == PML_CONTAINER_DECL ||
1286             $decl_is == PML_SEQUENCE_DECL ||
1287             $decl_is == PML_LIST_DECL ||
1288             $decl_is == PML_ALT_DECL ||
1289             $decl_is == PML_CHOICE_DECL ||
1290             $decl_is == PML_CONSTANT_DECL ||
1291             $decl_is == PML_CDATA_DECL
1292             ) {
1293 1032         2223 my $parent_is = $parent->get_decl_type;
1294 1032 100 100     3779 if ($parent_is == PML_TYPE_DECL) {
    100 100        
    100 66        
    100          
    100          
    50          
1295 390         837 $decl->{-path} = '!'.$parent->get_name;
1296             } elsif ($parent_is == PML_ROOT_DECL) {
1297 28         114 $decl->{-path} = '';
1298             } elsif ($parent_is == PML_ATTRIBUTE_DECL ||
1299             $parent_is == PML_MEMBER_DECL ||
1300             $parent_is == PML_ELEMENT_DECL) {
1301 470         1220 $decl->{-path} = $parent->{-parent}{-path}.'/'.$parent->get_name;
1302             } elsif ($parent_is == PML_CONTAINER_DECL and $decl_is != PML_ATTRIBUTE_DECL) {
1303 96         286 $decl->{-path} = $parent->{-path}.'/#content';
1304             } elsif ($parent_is == PML_LIST_DECL) {
1305 47         140 $decl->{-path} = $parent->{-path}.'/LM';
1306             } elsif ($parent_is == PML_ALT_DECL) {
1307 1         5 $decl->{-path} = $parent->{-path}.'/AM';
1308             }
1309 1032 50 100     3330 if ($decl_is == PML_LIST_DECL and !$decl->{-decl} and $decl->{role} eq '#KNIT') {
      66        
1310             # warn ("List $decl->{-path} with role=\"#KNIT\" must have a content type declaration: assuming !\n");
1311 0         0 __fix_knit_type($schema,$decl,$decl->{-path}.'/LM');
1312             }
1313             } elsif ($decl_is == PML_MEMBER_DECL) {
1314 587 50 66     2149 if (!$decl->{-decl} and $decl->{role} eq '#KNIT') {
1315             # warn ("Member $decl->{-parent}{-path}/$decl->{-name} with role=\"#KNIT\" must have a content type declaration: assuming !\n");
1316 0         0 __fix_knit_type($schema,$decl);
1317             }
1318             }
1319 66         900 });
1320             }
1321              
1322             sub __fix_knit_type {
1323 0     0   0 my ($schema,$decl,$path)=@_;
1324 0         0 $decl->{-decl}='cdata';
1325 0         0 my $cdata = $decl->{cdata}= bless {
1326             format => 'PMLREF',
1327             -xml_name => 'cdata',
1328             -attributes => [ 'format' ],
1329             }, 'Treex::PML::Schema::CDATA';
1330 0         0 weaken( $cdata->{-schema} = $schema );
1331 0         0 weaken( $cdata->{-parent} = $decl );
1332 0 0 0     0 if (defined $path) {
    0          
1333 0         0 $cdata->{-path} = $path;
1334             } elsif ($decl->{-parent} and $decl->{-name}) {
1335 0         0 $cdata->{-path} = "$decl->{-parent}{-path}/$decl->{-name}";
1336             }
1337             }
1338              
1339             sub _traverse_data {
1340 707     707   1253 my ($data,$sub,$seen,$hashes_only)=@_;
1341 707         1626 $seen->{$data}=1;
1342 707 100       2279 if (UNIVERSAL::isa($data,'ARRAY')) {
    50          
1343 337 50       635 $sub->($data,0) unless $hashes_only;
1344 337         563 foreach my $val (@$data) {
1345 499 50 33     1336 if (ref($val) and !exists $seen->{$val}) {
1346 0         0 _traverse_data($val,$sub,$seen,$hashes_only);
1347             }
1348             }
1349             } elsif (UNIVERSAL::isa($data,'HASH')) {
1350 370         904 $sub->($data,1);
1351 370         900 foreach my $val (values %$data) {
1352 2948 100 100     7301 if (ref($val) and !exists $seen->{$val}) {
1353 625         1147 _traverse_data($val,$sub,$seen,$hashes_only);
1354             }
1355             }
1356             }
1357             }
1358              
1359              
1360              
1361             =back
1362              
1363             =head1 CLASSES FOR TYPE DECLARATIONS
1364              
1365             =over 3
1366              
1367             =item L
1368              
1369             =item L
1370              
1371             =item L
1372              
1373             =item L
1374              
1375             =item L
1376              
1377             =item L
1378              
1379             =item L
1380              
1381             =item L
1382              
1383             =item L
1384              
1385             =item L
1386              
1387             =item L
1388              
1389             =item L
1390              
1391             =item L
1392              
1393             =item L
1394              
1395             =back
1396              
1397             =cut
1398              
1399             1;
1400              
1401             __END__