File Coverage

blib/lib/Treex/PML/Schema.pm
Criterion Covered Total %
statement 344 522 65.9
branch 131 282 46.4
condition 116 254 45.6
subroutine 62 83 74.7
pod 31 34 91.1
total 684 1175 58.2


line stmt bran cond sub pod time code
1             # -*- cperl -*-
2             package Treex::PML::Schema;
3              
4              
5 9     9   48 use strict;
  9         16  
  9         296  
6 9     9   33 use warnings;
  9         13  
  9         432  
7 9     9   78 no warnings 'uninitialized';
  9         20  
  9         347  
8              
9 9     9   3657 use UNIVERSAL::DOES;
  9         4976  
  9         433  
10              
11 9     9   47 use Carp;
  9         14  
  9         420  
12 9     9   3673 use Treex::PML::Schema::Constants;
  9         34  
  9         841  
13 9     9   3457 use Treex::PML::Resource::URI;
  9         26  
  9         791  
14              
15             BEGIN {
16 9     9   24 our $VERSION = '2.29'; # version template
17 9         59 require Exporter;
18 9         101 import Exporter qw(import);
19 9         44 our @EXPORT = (
20             @Treex::PML::Schema::Constants::EXPORT,
21             qw(PML_VERSION_SUPPORTED),
22             );
23 9         213 our %EXPORT_TAGS = (
24             'constants' => [ @EXPORT ],
25             );
26             } # BEGIN
27              
28 9     9   55 use constant PML_VERSION_SUPPORTED => "1.2";
  9         15  
  9         647  
29              
30 9     9   4130 use Treex::PML::Schema::XMLNode;
  9         21  
  9         284  
31 9     9   3771 use Treex::PML::Schema::Decl;
  9         24  
  9         289  
32 9     9   3570 use Treex::PML::Schema::Root;
  9         28  
  9         252  
33 9     9   3637 use Treex::PML::Schema::Template;
  9         27  
  9         231  
34 9     9   3985 use Treex::PML::Schema::Derive;
  9         26  
  9         287  
35 9     9   3739 use Treex::PML::Schema::Copy;
  9         23  
  9         291  
36 9     9   3918 use Treex::PML::Schema::Import;
  9         30  
  9         303  
37 9     9   3751 use Treex::PML::Schema::Type;
  9         24  
  9         258  
38 9     9   3844 use Treex::PML::Schema::Struct;
  9         29  
  9         289  
39 9     9   3769 use Treex::PML::Schema::Container;
  9         30  
  9         340  
40 9     9   4040 use Treex::PML::Schema::Seq;
  9         32  
  9         320  
41 9     9   3854 use Treex::PML::Schema::List;
  9         27  
  9         232  
42 9     9   3547 use Treex::PML::Schema::Alt;
  9         24  
  9         275  
43 9     9   3449 use Treex::PML::Schema::Choice;
  9         26  
  9         280  
44 9     9   4196 use Treex::PML::Schema::CDATA;
  9         32  
  9         1005  
45 9     9   4252 use Treex::PML::Schema::Constant;
  9         32  
  9         341  
46 9     9   3940 use Treex::PML::Schema::Member;
  9         22  
  9         270  
47 9     9   3649 use Treex::PML::Schema::Element;
  9         27  
  9         247  
48 9     9   3741 use Treex::PML::Schema::Attribute;
  9         24  
  9         278  
49 9     9   3765 use Treex::PML::Schema::Reader;
  9         32  
  9         321  
50 9     9   4604 use Treex::PML::IO;
  9         40  
  9         657  
51 9     9   4436 use XML::Writer;
  9         55958  
  9         318  
52              
53 9     9   60 use base qw(Treex::PML::Schema::Template);
  9         13  
  9         1070  
54              
55 9     9   47 use Scalar::Util qw(weaken isweak);
  9         15  
  9         5455  
56             require Treex::PML;
57              
58             =head1 NAME
59              
60             Treex::PML::Schema - Perl implements a PML schema.
61              
62             =head2 DESCRIPTION
63              
64             This class implements PML schemas. PML schema consists of a set of
65             type declarations of several kinds, represented by objects inheriting
66             from a common base class C.
67              
68             =head2 INHERITANCE
69              
70             This class inherits from L.
71              
72             =head3 Attribute Paths
73              
74             Some methods use so called 'attribute paths' to navigate through
75             nested and referenced type declarations. An attribute path is a
76             '/'-separated sequence of steps, where step can be one of the
77             following:
78              
79             =over 3
80              
81             =item CI
82              
83             '!' followed by name of a named type (this step can only occur
84             as the very first step
85              
86             =item I
87              
88             name (of a member of a structure, element of a sequence or attribute
89             of a container), specifying the type declaration of the specified
90             named component
91              
92             =item C<#content>
93              
94             the string '#content', specifying the content type declaration
95             of a container
96              
97             =item C
98              
99             specifying the type declaration of a list
100              
101             =item C
102              
103             specifying the type declaration of an alt
104              
105             =item C<[>IC<]>
106              
107             where I is a decimal number (ignored) are an equivalent of LM or AM
108              
109             =back
110              
111             Steps of the form LM, AM, and [NNN] (except when occuring at the end
112             of an attribute path) may be omitted.
113              
114             =head2 EXPORT
115              
116             This module exports constants for declaration types.
117              
118             =head2 EXPORT TAGS
119              
120             =over 3
121              
122             =item :constants
123              
124             Export constant symbols (exported by default, too).
125              
126             =back
127              
128             =head2 CONSTANTS
129              
130             See Treex::PML::Schema::Constants.
131              
132             =cut
133              
134             =head1 METHODS
135              
136             =over 3
137              
138             =item Treex::PML::Schema->new ({ option => value, ... })
139              
140             NOTE: Don't call this constructor directly, use Treex::PML::Factory->createPMLSchema() instead!
141              
142             Parses an XML representation of a PML Schema
143             from a string, filehandle, local file, or URL,
144             processing the modular instructions as described in
145              
146             L
147              
148             and returns the corresponding C object.
149              
150             One of the following options must be given:
151              
152             =over 5
153              
154             =item C
155              
156             a XML string to parse
157              
158             =item C
159              
160             a file name or URL
161              
162             =item C
163              
164             a file-handle (IO::File, IO::Pipe, etc.) open for reading
165              
166             =back
167              
168             The following options are optional:
169              
170             =over 5
171              
172             =item C
173              
174             base URL for referred schemas (usefull when parsing from a file-handle or a string)
175              
176             =item C
177              
178             if this option is used with a true value, the parser will attempt to
179             locate referred schemas also in L resource paths.
180              
181             =item C, C, C
182              
183             constraints to the revision number of the schema.
184              
185             =item C
186              
187             if this option is used with a true value, the parser will validate the
188             schema on the fly using a RelaxNG grammar given using the
189             C parameter; if C is not given, the
190             file 'pml_schema_inline.rng' searched for in L resource paths
191             is assumed.
192              
193             =item C
194              
195             a particular RelaxNG grammar to validate against. The value may be an
196             URL or filename for the grammar in the RelaxNG XML format, or a
197             XML::LibXML::RelaxNG object representation. The compact format is not
198             supported.
199              
200             =back
201              
202             =cut
203              
204 0         0 BEGIN{
205             my %parse_opts = (
206             KeyAttr => {
207             "member" => "name",
208             "attribute" => "name",
209             "element" => "name",
210             "type" => "name",
211             "template" => "name",
212             "derive" => "name",
213             "let" => "param",
214             "param" => "name",
215             },
216             TextOnly => {
217             description => 'content',
218             revision => 'content',
219             value => 'content',
220             delete => 'content',
221             constant => 'value',
222             },
223             Stringify => {
224             description => 'content',
225             revision => 'content',
226             value => 'content',
227             delete => 'content',
228             },
229             Solitary => {
230 9     9   191 map { $_ => 1 }
  99         44832  
231             qw(description revision root cdata structure container sequence constant list alt choice)
232             },
233             Bless => {
234             member => 'Treex::PML::Schema::Member',
235             attribute => 'Treex::PML::Schema::Attribute',
236             element => 'Treex::PML::Schema::Element',
237             type => 'Treex::PML::Schema::Type',
238             root => 'Treex::PML::Schema::Root',
239             structure => 'Treex::PML::Schema::Struct',
240             container => 'Treex::PML::Schema::Container',
241             sequence => 'Treex::PML::Schema::Seq',
242             list => 'Treex::PML::Schema::List',
243             alt => 'Treex::PML::Schema::Alt',
244             cdata => 'Treex::PML::Schema::CDATA',
245             constant => 'Treex::PML::Schema::Constant',
246             choice => 'Treex::PML::Schema::Choice',
247             template => 'Treex::PML::Schema::Template',
248             copy => 'Treex::PML::Schema::Copy',
249             import => 'Treex::PML::Schema::Import',
250             derive => 'Treex::PML::Schema::Derive',
251             '*' => 'Treex::PML::Schema::XMLNode',
252             },
253             DefaultNs => PML_SCHEMA_NS,
254             );
255              
256             sub new {
257 67     67 1 315 my ($class,$opts, $more_opts)=@_;
258 67 50       231 if (!ref $opts) {
259             # compatibility with older API
260 0   0     0 $more_opts ||= {};
261 0         0 $opts = { %$more_opts, string => $opts };
262             }
263              
264 67         197 my $file = $opts->{filename};
265              
266 67         166 my $base = $opts->{base_url};
267 67 100 66     387 if (defined $base and length $base) {
    100          
268 16         136 $file = Treex::PML::ResolvePath($base,$file,$opts->{use_resources});
269             } elsif ($opts->{use_resources}) {
270 28         100 $file = Treex::PML::FindInResources($file);
271             }
272 67         265 my $schema;
273             my $revision_opts = {
274 67         157 map { $_ => delete($opts->{$_}) }
  268         793  
275             qw(revision_error revision minimal_revision maximal_revision)
276             };
277 67 50 33     440 if (defined($file) and ref($schema = $opts->{schemas}{$file})) {
278 0 0       0 print STDERR "schema $file already hashed\n" if $Treex::PML::Debug;
279 0         0 $schema->check_revision($revision_opts);
280 0         0 return $schema;
281             }
282 67         824 my $parse_opts = {%parse_opts,%$opts};
283 67         244 $parse_opts->{Bless}{pml_schema}=$class;
284 67 100 100     506 $parse_opts->{URL} = (ref $file && $file->isa('Treex::PML::Resource::URI')) ? $file->file : $file;
285              
286 67         1982 my $pml_reader = Treex::PML::Schema::Reader->new($parse_opts);
287 67         287 my $reader = $pml_reader->reader;
288 67         115 my $version;
289 67         148 eval {
290 67 50       3786 unless ( $reader->nextElement('pml_schema', PML_SCHEMA_NS)==1 ) {
291 0         0 die "Not a PML schema: $file!\n";
292             }
293 67         491 $version = $reader->getAttribute('version');
294 67         222 $reader->moveToElement;
295 67         247 $schema = $pml_reader->parse_element();
296             };
297 67 50       270 if ($@) {
298 0         0 die "Treex::PML::Schema::Reader error while parsing: $file near line ".$reader->lineNumber."\n$@\n";
299 0         0 return;
300             }
301 67 50 33     502 if (defined $version and length $version) {
302 67 50       250 unless (cmp_revisions($version,PML_VERSION_SUPPORTED)<=0) {
303 0         0 die "Unsupported version of PML schema '$file': this module supports versions up to ".PML_VERSION_SUPPORTED."\n";
304             }
305             } else {
306 0         0 warn "WARNING: PML schema '$file' does not specify version! Assuming ".PML_VERSION_SUPPORTED."\n";
307             }
308 67         312 $schema->check_revision($revision_opts);
309 67         193 $schema->{-VERSION}=$Treex::PML::Schema::VERSION;
310 67         331 return $schema;
311             }
312             } # BEGIN
313              
314              
315             =item Treex::PML::Schema->readFrom (filename,opts)
316              
317             An obsolete alias for Treex::PML::Schema->new({%$opts, filename=>$filename}).
318              
319             =cut
320              
321             sub readFrom {
322 0     0 1 0 my ($self,$file,$opts)=@_;
323 0         0 return $self->new({%$opts, filename=>$file});
324             }
325              
326             =item $schema->write ({option => value})
327              
328             This method serializes the Treex::PML::Schema object to XML. See Treex::PML::Schema::XMLNode->write for implementation.
329              
330             IMPORTANT: The resulting schema is simplified, that is all modular instructions
331             are processed and removed from it, see L
332              
333             One of the following options must be given:
334              
335             =over 5
336              
337             =item C
338              
339             a scalar reference to which the XML is to be stored as a string
340              
341             =item C
342              
343             a file name
344              
345             =item C
346              
347             a file-handle (IO::File, IO::Pipe, etc.) open for writing
348              
349             =back
350              
351             One of the following options are optional:
352              
353             =over 5
354              
355             =item C
356              
357             if this option is used with a true value, the writer will not attempt
358             to create backup (tilda) files when overwriting an existing file.
359              
360             =item C
361              
362             if this option is used with a true value, the writer will not add
363             additional newlines and indentatin white-space to the result XML.
364              
365             =back
366              
367             =cut
368              
369             # for implementation see XMLNode.pm
370              
371              
372             =item $schema->get_url ()
373              
374             Return location of the PML schema file.
375              
376             =cut
377              
378 1     1 1 4 sub get_url { return $_[0]->{URL}; }
379              
380             =item $schema->set_url ($URI)
381              
382             Set location of the PML schema file.
383              
384             =cut
385              
386 22     22 1 4158 sub set_url { return $_[0]->{URL} = Treex::PML::IO::make_URI($_[1]) }
387              
388              
389             =item $schema->get_pml_version ()
390              
391             Return PML version the schema conforms to.
392              
393             =cut
394              
395 0     0 1 0 sub get_pml_version { return $_[0]->{version}; }
396              
397              
398             =item $schema->get_revision ()
399              
400             Return PML schema revision.
401              
402             =cut
403              
404 0     0 1 0 sub get_revision { return $_[0]->{revision}; }
405              
406             =item $schema->get_description ()
407              
408             Return PML schema description.
409              
410             =cut
411              
412 0     0 1 0 sub get_description { return $_[0]->{description}; }
413              
414             =item $schema->get_root_decl ()
415              
416             Return the root type declaration (see C).
417              
418             =cut
419              
420 151     151 1 6429 sub get_root_decl { return $_[0]->{root}; }
421              
422             =item $schema->get_root_type ()
423              
424             Like $schema->get_root_decl->get_content_decl.
425              
426             =cut
427              
428             sub get_root_type {
429 0     0 1 0 my ($self,$name) = @_;
430 0         0 return $self->{root}->get_content_decl;
431             }
432             *get_root_type_obj = \&get_root_type;
433              
434              
435 0     0   0 sub _internal_api_version { return $_[0]->{'-api_version'} }
436              
437             =item $decl->get_decl_type ()
438              
439             Return the constant PML_SCHEMA_DECL (for compatibility with the Treex::PML::Schema::Decl interface).
440              
441             =item $decl->get_decl_type_str ()
442              
443             Return the string 'schema' (for compatibility with the Treex::PML::Schema::Decl interface).
444              
445             =cut
446              
447 198     198 1 627 sub get_decl_type { return(PML_SCHEMA_DECL); }
448 0     0 1 0 sub get_decl_type_str { return('schema'); }
449              
450             =item $schema->get_root_name ()
451              
452             Return name of the root element for PML instance.
453              
454             =cut
455              
456             sub get_root_name {
457 1     1 1 3 my $root = $_[0]->{root};
458 1 50       9 return $root ? $root->{name} : undef;
459             }
460              
461             =item $schema->get_type_names ()
462              
463             Return names of all named type declarations.
464              
465             =cut
466              
467             sub get_type_names {
468 0     0 1 0 my $types = $_[0]->{type};
469 0 0       0 return $types ? keys(%$types) : ();
470             }
471              
472             =item $schema->get_named_references ()
473              
474             This method returns a list of HASHrefs containing
475             information about a named references to PML instances
476             (each hash will currently have the keys 'name' and 'readas').
477              
478             =cut
479              
480             sub get_named_references {
481 107     107 1 219 my ($self, $name) = @_;
482 107 100       290 if ($self->{reference}) {
483 56         80 return map { my $r=$_; my $h = { map { ($_=>$r->{$_}) } @{$r->{'-attributes'}} }; $h }
  56         83  
  112         1773  
  56         131  
  56         193  
484 52         111 @{$self->{reference}} ;
  52         146  
485             }
486 55         114 return;
487             }
488              
489             =item $schema->get_named_reference_info (name)
490              
491             This method retrieves information about a specific named instance
492             reference as a hash (currently with keys 'name' and 'readas').
493              
494             =cut
495              
496             sub get_named_reference_info {
497 0     0 1 0 my ($self, $name) = @_;
498 0 0       0 if ($self->{reference}) {
499 0         0 return { map { my $r=$_; map { $_=>$r->{$_} } @{$r->{'-attributes'}} }
  0         0  
  0         0  
  0         0  
500 0 0       0 grep { defined($_->{name}) and $_->{name} eq $name } @{$self->{reference}} };
  0         0  
  0         0  
501             }
502 0         0 return;
503             }
504              
505             =item Treex::PML::Schema::cmp_revisions($A, $B)
506              
507             This function compares two schema revision strings according to the
508             ruls described in the PML specification. Returns -1 if revision $A
509             precedes revision $B, 0 if the revisions are equal (equivalent), and 1
510             if revision $A follows revision $B.
511              
512             =cut
513              
514             # compare two revision numbers
515             sub cmp_revisions {
516 78     78 1 215 my ($my_revision,$revision)=@_;
517 78         388 my @my_revision = split(/\./,$my_revision);
518 78         231 my @revision = split(/\./,$revision);
519 78         148 my $cmp=0;
520 78   66     386 while ($cmp==0 and (@my_revision or @revision)) {
      66        
521 158         502 $cmp = (shift(@my_revision) <=> shift(@revision));
522             }
523 78         248 return $cmp;
524             }
525              
526             # compare schema revision number with a given revision number
527             sub _match_revision {
528 11     11   28 my ($self,$revision)=@_;
529 11   50     29 my $my_revision=$self->{revision} || 0;
530 11   50     38 return cmp_revisions($self->{revision} || 0, $revision);
531             }
532              
533             # for internal use only
534             sub _resolve_type {
535 9     9   14 my ($self,$type)=@_;
536 9 50       18 return $type unless ref($type);
537 9         19 my $ref = $type->{type};
538 9 50       33 if ($ref) {
539 9         17 my $rtype = $self->{type}{$ref};
540 9 50       17 if (ref($rtype)) {
541 9         17 return $rtype;
542             } else {
543             # couldn't resolve
544 0         0 warn "No declaration for type '$ref' in schema '".$self->get_url."'\n";
545 0         0 return $type->{type};
546             }
547             } else {
548 0         0 return $type;
549             }
550             }
551              
552             =item $schema->for_each_decl (sub{...})
553              
554             This method traverses all nested declarations and sub-declarations and
555             calls a given subroutine passing the sub-declaration object as a
556             parameter.
557              
558             =cut
559              
560             sub for_each_decl {
561 153     153 1 361 my ($self,$sub) = @_;
562 153 100       481 if (ref $self->{root}) {
563 152         751 $self->{root}->for_each_decl($sub);
564             }
565 153         263 for my $d (qw(template type)) {
566 306 100       699 if (ref $self->{$d}) {
567 139         185 foreach (values %{$self->{$d}}) {
  139         499  
568 972         2152 $_->for_each_decl($sub);
569             }
570             }
571             }
572             }
573              
574             # traverse type data structure and collect types referred via
575             # type="type-name" declarations in the refferred hash
576             sub _get_referred_types {
577 22     22   37 my ($self,$type,$referred) = @_;
578             $type->for_each_decl(
579             sub {
580 93     93   108 my ($type)=@_;
581 93 50       144 return unless ref($type);
582 93 50 66     201 if (defined($type->{type}) and length($type->{type}) and !exists($referred->{$type->{type}})) {
      66        
583             # this type declaration reffers to another type - get it
584 9         27 my $resolved = $self->_resolve_type($type);
585 9         22 $referred->{$type->{type}} = $resolved;
586 9 50       37 $self->_get_referred_types($resolved,$referred) if ref $resolved;
587             }
588 22         114 });
589             }
590              
591             # import given named type and all named types it requires
592             # from src_schema into the current schema (self)
593             sub _import_type {
594 0     0   0 my ($self,$src_schema, $name) = @_;
595 0 0       0 unless (exists $src_schema->{type}{$name}) {
596 0         0 croak "Cannot import type '$name' from '$src_schema->{URL}' to '$self->{URL}': type not declared in the source schema\n";
597             }
598 0         0 my $type = $src_schema->{type}{$name};
599 0         0 my %referred = ($name => $type);
600 0         0 $src_schema->_get_referred_types($type,\%referred);
601 0         0 foreach my $n (keys %referred) {
602 0 0       0 unless (exists $self->{type}{$n}) {
603 0         0 my $parent = $referred{$n}->{-parent};
604 0 0       0 if (defined $parent) {
605 0         0 $self->{type}{$n}=Treex::PML::CloneValue($referred{$n},[$parent], [$self]);
606             } else {
607 0         0 $self->{type}{$n}=Treex::PML::CloneValue($referred{$n});
608             }
609             } else {
610            
611             }
612             }
613             }
614              
615             sub __fmt {
616 0     0   0 my ($string,$fmt) =@_;
617 0         0 $string =~ s{%(.)}{ $1 eq "%" ? "%" :
618 0 0       0 exists($fmt->{$1}) ? $fmt->{$1} : "%$1" }eg;
    0          
619 0         0 return $string;
620             }
621              
622             =item $schema->check_revision({ option=>value })
623              
624             Check that schema revision satisfies given constraints. The following options are suported:
625              
626             C: exact revision number to match
627              
628             C: minimal revision number to match
629              
630             C: maximal revision number to match
631              
632             C: an optional error message format string with %f
633             mark for the schema filename or URL and %e for the error
634             string. Defaults to 'Error: wrong schema revision of %f: %e';
635              
636             =cut
637              
638             sub check_revision {
639 67     67 1 139 my ($self,$opts)=@_;
640              
641 67   100     321 my $error = $opts->{revision_error} || 'Error: wrong schema revision of %f: %e';
642 67 50 66     227 if ($opts->{revision} and
643             $self->_match_revision($opts->{revision})!=0) {
644             croak(__fmt($error, { 'e' => "required $opts->{revision}, got $self->{revision}",
645 0         0 'f' => $self->{URL}}));
646             } else {
647 67 50 66     233 if ($opts->{minimal_revision} and
648             $self->_match_revision($opts->{minimal_revision})<0) {
649             croak(__fmt($error, { 'e' => "required at least $opts->{minimal_revision}, got $self->{revision}",
650 0         0 'f' => $self->{URL}}));
651             }
652 67 50 66     272 if ($opts->{maximal_revision} and
653             $self->_match_revision($opts->{maximal_revision})>0) {
654             croak(__fmt($error, { 'e' => "required at most $opts->{maximal_revision}, got $self->{revision}",
655 0         0 'f' => $self->{URL}}));
656             }
657             }
658             }
659              
660             =item $schema->convert_from_hash
661              
662             Compatibility method building the schema object from a nested hash
663             structure created by XML::Simple which was used in older
664             implementations. This is useful for upgrading objects stored in old
665             binary dumps.
666              
667             =cut
668              
669             sub convert_from_hash {
670 2     2 1 10 my $class = shift;
671 2         5 my $schema_hash;
672 2 50       8 if (ref($class)) {
673 0         0 $schema_hash = $class;
674 0         0 $class = ref( $schema_hash );
675             } else {
676 2         4 $schema_hash = shift;
677 2         4 bless $schema_hash,$class;
678             }
679 2   50     24 $schema_hash->{-api_version} ||= '2.0';
680 2         8 $schema_hash->{'-xml_name'}='pml_schema';
681 2         8 $schema_hash->{-attributes}=[qw(xmlns version)];
682 2 50       8 if (ref $schema_hash->{reference}) {
683 0         0 for my $ref (@{$schema_hash->{reference}}) {
  0         0  
684 0         0 $ref->{'-xml_name'}='reference';
685 0         0 $ref->{'-attributes'}=[qw(name readas)];
686 0         0 bless $ref,'Treex::PML::Schema::XMLNode';
687 0         0 weaken($ref->{-parent}=$schema_hash);
688             }
689             }
690 2         5 my $root = $schema_hash->{root};
691 2 50       6 if (defined($root)) {
692 2         10 bless $root, 'Treex::PML::Schema::Root';
693 2         18 weaken($root->{-parent}=$schema_hash);
694 2         7 $root->{'-xml_name'}='root';
695 2         7 $root->{'-attributes'}=['name','type'];
696 2         19 Treex::PML::Schema::Decl->convert_from_hash($root,
697             $schema_hash,
698             undef # path = '' for root
699             );
700             }
701 2         6 my $types = $schema_hash->{type};
702 2 50       6 if ($types) {
703 2         4 my ($name, $decl);
704 2         7 while (($name, $decl) = each %$types) {
705 2         11 bless $decl, 'Treex::PML::Schema::Type';
706 2         9 $decl->{'-xml_name'}='type';
707 2         7 $decl->{'-attributes'}=['name'];
708 2         14 Treex::PML::Schema::Decl->convert_from_hash($decl,
709             $schema_hash,
710             '!'.$name
711             );
712             }
713             }
714 2         12 return $schema_hash;
715             }
716              
717              
718             =item $schema->find_type_by_path (attribute-path,noresolve,decl)
719              
720             Locate a declaration specified by C starting from
721             declaration C. If C is undefined the root type declaration
722             is used. (Note that attribute paths starting with '/' are always
723             evaluated startng from the root declaration and paths starting with
724             '!' followed by a name of a named type are evaluated starting from
725             that type.) All references to named types are transparently resolved
726             in each step.
727              
728             The caller should pass a true value in C to enforce Member,
729             Attribute, Element, Type, or Root declaration objects to be returned
730             rather than declarations of their content.
731              
732             Attribute path is a '/'-separated sequence of steps (member,
733             attribute, element names or strings matching [\d*]) which identifying
734             a certain nested type declaration. A step of the aforementioned form
735             [\d*] is match the content declaration of a List or Alt. Note however, that
736             named stepsdive into List or Alt declarations automatically, too.
737              
738             =cut
739              
740             sub find_type_by_path {
741 99     99 1 874 my ($schema, $path, $noresolve, $decl) = @_;
742 99 50 33     274 if (defined($path) and length($path)) {
    0          
743 99 100 33     368 if ($path=~s{^!([^/]+)/?}{}) {
    50          
744 2         11 $decl = $schema->get_type_by_name($1);
745 2 50       8 if (defined $decl) {
746 2         12 $decl = $decl->get_content_decl;
747             } else {
748 0         0 return;
749             }
750             } elsif ($path=~s{^/}{} or !$decl) {
751 97         186 $decl = $schema->get_root_decl->get_content_decl;
752             }
753 99         301 for my $step (split /\//, $path,-1) {
754 495 50       622 next if $step eq '.';
755 495 50       577 if (ref($decl)) {
756 495         722 my $decl_is = $decl->get_decl_type;
757 495 100 66     1503 if ($decl_is == PML_ATTRIBUTE_DECL ||
      100        
      66        
758             $decl_is == PML_MEMBER_DECL ||
759             $decl_is == PML_ELEMENT_DECL ||
760             $decl_is == PML_TYPE_DECL ) {
761 139         209 $decl = $decl->get_knit_content_decl;
762 139 50 33     286 next unless defined($step) and length($step);
763 139         153 redo;
764             }
765 356 100 66     646 if ($decl_is == PML_LIST_DECL ||
766             $decl_is == PML_ALT_DECL ) {
767 54         79 $decl = $decl->get_knit_content_decl;
768 54 50 33     197 next if ($step =~ /^\[[-+]?\d+\]$/ or
    50 0        
      33        
769             (($decl_is == PML_LIST_DECL) ?
770             ($step eq 'LM' or $step eq '[LIST]')
771             :($step eq 'AM' or $step eq '[ALT]')));
772 0         0 redo;
773             }
774 302 100       463 if ($decl_is == PML_STRUCTURE_DECL) {
    100          
    50          
    0          
775 42         82 my $member = $decl->get_member_by_name($step);
776 42 50       67 if ($member) {
777 42         60 $decl = $member;
778             } else {
779 0         0 $member = $decl->get_member_by_name($step.'.rf');
780 0 0       0 return unless $member;
781 0 0       0 if ($member->get_knit_name eq $step) {
782 0         0 $decl = $member;
783             } else {
784 0         0 return;
785             }
786             }
787             } elsif ($decl_is == PML_CONTAINER_DECL) {
788 97 50       128 if ($step eq '#content') {
789 97         118 $decl = $decl->get_content_decl;
790 97         108 next;
791             }
792 0         0 my $attr = $decl->get_attribute_by_name($step);
793 0         0 $decl = $attr;
794             } elsif ($decl_is == PML_SEQUENCE_DECL) {
795 163         168 $step =~ s/^\[\d+\]//; # name must follow
796 163         256 $decl = $decl->get_element_by_name($step);
797             } elsif ($decl_is == PML_ROOT_DECL) {
798 0 0 0     0 if (!(defined($step) and length($step)) or ($step eq $decl->get_name)) {
      0        
799 0         0 $decl = $decl->get_content_decl;
800             } else {
801 0         0 return;
802             }
803             } else {
804 0         0 return;
805             }
806             } else {
807             # warn "Can't follow type path '$path' (step '$step')\n";
808 0         0 return(undef); # ERROR
809             }
810             }
811             } elsif (!$decl) {
812 0   0     0 $decl ||= $schema->get_root_decl->get_content_decl;
813             }
814 99   33     269 my $decl_is = $decl && $decl->get_decl_type;
815 99 100 66     561 return $noresolve ? $decl :
    50          
816             $decl && (
817             $decl_is == PML_ATTRIBUTE_DECL ||
818             $decl_is == PML_MEMBER_DECL ||
819             $decl_is == PML_ELEMENT_DECL ||
820             $decl_is == PML_TYPE_DECL ||
821             $decl_is == PML_ROOT_DECL
822             )
823             ? ($decl->get_knit_content_decl) : $decl;
824             }
825              
826              
827             =item $schema->find_types_by_role (role,start_decls)
828              
829             Return a list of declarations (objects derived from Treex::PML::Schema::Decl)
830             that have role equal to C.
831              
832             If C is specified, it must be an ARRAY reference of
833             declarations; in that case, only declarations nested below the listed
834             ones are considered.
835              
836             =cut
837              
838             sub find_types_by_role {
839 0     0 1 0 my ($self,$role,$start_decls)=@_;
840 0         0 my @decls;
841 0 0   0   0 my $sub = sub { push @decls, $_[0] if $_[0]->{role} eq $role };
  0         0  
842 0 0       0 if (defined($start_decls)) {
843 0         0 for (@$start_decls) {
844 0         0 $_->for_each_decl($sub);
845             }
846             } else {
847 0         0 $self->for_each_decl($sub);
848             }
849 0         0 return @decls;
850             }
851              
852             =item $schema->find_role (role,start_decl,opts)
853              
854             WARINING: this function can be very slow, esp. if the type
855             declarations are recursive.
856              
857             Return a list of attribute paths leading to nested type declarations
858             of C with role equal to C.
859              
860             This is equivalent to
861              
862             $schema->find_decl($decl,sub{ $_[0]->{role} eq $role },$opts);
863              
864             Please, see the documentation for C for more information.
865              
866             =cut
867              
868             sub find_role {
869 22     22 1 80 my ($self, $role, $decl, $opts)=@_;
870 22 50 33     97 if (!$decl and wantarray()) {
871 22   100     145 $self->{-ROLE_CACHE}{$role} ||= [ $self->_find_role($role,$decl,$opts) ];
872 22         72 return @{$self->{-ROLE_CACHE}{$role}};
  22         115  
873             }
874 0         0 return $self->_find_role($role,$decl,$opts);
875             }
876              
877             sub _find_role {
878 12     12   27 my ($self, $role, $decl, $opts)=@_;
879 12 100   1826   80 return $self->find_decl(sub{ defined($_[0]->{role}) and $_[0]->{role} eq $role },$decl,$opts);
  1826         3680  
880             }
881              
882             =item $schema->find_decl (callback,start_decl,opts)
883              
884             WARINING: this function can be very slow, esp. if the type
885             declarations are recursive.
886              
887             Return a list of attribute paths leading to nested type declarations
888             of C for which a given callback returns a true value. The tested
889             type declaration is passed to the callback as the first (and only)
890             argument.
891              
892             If C is specified, it must be an ARRAY reference of
893             declarations; in that case, only declarations nested or referred to
894             from the listed ones are considered.
895              
896             In array context return all matching nested declarations are
897             returned. In scalar context only the first one is returned (with early
898             stopping).
899              
900             The last argument C can be used to pass some flags to the
901             algorithm. Currently only the flag C is available. If
902             true, then the function never recurses into content declaration of
903             declarations with the role #CHILDNODES.
904              
905             =cut
906              
907             sub find_decl {
908 12     12 1 30 my ($self, $sub, $decl, $opts)=@_;
909 12   33     77 $decl ||= $self->{root};
910 12         19 my $first = not(wantarray);
911 12         52 my @res = grep { defined } $self->_find($decl,$sub,$first,{},$opts);
  49         69  
912 12 50       135 return $first ? $res[0] : @res;
913             }
914              
915              
916             sub _find {
917 1859     1859   2554 my ($self, $decl, $test, $first, $cache, $opts)=@_;
918              
919 1859         1828 my @result = ();
920              
921 1859 50       2510 return () unless ref $decl;
922              
923              
924 1859 100       3018 if ($cache->{'#RECURSE'}{ $decl }) {
925             return ()
926 33         50 }
927 1826         3473 local $cache->{'#RECURSE'}{ $decl } = 1;
928              
929 1826 0 66     3983 if ( ref $opts and $opts->{no_childnodes} and defined($decl->{role}) and $decl->{role} eq '#CHILDNODES') {
      33        
      33        
930 0         0 return ();
931             }
932              
933 1826 100       2310 if ( $test->($decl) ) {
934 76 50       110 if ($first) {
935 0         0 return '';
936             } else {
937 76         112 push @result, '';
938             }
939             }
940 1826         3438 my $type_ref = $decl->get_type_ref;
941 1826         3232 my $decl_is = $decl->get_decl_type;
942 1826 50       2388 my $seq_bracket = $opts->{with_Seq_brackets} ? '[0]' : '';
943              
944 1826 100       2362 if ($type_ref) {
945 367         528 my $cached = $cache->{ $type_ref };
946 367 100       514 unless ($cached) {
947 109         189 $cached = $cache->{ $type_ref } = [ $self->_find( $self->get_type_by_name($type_ref),
948             $test, $first, $cache, $opts ) ];
949             }
950 367 50       685 if ($decl_is == PML_CONTAINER_DECL) {
    100          
    100          
951 0 0 0     0 push @result, map { (defined($_) and length($_)) ? '#content/'.$_ : '#content' } @$cached;
  0         0  
952             } elsif ($decl_is == PML_LIST_DECL) {
953 81 100 66     132 push @result, map { (defined($_) and length($_)) ? 'LM/'.$_ : 'LM' } @$cached;
  34         104  
954             } elsif ($decl_is == PML_ALT_DECL) {
955 24 0 0     41 push @result, map { (defined($_) and length($_)) ? 'AM/'.$_ : 'AM' } @$cached;
  0         0  
956             } else {
957 262         296 push @result, @$cached;
958             }
959 367 50 33     600 return $result[0] if ($first and @result);
960             }
961 1826 100 100     7389 if ($decl_is == PML_STRUCTURE_DECL) {
    100 100        
    100 100        
    100 100        
    100          
    100          
962 132         230 foreach my $member ($decl->get_members) {
963 624 50 33     967 my @res = map { (defined($_) and length($_)) ? $member->get_name.'/'.$_ : $member->get_name }
  37         98  
964             $self->_find($member, $test, $first, $cache, $opts);
965 624 50 33     967 return $res[0] if ($first and @res);
966 624         742 push @result,@res;
967             }
968             } elsif ($decl_is == PML_CONTAINER_DECL) {
969 48         89 my $cdecl = $decl->get_content_decl;
970 48         113 foreach my $attr ($decl->get_attributes) {
971 28 0 0     70 my @res = map { (defined($_) and length($_)) ? $attr->get_name.'/'.$_ : $attr->get_name }
  0         0  
972             $self->_find($attr, $test, $first, $cache, $opts);
973 28 50 33     65 return $res[0] if ($first and @res);
974 28         46 push @result,@res;
975             }
976 48 50       110 if ($cdecl) {
977 48 50 33     92 push @result, map { (defined($_) and length($_)) ? '#content/'.$_ : '#content' }
  51         164  
978             $self->_find($cdecl, $test, $first, $cache, $opts);
979 48 50 33     108 return $result[0] if ($first and @result);
980             }
981             } elsif ($decl_is == PML_SEQUENCE_DECL) {
982 34         83 foreach my $element ($decl->get_elements) {
983 55 100 66     107 my @res = map { (defined($_) and length($_)) ? $element->get_name.$seq_bracket.'/'.$_ : $element->get_name.$seq_bracket }
  97         542  
984             $self->_find($element, $test, $first, $cache, $opts);
985 55 50 33     116 return $res[0] if ($first and @res);
986 55         103 push @result,@res;
987             }
988             } elsif ($decl_is == PML_LIST_DECL) {
989 134 100 66     250 push @result, map { (defined($_) and length($_)) ? 'LM/'.$_ : 'LM' }
  34         96  
990             $self->_find($decl->get_content_decl, $test, $first, $cache, $opts);
991             } elsif ($decl_is == PML_ALT_DECL) {
992 28 0 0     83 push @result, map { (defined($_) and length($_)) ? 'AM/'.$_ : 'AM' }
  0         0  
993             $self->_find($decl->get_content_decl, $test, $first, $cache, $opts);
994             } elsif ($decl_is == PML_TYPE_DECL ||
995             $decl_is == PML_ROOT_DECL ||
996             $decl_is == PML_ATTRIBUTE_DECL ||
997             $decl_is == PML_MEMBER_DECL ||
998             $decl_is == PML_ELEMENT_DECL ) {
999 821         1342 push @result, $self->_find($decl->get_content_decl, $test, $first, $cache, $opts);
1000             }
1001 1826         2058 my %uniq;
1002             return $first ? (@result ? $result[0] : ())
1003 1826 0       3780 : grep { !$uniq{$_} && ($uniq{$_}=1) } @result;
  633 100       1802  
    50          
1004             }
1005              
1006             =item $schema->node_types ()
1007              
1008             Return a list of all type declarations with the role C<#NODE>.
1009              
1010             =cut
1011              
1012             sub node_types {
1013 0     0 1 0 my ($self) = @_;
1014 0         0 my @result;
1015 0         0 return $self->find_types_by_role('#NODE');
1016             }
1017              
1018              
1019              
1020             =item $schema->get_type_by_name (name)
1021              
1022             Return the declaration of the named type with a given name (see
1023             C).
1024              
1025             =cut
1026              
1027             sub get_type_by_name {
1028 111     111 1 165 my ($self,$name) = @_;
1029 111         343 return $self->{type}{$name};
1030             }
1031             *get_type_by_name_obj = \&get_type_by_name;
1032              
1033              
1034             # OBSOLETE: for backward compatibility only
1035             sub type {
1036 0     0 1 0 my ($self,$decl)=@_;
1037 0 0       0 if (UNIVERSAL::DOES::does($decl,'Treex::PML::Schema::Decl')) {
1038 0         0 return $decl
1039             } else {
1040 0         0 return Treex::PML::Type->new($self,$decl);
1041             }
1042             }
1043              
1044             =item $schema->validate_object (object, type_decl, log, flags)
1045              
1046             Validates the data content of the given object against a specified
1047             type declaration. The type_decl argument must either be an object
1048             derived from the C class or the name of a named
1049             type.
1050              
1051             An array reference may be passed as the optional 3rd argument C
1052             to obtain a detailed report of all validation errors.
1053              
1054             The C argument can specify flags that influance the
1055             validation. The following constants can binary-OR'ed to obtain the
1056             fags:
1057              
1058             PML_VALIDATE_NO_TREES - do not validate nested data with roles
1059             #CHIDLNODES or #TREES and do not require that objects with the role
1060             #NODE implement the Treex::PML::Node role.
1061              
1062             PML_VALIDATE_NO_CHILDNODES - do not validate nested data with the
1063             role #CHIDLNODES.
1064              
1065             Returns: 1 if the content conforms, 0 otherwise.
1066              
1067             =cut
1068              
1069             sub validate_object { # (path, base_type)
1070 0     0 1 0 my ($schema, $object, $type,$log)=@_;
1071 0 0 0     0 if (defined $log and UNIVERSAL::isa($log,'ARRAY')) {
1072 0         0 croak "Treex::PML::Schema::validate_object: log must be an ARRAY reference";
1073             }
1074 0   0     0 $type ||= $schema->get_type_by_name($type);
1075 0 0       0 if (!ref($type)) {
1076 0         0 croak "Treex::PML::Schema::validate_object: Cannot determine data type";
1077             }
1078 0         0 return $type->validate_object($object,{log => $log});
1079             }
1080              
1081              
1082             =item $schema->validate_field (object, attr-path, type, log)
1083              
1084             This method is similar to C, but in this case the
1085             validation is restricted to the data substructure of C
1086             specified by the C argument.
1087              
1088             C is the type of C specified either by the name of a
1089             named type, or as a Treex::PML::Type, or a type declaration.
1090              
1091             An array reference may be passed as the optional 3rd argument C
1092             to obtain a detailed report of all validation errors.
1093              
1094             Returns: 1 if the content conforms, 0 otherwise.
1095              
1096             =cut
1097              
1098             sub validate_field {
1099 0     0 1 0 my ($schema, $object, $path, $type, $log) = @_;
1100 0 0 0     0 if (defined $log and UNIVERSAL::isa($log,'ARRAY')) {
1101 0         0 croak "Treex::PML::Schema::validate_field: log must be an ARRAY reference";
1102             }
1103 0 0       0 if (!ref($type)) {
1104 0         0 my $named_type = $schema->get_type_by_name($type);
1105 0 0       0 croak "Treex::PML::Schema::validate_field: Cannot find type '$type'"
1106             unless $named_type;
1107 0         0 $type = $named_type;
1108             }
1109 0 0 0     0 if (!(defined($path) and length($path))) {
1110 0         0 return $type->validate_object($object, { log => $log });
1111             }
1112 0         0 $type = $type->find($path);
1113 0 0       0 croak "Treex::PML::Schema::validate_field: Cannot determine data type for attribute-path '$path'" unless $type;
1114             return
1115 0         0 $type->validate_object(Treex::PML::Instance::get_data($object,$path),{ path => $path,
1116             log => $log
1117             });
1118             }
1119              
1120              
1121             =item $schema->get_paths_to_atoms (\@decls, \%opts)
1122              
1123             This method returns a list of all non-periodic canonical paths leading
1124             from given types to atomic values. Currently only the following options
1125             are supported:
1126              
1127             no_childnodes => $bool
1128              
1129             If true, the method does not descent to member types with the role
1130             #CHILDNODES.
1131              
1132             no_nodes => $bool
1133              
1134             If true, the method does not descent to member types with the role
1135             #NODE (except for the starting types).
1136              
1137             with_LM => $bool
1138              
1139             If true, the paths will include a LM step for each List type on the path.
1140              
1141             with_AM => $bool
1142              
1143             If true, the paths will include a AM step for each Alt type on the path.
1144              
1145             with_Seq_brackets => $bool
1146              
1147             If true, the paths will append a [0] after each step representing a sequence element
1148              
1149             =cut
1150              
1151             sub get_paths_to_atoms {
1152 0     0 1 0 my ($self,$types,$opts) = @_;
1153             # find node type
1154              
1155 0 0       0 unless (defined $types) {
1156 0         0 $types = [ $self->node_types ];
1157             }
1158 0   0     0 $opts||={};
1159 0         0 return $self->_get_paths_to_atoms($types,{},$opts);
1160             }
1161              
1162             sub _get_paths_to_atoms {
1163 0     0   0 my ($self,$types,$seen,$opts)=@_;
1164 0         0 my @result;
1165 0         0 my $no_children = $opts->{no_childnodes};
1166 0         0 my $no_nodes = $opts->{no_nodes};
1167 0         0 my $with_LM = $opts->{with_LM};
1168 0         0 my $with_AM = $opts->{with_AM};
1169 0         0 my $with_Seq_brackets = $opts->{with_Seq_brackets};
1170 0         0 foreach my $type (@$types) {
1171 0 0       0 next if $seen->{$type};
1172 0         0 my $decl_is = $type->get_decl_type;
1173 0 0 0     0 next if $no_children and $type->get_role eq '#CHILDNODES';
1174 0 0 0     0 if ($decl_is == PML_TYPE_DECL ||
      0        
      0        
      0        
      0        
      0        
      0        
      0        
1175             $decl_is == PML_ROOT_DECL ||
1176             $decl_is == PML_ATTRIBUTE_DECL ||
1177             $decl_is == PML_MEMBER_DECL ||
1178             $decl_is == PML_ELEMENT_DECL ||
1179             (!$with_LM && $decl_is == PML_LIST_DECL) ||
1180             (!$with_AM && $decl_is == PML_ALT_DECL)) {
1181 0         0 $type = $type->get_knit_content_decl;
1182 0 0 0     0 next if $no_nodes and $type->get_role eq '#NODE';
1183 0         0 redo;
1184             }
1185 0 0       0 next unless ref($type);
1186 0         0 my @members;
1187 0 0       0 if ($decl_is == PML_STRUCTURE_DECL) {
    0          
    0          
    0          
    0          
1188 0         0 @members = map { [$_,$_->get_knit_name] } $type->get_members;
  0         0  
1189             } elsif ($decl_is == PML_CONTAINER_DECL) {
1190 0         0 my $cdecl = $type->get_knit_content_decl;
1191 0 0       0 @members = ((map { [ $_, $_->get_name ] } $type->get_attributes),
  0         0  
1192             ($cdecl ? [$cdecl, '#content'] : ()));
1193             } elsif ($decl_is == PML_SEQUENCE_DECL) {
1194 0 0       0 if ($with_Seq_brackets) {
1195 0         0 @members = map { [ $_, $_->get_name.'[0]' ] } $type->get_elements;
  0         0  
1196             } else {
1197 0         0 @members = map { [ $_, $_->get_name ] } $type->get_elements;
  0         0  
1198             }
1199             } elsif ($decl_is == PML_LIST_DECL) {
1200 0         0 @members = [$type->get_knit_content_decl,'LM'];
1201             } elsif ($decl_is == PML_ALT_DECL) {
1202 0         0 @members = [$type->get_knit_content_decl,'AM'];
1203             } else {
1204 0         0 push @result, qq{};
1205             }
1206 0 0       0 if (@members) {
1207 0         0 for my $m (@members) {
1208 0         0 my ($mdecl,$name) = @$m;
1209 0         0 local $seen->{$type}=1;
1210 0 0 0     0 push @result, map { (defined($_) and length($_)) ? $name."/".$_ : $name }
  0         0  
1211             $self->_get_paths_to_atoms([$mdecl],$seen,$opts);
1212             }
1213             }
1214             }
1215 0         0 my %uniq;
1216 0 0       0 return grep { !$uniq{$_} && ($uniq{$_}=1) } @result;
  0         0  
1217             }
1218              
1219              
1220             =item $schema->attributes (decl...)
1221              
1222             This function tries to emulate the behavior of
1223             C<<< Treex::PML::FSFormat->attributes >>> to some extent.
1224              
1225             Return attribute paths to all atomic subtypes of given type
1226             declarations. If no type declaration objects are given, then types
1227             with role C<#NODE> are assumed. This function never descends to
1228             subtypes with role C<#CHILDNODES>.
1229              
1230             =cut
1231              
1232             sub attributes {
1233 0     0 1 0 my ($self,@types) = @_;
1234             # find node type
1235 0 0       0 return $self->get_paths_to_atoms(@types ? \@types : undef, { no_childnodes => 1 });
1236             }
1237              
1238              
1239              
1240             sub init {
1241 67     67 0 216 my ($schema,$opts)=@_;
1242 67         283 $schema->{URL} = $opts->{URL};
1243 67         244 $schema->{-api_version} = '2.0';
1244             }
1245              
1246              
1247             # these functions are used internally by the serializer
1248             sub serialize_exclude_keys {
1249 11     11 0 79 return qw(URL revision description);
1250             }
1251             sub serialize_get_children {
1252 11     11 0 22 my ($self,$opts)=@_;
1253 11         39 my @children = $self->SUPER::serialize_get_children($opts);
1254             return (
1255 22 100       100 (grep { defined($_->[1]) and length($_->[1]) } (
1256             ['revision',$self->{revision}],
1257             ['description',$self->{description}]
1258             )
1259             ),
1260 62         85 (grep { $_->[0] eq 'reference' } @children),
1261 62         91 (grep { $_->[0] eq 'root' } @children),
1262 11         47 (grep { $_->[0] !~ /^(?:root|reference)$/ } @children)
  62         156  
1263             );
1264             }
1265              
1266             =item $schema->post_process($options)
1267              
1268             Auxiliary method used internally by the PML Schema parser. It
1269             simplifies the schema and for each declaration object creates back
1270             references to its parent declaration and schema and pre-computes the
1271             type attribute path returned by $decl->get_decl_path().
1272              
1273             =cut
1274              
1275             sub post_process {
1276 67     67 1 216 my ($schema,$opts)=@_;
1277 67         467 $schema->simplify($opts);
1278             $schema->for_each_decl(sub{
1279 2384     2384   2618 my ($decl)=@_;
1280 2384         4069 weaken( $decl->{-schema} = $schema );
1281 2384         2697 my $parent = $decl->{-parent};
1282 2384         4661 my $decl_is = $decl->get_decl_type;
1283 2384 100 100     14040 if (
    100 100        
      100        
      100        
      100        
      100        
      100        
1284             $decl_is == PML_STRUCTURE_DECL ||
1285             $decl_is == PML_CONTAINER_DECL ||
1286             $decl_is == PML_SEQUENCE_DECL ||
1287             $decl_is == PML_LIST_DECL ||
1288             $decl_is == PML_ALT_DECL ||
1289             $decl_is == PML_CHOICE_DECL ||
1290             $decl_is == PML_CONSTANT_DECL ||
1291             $decl_is == PML_CDATA_DECL
1292             ) {
1293 1053         1656 my $parent_is = $parent->get_decl_type;
1294 1053 100 100     3003 if ($parent_is == PML_TYPE_DECL) {
    100 100        
    100 66        
    100          
    100          
    50          
1295 400         669 $decl->{-path} = '!'.$parent->get_name;
1296             } elsif ($parent_is == PML_ROOT_DECL) {
1297 28         73 $decl->{-path} = '';
1298             } elsif ($parent_is == PML_ATTRIBUTE_DECL ||
1299             $parent_is == PML_MEMBER_DECL ||
1300             $parent_is == PML_ELEMENT_DECL) {
1301 479         998 $decl->{-path} = $parent->{-parent}{-path}.'/'.$parent->get_name;
1302             } elsif ($parent_is == PML_CONTAINER_DECL and $decl_is != PML_ATTRIBUTE_DECL) {
1303 98         218 $decl->{-path} = $parent->{-path}.'/#content';
1304             } elsif ($parent_is == PML_LIST_DECL) {
1305 47         111 $decl->{-path} = $parent->{-path}.'/LM';
1306             } elsif ($parent_is == PML_ALT_DECL) {
1307 1         4 $decl->{-path} = $parent->{-path}.'/AM';
1308             }
1309 1053 50 100     2534 if ($decl_is == PML_LIST_DECL and !$decl->{-decl} and $decl->{role} eq '#KNIT') {
      66        
1310             # warn ("List $decl->{-path} with role=\"#KNIT\" must have a content type declaration: assuming !\n");
1311 0         0 __fix_knit_type($schema,$decl,$decl->{-path}.'/LM');
1312             }
1313             } elsif ($decl_is == PML_MEMBER_DECL) {
1314 607 50 66     1518 if (!$decl->{-decl} and $decl->{role} eq '#KNIT') {
1315             # warn ("Member $decl->{-parent}{-path}/$decl->{-name} with role=\"#KNIT\" must have a content type declaration: assuming !\n");
1316 0         0 __fix_knit_type($schema,$decl);
1317             }
1318             }
1319 67         722 });
1320             }
1321              
1322             sub __fix_knit_type {
1323 0     0   0 my ($schema,$decl,$path)=@_;
1324 0         0 $decl->{-decl}='cdata';
1325 0         0 my $cdata = $decl->{cdata}= bless {
1326             format => 'PMLREF',
1327             -xml_name => 'cdata',
1328             -attributes => [ 'format' ],
1329             }, 'Treex::PML::Schema::CDATA';
1330 0         0 weaken( $cdata->{-schema} = $schema );
1331 0         0 weaken( $cdata->{-parent} = $decl );
1332 0 0 0     0 if (defined $path) {
    0          
1333 0         0 $cdata->{-path} = $path;
1334             } elsif ($decl->{-parent} and $decl->{-name}) {
1335 0         0 $cdata->{-path} = "$decl->{-parent}{-path}/$decl->{-name}";
1336             }
1337             }
1338              
1339             sub _traverse_data {
1340 707     707   1040 my ($data,$sub,$seen,$hashes_only)=@_;
1341 707         1413 $seen->{$data}=1;
1342 707 100       2005 if (UNIVERSAL::isa($data,'ARRAY')) {
    50          
1343 337 50       482 $sub->($data,0) unless $hashes_only;
1344 337         464 foreach my $val (@$data) {
1345 499 50 33     1123 if (ref($val) and !exists $seen->{$val}) {
1346 0         0 _traverse_data($val,$sub,$seen,$hashes_only);
1347             }
1348             }
1349             } elsif (UNIVERSAL::isa($data,'HASH')) {
1350 370         706 $sub->($data,1);
1351 370         842 foreach my $val (values %$data) {
1352 2948 100 100     5996 if (ref($val) and !exists $seen->{$val}) {
1353 625         924 _traverse_data($val,$sub,$seen,$hashes_only);
1354             }
1355             }
1356             }
1357             }
1358              
1359              
1360              
1361             =back
1362              
1363             =head1 CLASSES FOR TYPE DECLARATIONS
1364              
1365             =over 3
1366              
1367             =item L
1368              
1369             =item L
1370              
1371             =item L
1372              
1373             =item L
1374              
1375             =item L
1376              
1377             =item L
1378              
1379             =item L
1380              
1381             =item L
1382              
1383             =item L
1384              
1385             =item L
1386              
1387             =item L
1388              
1389             =item L
1390              
1391             =item L
1392              
1393             =item L
1394              
1395             =back
1396              
1397             =cut
1398              
1399             1;
1400              
1401             __END__