File Coverage

blib/lib/XML/Rules.pm
Criterion Covered Total %
statement 19 21 90.4
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 26 28 92.8


line stmt bran cond sub pod time code
1             package XML::Rules;
2            
3 14     14   703797 use warnings;
  14         35  
  14         563  
4 14     14   80 no warnings qw(uninitialized);
  14         27  
  14         519  
5 14     14   75 use strict;
  14         58  
  14         480  
6 14     14   80 use Carp;
  14         29  
  14         1103  
7 14     14   380 use 5.008;
  14         49  
  14         642  
8 14     14   80 use Scalar::Util qw(weaken);
  14         55  
  14         1910  
9            
10             require Exporter;
11             our @ISA = qw(Exporter);
12             our @EXPORT_OK = qw(paths2rules);
13            
14 14     14   30218 use XML::Parser::Expat;
  0            
  0            
15            
16             use constant STRIP => "0000";
17             use constant STRIP_RULE => 'pass';
18            
19             #use Data::Dumper;
20             #$Data::Dumper::Indent = 1;
21             #$Data::Dumper::Terse = 1;
22             #$Data::Dumper::Quotekeys = 0;
23             #$Data::Dumper::Sortkeys = 1;
24            
25            
26             =head1 NAME
27            
28             XML::Rules - parse XML and specify what and how to keep/process for individual tags
29            
30             =head1 VERSION
31            
32             Version 1.16
33            
34             =cut
35            
36             our $VERSION = '1.16';
37            
38             =head1 SYNOPSIS
39            
40             use XML::Rules;
41            
42             $xml = <<'*END*';
43            
44            
45             ...
46             ...
47             ...
48            
49             ...
50             ...
51             ...
52             ...
53            
54            
55             123-456-7890
56             663-486-7890
57             663-486-7000
58            
59            
60            
61             ...
62             ...
63             ...
64            
65             ...
66             ...
67             ...
68             ...
69            
70            
71             663-486-7891
72            
73            
74            
75             *END*
76            
77             @rules = (
78             _default => sub {$_[0] => $_[1]->{_content}},
79             # by default I'm only interested in the content of the tag, not the attributes
80             bogus => undef,
81             # let's ignore this tag and all inner ones as well
82             address => sub {address => "$_[1]->{street}, $_[1]->{city} ($_[1]->{country})"},
83             # merge the address into a single string
84             phone => sub {$_[1]->{type} => $_[1]->{_content}},
85             # let's use the "type" attribute as the key and the content as the value
86             phones => sub {delete $_[1]->{_content}; %{$_[1]}},
87             # remove the text content and pass along the type => content from the child nodes
88             person => sub { # lets print the values, all the data is readily available in the attributes
89             print "$_[1]->{lname}, $_[1]->{fname} <$_[1]->{email}>\n";
90             print "Home phone: $_[1]->{home}\n" if $_[1]->{home};
91             print "Office phone: $_[1]->{office}\n" if $_[1]->{office};
92             print "Fax: $_[1]->{fax}\n" if $_[1]->{fax};
93             print "$_[1]->{address}\n\n";
94             return; # the tag is processed, no need to remember what it contained
95             },
96             );
97             $parser = XML::Rules->new(rules => \@rules);
98             $parser->parse( $xml);
99            
100             =head1 INTRODUCTION
101            
102             There are several ways to extract data from XML. One that's often used is to read the whole file and transform it into a huge maze of objects and then write code like
103            
104             foreach my $obj ($XML->forTheLifeOfMyMotherGiveMeTheFirstChildNamed("Peter")->pleaseBeSoKindAndGiveMeAllChildrenNamedSomethingLike("Jane")) {
105             my $obj2 = $obj->sorryToKeepBotheringButINeedTheChildNamed("Theophile");
106             my $birth = $obj2->whatsTheValueOfAttribute("BirthDate");
107             print "Theophile was born at $birth\n";
108             }
109            
110             I'm exagerating of course, but you probably know what I mean. You can of course shorten the path and call just one method ... that is if you spend the time to learn one more "cool" thing starting with X. XPath.
111            
112             You can also use XML::Simple and generate an almost equaly huge maze of hashes and arrays ... which may make the code more or less complex. In either case you need to have enough memory
113             to store all that data, even if you only need a piece here and there.
114            
115             Another way to parse the XML is to create some subroutines that handle the start and end tags and the text and whatever else may appear in the XML. Some modules will let you specify just one for start tag, one for text and one for end tag, others will let you install different handlers for different tags. The catch is that you have to build your data structures yourself, you have to know where you are, what tag is just open and what is the parent and its parent etc. so that you could add the attributes and especially the text to the right place. And the handlers have to do everything as their side effect. Does anyone remember what do they say about side efects? They make the code hard to debug, they tend to change the code into a maze of interdependent snippets of code.
116            
117             So what's the difference in the way XML::Rules works? At the first glance, not much. You can also specify subroutines to be called for the tags encountered while parsing the XML, just like the other even based XML parsers. The difference is that you do not have to rely on side-effects if all you want is to store the value of a tag. You simply return whatever you need from the current tag and the module will add it at the right place in the data structure it builds and will provide it to the handlers for the parent tag. And if the parent tag does return that data again it will be passed to its parent and so forth. Until we get to the level at which it's convenient to handle all the data we accumulated from the twig.
118            
119             Do we want to keep just the content and access it in the parent tag handler under a specific name?
120            
121             foo => sub {return 'foo' => $_[1]->{_content}}
122            
123             Do we want to ornament the content a bit and add it to the parent tag's content?
124            
125             u => sub {return '_' . $_[1]->{_content} . '_'}
126             strong => sub {return '*' . $_[1]->{_content} . '*'}
127             uc => sub {return uc($_[1]->{_content})}
128            
129             Do we want to merge the attributes into a string and access the string from the parent tag under a specified name?
130            
131             address => sub {return 'Address' => "Street: $_[1]->{street} $_[1]->{bldngNo}\nCity: $_[1]->{city}\nCountry: $_[1]->{country}\nPostal code: $_[1]->{zip}"}
132            
133             and in this case the $_[1]->{street} may either be an attribute of the
tag or it may be ther result of the handler (rule)
134            
135             street => sub {return 'street' => $_[1]->{_content}}
136            
137             and thus come from a child tag . You may also use the rules to convert codes to values
138            
139             our %states = (
140             AL => 'Alabama',
141             AK => 'Alaska',
142             ...
143             );
144             ...
145             state => sub {return 'state' => $states{$_[1]->{_content}}; }
146            
147             or
148            
149             address => sub {
150             if (exists $_[1]->{id}) {
151             $sthFetchAddress->execute($_[1]->{id});
152             my $addr = $sthFetchAddress->fetchrow_hashref();
153             $sthFetchAddress->finish();
154             return 'address' => $addr;
155             } else {
156             return 'address' => $_[1];
157             }
158             }
159            
160             so that you do not have to care whether there was
161            
162            
163            
164             or
165            
166            
Larry Wall's St.478CoreThe Programming Republic of Perl
167            
168             And if you do not like to end up with a datastructure of plain old arrays and hashes, you can create
169             application specific objects in the rules
170            
171             address => sub {
172             my $type = lc(delete $_[1]->{type});
173             $type.'Address' => MyApp::Address->new(%{$_[1]})
174             },
175             person => sub {
176             '@person' => MyApp::Person->new(
177             firstname => $_[1]->{fname},
178             lastname => $_[1]->{lname},
179             deliveryAddress => $_[1]->{deliveryAddress},
180             billingAddress => $_[1]->{billingAddress},
181             phone => $_[1]->{phone},
182             )
183             }
184            
185            
186             At each level in the tree structure serialized as XML you can decide what to keep, what to throw away, what to transform and
187             then just return the stuff you care about and it will be available to the handler at the next level.
188            
189             =head1 CONSTRUCTOR
190            
191             my $parser = XML::Rules->new(
192             rules => \@rules,
193             [ start_rules => \@start_rules, ]
194             [ stripspaces => 0 / 1 / 2 / 3 + 0 / 4 + 0 / 8, ]
195             [ normalisespaces => 0 / 1, ]
196             [ style => 'parser' / 'filter', ]
197             [ ident => ' ', [reformat_all => 0 / 1] ],
198             [ encode => 'encoding specification', ]
199             [ output_encoding => 'encoding specification', ]
200             [ namespaces => \%namespace2alias_mapping, ]
201             [ handlers => \%additional_expat_handlers, ]
202             # and optionaly parameters passed to XML::Parser::Expat
203             );
204            
205             Options passed to XML::Parser::Expat : ProtocolEncoding Namespaces NoExpand Stream_Delimiter ErrorContext ParseParamEnt Base
206            
207             The "stripspaces" controls the handling of whitespace. Please see the C bellow.
208            
209             The "style" specifies whether you want to build a parser used to extract stuff from the XML or filter/modify the XML. If you specify
210             style => 'filter' then all tags for which you do not specify a subroutine rule or that occure inside such a tag are copied to the output filehandle
211             passed to the ->filter() or ->filterfile() methods.
212            
213             The "ident" specifies what character(s) to use to ident the tags when filtering, by default the tags are not formatted in any way. If the
214             "reformat_all" is not set then this affects only the tags that have a rule and their subtags. And in case of subtags only those that were
215             added into the attribute hash by their rules, not those left in the _content array!
216            
217             The "warnoverwrite" instructs XML::Rules to issue a warning whenever the rule cause a key in a tag's hash to be overwritten by new
218             data produced by the rule of a subtag. This happens eg. if a tag is repeated and its rule doesn't expect it.
219            
220             The "encode" allows you to ask the module to run all data through Encode::encode( 'encoding_specification', ...)
221             before being passed to the rules. Otherwise all data comes as UTF8.
222            
223             The "output_encoding" on the other hand specifies in what encoding is the resulting data going to be, the default is again UTF8.
224             This means that if you specify
225            
226             encode => 'windows-1250',
227             output_encoding => 'utf8',
228            
229             and the XML is in ISO-8859-2 (Latin2) then the filter will 1) convert the content and attributes of the tags you are not interested in from Latin2
230             directly to utf8 and output and 2) convert the content and attributes of the tags you want to process from Latin2 to Windows-1250, let you mangle
231             the data and then convert the results to utf8 for the output.
232            
233             The C and C affects also the C<$parser->toXML(...)>, if they are different then the data are converted from
234             one encoding to the other.
235            
236             The C allow you to set additional handlers for XML::Parser::Expat->setHandlers.
237             Your Start, End, Char and XMLDecl handlers are evaluated before the ones installed by XML::Rules and may
238             modify the values in @_, but you should be very carefull with that. Consider that experimental and if you do make
239             that work the way you needed, please let me know so that I know what was it good for and can make sure
240             it doesn't break in a new version.
241            
242             =head2 The Rules
243            
244             The rules option may be either an arrayref or a hashref, the module doesn't care, but if you want to use regexps to specify the groups of tags to be handled
245             by the same rule you should use the array ref. The rules array/hash is made of pairs in form
246            
247             tagspecification => action
248            
249             where the tagspecification may be either a name of a tag, a string containing comma or pipe ( "|" ) delimited list of tag names
250             or a string containing a regexp enclosed in // optionaly followed by the regular expression modifiers or a qr// compiled regular expressions.
251             The tag names and tag name lists take precedence to the regexps, the regexps are (in case of arrayrefs only!!!) tested in the order in which
252             they are specified.
253            
254             These rules are evaluated/executed whenever a tag if fully parsed including all the content and child tags and they may access the content and attributes of the
255             specified tag plus the stuff produced by the rules evaluated for the child tags.
256            
257             The action may be either
258            
259             - an undef or empty string = ignore the tag and all its children
260             - a subroutine reference = the subroutine will be called to handle the tag data&contents
261             sub { my ($tagname, $attrHash, $contexArray, $parentDataArray, $parser) = @_; ...}
262             - one of the built in rules below
263            
264             =head3 Custom rules
265            
266             The subroutines in the rules specification receive five parameters:
267            
268             $rule->( $tag_name, \%attrs, \@context, \@parent_data, $parser)
269            
270             It's OK to destroy the first two parameters, but you should treat the other three as read only
271             or at least treat them with care!
272            
273             $tag_name = string containing the tag name
274             \%attrs = hash containing the attributes of the tag plus the _content key
275             containing the text content of the tag. If it's not a leaf tag it may
276             also contain the data returned by the rules invoked for the child tags.
277             \@context = an array containing the names of the tags enclosing the current
278             one. The parent tag name is the last element of the array. (READONLY!)
279             \@parent_data = an array containing the hashes with the attributes
280             and content read&produced for the enclosing tags so far.
281             You may need to access this for example to find out the version
282             of the format specified as an attribute of the root tag. You may
283             safely add, change or delete attributes in the hashes, but all bets
284             are off if you change the number or type of elements of this array!
285             $parser = the parser object
286             you may use $parser->{pad} or $parser->{parameters} to store any data
287             you need. The first is never touched by XML::Rules, the second is set to
288             the last argument of parse() or filter() methods and reset to undef
289             before those methods exit.
290            
291             The subroutine may decide to handle the data and return nothing or
292             tweak the data as necessary and return just the relevant bits. It may also
293             load more information from elsewhere based on the ids found in the XML
294             and provide it to the rules of the ancestor tags as if it was part of the XML.
295            
296             The possible return values of the subroutines are:
297            
298             1) nothing or undef or "" - nothing gets added to the parent tag's hash
299            
300             2) a single string - if the parent's _content is a string then the one produced by this rule is appended to the parent's _content.
301             If the parent's _content is an array, then the string is push()ed to the array.
302            
303             3) a single reference - if the parent's _content is a string then it's changed to an array containing the original string and this reference.
304             If the parent's _content is an array, then the string is push()ed to the array.
305            
306             4) an even numbered list - it's a list of key & value pairs to be added to the parent's hash.
307            
308             The handling of the attributes may be changed by adding '@', '%', '+', '*' or '.' before the attribute name.
309            
310             Without any "sigil" the key & value is added to the hash overwriting any previous values.
311            
312             The values for the keys starting with '@' are push()ed to the arrays referenced by the key name
313             without the @. If there already is an attribute of the same name then the value will be preserved and will become
314             the first element in the array.
315            
316             The values for the keys starting with '%' have to be either hash or array references. The key&value pairs
317             in the referenced hash or array will be added to the hash referenced by the key. This is nice for rows of tags like this:
318            
319            
320            
321            
322             if you specify the rule as
323            
324             field => sub { '%fields' => [$_[1]->{name} => $_[1]->{value}]}
325            
326             then the parent tag's has will contain
327            
328             fields => {
329             foo => 12,
330             bar => 24,
331             }
332            
333             The values for the keys starting with '+' are added to the current value, the ones starting with '.' are
334             appended to the current value and the ones starting with '*' multiply the current value.
335            
336             5) an odd numbered list - the last element is appended or push()ed to the parent's _content, the rest is handled as in the previous case.
337            
338             =head3 Builtin rules
339            
340             'content' = only the content of the tag is preserved and added to
341             the parent tag's hash as an attribute named after the tag. Equivalent to:
342             sub { $_[0] => $_[1]->{_content}}
343             'content trim' = only the content of the tag is preserved, trimmed and added to
344             the parent tag's hash as an attribute named after the tag
345             sub { s/^\s+//,s/\s+$// for ($_[1]->{_content}); $_[0] => $_[1]->{_content}}
346             'content array' = only the content of the tag is preserved and pushed
347             to the array pointed to by the attribute
348             sub { '@' . $_[0] => $_[1]->{_content}}
349             'as is' = the tag's hash is added to the parent tag's hash
350             as an attribute named after the tag
351             sub { $_[0] => $_[1]}
352             'as is trim' = the tag's hash is added to the parent tag's hash
353             as an attribute named after the tag, the content is trimmed
354             sub { $_[0] => $_[1]}
355             'as array' = the tag's hash is pushed to the attribute named after the tag
356             in the parent tag's hash
357             sub { '@'.$_[0] => $_[1]}
358             'as array trim' = the tag's hash is pushed to the attribute named after the tag
359             in the parent tag's hash, the content is trimmed
360             sub { '@'.$_[0] => $_[1]}
361             'no content' = the _content is removed from the tag's hash and the hash
362             is added to the parent's hash into the attribute named after the tag
363             sub { delete $_[1]->{_content}; $_[0] => $_[1]}
364             'no content array' = similar to 'no content' except the hash is pushed
365             into the array referenced by the attribute
366             'as array no content' = same as 'no content array'
367             'pass' = the tag's hash is dissolved into the parent's hash,
368             that is all tag's attributes become the parent's attributes.
369             The _content is appended to the parent's _content.
370             sub { %{$_[1]}}
371             'pass no content' = the _content is removed and the hash is dissolved
372             into the parent's hash.
373             sub { delete $_[1]->{_content}; %{$_[1]}}
374             'pass without content' = same as 'pass no content'
375             'raw' = the [tagname => attrs] is pushed to the parent tag's _content.
376             You would use this style if you wanted to be able to print
377             the parent tag as XML preserving the whitespace or other textual content
378             sub { [$_[0] => $_[1]]}
379             'raw extended' = the [tagname => attrs] is pushed to the parent tag's _content
380             and the attrs are added to the parent's attribute hash with ":$tagname" as the key
381             sub { (':'.$_[0] => $_[1], [$_[0] => $_[1]])};
382             'raw extended array' = the [tagname => attrs] is pushed to the parent tag's _content
383             and the attrs are pushed to the parent's attribute hash with ":$tagname" as the key
384             sub { ('@:'.$_[0] => $_[1], [$_[0] => $_[1]])};
385             'by ' = uses the value of the specified attribute as the key when adding the
386             attribute hash into the parent tag's hash. You can specify more names, in that case
387             the first found is used.
388             sub {delete($_[1]->{name}) => $_[1]}
389             'content by ' = uses the value of the specified attribute as the key when adding the
390             tags content into the parent tag's hash. You can specify more names, in that case
391             the first found is used.
392             sub {$_[1]->{name} => $_[1]->{_content}}
393             'no content by ' = uses the value of the specified attribute as the key when adding the
394             attribute hash into the parent tag's hash. The content is dropped. You can specify more names,
395             in that case the first found is used.
396             sub {delete($_[1]->{_content}); delete($_[1]->{name}) => $_[1]}
397             '==...' = replace the tag by the specified string. That is the string will be added to
398             the parent tag's _content
399             sub { return '...' }
400             '=...' = replace the tag contents by the specified string and forget the attributes.
401             sub { return $_[0] => '...' }
402             '' = forget the tag's contents (after processing the rules for subtags)
403             sub { return };
404            
405             I include the unnamed subroutines that would be equivalent to the builtin rule in case you need to add
406             some tests and then behave as if one of the builtins was used.
407            
408             =head3 Builtin rule modifiers
409            
410             You can add these modifiers to most rules, just add them to the string literal, at the end, separated from the base rule by a space.
411            
412             no xmlns = strip the namespace alias from the $_[0] (tag name)
413             remove(list,of,attributes) = remove all specified attributes (or keys produced by child tag rules) from the tag data
414             only(list,of,attributes) = filter the hash of attributes and keys+values produced by child tag rules in the tag data
415             to only include those specified here. In case you need to include the tag content do not forget to include
416             _content in the list!
417            
418             Not all modifiers make sense for all rules. For example if the rule is 'content', it's pointless to filter the attributes, because the only one
419             used will be the content anyway.
420            
421             The behaviour of the combination of the 'raw...' rules and the rule modifiers is UNDEFINED!
422            
423             =head3 Different rules for different paths to tags
424            
425             Since 0.19 it's possible to specify several actions for a tag if you need to do something different based on the path to the tag like this:
426            
427             tagname => [
428             'tag/path' => action,
429             '/root/tag/path' => action,
430             '/root/*/path' => action,
431             qr{^root/ns:[^/]+/par$} => action,
432             default_action
433             ],
434            
435             The path is matched against the list of parent tags joined by slashes.
436            
437             If you need to use more complex conditions to select the actions you have to use a single subroutine rule and implement
438             the conditions within that subroutine. You have access both to the list of enclosing tags and their attribute hashes (including
439             the data obtained from the rules of the already closed subtags of the enclosing tags.
440            
441            
442             =head2 The Start Rules
443            
444             Apart from the normal rules that get invoked once the tag is fully parsed, including the contents and child tags, you may want to
445             attach some code to the start tag to (optionaly) skip whole branches of XML or set up attributes and variables. You may set up
446             the start rules either in a separate parameter to the constructor or in the rules=> by prepending the tag name(s) by ^.
447            
448             These rules are in form
449            
450             tagspecification => undef / '' / 'skip' --> skip the element, including child tags
451             tagspecification => 1 / 'handle' --> handle the element, may be needed
452             if you specify the _default rule.
453             tagspecification => \&subroutine
454            
455             The subroutines receive the same parameters as for the "end tag" rules except of course the _content, but their return value is treated differently.
456             If the subroutine returns a false value then the whole branch enclosed by the current tag is skipped, no data are stored and no rules are
457             executed. You may modify the hash referenced by $attr.
458            
459             You may even tie() the hash referenced by $attr, for example in case you want to store the parsed data in a DBM::Deep.
460             In such case all the data returned by the immediate subtags of this tag will be stored in the DBM::Deep.
461             Make sure you do not overwrite the data by data from another occurance of the same tag if you return $_[1]/$attr from the rule!
462            
463             YourHugeTag => sub {
464             my %temp = %{$_[1]};
465             tie %{$_[1]}, 'DBM::Deep', $filename;
466             %{$_[1]} = %temp;
467             1;
468             }
469            
470             Both types of rules are free to store any data they want in $parser->{pad}. This property is NOT emptied
471             after the parsing!
472            
473             =head2 Whitespace handling
474            
475             There are two options that affect the whitespace handling: stripspaces and normalisespaces. The normalisespaces is a simple flag that controls
476             whether multiple spaces/tabs/newlines are collapsed into a single space or not. The stripspaces is more complex, it's a bit-mask,
477             an ORed combination of the following options:
478            
479             0 - don't remove whitespace around tags
480             (around tags means before the opening tag and after the closing tag, not in the tag's content!)
481             1 - remove whitespace before tags whose rules did not return any text content
482             (the rule specified for the tag caused the data of the tag to be ignored,
483             processed them already or added them as attributes to parent's \%attr)
484             2 - remove whitespace around tags whose rules did not return any text content
485             3 - remove whitespace around all tags
486            
487             0 - remove only whitespace-only content
488             (that is remove the whitespace around in this case " "
489             but not this one "blah blah")
490             4 - remove trailing/leading whitespace
491             (remove the whitespace in both cases above)
492            
493             0 - don't trim content
494             8 - do trim content
495             (That is for " blah " only pass to the rule {_content => 'blah'})
496            
497            
498             That is if you have a data oriented XML in which each tag contains either text content or subtags, but not both,
499             you want to use stripspaces => 3 or stripspaces => 3|4. This will not only make sure you don't need to bother
500             with the whitespace-only _content of the tags with subtags, but will also make sure you do not keep on wasting
501             memory while parsing a huge XML and processing the "twigs". Without that option the parent tag of
502             the repeated tag would keep on accumulating unneeded whitespace in its _content.
503            
504             =cut
505            
506             sub new {
507             my $class = shift;
508             my %params = @_;
509             croak "Please specify the rules=> for the parser!" unless $params{rules} and ref($params{rules});
510            
511             my $self = {rules => {}, start_rules => {}};
512             bless $self, $class;
513            
514             my @rules = (ref($params{rules}) eq 'HASH' ? %{$params{rules}} : @{$params{rules}}); # dereference and copy
515             delete $params{rules};
516            
517             my @start_rules;
518             if ($params{start_rules} and ref($params{start_rules})) {
519             @start_rules = ref($params{start_rules}) eq 'HASH' ? %{$params{start_rules}} : @{$params{start_rules}}; # dereference and copy
520             };
521             delete $params{start_rules};
522            
523             for (my $i=0; $i <= $#rules; $i+=2) {
524             next unless $rules[$i] =~ s/^\^//;
525             push @start_rules, splice( @rules, $i, 2);
526             $i-=2;
527             }
528            
529             $self->_split_rules( \@rules, 'rules', 'as is');
530             $self->_split_rules( \@start_rules, 'start_rules', 'handle');
531            
532             $self->{for_parser} = {};
533             { # extract the params for the XML::Parser::Expat constructor
534             my @for_parser = grep exists($params{$_}), qw(ProtocolEncoding Namespaces NoExpand Stream_Delimiter ErrorContext ParseParamEnt Base);
535             if (@for_parser) {
536             @{$self->{for_parser}}{@for_parser} = @params{@for_parser};
537             delete @params{@for_parser};
538             }
539             }
540            
541             $self->{namespaces} = delete($params{namespaces});
542             if (defined($self->{namespaces})) {
543             croak 'XML::Rules->new( ... , namespaces => ...HERE...) must be a hash reference!'
544             unless ref($self->{namespaces}) eq 'HASH';
545             $self->{xmlns_map} = {};
546             if (defined $self->{namespaces}{'*'}) {
547             if (! grep $_ eq $self->{namespaces}{'*'}, qw(warn die keep strip), '') {
548             # local $Carp::CarpLevel = 2;
549             croak qq{Unknown namespaces->{'*'} option '$self->{namespaces}{'*'}'!};
550             }
551             } else {
552             $self->{namespaces}{'*'} = 'warn';
553             }
554             }
555            
556             $self->{custom_escape} = delete($params{custom_escape}) if exists $params{custom_escape};
557             $self->{style} = delete($params{style}) || 'parser';
558            
559             my $handlers = delete $params{handlers}; # need to remove it so that it doesn't end up in opt
560            
561             $self->{opt}{lc $_} = $params{$_} for keys %params;
562            
563             delete $self->{opt}{encode} if $self->{opt}{encode} =~ /^utf-?8$/i;
564             delete $self->{opt}{output_encoding} if $self->{opt}{output_encoding} =~ /^utf-?8$/i;
565            
566             for (qw(normalisespace normalizespace normalizespaces)) {
567             last if defined($self->{opt}{normalisespaces});
568             $self->{opt}{normalisespaces} = $self->{opt}{$_};
569             delete $self->{opt}{$_};
570             }
571             $self->{opt}{normalisespaces} = 0 unless(defined($self->{opt}{normalisespaces}));
572             $self->{opt}{stripspaces} = 0 unless(defined($self->{opt}{stripspaces}));
573            
574             require 'Encode.pm' if ($self->{opt}{encode} or $self->{opt}{output_encoding});
575            
576             if ($handlers) {
577             croak qq{The 'handlers' option must be a hashref!} unless ref($handlers) eq 'HASH';
578             my %handlers = %{$handlers}; # shallow copy
579            
580             for (qw(Start End Char XMLDecl), ($self->{style} eq 'filter' ? qw(CdataStart CdataEnd) : ())) {
581             no strict 'refs';
582             if ($handlers{$_}) {
583             my $custom = $handlers{$_};
584             my $mine = "_$_"->($self);
585             # $handlers{$_} = sub {$custom->(@_); $mine->(@_)}
586             $handlers{$_} = sub {&$custom; &$mine}
587             } else {
588             $handlers{$_} = "_$_"->($self);
589             }
590             }
591            
592             for (qw(Start End Char XMLDecl)) {
593             $self->{basic_handlers}{$_} = delete $self->{other_handlers}{$_} if exists $self->{other_handlers}{$_};
594             }
595             $self->{normal_handlers} = [ %handlers ];
596             } else {
597             $self->{normal_handlers} = [
598             Start => _Start($self),
599             End => _End($self),
600             Char => _Char($self),
601             XMLDecl => _XMLDecl($self),
602             (
603             $self->{style} eq 'filter' ? (CdataStart => _CdataStart($self), CdataEnd => _CdataEnd ($self)) : ()
604             )
605             ];
606             }
607             $self->{ignore_handlers} = [
608             Start => _StartIgnore($self),
609             Char => undef,
610             End => _EndIgnore($self),
611             ];
612            
613             return $self;
614             }
615            
616             sub _split_rules {
617             my ($self, $rules, $type, $default) = @_;
618            
619             $self->{$type}{_default} = $default unless exists($self->{$type}{_default});
620            
621             while (@$rules) {
622             my ($tag, $code) = (shift(@$rules), shift(@$rules));
623            
624             if (ref($code) eq 'ARRAY') {
625             for( my $i = 0; $i < $#$code; $i+=2) {
626             $code->[$i] = _xpath2re($code->[$i]);
627             }
628             push @$code, $self->{$type}{_default} if @$code % 2 == 0; # add the default type if there's even number of items (path => code, path => code)
629             }
630            
631             if ($tag =~ m{^/([^/].*)/([imosx]*)$}) { # string with a '/regexp/'
632             if ($2) {
633             push @{$self->{$type.'_re'}}, qr/(?$2)$1/;
634             } else {
635             push @{$self->{$type.'_re'}}, qr/$1/;
636             }
637             push @{$self->{$type.'_re_code'}}, $code;
638             } elsif (ref($tag) eq 'Regexp') { # a qr// created regexp
639             push @{$self->{$type.'_re'}}, $tag;
640             push @{$self->{$type.'_re_code'}}, $code;
641             } elsif ($tag =~ /[,\|]/) { # a , or | separated list
642             if ($tag =~ s/^\^//) {
643             my @tags = split(/\s*[,\|]\s*/, $tag);
644             $self->{$type}{'^'.$_} = $code for (@tags);
645             } else {
646             my @tags = split(/\s*[,\|]\s*/, $tag);
647             $self->{$type}{$_} = $code for (@tags);
648             }
649             } else { # a single tag
650             $self->{$type}{$tag} = $code;
651             }
652             }
653             }
654            
655             sub _xpath2re {
656             my $s = shift;
657             return $s if ref($s);
658             for ($s) {
659             s/([\.\[\]+{}\-])/\\$1/g;
660             s{\*}{.+}g;
661             s{^//}{}s;
662             s{^/}{^}s;
663             }
664             return qr{$s$};
665             }
666            
667             sub _import_usage {
668             croak
669             "Usage: use XML::Rules subroutine_name => {method => '...', rules => {...}, ...};
670             or use XML::Rules inferRules => 'file/path.dtd';
671             or use XML::Rules inferRules => 'file/path.xml';
672             or use XML::Rules inferRules => ['file/path1.xml','file/path2.xml'];"
673             }
674            
675             sub import {
676             my $class = shift();
677             return unless @_;
678             _import_usage() unless scalar(@_) % 2 == 0;
679             my $caller_pack = caller;
680             while (@_) {
681             my $subname = shift;
682             my $params = shift;
683            
684             if (lc($subname) eq 'inferrules') {
685             require Data::Dumper;
686             local $Data::Dumper::Terse = 1;
687             local $Data::Dumper::Indent = 1;
688             if (ref $params) {
689             if (ref $params eq 'ARRAY') {
690             print Data::Dumper::Dumper(inferRulesFromExample(@$params))
691             } else {
692             _import_usage()
693             }
694             } elsif ($params =~ /\.dtd$/i) {
695             print Data::Dumper::Dumper(inferRulesFromDTD($params))
696             } else {
697             print Data::Dumper::Dumper(inferRulesFromExample($params))
698             }
699             } else {
700             _import_usage()
701             unless !ref($subname) and ref($params) eq 'HASH';
702            
703             my $method = delete $params->{method} || $subname;
704             if (!$params->{rules} && $method =~ /^[tT]oXML$/) {
705             $params->{rules} = {};
706             }
707             my $parser = XML::Rules->new(%$params);
708            
709             no strict 'refs';
710             *{$caller_pack . '::' . $subname} = sub {unshift @_, $parser; goto &$method; };
711             }
712             }
713             }
714            
715             sub skip_rest {
716             die "[XML::Rules] skip rest\n";
717             }
718            
719             sub return_nothing {
720             die "[XML::Rules] return nothing\n";
721             }
722            
723             sub return_this {
724             my $self = shift();
725             die bless({val => [@_]}, "XML::Rules::return_this");
726             }
727            
728             sub _run {
729             my $self = shift;
730             my $string = shift;
731            
732             croak "This parser is already busy parsing a document!" if exists $self->{parser};
733            
734             $self->{parameters} = shift;
735            
736             $self->{parser} = XML::Parser::Expat->new( %{$self->{for_parser}});
737            
738             $self->{parser}->setHandlers( @{$self->{normal_handlers}} );
739            
740             $self->{data} = [];
741             $self->{context} = [];
742             $self->{_ltrim} = [0];
743            
744             if (! eval {
745             $self->{parser}->parse($string) and 1;
746             }) {
747             my $err = $@;
748             undef $@;
749             if ($err =~ /^\[XML::Rules\] skip rest/) {
750             my (undef, $handler) = $self->{parser}->setHandlers(End => undef);
751             foreach my $tag (reverse @{$self->{context} = []}) {
752             $handler->( $self->{parser}, $tag);
753             }
754             } else {
755            
756             delete $self->{parameters};
757             $self->{parser}->release();
758            
759             $self->{data} = [];
760             $self->{context} = [];
761            
762             if ($err =~ /^\[XML::Rules\] return nothing/) {
763             return;
764             } elsif (ref $err eq 'XML::Rules::return_this') {
765             if (wantarray()) {
766             return @{$err->{val}}
767             } else {
768             return ${$err->{val}}[-1]
769             }
770             }
771            
772             $err =~ s/at \S+Rules\.pm line \d+$//
773             and croak $err or die $err;
774             }
775             };
776            
777             $self->{parser}->release();
778             delete $self->{parser};
779            
780             delete $self->{parameters};
781             my $data; # return the accumulated data, without keeping a copy inside the object
782             ($data, $self->{data}) = ($self->{data}[0], undef);
783             if (!defined(wantarray()) or ! keys(%$data)) {
784             return;
785            
786             } elsif (keys(%$data) == 1 and exists(${$data}{_content})) {
787             if (ref(${$data}{_content}) eq 'ARRAY' and @{${$data}{_content}} == 1) {
788             return ${${$data}{_content}}[0]
789             } else {
790             return ${$data}{_content}
791             }
792            
793             } else {
794             return $data;
795             }
796             }
797            
798            
799             sub parsestring;
800             *parsestring = \&parse;
801             sub parse_string;
802             *parse_string = \&parse;
803             sub parse {
804             if (!ref $_[0] and $_[0] eq 'XML::Rules') {
805             my $parser = &new; # get's the current @_
806             return sub {unshift @_, $parser; goto &parse;}
807             }
808             my $self = shift;
809             croak("This XML::Rules object may only be used as a filter!") if ($self->{style} eq 'filter');
810             $self->_run(@_);
811             }
812            
813             sub parse_file;
814             *parse_file = \&parsefile;
815             sub parsefile {
816             if (!ref $_[0] and $_[0] eq 'XML::Rules') {
817             my $parser = &new; # get's the current @_
818             return sub {unshift @_, $parser; goto &parsefile;}
819             }
820             my $self = shift;
821             croak("This XML::Rules object may only be used as a filter!") if ($self->{style} eq 'filter');
822             my $filename = shift;
823             open my $IN, '<', $filename or croak "Cannot open '$filename' for reading: $^E";
824             return $self->_run($IN, @_);
825             }
826            
827            
828             sub filterstring;
829             *filterstring = \&filter;
830             sub filter_string;
831             *filter_string = \&filter;
832             sub filter {
833             if (!ref $_[0] and $_[0] eq 'XML::Rules') {
834             my $parser = &new; # get's the current @_
835             return sub {unshift @_, $parser; goto &filter;}
836             }
837             my $self = shift;
838             croak("This XML::Rules object may only be used as a parser!") unless ($self->{style} eq 'filter');
839            
840             my $XML = shift;
841             $self->{FH} = shift || select(); # either passed or the selected filehandle
842             if (!ref($self->{FH})) {
843             if ($self->{FH} =~ /^main::(?:STDOUT|STDERR)$/) {
844             # yeah, select sometimes returns the name of the filehandle, not the filehandle itself. eg. "main::STDOUT"
845             no strict;
846             $self->{FH} = \*{$self->{FH}};
847             } else {
848             open my $FH, '>:utf8', $self->{FH} or croak(qq{Failed to open "$self->{FH}" for writing: $^E});
849             $self->{FH} = $FH;
850             }
851             } elsif (ref($self->{FH}) eq 'SCALAR') {
852             open my $FH, '>', $self->{FH};
853             $self->{FH} = $FH;
854             }
855             if (! $self->{opt}{skip_xml_version}) {
856             if ($self->{opt}{output_encoding}) {
857             print {$self->{FH}} qq{{opt}{output_encoding}"?>\n};
858             } else {
859             print {$self->{FH}} qq{\n};
860             }
861             }
862            
863             $self->_run($XML, @_);
864             print {$self->{FH}} "\n";
865             delete $self->{FH};
866             }
867            
868             sub filterfile {
869             if (!ref $_[0] and $_[0] eq 'XML::Rules') {
870             my $parser = &new; # get's the current @_
871             return sub {unshift @_, $parser; goto &filterfile;}
872             }
873             my $self = shift;
874             croak("This XML::Rules object may only be used as a parser!") unless ($self->{style} eq 'filter');
875            
876             my $filename = shift;
877             open my $IN, '<', $filename or croak "Cannot open '$filename' for reading: $^E";
878            
879             $self->{FH} = shift || select(); # either passed or the selected filehandle
880             if (!ref($self->{FH})) {
881             if ($self->{FH} =~ /^main::(?:STDOUT|STDERR)$/) {
882             # yeah, select sometimes returns the name of the filehandle, not the filehandle itself. eg. "main::STDOUT"
883             no strict;
884             $self->{FH} = \*{$self->{FH}};
885             } else {
886             open my $FH, '>:utf8', $self->{FH} or croak(qq{Failed to open "$self->{FH}" for writing: $^E});
887             $self->{FH} = $FH;
888             }
889             } elsif (ref($self->{FH}) eq 'SCALAR') {
890             open $self->{FH}, '>', $self->{FH};
891             }
892             if (! $self->{opt}{skip_xml_version}) {
893             if ($self->{opt}{output_encoding}) {
894             print {$self->{FH}} qq{{opt}{output_encoding}"?>\n};
895             } else {
896             print {$self->{FH}} qq{\n};
897             }
898             }
899             $self->_run($IN, @_);
900             print {$self->{FH}} "\n";
901             delete $self->{FH};
902             }
903            
904             ## chunk processing
905            
906             sub parse_chunk {
907             my $self = shift;
908             croak("This XML::Rules object may only be used as a filter!") if ($self->{style} eq 'filter');
909             $self->_parse_or_filter_chunk(@_);
910             }
911            
912             sub _parse_or_filter_chunk {
913             my $self = shift;
914             my $string = shift;
915            
916             if (exists $self->{parser}) {
917             if (ref($self->{parser}) ne 'XML::Parser::ExpatNB') {
918             croak "This parser is already busy parsing a full document!";
919             } else {
920             if (exists $self->{chunk_processing_result}) {
921             if (defined $self->{chunk_processing_result}) {
922             if (wantarray()) {
923             return @{$self->{chunk_processing_result}}
924             } else {
925             return ${$self->{chunk_processing_result}}[-1]
926             }
927             } else {
928             return;
929             }
930             }
931            
932             if (! eval {
933             $self->{parser}->parse_more($string) and 1;
934             }) {
935             my $err = $@;
936             undef $@;
937             if ($err =~ /^\[XML::Rules\] skip rest/) {
938             my (undef, $handler) = $self->{parser}->setHandlers(End => undef);
939             foreach my $tag (reverse @{$self->{context} = []}) {
940             $handler->( $self->{parser}, $tag);
941             }
942             } else {
943            
944             delete $self->{parameters};
945             $self->{parser}->release();
946            
947             $self->{data} = [];
948             $self->{context} = [];
949            
950             if ($err =~ /^\[XML::Rules\] return nothing/) {
951             $self->{chunk_processing_result} = undef;
952             return;
953             } elsif (ref $err eq 'XML::Rules::return_this') {
954             $self->{chunk_processing_result} = $err->{val};
955             if (wantarray()) {
956             return @{$err->{val}}
957             } else {
958             return ${$err->{val}}[-1]
959             }
960             }
961            
962             $err =~ s/at \S+Rules\.pm line \d+$//
963             and croak $err or die $err;
964             }
965             };
966             return 1;
967             }
968             }
969            
970             $self->{parameters} = shift;
971            
972             $self->{parser} = XML::Parser::ExpatNB->new( %{$self->{for_parser}});
973            
974             $self->{parser}->setHandlers( @{$self->{normal_handlers}} );
975            
976             $self->{data} = [];
977             $self->{context} = [];
978             $self->{_ltrim} = [0];
979            
980             return $self->_parse_or_filter_chunk($string);
981             }
982            
983             sub filter_chunk {
984             my $self = shift;
985             croak("This XML::Rules object may only be used as a parser!") unless ($self->{style} eq 'filter');
986            
987             my $XML = shift;
988            
989             if (!exists $self->{FH}) {
990             $self->{FH} = shift || select(); # either passed or the selected filehandle
991             if (!ref($self->{FH})) {
992             if ($self->{FH} =~ /^main::(?:STDOUT|STDERR)$/) {
993             # yeah, select sometimes returns the name of the filehandle, not the filehandle itself. eg. "main::STDOUT"
994             no strict;
995             $self->{FH} = \*{$self->{FH}};
996             } else {
997             open my $FH, '>:utf8', $self->{FH} or croak(qq{Failed to open "$self->{FH}" for writing: $^E});
998             $self->{FH} = $FH;
999             }
1000             } elsif (ref($self->{FH}) eq 'SCALAR') {
1001             open my $FH, '>', $self->{FH};
1002             $self->{FH} = $FH;
1003             }
1004             if (! $self->{opt}{skip_xml_version}) {
1005             if ($self->{opt}{output_encoding}) {
1006             print {$self->{FH}} qq{{opt}{output_encoding}"?>\n};
1007             } else {
1008             print {$self->{FH}} qq{\n};
1009             }
1010             }
1011             }
1012            
1013             $self->_parse_or_filter_chunk($XML, @_);
1014             }
1015            
1016             sub last_chunk {
1017             my $self = shift;
1018             my $string = shift;
1019             if (exists $self->{parser}) {
1020             if (ref($self->{parser}) ne 'XML::Parser::ExpatNB') {
1021             if (exists $self->{FH}) { # in case it was a filter ...
1022             print {$self->{FH}} "\n";
1023             delete $self->{FH};
1024             }
1025             croak "This parser is already busy parsing a full document!";
1026             } else {
1027             if (exists $self->{chunk_processing_result}) {
1028             if (exists $self->{FH}) { # in case it was a filter ...
1029             print {$self->{FH}} "\n";
1030             delete $self->{FH};
1031             }
1032             if (defined $self->{chunk_processing_result}) {
1033             if (wantarray()) {
1034             return @{$self->{chunk_processing_result}}
1035             } else {
1036             return ${$self->{chunk_processing_result}}[-1]
1037             }
1038             } else {
1039             return;
1040             }
1041             }
1042             }
1043             } elsif (defined $string) {
1044             return ($self->{style} eq 'filter') ? $self->filter($string,@_) : $self->parse($string); # no chunks in processing
1045             } else {
1046             return;
1047             }
1048            
1049             if (defined $string) {
1050             $self->_parse_or_filter_chunk($string);
1051             }
1052            
1053             $self->{parser}->parse_done();
1054             delete $self->{parser};
1055            
1056             if (exists $self->{FH}) {
1057             print {$self->{FH}} "\n";
1058             delete $self->{FH};
1059             }
1060            
1061             delete $self->{parameters};
1062             my $data; # return the accumulated data, without keeping a copy inside the object
1063             ($data, $self->{data}) = ($self->{data}[0], undef);
1064             if (!defined(wantarray()) or ! keys(%$data)) {
1065             return;
1066            
1067             } elsif (keys(%$data) == 1 and exists(${$data}{_content})) {
1068             if (ref(${$data}{_content}) eq 'ARRAY' and @{${$data}{_content}} == 1) {
1069             return ${${$data}{_content}}[0]
1070             } else {
1071             return ${$data}{_content}
1072             }
1073            
1074             } else {
1075             return $data;
1076             }
1077             }
1078            
1079             ##
1080            
1081             sub _XMLDecl {
1082             weaken( my $self = shift);
1083             return sub {
1084             my ( $Parser, $Version, $Encoding, $Standalone) = @_;
1085             $self->{opt}{original_encoding} = $Encoding
1086             }
1087             }
1088            
1089             =begin comment
1090            
1091             start tag
1092             & 3 = 3 -> rtrim parent's _content
1093             & 8 = 8 -> $ltrim = 1
1094            
1095             string content
1096             $ltrim -> ltrim the string, if not completely whitespace set $ltrim 0
1097            
1098             end tag
1099             & 8 = 8 -> rtrim own content
1100             & 3 = 3 -> $ltrim = 1
1101             empty_returned_content and & 3 in (1,2) -> rtrim parent content
1102             empty_returned_content and & 3 = 2 -> $ltrim
1103            
1104             =end comment
1105            
1106             =cut
1107            
1108             sub _rtrim {
1109             my ($self, $attr, $more) = @_;
1110            
1111             if ($more) {
1112             if (ref $attr->{_content}) {
1113             if (!ref($attr->{_content}[-1])) {
1114             $attr->{_content}[-1] =~ s/\s+$//s;
1115             pop @{$attr->{_content}} if $attr->{_content}[-1] eq '';
1116             delete $attr->{_content} unless @{$attr->{_content}};
1117             }
1118             } else {
1119             $attr->{_content} =~ s/\s+$//s;
1120             delete $attr->{_content} if $attr->{_content} eq '';
1121             }
1122             } else {
1123             if (ref $attr->{_content}) {
1124             if (!ref($attr->{_content}[-1]) and $attr->{_content}[-1] =~ /^\s*$/s) {
1125             pop @{$attr->{_content}} ;
1126             delete $attr->{_content} unless @{$attr->{_content}};
1127             }
1128             } else {
1129             delete $attr->{_content} if $attr->{_content} =~ /^\s*$/s;
1130             }
1131             }
1132             }
1133            
1134             sub _findUnusedNs {
1135             my ($self, $old_ns) = @_;
1136             my $new_ns = $old_ns;
1137             my %used;
1138             @used{values %{$self->{namespaces}}, values %{$self->{xmlns_map}}}= ();
1139             no warnings 'numeric';
1140             while (exists $used{$new_ns}) {
1141             $new_ns =~ s/(\d*)$/$1+1/e;
1142             }
1143             return $new_ns;
1144             }
1145            
1146             sub _Start {
1147             weaken( my $self = shift);
1148             my $encode = $self->{opt}{encode};
1149             my $output_encoding = $self->{opt}{output_encoding};
1150             return sub {
1151             my ( $Parser, $Element , %Attr) = @_;
1152            
1153             if (($self->{opt}{stripspaces} & 3) == 3) {
1154             #rtrim parent
1155             #print "rtrim parent content in _Start\n";
1156             if ($self->{data}[-1] and $self->{data}[-1]{_content}) {
1157             $self->_rtrim( $self->{data}[-1], ($self->{opt}{stripspaces} & 4));
1158             }
1159             }
1160             if ($self->{opt}{stripspaces} & 8) {
1161             #print "ltrim own content in _Start\n";
1162             push @{$self->{_ltrim}}, 2;
1163             } else {
1164             push @{$self->{_ltrim}}, 0;
1165             }
1166            
1167             if ($self->{namespaces}) {
1168             my %restore;
1169             foreach my $attr (keys %Attr) { # find the namespace aliases
1170             next unless $attr =~ /^xmlns:(.*)$/;
1171             my $orig_ns = $1;
1172             $restore{$orig_ns} = $self->{xmlns_map}{$orig_ns};
1173             if (! exists($self->{namespaces}{ $Attr{$attr} })) {
1174             if ($self->{namespaces}{'*'} eq 'die') {
1175             local $Carp::CarpLevel = 2;
1176             croak qq{Unexpected namespace "$Attr{$attr}" found in the XML!};
1177             } elsif ($self->{namespaces}{'*'} eq '') {
1178             delete $Attr{$attr};
1179             $self->{xmlns_map}{$orig_ns} = '';
1180             } elsif ($self->{namespaces}{'*'} eq 'strip') {
1181             delete $Attr{$attr};
1182             $self->{xmlns_map}{$orig_ns} = STRIP;
1183             } else {
1184             warn qq{Unexpected namespace "$Attr{$attr}" found in the XML!\n} if ($self->{namespaces}{'*'} eq 'warn');
1185             my $new_ns = $self->_findUnusedNs( $orig_ns);
1186             if ($orig_ns ne $new_ns) {
1187             $Attr{'xmlns:' . $new_ns} = delete $Attr{$attr};
1188             }
1189             $self->{xmlns_map}{$orig_ns} = $new_ns;
1190             }
1191             } else {
1192             $self->{xmlns_map}{$orig_ns} = $self->{namespaces}{ delete($Attr{$attr}) };
1193             }
1194             }
1195             if (exists $Attr{xmlns}) { # find the default namespace
1196             #print "Found a xmlns attribute in $Element!\n";
1197             $restore{''} = $self->{xmlns_map}{''};
1198             if (!exists($self->{namespaces}{ $Attr{xmlns} })) { # unknown namespace
1199             if ($self->{namespaces}{'*'} eq 'die') {
1200             local $Carp::CarpLevel = 2;
1201             croak qq{Unexpected namespace "$Attr{xmlns}" found in the XML!};
1202             } elsif ($self->{namespaces}{'*'} eq '') {
1203             delete $Attr{xmlns};
1204             } elsif ($self->{namespaces}{'*'} eq 'strip') {
1205             delete $Attr{xmlns};
1206             $self->{xmlns_map}{''} = STRIP;
1207             } else { # warn or keep
1208             warn qq{Unexpected namespace "$Attr{xmlns}" found in the XML!\n} if ($self->{namespaces}{'*'} eq 'warn');
1209             my $new_ns = $self->_findUnusedNs( 'ns1');
1210             $Attr{'xmlns:'.$new_ns} = delete $Attr{xmlns};
1211             $self->{xmlns_map}{''} = $new_ns;
1212             }
1213             } else {
1214             $self->{xmlns_map}{''} = $self->{namespaces}{ delete($Attr{xmlns}) };
1215             }
1216             }
1217             if (%restore) {
1218             push @{$self->{xmlns_restore}}, \%restore;
1219             } else {
1220             push @{$self->{xmlns_restore}}, undef;
1221             }
1222            
1223             if (%{$self->{xmlns_map}}) {
1224             #print "About to map aliases for $Element\n";
1225             # add or map the alias for the tag
1226             if ($Element =~ /^([^:]+):(.*)$/) {
1227             #print "Mapping an alias $1 for tag $Element\n";
1228             if (exists($self->{xmlns_map}{$1})) {
1229             if ($self->{xmlns_map}{$1} eq '') {
1230             $Element = $2 ;
1231             } else {
1232             $Element = $self->{xmlns_map}{$1} . ':' . $2 ;
1233             }
1234             }
1235             #print " -> $Element\n";
1236             } elsif (defined($self->{xmlns_map}{''}) and $self->{xmlns_map}{''} ne '') { # no namespace alias in the tag and there's a default
1237             #print "Adding default alias $self->{xmlns_map}{''}:\n";
1238             $Element = $self->{xmlns_map}{''} . ':' . $Element;
1239             #print " -> $Element\n";
1240             }
1241             if (substr( $Element, 0, length(STRIP)+1) eq STRIP.':') {%Attr = ()}
1242            
1243             # map the aliases for the attributes
1244             foreach my $attr (keys %Attr) {
1245             next unless $attr =~ /^([^:]+):(.*)$/; # there's an alias
1246             next unless exists($self->{xmlns_map}{$1}); # and there's a mapping
1247             if ($self->{xmlns_map}{$1} eq '') {
1248             $Attr{$2} = delete($Attr{$attr}); # rename the attribute
1249             } elsif ($self->{xmlns_map}{$1} eq STRIP) {
1250             delete($Attr{$attr}); # remove the attribute
1251             } else {
1252             $Attr{$self->{xmlns_map}{$1} . ':' . $2} = delete($Attr{$attr}); # rename the attribute
1253             }
1254             }
1255             }
1256             } # /of namespace handling
1257            
1258            
1259             my ( $start_rule, $end_rule) = map {
1260             if ($self->{$_}{$Element} and ref($self->{$_}{$Element}) ne 'ARRAY') {
1261             $self->{$_}{$Element}
1262             } else {
1263             $self->_find_rule( $_, $Element, $self->{context})
1264             }
1265             } ( 'start_rules', 'rules');
1266            
1267             if ($start_rule ne 'handle'
1268             and (
1269             !$start_rule
1270             or $start_rule eq 'skip'
1271             or !$start_rule->($Element,\%Attr, $self->{context}, $self->{data}, $self)
1272             )
1273             ) {
1274             # ignore the tag and the ones below
1275             $Parser->setHandlers(@{$self->{ignore_handlers}});
1276             $self->{ignore_level}=1;
1277            
1278             } else {
1279             # process the tag and the ones below
1280             if ($encode) {
1281             foreach my $value (values %Attr) {
1282             $value = Encode::encode( $encode, $value);
1283             }
1284             }
1285            
1286             push @{$self->{context}}, $Element;
1287             push @{$self->{data}}, \%Attr;
1288             $self->{lastempty} = 0;
1289            
1290             if ($self->{style} eq 'filter') {
1291             $self->{in_interesting}++ if ref($end_rule) or $end_rule =~ /^=/s; # is this tag interesting?
1292            
1293             if (! $self->{in_interesting}) { # it neither this tag not an acestor is interesting, just copy the tag
1294             #print "Start:R ".$Parser->recognized_string()."\n";
1295             #print "Start:O ".$Parser->original_string()."\n";
1296             #print "Start:R ".$Parser->recognized_string()."\n";
1297             #print "Start:O ".$Parser->original_string()."\n";
1298             if (! $output_encoding) {
1299             print {$self->{FH}} $Parser->recognized_string();
1300             } elsif ($output_encoding eq $self->{opt}{original_encoding}) {
1301             print {$self->{FH}} $Parser->original_string();
1302             } else {
1303             print {$self->{FH}} $self->toXML($Element, \%Attr, "don't close");
1304             }
1305             }
1306             }
1307            
1308             }
1309             }
1310             }
1311            
1312             sub _find_rule {
1313             my ($self, $type, $Element, $path) = @_;
1314            
1315             if (substr( $Element, 0, length(STRIP)+1) eq STRIP.':') {
1316             return ($type eq 'rules' ? STRIP_RULE : 'handle');
1317             }
1318            
1319             if (exists($self->{$type.'_re'})) {
1320             for(my $i = 0; $i < @{$self->{$type.'_re'}}; $i++) {
1321             if ($Element =~ $self->{$type.'_re'}[$i]) {
1322             $self->{$type}{$Element} = $self->{$type.'_re_code'}[$i];
1323             last;
1324             }
1325             }
1326             }
1327             if (! exists $self->{$type}{$Element}) {
1328             $self->{$type}{$Element} = $self->{$type}{_default};
1329             }
1330            
1331             if (ref $self->{$type}{$Element} eq 'ARRAY') {
1332             $path = join( '/', @$path);
1333             for(my $i=0; $i < $#{$self->{$type}{$Element}}; $i+=2) {
1334             if ($path =~ $self->{$type}{$Element}[$i]) {
1335             return $self->{$type}{$Element}[$i+1];
1336             }
1337             }
1338             return $self->{$type}{$Element}[-1];
1339             } else {
1340             return $self->{$type}{$Element};
1341             }
1342             }
1343            
1344             sub _CdataStart {
1345             my $self = shift;
1346             my $encode = $self->{opt}{encode};
1347             return $self->{style} eq 'filter'
1348             ? sub {
1349             my ( $Parser, $String) = @_;
1350            
1351             return if (substr( $self->{context}[-1], 0, length(STRIP)+1) eq STRIP.':');
1352            
1353             if (! $self->{in_interesting}) {
1354             print {$self->{FH}} '
1355             }
1356             }
1357             : undef;
1358             }
1359            
1360             sub _CdataEnd {
1361             my $self = shift;
1362             my $encode = $self->{opt}{encode};
1363             return $self->{style} eq 'filter'
1364             ? sub {
1365             my ( $Parser, $String) = @_;
1366            
1367             return if (substr( $self->{context}[-1], 0, length(STRIP)+1) eq STRIP.':');
1368            
1369             if (! $self->{in_interesting}) {
1370             print {$self->{FH}} ']]>';
1371             }
1372             }
1373             : undef;
1374             }
1375            
1376             sub _Char {
1377             weaken( my $self = shift);
1378             my $encode = $self->{opt}{encode};
1379             return sub {
1380             my ( $Parser, $String) = @_;
1381            
1382             return if (substr( $self->{context}[-1], 0, length(STRIP)+1) eq STRIP.':');
1383            
1384             if ($self->{style} eq 'filter' and ! $self->{in_interesting}) {
1385             if (! $self->{opt}{output_encoding}) {
1386             print {$self->{FH}} $Parser->recognized_string();
1387             } elsif ($self->{opt}{output_encoding} eq $self->{opt}{original_encoding}) {
1388             print {$self->{FH}} $Parser->original_string();
1389             } else {
1390             print {$self->{FH}} encode($self->{opt}{output_encoding}, $Parser->recognized_string());
1391             }
1392             return;
1393             }
1394            
1395             if ($encode) {
1396             $String = Encode::encode( $encode, $String);
1397             }
1398            
1399             if ($self->{_ltrim}[-1]) {
1400             #print "ltrim in $self->{context}[-1] ($String)\n";
1401             if ($self->{_ltrim}[-1] == 2) {
1402             $String =~ s/^\s+//s;
1403             return if $String eq '';
1404             } else {
1405             return if $String =~ /^\s*$/s;
1406             }
1407             $self->{_ltrim}[-1] = 0;
1408             #print " ($String)\n";
1409             }
1410             $String =~ s/\s+/ /gs if ($self->{opt}{normalisespaces});
1411            
1412             if (!exists $self->{data}[-1]{_content}) {
1413             $self->{data}[-1]{_content} = $String;
1414             } elsif (!ref $self->{data}[-1]{_content}) {
1415             if ($self->{opt}{normalisespaces} and $self->{data}[-1]{_content} =~ /\s$/ and $String =~ /^\s/) {
1416             $String =~ s/^\s+//s;
1417             }
1418             $self->{data}[-1]{_content} .= $String;
1419             } else {
1420             if (ref $self->{data}[-1]{_content}[-1]) {
1421             push @{$self->{data}[-1]{_content}}, $String;
1422             } else {
1423             if ($self->{opt}{normalisespaces} and $self->{data}[-1]{_content}[-1] =~ /\s$/ and $String =~ /^\s/) {
1424             $String =~ s/^\s+//s;
1425             }
1426             $self->{data}[-1]{_content}[-1] .= $String;
1427             }
1428             }
1429             }
1430             }
1431            
1432             sub _End {
1433             weaken( my $self = shift);
1434             return sub {
1435             my ( $Parser, $Element) = @_;
1436             $Element = pop @{$self->{context}}; # the element name may have been mangled by XMLNS aliasing
1437            
1438             if ($self->{opt}{stripspaces} & 8) {
1439             #print "rtrim own content\n";
1440             if ($self->{data}[-1] and $self->{data}[-1]{_content}) {
1441             $self->_rtrim( $self->{data}[-1], 1);
1442             }
1443             }
1444             pop(@{$self->{_ltrim}});
1445            
1446             if ($self->{namespaces}) {
1447             if (my $restore = pop @{$self->{xmlns_restore}}) { # restore the old default namespace and/or alias mapping
1448             while (my ($their, $our) = each %$restore) {
1449             if (defined($our)) {
1450             $self->{xmlns_map}{$their} = $our;
1451             } else {
1452             delete $self->{xmlns_map}{$their};
1453             }
1454             }
1455             }
1456             }
1457            
1458             my ($rule) = map {
1459             if ($self->{$_}{$Element} and ref($self->{$_}{$Element}) ne 'ARRAY') {
1460             $self->{$_}{$Element}
1461             } else {
1462             $self->_find_rule( $_, $Element, $self->{context})
1463             }
1464             } ('rules');
1465            
1466             my $data = pop @{$self->{data}};
1467            
1468             my @results;
1469             if (ref $rule or $rule =~ /^=/s) {
1470             if ($rule =~ /^==(.*)$/s) { # change the whole tag to a string
1471             @results = ($1);
1472             } elsif ($rule =~ /^=(.*)$/s) { # change the contents to a string
1473             @results = ($Element => $1);
1474             } else {
1475             @results = $rule->($Element, $data, $self->{context}, $self->{data}, $self);
1476             }
1477            
1478             if ($self->{style} eq 'filter') {
1479            
1480             $self->{in_interesting}--;
1481             if (!$self->{in_interesting}) {
1482             if (@{$self->{data}}) {
1483             print {$self->{FH}} $self->escape_value($self->{data}[-1]{_content});
1484             delete $self->{data}[-1]{_content};
1485             }
1486             my $base;
1487             if ($self->{opt}{ident} ne '') {
1488             $base = $self->{opt}{ident} x scalar(@{$self->{context}});
1489             }
1490             @results and $results[0] =~ s/^[\@%\+\*\.]//;
1491             while (@results) {
1492             #use Data::Dumper;
1493             #print "\@results=".Dumper(\@results)."\n";
1494             if (ref($results[0])) {
1495             croak(ref($results[0]) . " not supported as the return value of a filter") unless ref($results[0]) eq 'ARRAY';
1496             if (@{$results[0]} ==2 and ref($results[0][1]) eq 'HASH') {
1497             print {$self->{FH}} $self->toXML(@{$results[0]}[0,1], 0, $self->{opt}{ident}, $base);
1498             } else {
1499             foreach my $item (@{$results[0]}) {
1500             if (ref($item)) {
1501             croak(ref($item) . " not supported in the return value of a filter") unless ref($item) eq 'ARRAY';
1502             croak("Empty array not supported in the return value of a filter") unless @$item;
1503             if (@$item <= 2) {
1504             print {$self->{FH}} $self->toXML(@{$item}[0,1], 0, $self->{opt}{ident}, $base);
1505             } else { # we suppose the 3rd and following elements are parameters to ->toXML()
1506             print {$self->{FH}} $self->toXML(@$item);
1507             }
1508             } else {
1509             print {$self->{FH}} $self->escape_value($item);
1510             }
1511             }
1512             }
1513             shift(@results);
1514             } else {
1515             if (@results == 1) {
1516             print {$self->{FH}} $self->escape_value($results[0]);
1517             @results = ();last;
1518             } else {
1519             print {$self->{FH}} $self->toXML(shift(@results), shift(@results), 0, $self->{opt}{ident}, $base);
1520             }
1521             }
1522             }
1523             }
1524             }
1525             } elsif ($self->{style} eq 'filter' and ! $self->{in_interesting}) {
1526             #print "End: \$Element=$Element; \$Parser->recognized_string()=".$Parser->recognized_string()."; \$Parser->original_string()=".$Parser->original_string()."\n";
1527             die "Unexpected \$data->{_content}={$data->{_content}} in filter outside interesting nodes!\n" if $data->{_content} ne '';
1528             if (! $self->{opt}{output_encoding}) {
1529             print {$self->{FH}} $Parser->recognized_string();
1530             } elsif ($self->{opt}{output_encoding} eq $self->{opt}{original_encoding}) {
1531             print {$self->{FH}} $Parser->original_string();
1532             } else {
1533             print {$self->{FH}} encode($self->{opt}{output_encoding}, $Parser->recognized_string());
1534             }
1535             # print {$self->{FH}} $self->escape_value($data->{_content})."";
1536            
1537             } else { # a predefined rule
1538            
1539             if ($rule =~ s/(?:^| )no\s+xmlns$//) {
1540             $Element =~ s/^\w+://;
1541             $rule = 'as is' if $rule eq '';
1542             }
1543             if ($rule =~ s/^((?:(?:no\s+)?content\s+)?by\s+(\S+))\s+remove\(([^\)]+)\)$/$1/) {
1544             my $keep = $2;
1545             my @remove = split /\s*,\s*/, $3;
1546             foreach (@remove) {
1547             next if $_ eq $keep;
1548             delete $data->{$_};
1549             }
1550             $rule = 'as is' if $rule eq '';
1551             } elsif ($rule =~ s/\s*\bremove\(([^\)]+)\)//) {
1552             my @remove = split /\s*,\s*/, $1;
1553             foreach (@remove) {
1554             delete $data->{$_};
1555             }
1556             $rule = 'as is' if $rule eq '';
1557             }
1558             if ($rule =~ s/^((?:(?:no\s+)?content\s+)?by\s+(\S+))\s+only\(([^\)]+)\)$/$1/) {
1559             my %only;
1560             $only{$2} = undef;
1561             @only{split /\s*,\s*/, $3} = ();
1562             foreach (keys %$data) {
1563             delete $data->{$_} unless exists $only{$_};
1564             }
1565             $rule = 'as is' if $rule eq '';
1566             } elsif ($rule =~ s/\s*\bonly\(([^\)]+)\)//) {
1567             my %only;
1568             @only{split /\s*,\s*/, $1} = ();
1569             foreach (keys %$data) {
1570             delete $data->{$_} unless exists $only{$_};
1571             }
1572             $rule = 'as is' if $rule eq '';
1573             }
1574            
1575             if ($rule eq '') {
1576             @results = ();
1577             } elsif ($rule eq 'content') {
1578             @results = ($Element => $data->{_content});
1579             } elsif ($rule eq 'content trim') {
1580             s/^\s+//,s/\s+$// for ($data->{_content});
1581             @results = ($Element => $data->{_content});
1582             } elsif ($rule eq 'content array') {
1583             @results = ('@'.$Element => $data->{_content});
1584             } elsif ($rule eq 'as is') {
1585             @results = ($Element => $data);
1586             } elsif ($rule eq 'as is trim') {
1587             s/^\s+//,s/\s+$// for ($data->{_content});
1588             @results = ($Element => $data);
1589             } elsif ($rule eq 'as array') {
1590             @results = ('@'.$Element => $data);
1591             } elsif ($rule eq 'as array trim') {
1592             s/^\s+//,s/\s+$// for ($data->{_content});
1593             @results = ('@'.$Element => $data);
1594             } elsif ($rule eq 'no content') {
1595             delete ${$data}{_content}; @results = ($Element => $data);
1596             } elsif ($rule eq 'no content array' or $rule eq 'as array no content') {
1597             delete ${$data}{_content}; @results = ('@' . $Element => $data);
1598            
1599             } elsif ($rule eq 'pass') {
1600             @results = (%$data);
1601             } elsif ($rule eq 'pass trim') {
1602             s/^\s+//,s/\s+$// for ($data->{_content});
1603             @results = (%$data);
1604             } elsif ($rule eq 'pass no content' or $rule eq 'pass without content') {
1605             delete ${$data}{_content}; @results = (%$data);
1606             } elsif ($rule =~ /^pass\s+(\S+)$/) {
1607             my %allowed = map {$_ => 1} split( /\s*,\s*/, $1);
1608             @results = map { $_ => $data->{$_} } grep {$allowed{$_}} keys %allowed;
1609            
1610             } elsif ($rule eq 'raw') {
1611             @results = [$Element => $data];
1612            
1613             } elsif ($rule eq 'raw extended') {
1614             @results = (':'.$Element => $data, [$Element => $data]);
1615            
1616             } elsif ($rule eq 'raw extended array') {
1617             @results = ('@:'.$Element => $data, [$Element => $data]);
1618            
1619             } elsif ($rule =~ /^((?:no )?content )?by\s+(\S+)$/) {
1620             my ($cnt,$attr) = ($1,$2);
1621             if ($cnt eq 'no content ') {
1622             delete $data->{_content};
1623             }
1624             if ($attr =~ /,/) {
1625             my @attr = split /,/, $attr;
1626             foreach (@attr) {
1627             next unless exists ($data->{$_});
1628             if ($cnt eq 'content ') {
1629             @results = ($data->{$_} => $data->{_content})
1630             } else {
1631             @results = (delete $data->{$_} => $data)
1632             }
1633             last;
1634             }
1635             } else {
1636             if ($cnt eq 'content ') {
1637             @results = ($data->{$attr} => $data->{_content})
1638             } else {
1639             @results = (delete $data->{$attr} => $data);
1640             }
1641             }
1642            
1643             } else {
1644             croak "Unknown predefined rule '$rule'!";
1645             }
1646             }
1647            
1648             if (! @results or (@results % 2 == 0) or $results[-1] eq '') {
1649             if ($self->{opt}{stripspaces} & 3 and @{$self->{data}} and $self->{data}[-1]{_content}) { # stripping some spaces, it's not root and it did not return content
1650             #print "maybe stripping some spaces in $Element, it's not root and it did not return content\n";
1651             if (($self->{opt}{stripspaces} & 3) < 3 and $self->{data}[-1]{_content}) {
1652             # rtrim parent content
1653             #print " yes, rtrim parent '$self->{data}[-1]{_content}'\n";
1654             $self->_rtrim( $self->{data}[-1], ($self->{opt}{stripspaces} & 4));
1655             #print " result '$self->{data}[-1]{_content}'\n";
1656             }
1657            
1658             $self->{_ltrim}[-1] = (($self->{opt}{stripspaces} & 4) ? 2 : 1)
1659             if ($self->{opt}{stripspaces} & 3) == 2;
1660             }
1661             } else {
1662             $self->{_ltrim}[-1] = 0;
1663             }
1664             if (($self->{opt}{stripspaces} & 3) == 3) {
1665             $self->{_ltrim}[-1] = (($self->{opt}{stripspaces} & 4) ? 2 : 1);
1666             }
1667            
1668            
1669             return unless scalar(@results) or scalar(@results) == 1 and ($results[0] eq '' or !defined($results[0]));
1670            
1671             @{$self->{data}} = ({}) unless @{$self->{data}}; # oops we are already closing the root tag! We do need there to be at least one hashref in $self->{data}
1672            
1673             if (scalar(@results) % 2) {
1674             # odd number of items, last is content
1675             my $value = pop(@results);
1676             _add_content( $self->{data}[-1], $value);
1677             }
1678            
1679             while (@results) {
1680             my ($key, $value) = ( shift(@results), shift(@results));
1681             if ($key eq '_content') {
1682             _add_content( $self->{data}[-1], $value);
1683             } elsif ($key =~ s/^\@//) {
1684             if (exists($self->{data}[-1]{$key}) and ref($self->{data}[-1]{$key}) ne 'ARRAY') {
1685             $self->{data}[-1]{$key} = [$self->{data}[-1]{$key}, $value];
1686             } else {
1687             push @{$self->{data}[-1]{$key}}, $value;
1688             }
1689             } elsif ($key =~ s/^\+//) {
1690             if (exists($self->{data}[-1]{$key})) {
1691             $self->{data}[-1]{$key} += $value;
1692             } else {
1693             $self->{data}[-1]{$key} = $value;
1694             }
1695             } elsif ($key =~ s/^\*//) {
1696             if (exists($self->{data}[-1]{$key})) {
1697             $self->{data}[-1]{$key} *= $value;
1698             } else {
1699             $self->{data}[-1]{$key} = $value;
1700             }
1701             } elsif ($key =~ s/^\.//) {
1702             if (exists($self->{data}[-1]{$key})) {
1703             $self->{data}[-1]{$key} .= $value;
1704             } else {
1705             $self->{data}[-1]{$key} = $value;
1706             }
1707             # } elsif ($key =~ s/^\%//) {
1708             # if (exists($self->{data}[-1]{$key})) {
1709             # if (ref($value) eq 'HASH') {
1710             # %{$self->{data}[-1]{$key}} = (%{$self->{data}[-1]{$key}}, %$value);
1711             # } elsif (ref($value) eq 'ARRAY') {
1712             # %{$self->{data}[-1]{$key}} = (%{$self->{data}[-1]{$key}}, @$value);
1713             # } else {
1714             # croak "The value of the rule return \%$key must be a hash or array ref!";
1715             # }
1716             } elsif ($key =~ s/^\%//) {
1717             if (exists($self->{data}[-1]{$key})) {
1718             if (ref($value) eq 'HASH') {
1719             if ($self->{opt}{warnoverwrite}) {
1720             foreach my $subkey (%$value) {
1721             warn "The key '$subkey' already exists in attribute $key for tag $self->{context}[-1].\n old value: $self->{data}[-1]{$key}{$subkey}\n new value: $value->{$subkey}\n"
1722             if (exists $self->{data}[-1]{$key}{$subkey} and $self->{data}[-1]{$key}{$subkey} ne $value->{$subkey});
1723             $self->{data}[-1]{$key}{$subkey} = $value->{$subkey};
1724             }
1725             } else {
1726             %{$self->{data}[-1]{$key}} = (%{$self->{data}[-1]{$key}}, %$value);
1727             }
1728             } elsif (ref($value) eq 'ARRAY') {
1729             if ($self->{opt}{warnoverwrite}) {
1730             $value = {@$value}; # convert to hash
1731             foreach my $subkey (%$value) {
1732             warn "The key '$subkey' already exists in attribute $key for tag $self->{context}[-1].\n old value: $self->{data}[-1]{$key}{$subkey}\n new value: $value->{$subkey}\n"
1733             if (exists $self->{data}[-1]{$key}{$subkey} and $self->{data}[-1]{$key}{$subkey} ne $value->{$subkey});
1734             $self->{data}[-1]{$key}{$subkey} = $value->{$subkey};
1735             }
1736             } else {
1737             %{$self->{data}[-1]{$key}} = (%{$self->{data}[-1]{$key}}, @$value);
1738             }
1739             } else {
1740             croak "The value of the rule return \%$key must be a hash or array ref!";
1741             }
1742             } else {
1743             if (ref($value) eq 'HASH') {
1744             $self->{data}[-1]{$key} = $value;
1745             } elsif (ref($value) eq 'ARRAY') {
1746             $self->{data}[-1]{$key} = {@$value};
1747             } else {
1748             croak "The value of the rule return \%$key must be a hash or array ref!";
1749             }
1750             }
1751             } else {
1752             warn "The attribute '$key' already exists for tag $self->{context}[-1].\n old value: $self->{data}[-1]{$key}\n new value: $value\n"
1753             if ($self->{opt}{warnoverwrite} and exists $self->{data}[-1]{$key} and $self->{data}[-1]{$key} ne $value);
1754             $self->{data}[-1]{$key} = $value;
1755             }
1756             }
1757             }
1758             }
1759            
1760             sub _StartIgnore {
1761             weaken( my $self = shift);
1762             return sub {
1763             $self->{ignore_level}++
1764             }
1765             }
1766            
1767             sub _EndIgnore {
1768             weaken( my $self = shift);
1769             return sub {
1770             return if --$self->{ignore_level};
1771            
1772             $self->{parser}->setHandlers(@{$self->{normal_handlers}})
1773             }
1774             }
1775            
1776             sub _add_content {
1777             my ($hash, $value) = @_;
1778             if (ref($value)) {
1779             if (ref($hash->{_content})) {
1780             # both are refs, push to @_content
1781             push @{$hash->{_content}}, $value;
1782             } elsif (exists($hash->{_content})) {
1783             # result is ref, _content is not -> convert to an arrayref containing old _content and result
1784             $hash->{_content} = [ $hash->{_content}, $value]
1785             } else {
1786             # result is ref, _content is not present
1787             $hash->{_content} = [ $value]
1788             }
1789             } else {
1790             if (ref($hash->{_content})) {
1791             # _content is an arrayref, value is a string
1792             if (ref $hash->{_content}[-1]) {
1793             # the last element is a ref -> push
1794             push @{$hash->{_content}}, $value;
1795             } else {
1796             # the last element is a string -> concatenate
1797             $hash->{_content}[-1] .= $value;
1798             }
1799             } else {
1800             # neither is ref, concatenate
1801             $hash->{_content} .= $value;
1802             }
1803             }
1804             }
1805            
1806             =head1 INSTANCE METHODS
1807            
1808             =head2 parse
1809            
1810             $parser->parse( $string [, $parameters]);
1811             $parser->parse( $IOhandle [, $parameters]);
1812            
1813             Parses the XML in the string or reads and parses the XML from the opened IO handle,
1814             executes the rules as it encounters the closing tags and returns the resulting structure.
1815            
1816             The scalar or reference passed as the second parameter to the parse() method is assigned to
1817             $parser->{parameters} for the parsing of the file or string. Once the XML is parsed the key is
1818             deleted. This means that the $parser does not retain a reference to the $parameters after the parsing.
1819            
1820             =head2 parsestring
1821            
1822             $parser->parsestring( $string [, $parameters]);
1823            
1824             Just an alias to ->parse().
1825            
1826             =head2 parse_string
1827            
1828             $parser->parse_string( $string [, $parameters]);
1829            
1830             Just an alias to ->parse().
1831            
1832             =head2 parsefile
1833            
1834             $parser->parsefile( $filename [, $parameters]);
1835            
1836             Opens the specified file and parses the XML and executes the rules as it encounters
1837             the closing tags and returns the resulting structure.
1838            
1839             =head2 parse_file
1840            
1841             $parser->parse_file( $filename [, $parameters]);
1842            
1843             Just an alias to ->parsefile().
1844            
1845             =head2 parse_chunk
1846            
1847             while (my $chunk = read_chunk_of_data()) {
1848             $parser->parse_chunk($chunk);
1849             }
1850             my $data = $parser->last_chunk();
1851            
1852             This method allows you to process the XML in chunks as you receive them. The chunks do not need to be in any
1853             way valid ... it's fine if the chunk ends in the middle of a tag or attribute.
1854            
1855             If you need to set the $parser->{parameters}, pass it to the first call to parse_chunk() the same way you would to parse().
1856             The first chunk may be empty so if you need to set up the parameters, but read the chunks in a loop or in a callback, you can do this:
1857            
1858             $parser->parse_chunk('', {foo => 15, bar => "Hello World!"});
1859             while (my $chunk = read_chunk_of_data()) {
1860             $parser->parse_chunk($chunk);
1861             }
1862             my $data = $parser->last_chunk();
1863            
1864             or
1865            
1866             $parser->parse_chunk('', {foo => 15, bar => "Hello World!"});
1867             $ua->get($url, ':content_cb' => sub { my($data, $response, $protocol) = @_; $parser->parse_chunk($data); return 1 });
1868             my $data = $parser->last_chunk();
1869            
1870             The parse_chunk() returns 1 or dies, to get the accumulated data, you need to call last_chunk(). You will want to either agressively trim the data remembered
1871             or handle parts of the file using custom rules as the XML is being parsed.
1872            
1873             =head2 filter
1874            
1875             $parser->filter( $string);
1876             $parser->filter( $string, $LexicalOutputIOhandle [, $parameters]);
1877             $parser->filter( $LexicalInputIOhandle, $LexicalOutputIOhandle [, $parameters]);
1878             $parser->filter( $string, \*OutputIOhandle [, $parameters]);
1879             $parser->filter( $LexicalInputIOhandle, \*OutputIOhandle [, $parameters]);
1880             $parser->filter( $string, $OutputFilename [, $parameters]);
1881             $parser->filter( $LexicalInputIOhandle, $OutputFilename [, $parameters]);
1882             $parser->filter( $string, $StringReference [, $parameters]);
1883             $parser->filter( $LexicalInputIOhandle, $StringReference [, $parameters]);
1884            
1885             Parses the XML in the string or reads and parses the XML from the opened IO handle,
1886             copies the tags that do not have a subroutine rule specified and do not occure under such a tag,
1887             executes the specified rules and prints the results to select()ed filehandle, $OutputFilename or
1888             $OutputIOhandle or stores them in the scalar referenced by $StringReference using the ->ToXML() method.
1889            
1890             The scalar or reference passed as the third parameter to the filter() method is assigned to
1891             $parser->{parameters} for the parsing of the file or string. Once the XML is parsed the key is
1892             deleted. This means that the $parser does not retain a reference to the $parameters after the parsing.
1893            
1894             =head2 filterstring
1895            
1896             $parser->filterstring( ...);
1897            
1898             Just an alias to ->filter().
1899            
1900             =head2 filter_string
1901            
1902             $parser->filter_string( ...);
1903            
1904             Just an alias to ->filter().
1905            
1906             =head2 filterfile
1907            
1908             $parser->filterfile( $filename);
1909             $parser->filterfile( $filename, $LexicalOutputIOhandle [, $parameters]);
1910             $parser->filterfile( $filename, \*OutputIOhandle [, $parameters]);
1911             $parser->filterfile( $filename, $OutputFilename [, $parameters]);
1912            
1913             Parses the XML in the specified file, copies the tags that do not have a subroutine rule specified
1914             and do not occure under such a tag, executes the specified rules and prints the results to select()ed
1915             filehandle, $OutputFilename or $OutputIOhandle or stores them in the scalar
1916             referenced by $StringReference.
1917            
1918             The scalar or reference passed as the third parameter to the filter() method is assigned to
1919             $parser->{parameters} for the parsing of the file or string. Once the XML is parsed the key is
1920             deleted. This means that the $parser does not retain a reference to the $parameters after the parsing.
1921            
1922             =head2 filter_file
1923            
1924             Just an alias to ->filterfile().
1925            
1926             =head2 filter_chunk
1927            
1928             while (my $chunk = read_chunk_of_data()) {
1929             $parser->filter_chunk($chunk);
1930             }
1931             $parser->last_chunk();
1932            
1933             This method allows you to process the XML in chunks as you receive them. The chunks do not need to be in any
1934             way valid ... it's fine if the chunk ends in the middle of a tag or attribute.
1935            
1936             If you need to set the file to store the result to (default is the select()ed filehandle) or set the $parser->{parameters}, pass it to the first call to filter_chunk() the same way you would to filter().
1937             The first chunk may be empty so if you need to set up the parameters, but read the chunks in a loop or in a callback, you can do this:
1938            
1939             $parser->filter_chunk('', "the-filtered.xml", {foo => 15, bar => "Hello World!"});
1940             while (my $chunk = read_chunk_of_data()) {
1941             $parser->filter_chunk($chunk);
1942             }
1943             $parser->last_chunk();
1944            
1945             or
1946            
1947             $parser->filter_chunk('', "the_filtered.xml", {foo => 15, bar => "Hello World!"});
1948             $ua->get($url, ':content_cb' => sub { my($data, $response, $protocol) = @_; $parser->filter_chunk($data); return 1 });
1949             filter_chunk$parser->last_chunk();
1950            
1951             The filter_chunk() returns 1 or dies, you need to call last_chunk() to sign the end of the data and close the filehandles and clean the parser status.
1952             Make sure you do not set a rule for the root tag or other tag containing way too much data. Keep in mind that even if the parser works as a filter,
1953             the data for a custom rule must be kept in memory for the rule to execute!
1954            
1955             =head2 last_chunk
1956            
1957             my $data = $parser->last_chunk();
1958             my $data = $parser->last_chunk($the_last_chunk_contents);
1959            
1960             Finishes the processing of a XML fed to the parser in chunks. In case of the parser style, returns the accumulated data. In case of the filter style,
1961             flushes and closes the output file. You can pass the last piece of the XML to this method or call it without parameters if all the data was passed to parse_chunk()/filter_chunk().
1962            
1963             You HAVE to execute this method after call(s) to parse_chunk() or filter_chunk()! Until you do, the parser will refuse to process full documents and
1964             expect another call to parse_chunk()/filter_chunk()!
1965            
1966             =cut
1967            
1968             sub escape_value {
1969             my($self, $data, $level) = @_;
1970            
1971             if (exists $self->{custom_escape}) {
1972             if (ref $self->{custom_escape}) {
1973             return $self->{custom_escape}->($data,$level);
1974             } else {
1975             return $data;
1976             }
1977             }
1978            
1979             return '' unless(defined($data) and $data ne '');
1980            
1981             if ($self->{opt}{output_encoding} ne $self->{opt}{encode}) {
1982             $data = Encode::decode( $self->{opt}{encode}, $data) if $self->{opt}{encode};
1983             $data = Encode::encode( $self->{opt}{output_encoding}, $data) if $self->{opt}{output_encoding};
1984             }
1985            
1986             $data =~ s/&/&/sg;
1987             $data =~ s/
1988             $data =~ s/>/>/sg;
1989             $data =~ s/"/"/sg;
1990            
1991             $level = $self->{opt}->{numericescape} unless defined $level;
1992             return $data unless $level;
1993            
1994             if($self->{opt}->{numericescape} eq '2') {
1995             $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
1996             } else {
1997             $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
1998             }
1999             return $data;
2000             }
2001            
2002             =head2 escape_value
2003            
2004             $parser->escape_value( $data [, $numericescape])
2005            
2006             This method escapes the $data for inclusion in XML, the $numericescape may be 0, 1 or 2
2007             and controls whether to convert 'high' (non ASCII) characters to XML entities.
2008            
2009             0 - default: no numeric escaping (OK if you're writing out UTF8)
2010            
2011             1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
2012            
2013             2 - all characters above 0x7F are escaped (good for plain ASCII output)
2014            
2015             You can also specify the default value in the constructor
2016            
2017             my $parser = XML::Rules->new(
2018             ...
2019             NumericEscape => 2,
2020             );
2021            
2022             =cut
2023            
2024             sub ToXML;*ToXML=\&toXML;
2025             sub toXML {
2026             my $self = shift;
2027             if (!ref($self) and $self eq 'XML::Rules') {
2028             $self = XML::Rules->new(rules=>{}, ident => ' ');
2029             }
2030             my ($tag, $attrs, $no_close, $ident, $base);
2031             if (ref $_[0]) {
2032             ($tag, $no_close, $ident, $base) = @_;
2033             } else {
2034             ($tag, $attrs, $no_close, $ident, $base) = @_;
2035             }
2036             $ident = $self->{opt}{ident} unless defined $ident;
2037            
2038             if ($ident eq '') {
2039             $self->_toXMLnoformat(@_)
2040             } else {
2041             $base = '' unless defined $base;
2042             $base = "\n" . $base unless $base =~ /^\n/s;
2043             if (ref $tag) {
2044             $self->_toXMLformat($tag, $no_close, $ident, $base)
2045             } else {
2046             $self->_toXMLformat($tag, $attrs, $no_close, $ident, $base)
2047             }
2048             }
2049             }
2050            
2051             sub _toXMLnoformat {
2052             my ($self, $tag, $attrs, @body, $no_close);
2053             if (ref $_[1]) {
2054             if (ref $_[1] eq 'ARRAY') {
2055             ($self, $tag, $no_close) = @_;
2056             ($tag, $attrs, @body) = @$tag;
2057             if (defined $attrs and ref $attrs ne 'HASH') {
2058             unshift( @body, $attrs);
2059             $attrs = undef;
2060             }
2061             } else {
2062             croak("The first parameter to ->ToXML() must be the tag name or the arrayref containing [tagname, {attributes}, content]")
2063             }
2064             } else {
2065             ($self, $tag, $attrs, $no_close) = @_;
2066             if (ref $attrs ne 'HASH') {
2067             if (defined $attrs and ref $attrs eq 'ARRAY') {
2068             return '' unless @$attrs;
2069             ($attrs,@body) = (undef,@$attrs);
2070             } else {
2071             ($attrs,@body) = (undef,$attrs);
2072             }
2073             }
2074             }
2075            
2076             push @body, $attrs->{_content} if $attrs and defined $attrs->{_content};
2077             $attrs = undef if (ref $attrs eq 'HASH' and (keys(%{$attrs}) == 0 or keys(%{$attrs}) == 1 and exists $attrs->{_content})); # ->toXML( $tagname, {}, ...)
2078            
2079             #use Data::Dumper;
2080             #print Dumper( [$tag, $attrs, \@body]);
2081             #sleep(1);
2082            
2083             if ($tag eq '') {
2084             # \%attrs is ignored
2085             if (@body) {
2086             return join( '', map {
2087             if (!ref($_)) {
2088             $self->escape_value($_)
2089             } elsif (ref($_) eq 'ARRAY') {
2090             $self->_toXMLnoformat($_, 0)
2091             } else {
2092             croak "The content in XML::Rules->ToXML( '', here) must be a string or an arrayref containing strings and arrayrefs!";
2093             }
2094             } @body);
2095             } else {
2096             return '';
2097             }
2098             }
2099            
2100             if (@body > 1) {
2101             if (! $attrs) {
2102             my $result = '';
2103             while (@body) {
2104             my $content = shift(@body);
2105             if (ref $content eq 'HASH') {
2106             if (@body and ref($body[0]) ne 'HASH') {
2107             $result .= $self->_toXMLnoformat([$tag, $content, shift(@body)], 0)
2108             } else {
2109             $result .= $self->_toXMLnoformat([$tag, $content], 0)
2110             }
2111             } else {
2112             $result .= $self->_toXMLnoformat([$tag, undef, $content], 0)
2113             }
2114             }
2115             return $result;
2116             } else {
2117             my $result = '';
2118             while (@body) {
2119             my $content = shift(@body);
2120             if (ref $content eq 'HASH') {
2121             my %h = (%$attrs, %$content);
2122             if (@body and ref($body[0]) ne 'HASH') {
2123             $result .= $self->_toXMLnoformat([$tag, \%h, shift(@body)], 0)
2124             } else {
2125             $result .= $self->_toXMLnoformat([$tag, \%h], 0)
2126             }
2127             } else {
2128             $result .= $self->_toXMLnoformat([$tag, $attrs, $content])
2129             }
2130             }
2131             return $result;
2132             }
2133             }
2134            
2135             if (! $attrs and !ref($body[0])) { # ->toXML( $tagname, $string_content, ...)
2136             if ($no_close) {
2137             return "<$tag>" . $self->escape_value($body[0]);
2138             } elsif (! defined $body[0]) {
2139             return "<$tag/>";
2140             } else {
2141             return "<$tag>" . $self->escape_value($body[0]) . "";
2142             }
2143             }
2144            
2145             my $content = $body[0];
2146             my $result = "<$tag";
2147             my $subtags = '';
2148             foreach my $key (sort keys %$attrs) {
2149             next if $key =~ /^:/ or $key eq '_content';
2150             if (ref $attrs->{$key}) {
2151             if (ref $attrs->{$key} eq 'ARRAY') {
2152             if (@{$attrs->{$key}}) {
2153             foreach my $subtag (@{$attrs->{$key}}) {
2154             $subtags .= $self->_toXMLnoformat($key, $subtag, 0);
2155             }
2156             } else {
2157             $subtags .= "<$key/>";
2158             }
2159             } elsif (ref $attrs->{$key} eq 'HASH') {
2160             $subtags .= $self->_toXMLnoformat($key, $attrs->{$key}, 0)
2161             } else {
2162             croak(ref($attrs->{$key}) . " attributes not supported in XML::Rules->toXML()!");
2163             }
2164             } else {
2165             $result .= qq{ $key="} . $self->escape_value($attrs->{$key}) . qq{"};
2166             }
2167             }
2168             if (! defined $content and $subtags eq '') {
2169             if ($no_close) {
2170             return $result.">";
2171             } else {
2172             return $result."/>";
2173             }
2174            
2175             } elsif (!ref($content)) { # content is a string, not an array of strings and subtags
2176             if ($no_close) {
2177             return "$result>$subtags" . $self->escape_value($content);
2178             } elsif ($content eq '' and $subtags ne '') {
2179             return "$result>$subtags";
2180             } else {
2181             return "$result>$subtags" . $self->escape_value($content) ."";
2182             }
2183            
2184             } elsif (ref($content) eq 'ARRAY') {
2185             $result .= ">$subtags";
2186             foreach my $snippet (@$content) {
2187             if (!ref($snippet)) {
2188             $result .= $self->escape_value($snippet);
2189             } elsif (ref($snippet) eq 'ARRAY') {
2190             $result .= $self->_toXMLnoformat($snippet, 0);
2191             } else {
2192             croak(ref($snippet) . " not supported in _content in XML::Rules->toXML()!");
2193             }
2194             }
2195             if ($no_close) {
2196             return $result;
2197             } else {
2198             return $result."";
2199             }
2200             } else {
2201             croak(ref($content) . " _content not supported in XML::Rules->toXML()!");
2202             }
2203             }
2204            
2205             sub _toXMLformat {
2206             my ($self, $tag, $attrs, @body, $no_close, $ident, $base);
2207             if (ref $_[1]) {
2208             if (ref $_[1] eq 'ARRAY') {
2209             ($self, $tag, $no_close, $ident, $base) = @_;
2210             ($tag, $attrs, @body) = @$tag;
2211             if (defined $attrs and ref $attrs ne 'HASH') {
2212             unshift( @body, $attrs);
2213             $attrs = undef;
2214             }
2215             } else {
2216             croak("The first parameter to ->ToXML() must be the tag name or the arrayref containing [tagname, {attributes}, content]")
2217             }
2218             } else {
2219             ($self, $tag, $attrs, $no_close, $ident, $base) = @_;
2220             if (ref $attrs ne 'HASH') {
2221             if (defined $attrs and ref $attrs eq 'ARRAY') {
2222             return '' unless @$attrs;
2223             ($attrs,@body) = (undef,@$attrs);
2224             } else {
2225             ($attrs,@body) = (undef,$attrs);
2226             }
2227             }
2228             }
2229            
2230             push @body, $attrs->{_content} if $attrs and defined $attrs->{_content};
2231             $attrs = undef if (ref $attrs eq 'HASH' and (keys(%{$attrs}) == 0 or keys(%{$attrs}) == 1 and exists $attrs->{_content})); # ->toXML( $tagname, {}, ...)
2232            
2233             #use Data::Dumper;
2234             #print Dumper( [$tag, $attrs, \@body]);
2235             #sleep(1);
2236            
2237             if ($tag eq '') {
2238             # \%attrs is ignored
2239             if (@body) {
2240             return join( '', map {
2241             if (!ref($_)) {
2242             $self->escape_value($_)
2243             } elsif (ref($_) eq 'ARRAY') {
2244             $self->_toXMLformat($_, 0, $ident, $base)
2245             } else {
2246             croak "The content in XML::Rules->ToXML( '', here) must be a string or an arrayref containing strings and arrayrefs!";
2247             }
2248             } @body);
2249             } else {
2250             return '';
2251             }
2252             }
2253            
2254             if (@body > 1) {
2255             if (! $attrs) {
2256             my $result = '';
2257             while (@body) {
2258             $result .= $base if $result ne '';
2259             my $content = shift(@body);
2260             if (ref $content eq 'HASH') {
2261             if (@body and ref($body[0]) ne 'HASH') {
2262             $result .= $self->_toXMLformat([$tag, $content, shift(@body)], 0, $ident, $base)
2263             } else {
2264             $result .= $self->_toXMLformat([$tag, $content], 0, $ident, $base)
2265             }
2266             } else {
2267             $result .= $self->_toXMLformat([$tag, undef, $content], 0, $ident, $base)
2268             }
2269             }
2270             return $result;
2271             } else {
2272             my $result = '';
2273             while (@body) {
2274             $result .= $base if $result ne '';
2275             my $content = shift(@body);
2276             if (ref $content eq 'HASH') {
2277             my %h = (%$attrs, %$content);
2278             if (@body and ref($body[0]) ne 'HASH') {
2279             $result .= $self->_toXMLformat([$tag, \%h, shift(@body)], 0, $ident, $base)
2280             } else {
2281             $result .= $self->_toXMLformat([$tag, \%h], 0, $ident, $base)
2282             }
2283             } else {
2284             $result .= $self->_toXMLformat([$tag, $attrs, $content], 0, $ident, $base)
2285             }
2286             }
2287             return $result;
2288             }
2289             }
2290            
2291             if (! $attrs and !ref($body[0])) { # ->toXML( $tagname, $string_content, ...)
2292             if ($no_close) {
2293             return "<$tag>" . $self->escape_value($body[0]);
2294             } elsif (! defined $body[0]) {
2295             return "<$tag/>";
2296             } else {
2297             return "<$tag>" . $self->escape_value($body[0]) . "";
2298             }
2299             }
2300            
2301             my $content = $body[0];
2302             my $result = "<$tag";
2303             my $subtags = '';
2304             my $had_child = 0;
2305             foreach my $key (sort keys %$attrs) {
2306             next if $key =~ /^:/ or $key eq '_content';
2307             if (ref $attrs->{$key}) {
2308             if (ref $attrs->{$key} eq 'ARRAY') {
2309             if (@{$attrs->{$key}}) {
2310             foreach my $subtag (@{$attrs->{$key}}) {
2311             $subtags .= $base . $ident . $self->_toXMLformat($key, $subtag, 0, $ident, $base.$ident);
2312             $had_child = 1;
2313             }
2314             } else {
2315             $subtags .= $base . $ident . "<$key/>";
2316             }
2317             } elsif (ref $attrs->{$key} eq 'HASH') {
2318             $subtags .= $base . $ident . $self->_toXMLformat($key, $attrs->{$key}, 0, $ident, $base.$ident);
2319             $had_child = 1;
2320             } else {
2321             croak(ref($attrs->{$key}) . " attributes not supported in XML::Rules->toXML()!");
2322             }
2323             } else {
2324             $result .= qq{ $key="} . $self->escape_value($attrs->{$key}) . qq{"};
2325             }
2326             }
2327             if (! defined $content and $subtags eq '') {
2328             if ($no_close) {
2329             return $result.">";
2330             } else {
2331             return $result."/>";
2332             }
2333            
2334             } elsif (!ref($content)) { # content is a string, not an array of strings and subtags
2335             if ($no_close) {
2336             return "$result>$subtags" . $self->escape_value($content);
2337             } elsif ($content eq '' and $subtags ne '') {
2338             return "$result>$subtags".($had_child ? $base : '')."";
2339             } else {
2340             return "$result>$subtags" . $self->escape_value($content) . ($had_child ? $base : '') ."";
2341             }
2342            
2343             } elsif (ref($content) eq 'ARRAY') {
2344             $result .= ">$subtags";
2345             foreach my $snippet (@$content) {
2346             if (!ref($snippet)) {
2347             $result .= $self->escape_value($snippet);
2348             } elsif (ref($snippet) eq 'ARRAY') {
2349             $result .= $base.$ident . $self->_toXMLformat($snippet, 0, $ident, $base.$ident);
2350             $had_child = 1;
2351             } else {
2352             croak(ref($snippet) . " not supported in _content in XML::Rules->toXML()!");
2353             }
2354             }
2355             if ($no_close) {
2356             return $result;
2357             } else {
2358             if ($had_child) {
2359             return $result.$base."";
2360             } else {
2361             return $result."";
2362             }
2363             }
2364             } else {
2365             croak(ref($content) . " _content not supported in XML::Rules->toXML()!");
2366             }
2367             }
2368            
2369            
2370             sub parentsToXML {
2371             my ($self, $level) = @_;
2372             my $tag_names = $self->{context};
2373             my $tag_attrs = $self->{data};
2374            
2375             $level = scalar(@$tag_names) unless $level;
2376            
2377             my $result = '';
2378             for (my $i = -1; -$i <= $level; $i--) {
2379             $result = $self->toXML( ${$tag_names}[$i], ${$tag_attrs}[$i], 1) . $result;
2380             }
2381             return $result;
2382             }
2383            
2384             sub closeParentsToXML {
2385             my ($self, $level) = @_;
2386             my $tag_names = $self->{context};
2387            
2388             if ($level) {
2389             return '';
2390             } else {
2391             return '';
2392             }
2393             }
2394            
2395             =head2 toXML / ToXML
2396            
2397             $xml = $parser->toXML( $tagname, \%attrs[, $do_not_close, $ident, $base])
2398            
2399             You may use this method to convert the datastructures created by parsing the XML into the XML format.
2400             Not all data structures may be printed! I'll add more docs later, for now please do experiment.
2401            
2402             The $ident and $base, if defined, turn on and control the pretty-printing. The $ident specifies the character(s)
2403             used for one level of identation, the base contains the identation of the current tag. That is if you want to include the data inside of
2404            
2405            
2406            
2407             $here
2408            
2409            
2410            
2411             you will call
2412            
2413             $parser->toXML( $tagname, \%attrs, 0, "\t", "\t\t\t");
2414            
2415             The method does NOT validate that the $ident and $base are whitespace only, but of course if it's not you end up with invalid
2416             XML. Newlines are added only before the start tag and (if the tag has only child tags and no content) before the closing tag,
2417             but not after the closing tag! Newlines are added even if the $ident is an empty string.
2418            
2419             =head2 parentsToXML
2420            
2421             $xml = $parser->parentsToXML( [$level])
2422            
2423             Prints all or only the topmost $level ancestor tags, including the attributes and content (parsed so far),
2424             but without the closing tags. You may use this to print the header of the file you are parsing,
2425             followed by calling toXML() on a structure you build and then by closeParentsToXML() to close
2426             the tags left opened by parentsToXML(). You most likely want to use the style => 'filter' option
2427             for the constructor instead.
2428            
2429             =head2 closeParentsToXML
2430            
2431             $xml = $parser->closeParentsToXML( [$level])
2432            
2433             Prints the closing tags for all or the topmost $level ancestor tags of the one currently processed.
2434            
2435             =head2 paths2rules
2436            
2437             my $parser = XML::Rules->new(
2438             rules => paths2rules {
2439             '/root/subtag/tag' => sub { ...},
2440             '/root/othertag/tag' => sub {...},
2441             'tag' => sub{ ... the default code for this tag ...},
2442             ...
2443             }
2444             );
2445            
2446             This helper function converts a hash of "somewhat xpath-like" paths and subs/rules into the format required by the module.
2447             Due to backwards compatibility and efficiency I can't directly support paths in the rules and the direct syntax for their
2448             specification is a bit awkward. So if you need the paths and not the regexps, you may use this helper instead of:
2449            
2450             my $parser = XML::Rules->new(
2451             rules => {
2452             'tag' => [
2453             '/root/subtag' => sub { ...},
2454             '/root/othertag' => sub {...},
2455             sub{ ... the default code for this tag ...},
2456             ],
2457             ...
2458             }
2459             );
2460            
2461             =cut
2462            
2463             sub paths2rules {
2464             my ($paths) = @_;
2465            
2466             my %rules;
2467             while ( my ($tag, $val) = each %$paths) {
2468            
2469             if ($tag =~ m{^(.*)/(.*)$}) {
2470             my ($path, $tagname) = ($1, $2);
2471            
2472             if (exists $rules{$tagname} and ref($rules{$tagname}) eq 'ARRAY') {
2473             if (@{$rules{$tagname}} % 2) {
2474             push @{$rules{$tagname}}, $path, $val;
2475             } else {
2476             splice @{$rules{$tagname}}, -1, 0, $path, $val;
2477             }
2478             } else {
2479             $rules{$tagname} = [ $path => $val]
2480             }
2481            
2482             } elsif (exists $rules{$tag} and ref($rules{$tag}) eq 'ARRAY') {
2483             push @{$rules{$tag}}, $val;
2484             } else {
2485             $rules{$tag} = $val
2486             }
2487             }
2488            
2489             return \%rules;
2490             }
2491            
2492             =head2 return_nothing
2493            
2494             Stop parsing the XML, forget any data we already have and return from the $parser->parse().
2495             This is only supposed to be used within rules and may be called both as a method and as
2496             an ordinary function (it's not exported).
2497            
2498             =head2 return_this
2499            
2500             Stop parsing the XML, forget any data we already have and return the attributes passed to this subroutine
2501             from the $parser->parse(). This is only supposed to be used within rules and may be called both as a method
2502             and as an ordinary function (it's not exported).
2503            
2504             =head2 skip_rest
2505            
2506             Stop parsing the XML and return whatever data we already have from the $parser->parse().
2507             The rules for the currently opened tags are evaluated as if the XML contained all
2508             the closing tags in the right order.
2509            
2510             These three work via raising an exception, the exception is caught within the $parser->parse() and does not propagate outside.
2511             It's also safe to raise any other exception within the rules, the exception will be caught as well, the internal state of the $parser object
2512             will be cleaned and the exception rethrown.
2513            
2514             =head1 CLASS METHODS
2515            
2516             =head2 parse
2517            
2518             When called as a class method, parse() accepts the same parameters as new(), instantiates a new parser object
2519             and returns a subroutine reference that calls the parse() method on that instance.
2520            
2521             my $parser = XML::Rules->new(rules => \%rules);
2522             my $data = $parser->parse($xml);
2523            
2524             becomes
2525            
2526             my $read_data = XML::Rules->parse(rules => \%rules);
2527             my $data = $read_data->($xml);
2528            
2529             or
2530            
2531             sub read_data;
2532             *read_data = XML::Rules->parse(rules => \%rules);
2533             my $data = read_data($xml);
2534            
2535             =head2 parsestring, parsefile, parse_file, filter, filterstring, filter_string, filterfile, filter_file
2536            
2537             All these methods work the same way as parse() when used as a class method. They accept the same parameters as new(),
2538             instantiate a new object and return a subroutine reference that calls the respective method.
2539            
2540             =head2 inferRulesFromExample
2541            
2542             Dumper(XML::Rules::inferRulesFromExample( $fileOrXML, $fileOrXML, $fileOrXML, ...)
2543             Dumper(XML::Rules->inferRulesFromExample( $fileOrXML, $fileOrXML, $fileOrXML, ...)
2544             Dumper($parser->inferRulesFromExample( $fileOrXML, $fileOrXML, $fileOrXML, ...)
2545            
2546             The subroutine parses the listed files and infers the rules that would produce the minimal, but complete datastructure.
2547             It finds out what tags may be repeated, whether they contain text content, attributes etc. You may want to give
2548             the subroutine several examples to make sure it knows about all possibilities. You should use this once and store
2549             the generated rules in your script or even take this as the basis of a more specific set of rules.
2550            
2551             =cut
2552            
2553             sub inferRulesFromExample {
2554             shift(@_) if $_[0] eq 'XML::Rules' or ref($_[0]);
2555             my @files = @_;
2556            
2557             my %rules;
2558            
2559             my $parser = XML::Rules->new(
2560             namespaces => { '*' => ''},
2561             rules => {
2562             _default => sub {
2563             my ($tag, $attrs, $context, $parent_data, $parser) = @_;
2564             my $repeated = (exists $parent_data->[-1] and exists $parent_data->[-1]{$tag});
2565             my $has_content = (exists $attrs->{_content});
2566             my $has_children = grep ref($_) eq 'HASH', values %$attrs;
2567             my $has_attr = grep {$_ ne '_content' and !ref($attrs->{$_})} keys %$attrs;
2568            
2569             my $rule = do {
2570             if ($repeated) {
2571             if ($has_content) {
2572             if ($has_attr or $has_children) {
2573             'as array'
2574             } else {
2575             'content array'
2576             }
2577             } else {
2578             if ($has_attr or $has_children) {
2579             'as array no content'
2580             } else {
2581             'content array'
2582             }
2583             }
2584             } else {
2585             if ($has_content) {
2586             if ($has_attr or $has_children) {
2587             'as is'
2588             } else {
2589             'content'
2590             }
2591             } else {
2592             if ($has_attr or $has_children) {
2593             'no content'
2594             } else {
2595             'content'
2596             }
2597             }
2598             }
2599             };
2600            
2601             if (not exists $rules{$tag}) {
2602             $rules{$tag} = $rule
2603             } elsif($rules{$tag} ne $rule) {
2604             # we've already seen the tag and it had different type
2605             if ($rules{$tag} eq 'raw extended array') {
2606             } elsif ($rule eq 'raw extended array') {
2607             $rules{$tag} = 'raw extended array';
2608             } elsif ($rules{$tag} eq 'raw extended' and $rule =~ /array/
2609             or $rule eq 'raw extended' and $rules{$tag} =~ /array/) {
2610             $rules{$tag} = 'raw extended array'
2611             } elsif ($rules{$tag} eq 'as array' or $rule eq 'as array') {
2612             $rules{$tag} = 'as array'
2613             } elsif ($rules{$tag} eq 'content array' and $rule eq 'content'
2614             or $rule eq 'content array' and $rules{$tag} eq 'content') {
2615             $rules{$tag} = 'content array'
2616             } elsif ($rules{$tag} eq 'content array' and $rule eq 'as array no content'
2617             or $rule eq 'content array' and $rules{$tag} eq 'as array no content') {
2618             $rules{$tag} = 'as array'
2619             } elsif ($rules{$tag} eq 'content array' and $rule eq 'as is'
2620             or $rule eq 'content array' and $rules{$tag} eq 'as is') {
2621             $rules{$tag} = 'as array'
2622             } elsif ($rules{$tag} eq 'content array' and $rule eq 'no content'
2623             or $rule eq 'content array' and $rules{$tag} eq 'no content') {
2624             $rules{$tag} = 'as array'
2625             } elsif ($rules{$tag} eq 'as array no content' and $rule eq 'as is'
2626             or $rule eq 'as array no content' and $rules{$tag} eq 'as is') {
2627             $rules{$tag} = 'as array'
2628             } elsif ($rules{$tag} eq 'as array no content' and $rule eq 'content'
2629             or $rule eq 'as array no content' and $rules{$tag} eq 'content') {
2630             $rules{$tag} = 'as array'
2631             } elsif ($rules{$tag} eq 'as array no content' and $rule eq 'no content'
2632             or $rule eq 'as array no content' and $rules{$tag} eq 'no content') {
2633             $rules{$tag} = 'as array no content'
2634             } elsif ($rules{$tag} eq 'as is' and ($rule eq 'no content' or $rule eq 'content')
2635             or $rule eq 'as is' and ($rules{$tag} eq 'no content' or $rules{$tag} eq 'content')) {
2636             $rules{$tag} = 'as is'
2637             } elsif ($rules{$tag} eq 'content' and $rule eq 'no content'
2638             or $rule eq 'content' and $rules{$tag} eq 'no content') {
2639             $rules{$tag} = 'as is'
2640             } else {
2641             die "Unexpected combination of rules: old=$rules{$tag}, new=$rule for tag $tag\n";
2642             }
2643             }
2644            
2645             if ($has_content and $has_children) { # the tag contains both text content and subtags!, need to use the raw extended rules
2646             foreach my $child (grep ref($attrs->{$_}) eq 'HASH', keys %$attrs) {
2647             next if $rules{$child} =~ /^raw extended/;
2648             if ($rules{$child} =~ /array/) {
2649             $rules{$child} = 'raw extended array'
2650             } else {
2651             $rules{$child} = 'raw extended'
2652             }
2653             }
2654             }
2655             return $tag => {};
2656             }
2657             },
2658             stripspaces => 7,
2659             );
2660            
2661             for (@files) {
2662             eval {
2663             if (! ref($_) and $_ !~ /\n/ and $_ !~ /^\s*
2664             $parser->parsefile($_);
2665             } else {
2666             $parser->parse($_);
2667             }
2668             } or croak "Error parsing $_: $@\n";
2669             }
2670            
2671             my %short_rules;
2672             foreach my $tag (sort keys %rules) {
2673             push @{$short_rules{$rules{$tag}}}, $tag
2674             }
2675            
2676             foreach my $tags (values %short_rules) {
2677             $tags = join ',', sort @$tags;
2678             }
2679             %short_rules = reverse %short_rules;
2680            
2681             return \%short_rules;
2682             }
2683            
2684             =head2 inferRulesFromDTD
2685            
2686             Dumper(XML::Rules::inferRulesFromDTD( $DTDorDTDfile, [$enableExtended]))
2687             Dumper(XML::Rules->inferRulesFromDTD( $DTDorDTDfile, [$enableExtended]))
2688             Dumper($parser->inferRulesFromDTD( $DTDorDTDfile, [$enableExtended]))
2689            
2690             The subroutine parses the DTD and infers the rules that would produce the minimal, but complete datastructure.
2691             It finds out what tags may be repeated, whether they contain text content, attributes etc. You may use this
2692             each time you are about to parse the XML or once and store the generated rules in your script or even take this
2693             as the basis of a more specific set of rules.
2694            
2695             With the second parameter set to a true value, the tags included in a mixed content will use the "raw extended"
2696             or "raw extended array" types instead of just "raw". This makes sure the tag data both stay at the right place in
2697             the content and are accessible easily from the parent tag's atrribute hash.
2698            
2699             This subroutine requires the XML::DTDParser module!
2700            
2701             =cut
2702            
2703             sub inferRulesFromDTD {
2704             shift(@_) if $_[0] eq 'XML::Rules' or ref($_[0]);
2705             require XML::DTDParser;
2706            
2707             my ($DTDfile, $enable_extended) = @_;
2708            
2709             my $DTD = ( ($DTDfile=~ /\n/) ? XML::DTDParser::ParseDTD($DTDfile) : XML::DTDParser::ParseDTDFile($DTDfile));
2710            
2711             my $has_mixed = 0;
2712             foreach my $tag (values %$DTD) {
2713             $tag->{is_mixed} = (($tag->{content} and $tag->{children}) ? 1 : 0)
2714             and $has_mixed = 1;
2715             }
2716            
2717             my %settings;
2718             foreach my $tagname (keys %$DTD) {
2719             my $tag = $DTD->{$tagname};
2720            
2721             my $repeated = ($tag->{option} =~ /^[+*]$/ ? 1 : 0);
2722             my $has_content = $tag->{content};
2723            
2724             my $in_mixed = grep {$DTD->{$_}{is_mixed}} @{$tag->{parent}};
2725            
2726             if ($in_mixed) {
2727             if ($enable_extended) {
2728             if ($repeated) {
2729             $settings{$tagname} = "raw extended array"
2730             } else {
2731             $settings{$tagname} = "raw extended"
2732             }
2733             } else {
2734             $settings{$tagname} = "raw"
2735             }
2736             } else {
2737             if (exists $tag->{attributes} or exists $tag->{children}) {
2738             my @ids ;
2739             if (exists $tag->{attributes}) {
2740             @ids = grep {$tag->{attributes}{$_}[0] eq 'ID' and $tag->{attributes}{$_}[1] eq '#REQUIRED'} keys %{$tag->{attributes}};
2741             }
2742             if (scalar(@ids) == 1) {
2743             if ($has_content) {
2744             $settings{$tagname} = "by $ids[0]"
2745             } else {
2746             $settings{$tagname} = "no content by $ids[0]"
2747             }
2748             } else {
2749             if ($has_content) {
2750             if ($repeated) {
2751             $settings{$tagname} = "as array"
2752             } else {
2753             $settings{$tagname} = "as is"
2754             }
2755             } else {
2756             if ($repeated) {
2757             $settings{$tagname} = "as array no content"
2758             } else {
2759             $settings{$tagname} = "no content"
2760             }
2761             }
2762             }
2763             } elsif ($repeated) {
2764             $settings{$tagname} = "content array"
2765             } else {
2766             $settings{$tagname} = "content"
2767             }
2768             }
2769             }
2770            
2771             # use Data::Dumper;
2772             # print Dumper(\%settings);
2773            
2774             my %compressed;
2775             {
2776             my %tmp;
2777             while (my ($tag, $option) = each %settings) {
2778             push @{$tmp{$option}}, $tag;
2779             }
2780            
2781             while (my ($option, $tags) = each %tmp) {
2782             $compressed{join ',', sort @$tags} = $option
2783             }
2784             }
2785            
2786             if ($has_mixed) {
2787             $compressed{"#stripspaces"} = 0;
2788             } else {
2789             $compressed{"#stripspaces"} = 7;
2790             }
2791            
2792             return \%compressed;
2793             }
2794            
2795             =head2 toXML / ToXML
2796            
2797             The ToXML() method may be called as a class/static method as well. In that case the default identation is two spaces and the output encoding is utf8.
2798            
2799             =head1 PROPERTIES
2800            
2801             =head2 parameters
2802            
2803             You can pass a parameter (scalar or reference) to the parse...() or filter...() methods, this parameter
2804             is later available to the rules as $parser->{parameters}. The module will never use this parameter for
2805             any other purpose so you are free to use it for any purposes provided that you expect it to be reset by
2806             each call to parse...() or filter...() first to the passed value and then, after the parsing is complete, to undef.
2807            
2808             =head2 pad
2809            
2810             The $parser->{pad} key is specificaly reserved by the module as a place where the module users can
2811             store their data. The module doesn't and will not use this key in any way, doesn't set or reset it under any
2812             circumstances. If you need to share some data between the rules and do not want to use the structure built
2813             by applying the rules you are free to use this key.
2814            
2815             You should refrain from modifying or accessing other properties of the XML::Rules object!
2816            
2817             =head1 IMPORTS
2818            
2819             When used without parameters, the module does not export anything into the caller's namespace. When used with parameters
2820             it either infers and prints a set of rules from a DTD or example(s) or instantiates a parser
2821             and exports a subroutine calling the specified method similar to the parse() and other methods when called as class methods:
2822            
2823             use XML::Rules inferRules => 'c:\temp\example.xml';
2824             use XML::Rules inferRules => 'c:\temp\ourOwn.dtd';
2825             use XML::Rules inferRules => ['c:\temp\example.xml', c:\temp\other.xml'];
2826             use XML::Rules
2827             read_data => {
2828             method => 'parse',
2829             rules => { ... },
2830             ...
2831             };
2832             use XML::Rules ToXML => {
2833             method => 'ToXML',
2834             rules => {}, # the option is required, but may be empty
2835             ident => ' '
2836             };
2837             ...
2838             my $data => read_data($xml);
2839             print ToXML(
2840             rootTag => {
2841             thing => [
2842             {Name => "english", child => [7480], otherChild => ['Hello world']},
2843             {Name => "espanol", child => [7440], otherChild => ['Hola mundo']},
2844             ]
2845             });
2846            
2847            
2848             Please keep in mind that the use statement is executed at "compile time", which means that the variables declared and assigned above the statement
2849             do not have the value yet! This is wrong!
2850            
2851             my %rules = ( _default => 'content', foo => 'as is', ...};
2852             use XML::Rules
2853             read_data => {
2854             method => 'parse',
2855             rules => \%rules,
2856             ...
2857             };
2858            
2859             If you do not specify the method, then the method named the same as the import is assumed. You also do not have to specify the rules option for
2860             the ToXML method as it is not used anyway:
2861            
2862             use XML::Rules ToXML => { ident => ' ' };
2863             use XML::Rules parse => {stripspaces => 7, rules => { ... }};
2864            
2865             You can use the inferRules form the command line like this:
2866            
2867             perl -e "use XML::Rules inferRules => 'c:\temp\example.xml'"
2868            
2869             or this:
2870            
2871             perl -MXML::Rules=inferRules,c:\temp\example.xml -e 1
2872            
2873             or use the included xml2XMLRules.pl and dtd2XMLRules.pl scripts.
2874            
2875             =head1 Namespace support
2876            
2877             By default the module doesn't handle namespaces in any way, it doesn't do anything special with
2878             xmlns or xmlns:alias attributes and it doesn't strip or mangle the namespace aliases
2879             in tag or attribute names. This means that if you know for sure what namespace
2880             aliases will be used you can set up rules for tags including the aliases and unless
2881             someone decides to use a different alias or makes use of the default namespace
2882             your script will work without turning the namespace support on.
2883            
2884             If you do specify any namespace to alias mapping in the constructor it does
2885             start processing the namespace stuff. The xmlns and xmlns:alias attributes
2886             for the known namespaces are stripped from the datastructures and
2887             the aliases are transformed from whatever the XML author decided to use
2888             to whatever your namespace mapping specifies. Aliases are also added to all
2889             tags that belong to a default namespace.
2890            
2891             Assuming the constructor parameters contain
2892            
2893             namespaces => {
2894             'http://my.namespaces.com/foo' => 'foo',
2895             'http://my.namespaces.com/bar' => 'bar',
2896             }
2897            
2898             and the XML looks like this:
2899            
2900            
2901            
2902             Hello world
2903            
2904            
2905            
2906             NaRuzku
2907             at any crossroads
2908             Fakt desnej pajzl.
2909            
2910            
2911            
2912            
2913             then the rules wil be called as if the XML looked like this
2914             while the namespace support is turned off:
2915            
2916            
2917            
2918             Hello world
2919            
2920            
2921            
2922             NaRuzku
2923             at any crossroads
2924             Fakt desnej pajzl.
2925            
2926            
2927            
2928            
2929            
2930             This means that the namespace handling will normalize the aliases used so that you can use
2931             them in the rules.
2932            
2933             It is possible to specify an empty alias, so eg. in case you are processing a SOAP XML
2934             and know the tags defined by SOAP do not colide with the tags in the enclosed XML
2935             you may simplify the parsing by removing all namespace aliases.
2936            
2937             You can control the behaviour with respect to the namespaces that you did not include
2938             in your mapping by setting the "alias" for the special pseudonamespace '*'. The possible values
2939             of the "alias"are: "warn" (default), "keep", "strip", "" and "die".
2940            
2941             warn: whenever an unknown namespace is encountered, XML::Rules prints a warning.
2942             The xmlns:XX attributes and the XX: aliases are retained for these namespaces.
2943             If the alias clashes with one specified by your mapping it will be changed in all places,
2944             the xmlns="..." referencing an unexpected namespace are changed to xmlns:nsN
2945             and the alias is added to the tag names included.
2946            
2947             keep: this works just like the "warn" except for the warning.
2948            
2949             strip: all attributes and tags in the unknown namespaces are stripped. If
2950             a tag in such a namespace contains a tag from a known namespace,
2951             then the child tag is retained.
2952            
2953             "": all the xmlns attributes and the aliases for the unexected namespaces are removed,
2954             the tags and normal attributes are retained without any alias.
2955            
2956             die: as soon as any unexpected namespace is encountered, XML::Rules croak()s.
2957            
2958            
2959             =head1 HOW TO USE
2960            
2961             You may view the module either as a XML::Simple on steriods and use it to build a data structure
2962             similar to the one produced by XML::Simple with the added benefit of being able
2963             to specify what tags or attributes to ignore, when to take just the content, what to store as an array etc.
2964            
2965             You could also view it as yet another event based XML parser that differs from all the others only in one thing.
2966             It stores the data for you so that you do not have to use globals or closures and wonder where to attach
2967             the snippet of data you just received onto the structure you are building.
2968            
2969             You can use it in a way similar to XML::Twig with simplify(): specify the rules to transform the lower
2970             level tags into a XML::Simple like (simplify()ed) structure and then handle the structure in the rule for
2971             the tag(s) you'd specify in XML::Twig's twig_roots.
2972            
2973             =head1 Unrelated tricks
2974            
2975             If you need to parse a XML file without the root tag (something that each and any sane person would allow,
2976             but the XML comitee did not), you can parse
2977            
2978             ]>&real_doc;
2979            
2980             instead.
2981            
2982             =head1 AUTHOR
2983            
2984             Jan Krynicky, C<< >>
2985            
2986             =head1 BUGS
2987            
2988             Please report any bugs or feature requests to
2989             C, or through the web interface at
2990             L.
2991             I will be notified, and then you'll automatically be notified of progress on
2992             your bug as I make changes.
2993            
2994             =head1 SUPPORT
2995            
2996             You can find documentation for this module with the perldoc command.
2997            
2998             perldoc XML::Rules
2999            
3000             You can also look for information at:
3001            
3002             =over 4
3003            
3004             =item * AnnoCPAN: Annotated CPAN documentation
3005            
3006             L
3007            
3008             =item * CPAN Ratings
3009            
3010             L
3011            
3012             =item * RT: CPAN's request tracker
3013            
3014             L
3015            
3016             =item * Search CPAN
3017            
3018             L
3019            
3020             =item * PerlMonks
3021            
3022             Please see L or
3023             L for discussion.
3024            
3025             =back
3026            
3027             =head1 SEE ALSO
3028            
3029             L, L, L
3030            
3031             =head1 ACKNOWLEDGEMENTS
3032            
3033             The escape_value() method is taken with minor changes from XML::Simple.
3034            
3035             =head1 COPYRIGHT & LICENSE
3036            
3037             Copyright 2006-2012 Jan Krynicky, all rights reserved.
3038            
3039             This program is free software; you can redistribute it and/or modify it
3040             under the same terms as Perl itself.
3041            
3042             =cut
3043            
3044             # if I ever attempt to switch to SAX I want to look at XML::Handler::Trees
3045            
3046             1; # End of XML::Rules