File Coverage

blib/lib/XML/Handler/YAWriter.pm
Criterion Covered Total %
statement 9 146 6.1
branch 0 98 0.0
condition 0 22 0.0
subroutine 3 17 17.6
pod 0 14 0.0
total 12 297 4.0


line stmt bran cond sub pod time code
1             #
2             # Copyright (c) 1999 Michael Koehne
3             #
4             # XML::Handler::YAWriter is free software. You can redistribute
5             # and/or modify this copy under terms of GNU General Public License.
6              
7             # Based on XML::Handler::XMLWriter Copyright (C) 1999 Ken MacLeod
8             # Portions derived from code in XML::Writer by David Megginson
9              
10             package XML::Handler::YAWriter;
11              
12 1     1   769 use strict;
  1         2  
  1         63  
13 1     1   5 use vars qw($VERSION);
  1         1  
  1         118  
14              
15             $VERSION="0.23";
16              
17             sub new {
18 0     0 0   my $type = shift;
19 0 0         my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
  0            
20              
21 0           return bless $self, $type;
22             }
23              
24 1     1   5 use vars qw($escapes);
  1         11  
  1         2296  
25              
26             $escapes = { '&' => '&',
27             '<' => '<',
28             '>' => '>',
29             '"' => '"',
30             '--' => '--'
31             };
32              
33             sub start_document {
34 0     0 0   my ($self, $document) = @_;
35 0           my ($lc,$uc);
36              
37 0           $self->{'Strings'} = [];
38 0 0         $self->{'Escape'} = $escapes unless $self->{'Escape'};
39 0 0         $self->{'Encoding'} = "UTF-8" unless $self->{'Encoding'};
40              
41 0 0         if ($self->{'AsFile'}) {
    0          
42 0           require IO::File;
43 0   0       $self->{'Output'} = new IO::File(">".$self->{'AsFile'}) || die "$!";
44             } elsif ($self->{'AsPipe'}) {
45 0           require IO::File;
46 0   0       $self->{'Output'} = new IO::File("|".$self->{'AsPipe'}) || die "$!";
47             }
48              
49 0   0       $self->{'NoString'} = ($self->{'Output'} && ! $self->{'AsArray'});
50              
51 0 0         $self->{'Pretty'} = {} unless ref($self->{'Pretty'}) eq "HASH";
52              
53 0           $uc = $self->{'Pretty'};
54 0           foreach (keys %$uc) {
55 0           $lc = lc $_;
56 0 0         if ($lc ne $_) {
57 0           $self->{'Pretty'}{$lc} = $self->{'Pretty'}{$_};
58 0           delete $self->{'Pretty'}{$_};
59             }
60             }
61 0 0         $self->{'LeftSPC'} = $self->{'Pretty'}{'prettywhitenewline'} ? "\n" : "";
62 0 0         $self->{'Indent'} = $self->{'Pretty'}{'prettywhiteindent'} ? " " : "";
63 0 0         $self->{'AttrSPC'} = $self->{'Pretty'}{'addhiddenattrtab'} ? "\n\t" : " ";
64 0 0         $self->{'ElemSPC'} = $self->{'Pretty'}{'addhiddennewline'} ? "\n" : "";
65 0           $self->{'CompactAttr'} = $self->{'Pretty'}{'compactattrindent'};
66 0           $self->{'Counter'} = 0;
67 0           $self->{'Section'} = 0;
68 0           $self->{LastCount} = 0;
69 0           $self->{'InCDATA'} = 0;
70              
71 0           undef $self->{Sendleft};
72 0           undef $self->{Sendbuf};
73 0           undef $self->{Sendright};
74              
75 0           my $sub = 'sub { my ($str,$esc) = @_; $str =~ s/(' .
76 0           join("|", map { $_ = "\Q$_\E" } keys %{$self->{Escape}}).
  0            
77             ')/$esc->{$1}/oge; return $str; }';
78              
79 0           $self->{EscSub} = eval $sub;
80              
81 0 0         $self->print(
82             undef,
83             "{'Encoding'}."\"?>",
84             undef) unless $self->{'Pretty'}{'noprolog'};
85              
86             }
87              
88             sub end_document {
89 0     0 0   my ($self, $document) = @_;
90              
91 0 0         $self->print(undef,"\n",undef) unless $self->{'LeftSPC'};
92 0           $self->print(undef,undef,undef);
93              
94 0           my $string = undef;
95 0 0         $string = join('', @{$self->{Strings}}) if $self->{AsString};
  0            
96              
97 0 0         if ($self->{'AsFile'}) {
98 0           $self->{'Output'}->close();
99 0           undef $self->{'Output'};
100             }
101              
102 0           return($string);
103             }
104              
105             sub doctype_decl {
106 0     0 0   my ($self, $properties) = @_;
107              
108 0 0         return if $self->{'Pretty'}{'nodtd'};
109 0 0         return unless $properties->{'Name'};
110              
111 0           my $attspc = $self->{'AttrSPC'};
112 0           my $output = "DOCTYPE ".$properties->{'Name'};
113 0 0         $output .= $attspc.'SYSTEM "'.$properties->{'SystemId'}.'"' if $properties->{'SystemId'};
114 0 0         $output .= $attspc.'PUBLIC "'.$properties->{'PublicId'}.'"' if $properties->{'PublicId'};
115 0 0         $output .= $attspc.$properties->{'Internal'} if $properties->{'Internal'};
116              
117 0           $self->print("");
118             }
119              
120             sub processing_instruction {
121 0     0 0   my ($self, $pi) = @_;
122              
123 0 0         return if $self->{'Pretty'}{'nopi'};
124 0           my $output = undef;
125              
126 0 0         $output = $pi->{Target}." " if $pi->{Target};
127 0 0         $output .= $pi->{Data}." " if $pi->{Data};
128              
129 0 0         return unless $output;
130              
131 0           chop $output;
132              
133 0 0         if ($self->{'Pretty'}{issgml}) {
134 0           $self->print("")
135             } else {
136 0           $self->print("")
137             }
138             }
139              
140             sub start_element {
141 0     0 0   my ($self, $element) = @_;
142 0           my $name;
143             my $esc_value;
144 0           my $attr;
145              
146 0           my $output = $element->{Name};
147 0           my $attrspc= $self->{'AttrSPC'};
148              
149 0 0         $attrspc= "\n".$self->{'Indent'} x (2+$self->{'Counter'})
150             if $self->{'Indent'};
151 0 0         $attrspc= " " if $self->{'CompactAttr'};
152              
153 0 0         if ($element->{Attributes}) {
154 0           $attr = $element->{Attributes};
155 0           foreach $name (sort keys %$attr) {
156 0           $esc_value = $self->encode($attr->{$name});
157              
158 0           $output .= $attrspc . "$name=\"$esc_value\"";
159             }
160             }
161              
162 0           $self->print("<", $output, ">");
163 0           $self->{'Counter'}++;
164             }
165              
166             sub end_element {
167 0     0 0   my ($self, $element) = @_;
168 0           my $name = $element->{Name};
169              
170 0           $self->{'Counter'}--;
171 0 0 0       if ($self->{'Pretty'}{'catchemptyelement'} &&
      0        
      0        
172             ($self->{Sendbuf} =~ /^$name/ ) &&
173             ($self->{Sendleft} eq "<") &&
174             ($self->{Sendright} eq ">") ) {
175 0           $self->{Sendright} = "/>";
176             } else {
177 0           $self->print("");
178             }
179             }
180              
181             sub characters {
182 0     0 0   my ($self, $characters) = @_;
183              
184 0 0         return unless defined $characters->{Data};
185              
186 0 0         my $output = $self->{'InCDATA'} ?
187             $characters->{Data} :
188             $self->encode($characters->{Data});
189              
190 0 0 0       if ($self->{'Pretty'}{'catchwhitespace'} && !$self->{'InCDATA'}) {
    0 0        
191 0 0         $output =~ s/^([ \t\n\r]+)//; $self->print("") if $1;
  0            
192 0 0         return if $output eq "";
193 0 0         $output =~ s/([ \t\n\r]+)\$//; $self->print("") if $1;
  0            
194 0 0         return if $output eq "";
195             } elsif ($self->{'Pretty'}{'nowhitespace'} && !$self->{'InCDATA'}) {
196 0           $output =~ s/^([ \t\n\r]+)//;
197 0 0         return if $output eq "";
198 0           $output =~ s/([ \t\n\r]+)\$//;
199 0 0         return if $output eq "";
200             }
201            
202 0           $self->print(undef, $output, undef);
203             }
204              
205             sub ignorable_whitespace {
206 0     0 0   my ($self, $whitespace) = @_;
207              
208 0           my $output = $whitespace->{Data};
209              
210 0 0         return unless $output;
211              
212 0           $self->print("");
213             # $self->print($output, undef, undef);
214             }
215              
216             sub comment {
217 0     0 0   my ($self, $comment) = @_;
218              
219 0 0         return if $self->{'Pretty'}{'nocomments'};
220 0           my $output = $self->encode($comment->{Data});
221 0 0         return unless $output;
222              
223 0           $self->print("");
224             }
225              
226             sub encode {
227 0     0 0   my ($self, $string) = @_;
228              
229 0           return &{$self->{EscSub}}($string, $self->{'Escape'});
  0            
230             }
231              
232             sub start_cdata {
233 0     0 0   my ($self, $cdata) = @_;
234 0           $self->{'InCDATA'} = 1;
235 0           $self->print(undef, '
236             }
237              
238             sub end_cdata {
239 0     0 0   my ($self, $cdata) = @_;
240 0           $self->{'InCDATA'} = 0;
241 0           $self->print(undef, ']]>', undef);
242             }
243              
244             sub print {
245 0     0 0   my ($self, $left, $output, $right) = @_;
246 0           my $sendbuf = "";
247              
248 0 0         if ($self->{Sendleft}) {
249 0           $sendbuf .= $self->{'LeftSPC'};
250 0 0         $sendbuf .= $self->{'Indent'} x $self->{'LastCount'}
251             if $self->{'Indent'};
252 0           $sendbuf .= $self->{Sendleft};
253             }
254 0 0         $sendbuf .= $self->{Sendbuf} if defined $self->{Sendbuf};
255 0 0         $sendbuf .= $self->{'ElemSPC'}.$self->{Sendright} if $self->{Sendright};
256              
257 0 0         if ($sendbuf ne "") {
258 0 0         $self->{Output}->print( $sendbuf ) if $self->{Output};
259 0 0         push(@{$self->{Strings}}, $sendbuf) unless $self->{NoString};
  0            
260             }
261              
262 0           $self->{Sendleft} = $left;
263 0           $self->{Sendbuf} = $output;
264 0           $self->{Sendright} = $right;
265 0           $self->{LastCount} = $self->{'Counter'};
266             }
267              
268             1;
269              
270             =head1 NAME
271              
272             XML::Handler::YAWriter - Yet another Perl SAX XML Writer
273              
274             =head1 SYNOPSIS
275              
276             use XML::Handler::YAWriter;
277              
278             my $ya = new XML::Handler::YAWriter( %options );
279             my $perlsax = new XML::Parser::PerlSAX( 'Handler' => $ya );
280              
281             =head1 DESCRIPTION
282              
283             YAWriter implements Yet Another XML::Handler::Writer. The reasons for
284             this one are that I needed a flexible escaping technique, and want
285             some kind of pretty printing. If an instance of YAWriter is created
286             without any options, the default behavior is to produce an array of
287             strings containing the XML in :
288              
289             @{$ya->{Strings}}
290              
291             =head2 Options
292              
293             Options are given in the usual 'key' => 'value' idiom.
294              
295             =over
296              
297             =item Output IO::File
298              
299             This option tells YAWriter to use an already open file for output, instead
300             of using $ya->{Strings} to store the array of strings. It should be noted
301             that the only thing the object needs to implement is the print method. So
302             anything can be used to receive a stream of strings from YAWriter.
303              
304             =item AsFile string
305              
306             This option will cause start_document to open named file and end_document
307             to close it. Use the literal dash "-" if you want to print on standard
308             output.
309              
310             =item AsPipe string
311              
312             This option will cause start_document to open a pipe and end_document
313             to close it. The pipe is a normal shell command. Secure shell comes handy
314             but has a 2GB limit on most systems.
315              
316             =item AsArray boolean
317              
318             This option will force storage of the XML in $ya->{Strings}, even if the
319             Output option is given.
320              
321             =item AsString boolean
322              
323             This option will cause end_document to return the complete XML document
324             in a single string. Most SAX drivers return the value of end_document
325             as a result of their parse method. As this may not work with some
326             combinations of SAX drivers and filters, a join of $ya->{Strings} in
327             the controlling method is preferred.
328              
329             =item Encoding string
330              
331             This will change the default encoding from UTF-8 to anything you like.
332             You should ensure that given data are already in this encoding or provide
333             an Escape hash, to tell YAWriter about the recoding.
334              
335             =item Escape hash
336              
337             The Escape hash defines substitutions that have to be done to any
338             string, with the exception of the processing_instruction and doctype_decl
339             methods, where I think that escaping of target and data would cause more
340             trouble than necessary.
341              
342             The default value for Escape is
343              
344             $XML::Handler::YAWriter::escape = {
345             '&' => '&',
346             '<' => '<',
347             '>' => '>',
348             '"' => '"',
349             '--' => '--'
350             };
351              
352             YAWriter will use an evaluated sub to make the recoding based on a given
353             Escape hash reasonably fast. Future versions may use XS to improve this
354             performance bottleneck.
355              
356             =item Pretty hash
357              
358             Hash of string => boolean tuples, to define kind of
359             prettyprinting. Default to undef. Possible string values:
360              
361             =over
362              
363             =item AddHiddenNewline boolean
364              
365             Add hidden newline before ">"
366              
367             =item AddHiddenAttrTab boolean
368              
369             Add hidden tabulation for attributes
370              
371             =item CatchEmptyElement boolean
372              
373             Catch empty Elements, apply "/>" compression
374              
375             =item CatchWhiteSpace boolean
376              
377             Catch whitespace with comments
378              
379             =item CompactAttrIndent
380              
381             Places Attributes on the same line as the Element
382              
383             =item IsSGML boolean
384              
385             This option will cause start_document, processing_instruction and doctype_decl
386             to appear as SGML. The SGML is still well-formed of course, if your SAX events
387             are well-formed.
388              
389             =item NoComments boolean
390              
391             Supress Comments
392              
393             =item NoDTD boolean
394              
395             Supress DTD
396              
397             =item NoPI boolean
398              
399             Supress Processing Instructions
400              
401             =item NoProlog boolean
402              
403             Supress Prolog
404              
405             =item NoWhiteSpace boolean
406              
407             Supress WhiteSpace to clean documents from prior pretty printing.
408              
409             =item PrettyWhiteIndent boolean
410              
411             Add visible indent before any eventstring
412              
413             =item PrettyWhiteNewline boolean
414              
415             Add visible newlines before any eventstring
416              
417             =item SAX1 boolean (not yet implemented)
418              
419             Output only SAX1 compliant eventstrings
420              
421             =back
422              
423             =back
424              
425             =head2 Notes:
426              
427             Correct handling of start_document and end_document is required!
428              
429             The YAWriter Object initialises its structures during start_document
430             and does its cleanup during end_document. If you forget to call
431             start_document, any other method will break during the run. Most likely
432             place is the encode method, trying to eval undef as a subroutine. If
433             you forget to call end_document, you should not use a single instance
434             of YAWriter more than once.
435              
436             For small documents AsArray may be the fastest method and AsString
437             the easiest one to receive the output of YAWriter. But AsString and
438             AsArray may run out of memory with infinite SAX streams. The only
439             method XML::Handler::Writer calls on a given Output object is the print
440             method. So it's easy to use a self written Output object to improve
441             streaming.
442            
443             A single instance of XML::Handler::YAWriter is able to produce more
444             than one file in a single run. Be sure to provide a fresh IO::File
445             as Output before you call start_document and close this File after
446             calling end_document. Or provide a filename in AsFile, so start_document
447             and end_document can open and close its own filehandle.
448            
449             Automatic recoding between 8bit and 16bit does not work in any Perl correctly !
450              
451             I have Perl-5.00563 at home and here I can specify "use utf8;" in the right
452             places to make recoding work. But I dislike saying "use 5.00555;" because
453             many systems run 5.00503.
454            
455             If you use some 8bit character set internally and want use national characters,
456             either state your character as Encoding to be ISO-8859-1, or provide an Escape
457             hash similar to the following :
458              
459             $ya->{'Escape'} = {
460             '&' => '&',
461             '<' => '<',
462             '>' => '>',
463             '"' => '"',
464             '--' => '--'
465             'ö' => 'ö'
466             'ä' => 'ä'
467             'ü' => 'ü'
468             'Ö' => 'Ö'
469             'Ä' => 'Ä'
470             'Ü' => 'Ü'
471             'ß' => 'ß'
472             };
473              
474             You may abuse YAWriter to clean whitespace from XML documents. Take a look
475             at test.pl, doing just that with an XML::Edifact message, without querying
476             the DTD. This may work in 99% of the cases where you want to get rid of
477             ignorable whitespace caused by the various forms of pretty printing.
478            
479             my $ya = new XML::Handler::YAWriter(
480             'Output' => new IO::File ( ">-" );
481             'Pretty' => {
482             'NoWhiteSpace'=>1,
483             'NoComments'=>1,
484             'AddHiddenNewline'=>1,
485             'AddHiddenAttrTab'=>1,
486             } );
487            
488             XML::Handler::Writer implements any method XML::Parser::PerlSAX wants.
489             This extends the Java SAX1.0 specification. I have in mind using
490             Pretty=>SAX1=>1 to disable this feature, if abusing YAWriter for a
491             SAX proxy.
492            
493             =head1 AUTHOR
494              
495             Michael Koehne, Kraehe@Copyleft.De
496              
497             =head1 Thanks
498              
499             "Derksen, Eduard (Enno), CSCIO" helped me with the Escape
500             hash and gave quite a lot of useful comments.
501              
502             =head1 SEE ALSO
503              
504             L and L
505              
506             =cut