line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# Copyright (c) 1999 Michael Koehne |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# XML::Handler::YAWriter is free software. You can redistribute |
5
|
|
|
|
|
|
|
# and/or modify this copy under terms of GNU General Public License. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# Based on XML::Handler::XMLWriter Copyright (C) 1999 Ken MacLeod |
8
|
|
|
|
|
|
|
# Portions derived from code in XML::Writer by David Megginson |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package XML::Handler::YAWriter; |
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
769
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
63
|
|
13
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
118
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$VERSION="0.23"; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
18
|
0
|
|
|
0
|
0
|
|
my $type = shift; |
19
|
0
|
0
|
|
|
|
|
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; |
|
0
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
0
|
|
|
|
|
|
return bless $self, $type; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
1
|
|
|
1
|
|
5
|
use vars qw($escapes); |
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
2296
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$escapes = { '&' => '&', |
27
|
|
|
|
|
|
|
'<' => '<', |
28
|
|
|
|
|
|
|
'>' => '>', |
29
|
|
|
|
|
|
|
'"' => '"', |
30
|
|
|
|
|
|
|
'--' => '--' |
31
|
|
|
|
|
|
|
}; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub start_document { |
34
|
0
|
|
|
0
|
0
|
|
my ($self, $document) = @_; |
35
|
0
|
|
|
|
|
|
my ($lc,$uc); |
36
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
$self->{'Strings'} = []; |
38
|
0
|
0
|
|
|
|
|
$self->{'Escape'} = $escapes unless $self->{'Escape'}; |
39
|
0
|
0
|
|
|
|
|
$self->{'Encoding'} = "UTF-8" unless $self->{'Encoding'}; |
40
|
|
|
|
|
|
|
|
41
|
0
|
0
|
|
|
|
|
if ($self->{'AsFile'}) { |
|
|
0
|
|
|
|
|
|
42
|
0
|
|
|
|
|
|
require IO::File; |
43
|
0
|
|
0
|
|
|
|
$self->{'Output'} = new IO::File(">".$self->{'AsFile'}) || die "$!"; |
44
|
|
|
|
|
|
|
} elsif ($self->{'AsPipe'}) { |
45
|
0
|
|
|
|
|
|
require IO::File; |
46
|
0
|
|
0
|
|
|
|
$self->{'Output'} = new IO::File("|".$self->{'AsPipe'}) || die "$!"; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
0
|
|
0
|
|
|
|
$self->{'NoString'} = ($self->{'Output'} && ! $self->{'AsArray'}); |
50
|
|
|
|
|
|
|
|
51
|
0
|
0
|
|
|
|
|
$self->{'Pretty'} = {} unless ref($self->{'Pretty'}) eq "HASH"; |
52
|
|
|
|
|
|
|
|
53
|
0
|
|
|
|
|
|
$uc = $self->{'Pretty'}; |
54
|
0
|
|
|
|
|
|
foreach (keys %$uc) { |
55
|
0
|
|
|
|
|
|
$lc = lc $_; |
56
|
0
|
0
|
|
|
|
|
if ($lc ne $_) { |
57
|
0
|
|
|
|
|
|
$self->{'Pretty'}{$lc} = $self->{'Pretty'}{$_}; |
58
|
0
|
|
|
|
|
|
delete $self->{'Pretty'}{$_}; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
} |
61
|
0
|
0
|
|
|
|
|
$self->{'LeftSPC'} = $self->{'Pretty'}{'prettywhitenewline'} ? "\n" : ""; |
62
|
0
|
0
|
|
|
|
|
$self->{'Indent'} = $self->{'Pretty'}{'prettywhiteindent'} ? " " : ""; |
63
|
0
|
0
|
|
|
|
|
$self->{'AttrSPC'} = $self->{'Pretty'}{'addhiddenattrtab'} ? "\n\t" : " "; |
64
|
0
|
0
|
|
|
|
|
$self->{'ElemSPC'} = $self->{'Pretty'}{'addhiddennewline'} ? "\n" : ""; |
65
|
0
|
|
|
|
|
|
$self->{'CompactAttr'} = $self->{'Pretty'}{'compactattrindent'}; |
66
|
0
|
|
|
|
|
|
$self->{'Counter'} = 0; |
67
|
0
|
|
|
|
|
|
$self->{'Section'} = 0; |
68
|
0
|
|
|
|
|
|
$self->{LastCount} = 0; |
69
|
0
|
|
|
|
|
|
$self->{'InCDATA'} = 0; |
70
|
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
|
undef $self->{Sendleft}; |
72
|
0
|
|
|
|
|
|
undef $self->{Sendbuf}; |
73
|
0
|
|
|
|
|
|
undef $self->{Sendright}; |
74
|
|
|
|
|
|
|
|
75
|
0
|
|
|
|
|
|
my $sub = 'sub { my ($str,$esc) = @_; $str =~ s/(' . |
76
|
0
|
|
|
|
|
|
join("|", map { $_ = "\Q$_\E" } keys %{$self->{Escape}}). |
|
0
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
')/$esc->{$1}/oge; return $str; }'; |
78
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
$self->{EscSub} = eval $sub; |
80
|
|
|
|
|
|
|
|
81
|
0
|
0
|
|
|
|
|
$self->print( |
82
|
|
|
|
|
|
|
undef, |
83
|
|
|
|
|
|
|
"{'Encoding'}."\"?>", |
84
|
|
|
|
|
|
|
undef) unless $self->{'Pretty'}{'noprolog'}; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub end_document { |
89
|
0
|
|
|
0
|
0
|
|
my ($self, $document) = @_; |
90
|
|
|
|
|
|
|
|
91
|
0
|
0
|
|
|
|
|
$self->print(undef,"\n",undef) unless $self->{'LeftSPC'}; |
92
|
0
|
|
|
|
|
|
$self->print(undef,undef,undef); |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
|
my $string = undef; |
95
|
0
|
0
|
|
|
|
|
$string = join('', @{$self->{Strings}}) if $self->{AsString}; |
|
0
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
0
|
0
|
|
|
|
|
if ($self->{'AsFile'}) { |
98
|
0
|
|
|
|
|
|
$self->{'Output'}->close(); |
99
|
0
|
|
|
|
|
|
undef $self->{'Output'}; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
return($string); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub doctype_decl { |
106
|
0
|
|
|
0
|
0
|
|
my ($self, $properties) = @_; |
107
|
|
|
|
|
|
|
|
108
|
0
|
0
|
|
|
|
|
return if $self->{'Pretty'}{'nodtd'}; |
109
|
0
|
0
|
|
|
|
|
return unless $properties->{'Name'}; |
110
|
|
|
|
|
|
|
|
111
|
0
|
|
|
|
|
|
my $attspc = $self->{'AttrSPC'}; |
112
|
0
|
|
|
|
|
|
my $output = "DOCTYPE ".$properties->{'Name'}; |
113
|
0
|
0
|
|
|
|
|
$output .= $attspc.'SYSTEM "'.$properties->{'SystemId'}.'"' if $properties->{'SystemId'}; |
114
|
0
|
0
|
|
|
|
|
$output .= $attspc.'PUBLIC "'.$properties->{'PublicId'}.'"' if $properties->{'PublicId'}; |
115
|
0
|
0
|
|
|
|
|
$output .= $attspc.$properties->{'Internal'} if $properties->{'Internal'}; |
116
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
$self->print(""); |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub processing_instruction { |
121
|
0
|
|
|
0
|
0
|
|
my ($self, $pi) = @_; |
122
|
|
|
|
|
|
|
|
123
|
0
|
0
|
|
|
|
|
return if $self->{'Pretty'}{'nopi'}; |
124
|
0
|
|
|
|
|
|
my $output = undef; |
125
|
|
|
|
|
|
|
|
126
|
0
|
0
|
|
|
|
|
$output = $pi->{Target}." " if $pi->{Target}; |
127
|
0
|
0
|
|
|
|
|
$output .= $pi->{Data}." " if $pi->{Data}; |
128
|
|
|
|
|
|
|
|
129
|
0
|
0
|
|
|
|
|
return unless $output; |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
chop $output; |
132
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
|
if ($self->{'Pretty'}{issgml}) { |
134
|
0
|
|
|
|
|
|
$self->print("", $output, ">") |
135
|
|
|
|
|
|
|
} else { |
136
|
0
|
|
|
|
|
|
$self->print("", $output, "?>") |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub start_element { |
141
|
0
|
|
|
0
|
0
|
|
my ($self, $element) = @_; |
142
|
0
|
|
|
|
|
|
my $name; |
143
|
|
|
|
|
|
|
my $esc_value; |
144
|
0
|
|
|
|
|
|
my $attr; |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
my $output = $element->{Name}; |
147
|
0
|
|
|
|
|
|
my $attrspc= $self->{'AttrSPC'}; |
148
|
|
|
|
|
|
|
|
149
|
0
|
0
|
|
|
|
|
$attrspc= "\n".$self->{'Indent'} x (2+$self->{'Counter'}) |
150
|
|
|
|
|
|
|
if $self->{'Indent'}; |
151
|
0
|
0
|
|
|
|
|
$attrspc= " " if $self->{'CompactAttr'}; |
152
|
|
|
|
|
|
|
|
153
|
0
|
0
|
|
|
|
|
if ($element->{Attributes}) { |
154
|
0
|
|
|
|
|
|
$attr = $element->{Attributes}; |
155
|
0
|
|
|
|
|
|
foreach $name (sort keys %$attr) { |
156
|
0
|
|
|
|
|
|
$esc_value = $self->encode($attr->{$name}); |
157
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
|
$output .= $attrspc . "$name=\"$esc_value\""; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
|
$self->print("<", $output, ">"); |
163
|
0
|
|
|
|
|
|
$self->{'Counter'}++; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub end_element { |
167
|
0
|
|
|
0
|
0
|
|
my ($self, $element) = @_; |
168
|
0
|
|
|
|
|
|
my $name = $element->{Name}; |
169
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
$self->{'Counter'}--; |
171
|
0
|
0
|
0
|
|
|
|
if ($self->{'Pretty'}{'catchemptyelement'} && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
172
|
|
|
|
|
|
|
($self->{Sendbuf} =~ /^$name/ ) && |
173
|
|
|
|
|
|
|
($self->{Sendleft} eq "<") && |
174
|
|
|
|
|
|
|
($self->{Sendright} eq ">") ) { |
175
|
0
|
|
|
|
|
|
$self->{Sendright} = "/>"; |
176
|
|
|
|
|
|
|
} else { |
177
|
0
|
|
|
|
|
|
$self->print("", $name, ">"); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub characters { |
182
|
0
|
|
|
0
|
0
|
|
my ($self, $characters) = @_; |
183
|
|
|
|
|
|
|
|
184
|
0
|
0
|
|
|
|
|
return unless defined $characters->{Data}; |
185
|
|
|
|
|
|
|
|
186
|
0
|
0
|
|
|
|
|
my $output = $self->{'InCDATA'} ? |
187
|
|
|
|
|
|
|
$characters->{Data} : |
188
|
|
|
|
|
|
|
$self->encode($characters->{Data}); |
189
|
|
|
|
|
|
|
|
190
|
0
|
0
|
0
|
|
|
|
if ($self->{'Pretty'}{'catchwhitespace'} && !$self->{'InCDATA'}) { |
|
|
0
|
0
|
|
|
|
|
191
|
0
|
0
|
|
|
|
|
$output =~ s/^([ \t\n\r]+)//; $self->print("") if $1; |
|
0
|
|
|
|
|
|
|
192
|
0
|
0
|
|
|
|
|
return if $output eq ""; |
193
|
0
|
0
|
|
|
|
|
$output =~ s/([ \t\n\r]+)\$//; $self->print("") if $1; |
|
0
|
|
|
|
|
|
|
194
|
0
|
0
|
|
|
|
|
return if $output eq ""; |
195
|
|
|
|
|
|
|
} elsif ($self->{'Pretty'}{'nowhitespace'} && !$self->{'InCDATA'}) { |
196
|
0
|
|
|
|
|
|
$output =~ s/^([ \t\n\r]+)//; |
197
|
0
|
0
|
|
|
|
|
return if $output eq ""; |
198
|
0
|
|
|
|
|
|
$output =~ s/([ \t\n\r]+)\$//; |
199
|
0
|
0
|
|
|
|
|
return if $output eq ""; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
|
$self->print(undef, $output, undef); |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub ignorable_whitespace { |
206
|
0
|
|
|
0
|
0
|
|
my ($self, $whitespace) = @_; |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
|
my $output = $whitespace->{Data}; |
209
|
|
|
|
|
|
|
|
210
|
0
|
0
|
|
|
|
|
return unless $output; |
211
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
$self->print(""); |
213
|
|
|
|
|
|
|
# $self->print($output, undef, undef); |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub comment { |
217
|
0
|
|
|
0
|
0
|
|
my ($self, $comment) = @_; |
218
|
|
|
|
|
|
|
|
219
|
0
|
0
|
|
|
|
|
return if $self->{'Pretty'}{'nocomments'}; |
220
|
0
|
|
|
|
|
|
my $output = $self->encode($comment->{Data}); |
221
|
0
|
0
|
|
|
|
|
return unless $output; |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
$self->print(""); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub encode { |
227
|
0
|
|
|
0
|
0
|
|
my ($self, $string) = @_; |
228
|
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
|
return &{$self->{EscSub}}($string, $self->{'Escape'}); |
|
0
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
sub start_cdata { |
233
|
0
|
|
|
0
|
0
|
|
my ($self, $cdata) = @_; |
234
|
0
|
|
|
|
|
|
$self->{'InCDATA'} = 1; |
235
|
0
|
|
|
|
|
|
$self->print(undef, '
|
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub end_cdata { |
239
|
0
|
|
|
0
|
0
|
|
my ($self, $cdata) = @_; |
240
|
0
|
|
|
|
|
|
$self->{'InCDATA'} = 0; |
241
|
0
|
|
|
|
|
|
$self->print(undef, ']]>', undef); |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub print { |
245
|
0
|
|
|
0
|
0
|
|
my ($self, $left, $output, $right) = @_; |
246
|
0
|
|
|
|
|
|
my $sendbuf = ""; |
247
|
|
|
|
|
|
|
|
248
|
0
|
0
|
|
|
|
|
if ($self->{Sendleft}) { |
249
|
0
|
|
|
|
|
|
$sendbuf .= $self->{'LeftSPC'}; |
250
|
0
|
0
|
|
|
|
|
$sendbuf .= $self->{'Indent'} x $self->{'LastCount'} |
251
|
|
|
|
|
|
|
if $self->{'Indent'}; |
252
|
0
|
|
|
|
|
|
$sendbuf .= $self->{Sendleft}; |
253
|
|
|
|
|
|
|
} |
254
|
0
|
0
|
|
|
|
|
$sendbuf .= $self->{Sendbuf} if defined $self->{Sendbuf}; |
255
|
0
|
0
|
|
|
|
|
$sendbuf .= $self->{'ElemSPC'}.$self->{Sendright} if $self->{Sendright}; |
256
|
|
|
|
|
|
|
|
257
|
0
|
0
|
|
|
|
|
if ($sendbuf ne "") { |
258
|
0
|
0
|
|
|
|
|
$self->{Output}->print( $sendbuf ) if $self->{Output}; |
259
|
0
|
0
|
|
|
|
|
push(@{$self->{Strings}}, $sendbuf) unless $self->{NoString}; |
|
0
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
0
|
|
|
|
|
|
$self->{Sendleft} = $left; |
263
|
0
|
|
|
|
|
|
$self->{Sendbuf} = $output; |
264
|
0
|
|
|
|
|
|
$self->{Sendright} = $right; |
265
|
0
|
|
|
|
|
|
$self->{LastCount} = $self->{'Counter'}; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
1; |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 NAME |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
XML::Handler::YAWriter - Yet another Perl SAX XML Writer |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=head1 SYNOPSIS |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
use XML::Handler::YAWriter; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
my $ya = new XML::Handler::YAWriter( %options ); |
279
|
|
|
|
|
|
|
my $perlsax = new XML::Parser::PerlSAX( 'Handler' => $ya ); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 DESCRIPTION |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
YAWriter implements Yet Another XML::Handler::Writer. The reasons for |
284
|
|
|
|
|
|
|
this one are that I needed a flexible escaping technique, and want |
285
|
|
|
|
|
|
|
some kind of pretty printing. If an instance of YAWriter is created |
286
|
|
|
|
|
|
|
without any options, the default behavior is to produce an array of |
287
|
|
|
|
|
|
|
strings containing the XML in : |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
@{$ya->{Strings}} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head2 Options |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Options are given in the usual 'key' => 'value' idiom. |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=over |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=item Output IO::File |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
This option tells YAWriter to use an already open file for output, instead |
300
|
|
|
|
|
|
|
of using $ya->{Strings} to store the array of strings. It should be noted |
301
|
|
|
|
|
|
|
that the only thing the object needs to implement is the print method. So |
302
|
|
|
|
|
|
|
anything can be used to receive a stream of strings from YAWriter. |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=item AsFile string |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
This option will cause start_document to open named file and end_document |
307
|
|
|
|
|
|
|
to close it. Use the literal dash "-" if you want to print on standard |
308
|
|
|
|
|
|
|
output. |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=item AsPipe string |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
This option will cause start_document to open a pipe and end_document |
313
|
|
|
|
|
|
|
to close it. The pipe is a normal shell command. Secure shell comes handy |
314
|
|
|
|
|
|
|
but has a 2GB limit on most systems. |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=item AsArray boolean |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
This option will force storage of the XML in $ya->{Strings}, even if the |
319
|
|
|
|
|
|
|
Output option is given. |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=item AsString boolean |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
This option will cause end_document to return the complete XML document |
324
|
|
|
|
|
|
|
in a single string. Most SAX drivers return the value of end_document |
325
|
|
|
|
|
|
|
as a result of their parse method. As this may not work with some |
326
|
|
|
|
|
|
|
combinations of SAX drivers and filters, a join of $ya->{Strings} in |
327
|
|
|
|
|
|
|
the controlling method is preferred. |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=item Encoding string |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
This will change the default encoding from UTF-8 to anything you like. |
332
|
|
|
|
|
|
|
You should ensure that given data are already in this encoding or provide |
333
|
|
|
|
|
|
|
an Escape hash, to tell YAWriter about the recoding. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item Escape hash |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
The Escape hash defines substitutions that have to be done to any |
338
|
|
|
|
|
|
|
string, with the exception of the processing_instruction and doctype_decl |
339
|
|
|
|
|
|
|
methods, where I think that escaping of target and data would cause more |
340
|
|
|
|
|
|
|
trouble than necessary. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
The default value for Escape is |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
$XML::Handler::YAWriter::escape = { |
345
|
|
|
|
|
|
|
'&' => '&', |
346
|
|
|
|
|
|
|
'<' => '<', |
347
|
|
|
|
|
|
|
'>' => '>', |
348
|
|
|
|
|
|
|
'"' => '"', |
349
|
|
|
|
|
|
|
'--' => '--' |
350
|
|
|
|
|
|
|
}; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
YAWriter will use an evaluated sub to make the recoding based on a given |
353
|
|
|
|
|
|
|
Escape hash reasonably fast. Future versions may use XS to improve this |
354
|
|
|
|
|
|
|
performance bottleneck. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=item Pretty hash |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Hash of string => boolean tuples, to define kind of |
359
|
|
|
|
|
|
|
prettyprinting. Default to undef. Possible string values: |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=over |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=item AddHiddenNewline boolean |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Add hidden newline before ">" |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=item AddHiddenAttrTab boolean |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Add hidden tabulation for attributes |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=item CatchEmptyElement boolean |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
Catch empty Elements, apply "/>" compression |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=item CatchWhiteSpace boolean |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
Catch whitespace with comments |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=item CompactAttrIndent |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Places Attributes on the same line as the Element |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item IsSGML boolean |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
This option will cause start_document, processing_instruction and doctype_decl |
386
|
|
|
|
|
|
|
to appear as SGML. The SGML is still well-formed of course, if your SAX events |
387
|
|
|
|
|
|
|
are well-formed. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=item NoComments boolean |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Supress Comments |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=item NoDTD boolean |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Supress DTD |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item NoPI boolean |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Supress Processing Instructions |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=item NoProlog boolean |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
Supress Prolog |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=item NoWhiteSpace boolean |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Supress WhiteSpace to clean documents from prior pretty printing. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item PrettyWhiteIndent boolean |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Add visible indent before any eventstring |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item PrettyWhiteNewline boolean |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Add visible newlines before any eventstring |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item SAX1 boolean (not yet implemented) |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Output only SAX1 compliant eventstrings |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=back |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=back |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 Notes: |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Correct handling of start_document and end_document is required! |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
The YAWriter Object initialises its structures during start_document |
430
|
|
|
|
|
|
|
and does its cleanup during end_document. If you forget to call |
431
|
|
|
|
|
|
|
start_document, any other method will break during the run. Most likely |
432
|
|
|
|
|
|
|
place is the encode method, trying to eval undef as a subroutine. If |
433
|
|
|
|
|
|
|
you forget to call end_document, you should not use a single instance |
434
|
|
|
|
|
|
|
of YAWriter more than once. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
For small documents AsArray may be the fastest method and AsString |
437
|
|
|
|
|
|
|
the easiest one to receive the output of YAWriter. But AsString and |
438
|
|
|
|
|
|
|
AsArray may run out of memory with infinite SAX streams. The only |
439
|
|
|
|
|
|
|
method XML::Handler::Writer calls on a given Output object is the print |
440
|
|
|
|
|
|
|
method. So it's easy to use a self written Output object to improve |
441
|
|
|
|
|
|
|
streaming. |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
A single instance of XML::Handler::YAWriter is able to produce more |
444
|
|
|
|
|
|
|
than one file in a single run. Be sure to provide a fresh IO::File |
445
|
|
|
|
|
|
|
as Output before you call start_document and close this File after |
446
|
|
|
|
|
|
|
calling end_document. Or provide a filename in AsFile, so start_document |
447
|
|
|
|
|
|
|
and end_document can open and close its own filehandle. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Automatic recoding between 8bit and 16bit does not work in any Perl correctly ! |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
I have Perl-5.00563 at home and here I can specify "use utf8;" in the right |
452
|
|
|
|
|
|
|
places to make recoding work. But I dislike saying "use 5.00555;" because |
453
|
|
|
|
|
|
|
many systems run 5.00503. |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
If you use some 8bit character set internally and want use national characters, |
456
|
|
|
|
|
|
|
either state your character as Encoding to be ISO-8859-1, or provide an Escape |
457
|
|
|
|
|
|
|
hash similar to the following : |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
$ya->{'Escape'} = { |
460
|
|
|
|
|
|
|
'&' => '&', |
461
|
|
|
|
|
|
|
'<' => '<', |
462
|
|
|
|
|
|
|
'>' => '>', |
463
|
|
|
|
|
|
|
'"' => '"', |
464
|
|
|
|
|
|
|
'--' => '--' |
465
|
|
|
|
|
|
|
'ö' => 'ö' |
466
|
|
|
|
|
|
|
'ä' => 'ä' |
467
|
|
|
|
|
|
|
'ü' => 'ü' |
468
|
|
|
|
|
|
|
'Ö' => 'Ö' |
469
|
|
|
|
|
|
|
'Ä' => 'Ä' |
470
|
|
|
|
|
|
|
'Ü' => 'Ü' |
471
|
|
|
|
|
|
|
'ß' => 'ß' |
472
|
|
|
|
|
|
|
}; |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
You may abuse YAWriter to clean whitespace from XML documents. Take a look |
475
|
|
|
|
|
|
|
at test.pl, doing just that with an XML::Edifact message, without querying |
476
|
|
|
|
|
|
|
the DTD. This may work in 99% of the cases where you want to get rid of |
477
|
|
|
|
|
|
|
ignorable whitespace caused by the various forms of pretty printing. |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
my $ya = new XML::Handler::YAWriter( |
480
|
|
|
|
|
|
|
'Output' => new IO::File ( ">-" ); |
481
|
|
|
|
|
|
|
'Pretty' => { |
482
|
|
|
|
|
|
|
'NoWhiteSpace'=>1, |
483
|
|
|
|
|
|
|
'NoComments'=>1, |
484
|
|
|
|
|
|
|
'AddHiddenNewline'=>1, |
485
|
|
|
|
|
|
|
'AddHiddenAttrTab'=>1, |
486
|
|
|
|
|
|
|
} ); |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
XML::Handler::Writer implements any method XML::Parser::PerlSAX wants. |
489
|
|
|
|
|
|
|
This extends the Java SAX1.0 specification. I have in mind using |
490
|
|
|
|
|
|
|
Pretty=>SAX1=>1 to disable this feature, if abusing YAWriter for a |
491
|
|
|
|
|
|
|
SAX proxy. |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 AUTHOR |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
Michael Koehne, Kraehe@Copyleft.De |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=head1 Thanks |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
"Derksen, Eduard (Enno), CSCIO" helped me with the Escape |
500
|
|
|
|
|
|
|
hash and gave quite a lot of useful comments. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head1 SEE ALSO |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
L and L |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=cut |