| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
## -*- cperl -*- |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package XML::DT; |
|
4
|
6
|
|
|
6
|
|
85560
|
use 5.008006; |
|
|
6
|
|
|
|
|
19
|
|
|
|
6
|
|
|
|
|
229
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
6
|
|
|
6
|
|
28
|
use strict; |
|
|
6
|
|
|
|
|
7
|
|
|
|
6
|
|
|
|
|
203
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
6
|
|
|
6
|
|
3448
|
use Data::Dumper; |
|
|
6
|
|
|
|
|
39634
|
|
|
|
6
|
|
|
|
|
1635
|
|
|
9
|
6
|
|
|
6
|
|
3037
|
use LWP::Simple; |
|
|
6
|
|
|
|
|
351835
|
|
|
|
6
|
|
|
|
|
51
|
|
|
10
|
6
|
|
|
6
|
|
5153
|
use XML::DTDParser "ParseDTDFile"; |
|
|
6
|
|
|
|
|
84253
|
|
|
|
6
|
|
|
|
|
466
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
6
|
|
|
6
|
|
5537
|
use XML::LibXML ':libxml'; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $PARSER = 'XML::LibXML'; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use parent 'Exporter'; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use vars qw($c $u %v $q @dtcontext %dtcontextcount @dtatributes |
|
18
|
|
|
|
|
|
|
@dtattributes ); |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @EXPORT = qw(&dt &dtstring &dturl &inctxt &ctxt &mkdtskel &inpath |
|
21
|
|
|
|
|
|
|
&mkdtskel_fromDTD &mkdtdskel &tohtml &toxml &MMAPON $c %v $q $u |
|
22
|
|
|
|
|
|
|
&xmltree &pathdturl @dtcontext %dtcontextcount |
|
23
|
|
|
|
|
|
|
@dtatributes @dtattributes &pathdt &pathdtstring |
|
24
|
|
|
|
|
|
|
&father &gfather &ggfather &root); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our $VERSION = '0.67'; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=encoding utf-8 |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
XML::DT - a package for down translation of XML files |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
use XML::DT; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
%xml=( 'music' => sub{"Music from: $c\n"}, |
|
39
|
|
|
|
|
|
|
'lyrics' => sub{"Lyrics from: $v{name}\n"}, |
|
40
|
|
|
|
|
|
|
'title' => sub{ uc($c) }, |
|
41
|
|
|
|
|
|
|
'-userdata => { something => 'I like' }, |
|
42
|
|
|
|
|
|
|
'-default' => sub{"$q:$c"} ); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
print dt($filename,%xml); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 ABSTRACT |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This module is a XML down processor. It maps tag (element) |
|
49
|
|
|
|
|
|
|
names to functions to process that element and respective |
|
50
|
|
|
|
|
|
|
contents. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
This module processes XML files with an approach similar to |
|
55
|
|
|
|
|
|
|
OMNIMARK. As XML parser it uses XML::LibXML module in an independent |
|
56
|
|
|
|
|
|
|
way. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
You can parse HTML files as if they were XML files. For this, you must |
|
59
|
|
|
|
|
|
|
supply an extra option to the hash: |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
%hander = ( -html => 1, |
|
62
|
|
|
|
|
|
|
... |
|
63
|
|
|
|
|
|
|
); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
You can also ask the parser to recover from XML errors: |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
%hander = ( -recover => 1, |
|
68
|
|
|
|
|
|
|
... |
|
69
|
|
|
|
|
|
|
); |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 Functions |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head2 dt |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Down translation function C receives a filename and a set of |
|
76
|
|
|
|
|
|
|
expressions (functions) defining the processing and associated values |
|
77
|
|
|
|
|
|
|
for each element. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 dtstring |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
C works in a similar way with C but takes input from a |
|
82
|
|
|
|
|
|
|
string instead of a file. |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 dturl |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
C works in a similar way with C but takes input from an |
|
87
|
|
|
|
|
|
|
Internet url instead of a file. |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 pathdt |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The C function is a C function which can handle a subset |
|
92
|
|
|
|
|
|
|
of XPath on handler keys. Example: |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
%handler = ( |
|
95
|
|
|
|
|
|
|
"article/title" => sub{ toxml("h1",{},$c) }, |
|
96
|
|
|
|
|
|
|
"section/title" => sub{ toxml("h2",{},$c) }, |
|
97
|
|
|
|
|
|
|
"title" => sub{ $c }, |
|
98
|
|
|
|
|
|
|
"//image[@type='jpg']" => sub{ "JPEG: " }, |
|
99
|
|
|
|
|
|
|
"//image[@type='bmp']" => sub{ "BMP: sorry, no bitmaps on the web" }, |
|
100
|
|
|
|
|
|
|
) |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
pathdt($filename, %handler); |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Here are some examples of valid XPath expressions under XML::DT: |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
/aaa |
|
107
|
|
|
|
|
|
|
/aaa/bbb |
|
108
|
|
|
|
|
|
|
//ccc - ccc somewhere (same as "ccc") |
|
109
|
|
|
|
|
|
|
/*/aaa/* |
|
110
|
|
|
|
|
|
|
//* - same as "-default" |
|
111
|
|
|
|
|
|
|
/aaa[@id] - aaa with an attribute id |
|
112
|
|
|
|
|
|
|
/*[@*] - root with an attribute |
|
113
|
|
|
|
|
|
|
/aaa[not(@name)] - aaa with no attribute "name" |
|
114
|
|
|
|
|
|
|
//bbb[@name='foo'] - ... attribute "name" = "foo" |
|
115
|
|
|
|
|
|
|
/ccc[normalize-space(@name)='bbb'] |
|
116
|
|
|
|
|
|
|
//*[name()='bbb'] - complex way of saying "//bbb" |
|
117
|
|
|
|
|
|
|
//*[starts-with(name(),'aa')] - an element named "aa.*" |
|
118
|
|
|
|
|
|
|
//*[contains(name(),'c')] - an element ".*c.*" |
|
119
|
|
|
|
|
|
|
//aaa[string-length(name())=4] "...." |
|
120
|
|
|
|
|
|
|
//aaa[string-length(name())<4] ".{1,4}" |
|
121
|
|
|
|
|
|
|
//aaa[string-length(name())>5] ".{5,}" |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Note that not all XPath is currently handled by XML::DT. A lot of |
|
124
|
|
|
|
|
|
|
XPath will never be added to XML::DT because is not in accordance with |
|
125
|
|
|
|
|
|
|
the down translation model. For more documentation about XPath check |
|
126
|
|
|
|
|
|
|
the specification at http://www.w3c.org or some tutorials under |
|
127
|
|
|
|
|
|
|
http://www.zvon.org |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 pathdtstring |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Like the C function but supporting XPath. |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 pathdturl |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Like the C function but supporting XPath. |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 ctxt |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Returns the context element of the currently being processed |
|
141
|
|
|
|
|
|
|
element. So, if you call C you will get your father element, |
|
142
|
|
|
|
|
|
|
and so on. |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 inpath |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
C is true if the actual element path matches the |
|
147
|
|
|
|
|
|
|
provided pattern. This function is meant to be used in the element |
|
148
|
|
|
|
|
|
|
functions in order to achieve context dependent processing. |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head2 inctxt |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
C is true if the actual element father matches the |
|
153
|
|
|
|
|
|
|
provided pattern. |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 toxml |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
This is the default "-default" function. It can be used to generate |
|
158
|
|
|
|
|
|
|
XML based on C<$c> C<$q> and C<%v> variables. Example: add a new |
|
159
|
|
|
|
|
|
|
attribute to element C without changing it: |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
%handler=( ... |
|
162
|
|
|
|
|
|
|
ele1 => sub { $v{at1} = "v1"; toxml(); }, |
|
163
|
|
|
|
|
|
|
) |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
C can also be used with 3 arguments: tag, attributes and contents |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
toxml("a",{href=> "http://local/f.html"}, "example") |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
returns: |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
example |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Empty tags are written as empty tags. If you want an empty tag with opening and |
|
174
|
|
|
|
|
|
|
closing tags, then use the C. |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 tohtml |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
See C. |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 xmltree |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
This simple function just makes a HASH reference: |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
{ -c => $c, -q => $q, all_the_other_attributes } |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
The function C understands this structure and makes XML with it. |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head2 mkdtskel |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Used by the mkdtskel script to generate automatically a XML::DT perl |
|
191
|
|
|
|
|
|
|
script file based on an XML file. Check C manpage for |
|
192
|
|
|
|
|
|
|
details. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 mkdtskel_fromDTD |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Used by the mkdtskel script to generate automatically a XML::DT perl |
|
197
|
|
|
|
|
|
|
script file based on an DTD file. Check C manpage for |
|
198
|
|
|
|
|
|
|
details. |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 mkdtdskel |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Used by the mkdtskel script to generate automatically a XML::DT perl |
|
203
|
|
|
|
|
|
|
script file based on a DTD file. Check C manpage for |
|
204
|
|
|
|
|
|
|
details. |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head1 Accessing parents |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
With XML::DT you can access an element parent (or grand-parent) |
|
209
|
|
|
|
|
|
|
attributes, till the root of the XML document. |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
If you use c<$dtattributes[1]{foo} = 'bar'> on a processing function, |
|
212
|
|
|
|
|
|
|
you are defining the attribute C for that element parent. |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
In the same way, you can use C<$dtattributes[2]> to access the |
|
215
|
|
|
|
|
|
|
grand-parent. C<$dtattributes[-1]> is, as expected, the XML document |
|
216
|
|
|
|
|
|
|
root element. |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
There are some shortcuts: |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=over 4 |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=item C |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item C |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item C |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
You can use these functions to access to your C, grand-father |
|
229
|
|
|
|
|
|
|
(C) or great-grand-father (C): |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
father("x"); # returns value for attribute "x" on father element |
|
232
|
|
|
|
|
|
|
father("x", "value"); # sets value for attribute "x" on father |
|
233
|
|
|
|
|
|
|
# element |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
You can also use it directly as a reference to C<@dtattributes>: |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
father->{"x"}; # gets the attribute |
|
238
|
|
|
|
|
|
|
father->{"x"} = "value"; # sets the attribute |
|
239
|
|
|
|
|
|
|
$attributes = father; # gets all attributes reference |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=item C |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
You can use it as a function to access to your tree root element. |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
root("x"); # gets attribute C on root element |
|
247
|
|
|
|
|
|
|
root("x", "value"); # sets value for attribute C on root |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
You can also use it directly as a reference to C<$dtattributes[-1]>: |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
root->{"x"}; # gets the attribute x |
|
252
|
|
|
|
|
|
|
root->{"x"} = "value"; # sets the attribute x |
|
253
|
|
|
|
|
|
|
$attributes = root; # gets all attributes reference |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=back |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head1 User provided element processing functions |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
The user must provide an HASH with a function for each element, that |
|
260
|
|
|
|
|
|
|
computes element output. Functions can use the element name C<$q>, the |
|
261
|
|
|
|
|
|
|
element content C<$c> and the attribute values hash C<%v>. |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
All those global variables are defined in C<$CALLER::>. |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Each time an element is find the associated function is called. |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Content is calculated by concatenation of element contents strings and |
|
268
|
|
|
|
|
|
|
interior elements return values. |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 C<-default> function |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
When a element has no associated function, the function associated |
|
273
|
|
|
|
|
|
|
with C<-default> called. If no C<-default> function is defined the |
|
274
|
|
|
|
|
|
|
default function returns a XML like string for the element. |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
When you use C-type> definitions, you often need do set C<-default> |
|
277
|
|
|
|
|
|
|
function to return just the contents: C. |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head2 C<-outputenc> option |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
C<-outputenc> defines the output encoding (default is Unicode UTF8). |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 C<-inputenc> option |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
C<-inputenc> forces a input encoding type. Whenever that is possible, |
|
286
|
|
|
|
|
|
|
define the input encoding in the XML file: |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 C<-pcdata> function |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
C<-pcdata> function is used to define transformation over the |
|
293
|
|
|
|
|
|
|
contents. Typically this function should look at context (see |
|
294
|
|
|
|
|
|
|
C function) |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
The default C<-pcdata> function is the identity |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head2 C<-cdata> function |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
You can process C<> in a way different from pcdata. If you |
|
301
|
|
|
|
|
|
|
define a C<-cdata> method, it will be used. Otherwise, the C<-pcdata> |
|
302
|
|
|
|
|
|
|
method is called. |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head2 C<-begin> function |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Function to be executed before processing XML file. |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
Example of use: initialization of side-effect variables |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 C<-end> function |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Function to be executed after processing XML file. I can use C<$c> |
|
313
|
|
|
|
|
|
|
content value. The value returned by C<-end> will be the C return |
|
314
|
|
|
|
|
|
|
value. |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Example of use: post-processing of returned contents |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head2 C<-recover> option |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
If set, the parser will try to recover in XML errors. |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=head2 C<-html> option |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
If set, the parser will try to recover in errors. Note that this |
|
325
|
|
|
|
|
|
|
differs from the previous one in the sense it uses some knowledge of |
|
326
|
|
|
|
|
|
|
the HTML structure for the recovery. |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head2 C<-userdata> option |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Use this to pass any information you like to your handlers. The data |
|
331
|
|
|
|
|
|
|
structure you pass in this option will be available as C<< $u >> in |
|
332
|
|
|
|
|
|
|
your code. -- New in 0.62. |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head1 Elements with values other than strings (C<-type>) |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
By default all elements return strings, and contents (C<$c>) is the |
|
338
|
|
|
|
|
|
|
concatenation of the strings returned by the sub-elements. |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
In some situations the XML text contains values that are better |
|
341
|
|
|
|
|
|
|
processed as a structured type. |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
The following types (functors) are available: |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=over 4 |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=item THE_CHILD |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Return the result of processing the only child of the element. |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item LAST_CHILD |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Returns the result of processing the last child of the element. |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=item STR |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
concatenates all the sub-elements returned values (DEFAULT) all the |
|
358
|
|
|
|
|
|
|
sub-element should return strings to be concatenated; |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=item SEQ |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
makes an ARRAY with all the sub elements contents; attributes are |
|
363
|
|
|
|
|
|
|
ignored (they should be processed in the sub-element). (returns a ref) |
|
364
|
|
|
|
|
|
|
If you have different types of sub-elements, you should use SEQH |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=item SEQH |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
makes an ARRAY of HASH with all the sub elements (returns a ref); for |
|
369
|
|
|
|
|
|
|
each sub-element: |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
-q => element name |
|
372
|
|
|
|
|
|
|
-c => contents |
|
373
|
|
|
|
|
|
|
at1 => at value1 for each attribute |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=item MAP |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
makes an HASH with the sub elements; keys are the sub-element names, |
|
378
|
|
|
|
|
|
|
values are their contents. Attributes are ignored. (they should be |
|
379
|
|
|
|
|
|
|
processed in the sub-element) (returns a ref) |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=item MULTIMAP |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
makes an HASH of ARRAY; keys are the sub-element names; values are |
|
384
|
|
|
|
|
|
|
lists of contents; attributes are ignored (they should be processed in |
|
385
|
|
|
|
|
|
|
the sub-element); (returns a ref) |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=item MMAPON(element-list) |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
makes an HASH with the sub-elements; keys are the sub-element names, |
|
390
|
|
|
|
|
|
|
values are their contents; attributes are ignored (they should be |
|
391
|
|
|
|
|
|
|
processed in the sub-element); for all the elements contained in the |
|
392
|
|
|
|
|
|
|
element-list, it is created an ARRAY with their contents. (returns a |
|
393
|
|
|
|
|
|
|
ref) |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=item XML |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
return a reference to an HASH with: |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
-q => element name |
|
400
|
|
|
|
|
|
|
-c => contents |
|
401
|
|
|
|
|
|
|
at1 => at value1 for each attribute |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item ZERO |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
don't process the sub-elements; return "" |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=back |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
When you use C-type> definitions, you often need do set C<-default> |
|
410
|
|
|
|
|
|
|
function returning just the contents C. |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 An example: |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
use XML::DT; |
|
415
|
|
|
|
|
|
|
%handler = ( contacts => sub{ [ split(";",$c)] }, |
|
416
|
|
|
|
|
|
|
-default => sub{$c}, |
|
417
|
|
|
|
|
|
|
-type => { institution => 'MAP', |
|
418
|
|
|
|
|
|
|
degrees => MMAPON('name') |
|
419
|
|
|
|
|
|
|
tels => 'SEQ' } |
|
420
|
|
|
|
|
|
|
); |
|
421
|
|
|
|
|
|
|
$a = dt ("f.xml", %handler); |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
with the following f.xml |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
U.M. |
|
428
|
|
|
|
|
|
|
University of Minho |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
- 1111
|
|
431
|
|
|
|
|
|
|
- 1112
|
|
432
|
|
|
|
|
|
|
- 1113
|
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Portugal |
|
435
|
|
|
|
|
|
|
J.Joao; J.Rocha; J.Ramalho |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Computer science |
|
438
|
|
|
|
|
|
|
Informatica |
|
439
|
|
|
|
|
|
|
history |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
would make $a |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
{ 'name' => [ 'Computer science', |
|
445
|
|
|
|
|
|
|
'Informatica ', |
|
446
|
|
|
|
|
|
|
' history ' ], |
|
447
|
|
|
|
|
|
|
'institution' => { 'tels' => [ 1111, 1112, 1113 ], |
|
448
|
|
|
|
|
|
|
'name' => 'University of Minho', |
|
449
|
|
|
|
|
|
|
'where' => 'Portugal', |
|
450
|
|
|
|
|
|
|
'id' => 'U.M.', |
|
451
|
|
|
|
|
|
|
'contacts' => [ 'J.Joao', |
|
452
|
|
|
|
|
|
|
' J.Rocha', |
|
453
|
|
|
|
|
|
|
' J.Ramalho' ] } }; |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head1 DT Skeleton generation |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
It is possible to build an initial processor program based on an example |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
To do this use the function C. |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Example: |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
perl -MXML::DT -e 'mkdtskel "f.xml"' > f.pl |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head1 DTD skeleton generation |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
It makes a naive DTD based on an example(s). |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
To do this use the function C. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Example: |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
perl -MXML::DT -e 'mkdtdskel "f.xml"' > f.dtd |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
mkdtskel(1) and mkdtdskel(1) |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head1 AUTHORS |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Home for XML::DT; |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
http://natura.di.uminho.pt/~jj/perl/XML/ |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Jose Joao Almeida, |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
Alberto Manuel Simões, |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Michel Rodriguez |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
José Carlos Ramalho |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
Mark A. Hillebrand |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Copyright 1999-2012 Project Natura. |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
This library is free software; you can redistribute it |
|
503
|
|
|
|
|
|
|
and/or modify it under the same terms as Perl itself. |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=cut |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
our %ty = (); |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub dt { |
|
512
|
|
|
|
|
|
|
my ($file, %xml)=@_; |
|
513
|
|
|
|
|
|
|
my ($parser, $tree); |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
# Treat -decl option |
|
516
|
|
|
|
|
|
|
my $declr = ""; |
|
517
|
|
|
|
|
|
|
if ($xml{-declr}) { |
|
518
|
|
|
|
|
|
|
if ($xml{-outputenc}) { |
|
519
|
|
|
|
|
|
|
$declr = "\n"; |
|
520
|
|
|
|
|
|
|
} else { |
|
521
|
|
|
|
|
|
|
$declr = "\n"; |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
%ty = (); |
|
526
|
|
|
|
|
|
|
%ty = (%{$xml{'-type'}}) if defined($xml{'-type'}); |
|
527
|
|
|
|
|
|
|
$ty{-ROOT} = "NONE"; |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
&{$xml{-begin}} if $xml{-begin}; |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# TODO --- how to force encoding with XML::LibXML? |
|
532
|
|
|
|
|
|
|
# $xml{-inputenc} |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# create a new LibXML parser |
|
535
|
|
|
|
|
|
|
$parser = XML::LibXML->new(); |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
#### We don't wan't DT to load everytime the DTD (I Think!) |
|
538
|
|
|
|
|
|
|
$parser->validation(0); |
|
539
|
|
|
|
|
|
|
# $parser->expand_xinclude(0); # testing |
|
540
|
|
|
|
|
|
|
$parser->load_ext_dtd(0); |
|
541
|
|
|
|
|
|
|
$parser->expand_entities(0); |
|
542
|
|
|
|
|
|
|
$parser->expand_xincludes(1) if $xml{'-xinclude'}; |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# parse the file |
|
545
|
|
|
|
|
|
|
my $doc; |
|
546
|
|
|
|
|
|
|
if ( $xml{'-recover'}) { |
|
547
|
|
|
|
|
|
|
$parser->recover(1); |
|
548
|
|
|
|
|
|
|
eval { |
|
549
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
|
550
|
|
|
|
|
|
|
$doc = $parser->parse_file($file); |
|
551
|
|
|
|
|
|
|
}; |
|
552
|
|
|
|
|
|
|
return undef if !$doc; |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
elsif ( $xml{'-html'}) { |
|
555
|
|
|
|
|
|
|
$parser->recover(1); |
|
556
|
|
|
|
|
|
|
eval { |
|
557
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
|
558
|
|
|
|
|
|
|
$doc = $parser->parse_html_file($file); |
|
559
|
|
|
|
|
|
|
}; |
|
560
|
|
|
|
|
|
|
return undef if !$doc; |
|
561
|
|
|
|
|
|
|
} |
|
562
|
|
|
|
|
|
|
else { |
|
563
|
|
|
|
|
|
|
$doc = $parser->parse_file($file) |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# get the document root element |
|
567
|
|
|
|
|
|
|
$tree = $doc->getDocumentElement(); |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
my $return = ""; |
|
570
|
|
|
|
|
|
|
# execute End action if it exists |
|
571
|
|
|
|
|
|
|
if($xml{-end}) { |
|
572
|
|
|
|
|
|
|
$c = _omni("-ROOT", \%xml, $tree); |
|
573
|
|
|
|
|
|
|
$return = &{$xml{-end}} |
|
574
|
|
|
|
|
|
|
} else { |
|
575
|
|
|
|
|
|
|
$return = _omni("-ROOT",\%xml, $tree) |
|
576
|
|
|
|
|
|
|
} |
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
if ($declr) { |
|
579
|
|
|
|
|
|
|
return $declr.$return; |
|
580
|
|
|
|
|
|
|
} else { |
|
581
|
|
|
|
|
|
|
return $return; |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub ctxt { |
|
587
|
|
|
|
|
|
|
my $level = $_[0]; |
|
588
|
|
|
|
|
|
|
$dtcontext[-$level-1]; |
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub inpath { |
|
592
|
|
|
|
|
|
|
my $pattern = shift ; |
|
593
|
|
|
|
|
|
|
join ("/", @dtcontext) =~ m!\b$pattern\b!; |
|
594
|
|
|
|
|
|
|
} |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub inctxt { |
|
598
|
|
|
|
|
|
|
my $pattern = shift ; |
|
599
|
|
|
|
|
|
|
# see if is in root context... |
|
600
|
|
|
|
|
|
|
return 1 if (($pattern eq "^" && @dtcontext==1) || $pattern eq ".*"); |
|
601
|
|
|
|
|
|
|
join("/", @dtcontext) =~ m!$pattern/[^/]*$! ; |
|
602
|
|
|
|
|
|
|
} |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub father { |
|
605
|
|
|
|
|
|
|
my ($a,$b)=@_; |
|
606
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[1]{$a} = $b} |
|
607
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[1]{$a} } |
|
608
|
|
|
|
|
|
|
else {$dtattributes[1]} |
|
609
|
|
|
|
|
|
|
} |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub gfather { |
|
612
|
|
|
|
|
|
|
my ($a,$b)=@_; |
|
613
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[2]{$a} = $b} |
|
614
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[2]{$a} } |
|
615
|
|
|
|
|
|
|
else {$dtattributes[2]} |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
sub ggfather { |
|
620
|
|
|
|
|
|
|
my ($a,$b)=@_; |
|
621
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[3]{$a} = $b} |
|
622
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[3]{$a} } |
|
623
|
|
|
|
|
|
|
else {$dtattributes[3]} |
|
624
|
|
|
|
|
|
|
} |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
sub root { ### the root |
|
628
|
|
|
|
|
|
|
my ($a,$b)=@_; |
|
629
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[-1]{$a} = $b } |
|
630
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[-1]{$a} } |
|
631
|
|
|
|
|
|
|
else {$dtattributes[-1] } |
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
sub pathdtstring{ |
|
635
|
|
|
|
|
|
|
my $string = shift; |
|
636
|
|
|
|
|
|
|
my %h = _pathtodt(@_); |
|
637
|
|
|
|
|
|
|
return dtstring($string,%h); |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
sub pathdturl{ |
|
643
|
|
|
|
|
|
|
my $url = shift; |
|
644
|
|
|
|
|
|
|
my %h = _pathtodt(@_); |
|
645
|
|
|
|
|
|
|
return dturl($url,%h); |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub dturl{ |
|
651
|
|
|
|
|
|
|
my $url = shift; |
|
652
|
|
|
|
|
|
|
my $contents = get($url); |
|
653
|
|
|
|
|
|
|
if ($contents) { |
|
654
|
|
|
|
|
|
|
return dtstring($contents, @_); |
|
655
|
|
|
|
|
|
|
} else { |
|
656
|
|
|
|
|
|
|
return undef; |
|
657
|
|
|
|
|
|
|
} |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
sub dtstring { |
|
663
|
|
|
|
|
|
|
my ($string, %xml)=@_; |
|
664
|
|
|
|
|
|
|
my ($parser, $tree); |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
my $declr = ""; |
|
667
|
|
|
|
|
|
|
if ($xml{-declr}) { |
|
668
|
|
|
|
|
|
|
if ($xml{-outputenc}) { |
|
669
|
|
|
|
|
|
|
$declr = "\n"; |
|
670
|
|
|
|
|
|
|
} else { |
|
671
|
|
|
|
|
|
|
$declr = "\n"; |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
} |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
$xml{'-type'} = {} unless defined $xml{'-type'}; |
|
676
|
|
|
|
|
|
|
%ty = (%{$xml{'-type'}}, -ROOT => "NONE"); |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
# execute Begin action if it exists |
|
679
|
|
|
|
|
|
|
if ($xml{-begin}) { |
|
680
|
|
|
|
|
|
|
&{$xml{-begin}} |
|
681
|
|
|
|
|
|
|
} |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
if ($xml{-inputenc}) { |
|
684
|
|
|
|
|
|
|
$string = XML::LibXML::encodeToUTF8($xml{-inputenc}, $string); |
|
685
|
|
|
|
|
|
|
} |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# create a new LibXML parser |
|
688
|
|
|
|
|
|
|
$parser = XML::LibXML->new(); |
|
689
|
|
|
|
|
|
|
$parser->validation(0); |
|
690
|
|
|
|
|
|
|
$parser->load_ext_dtd(0); |
|
691
|
|
|
|
|
|
|
$parser->expand_entities(0); |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
# parse the string |
|
694
|
|
|
|
|
|
|
my $doc; |
|
695
|
|
|
|
|
|
|
if ( $xml{'-recover'}) { |
|
696
|
|
|
|
|
|
|
$parser->recover(1); |
|
697
|
|
|
|
|
|
|
eval { |
|
698
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
|
699
|
|
|
|
|
|
|
$doc = $parser->parse_string($string); |
|
700
|
|
|
|
|
|
|
}; |
|
701
|
|
|
|
|
|
|
return undef if !$doc; |
|
702
|
|
|
|
|
|
|
} |
|
703
|
|
|
|
|
|
|
elsif ( $xml{'-html'}) { |
|
704
|
|
|
|
|
|
|
$parser->recover(1); |
|
705
|
|
|
|
|
|
|
eval{ |
|
706
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
|
707
|
|
|
|
|
|
|
$doc = $parser->parse_html_string($string); |
|
708
|
|
|
|
|
|
|
}; |
|
709
|
|
|
|
|
|
|
# if ($@) { return undef; } |
|
710
|
|
|
|
|
|
|
return undef unless defined $doc; |
|
711
|
|
|
|
|
|
|
} else { |
|
712
|
|
|
|
|
|
|
$doc = $parser->parse_string($string); |
|
713
|
|
|
|
|
|
|
} |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
# get the document root element |
|
716
|
|
|
|
|
|
|
$tree = $doc->getDocumentElement(); |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
my $return; |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
# Check if we have an end function |
|
721
|
|
|
|
|
|
|
if ($xml{-end}) { |
|
722
|
|
|
|
|
|
|
$c = _omni("-ROOT", \%xml, $tree); |
|
723
|
|
|
|
|
|
|
$return = &{$xml{-end}} |
|
724
|
|
|
|
|
|
|
} else { |
|
725
|
|
|
|
|
|
|
$return = _omni("-ROOT", \%xml, $tree) |
|
726
|
|
|
|
|
|
|
} |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
if ($declr) { |
|
729
|
|
|
|
|
|
|
return $declr.$return; |
|
730
|
|
|
|
|
|
|
} else { |
|
731
|
|
|
|
|
|
|
return $return; |
|
732
|
|
|
|
|
|
|
} |
|
733
|
|
|
|
|
|
|
} |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
sub pathdt{ |
|
738
|
|
|
|
|
|
|
my $file = shift; |
|
739
|
|
|
|
|
|
|
my %h = _pathtodt(@_); |
|
740
|
|
|
|
|
|
|
return dt($file,%h); |
|
741
|
|
|
|
|
|
|
} |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
# Parsing dos predicados do XPath |
|
746
|
|
|
|
|
|
|
sub _testAttr { |
|
747
|
|
|
|
|
|
|
my $atr = shift; |
|
748
|
|
|
|
|
|
|
for ($atr) { |
|
749
|
|
|
|
|
|
|
s/name\(\)/'$q'/g; |
|
750
|
|
|
|
|
|
|
# s/\@([A-Za-z_]+)/'$v{$1}'/g; |
|
751
|
|
|
|
|
|
|
s/\@([A-Za-z_]+)/defined $v{$1}?"'$v{$1}'":"''"/ge; |
|
752
|
|
|
|
|
|
|
s/\@\*/keys %v?"'1'":"''"/ge; |
|
753
|
|
|
|
|
|
|
if (/^not\((.*)\)$/) { |
|
754
|
|
|
|
|
|
|
return ! _testAttr($1); |
|
755
|
|
|
|
|
|
|
} elsif (/^('|")([^\1]*)(\1)\s*=\s*('|")([^\4]*)\4$/) { |
|
756
|
|
|
|
|
|
|
return ($2 eq $5); |
|
757
|
|
|
|
|
|
|
} elsif (/^(.*?)normalize-space\((['"])([^\2)]*)\2\)(.*)$/) { |
|
758
|
|
|
|
|
|
|
my ($back,$forward)=($1,$4); |
|
759
|
|
|
|
|
|
|
my $x = _normalize_space($3); |
|
760
|
|
|
|
|
|
|
return _testAttr("$back'$x'$forward"); |
|
761
|
|
|
|
|
|
|
} elsif (/starts-with\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) { |
|
762
|
|
|
|
|
|
|
my $x = _starts_with($2,$4); |
|
763
|
|
|
|
|
|
|
return $x; |
|
764
|
|
|
|
|
|
|
} elsif (/contains\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) { |
|
765
|
|
|
|
|
|
|
my $x = _contains($2,$4); |
|
766
|
|
|
|
|
|
|
return $x; |
|
767
|
|
|
|
|
|
|
} elsif (/^(.*?)string-length\((['"])([^\2]*)\2\)(.*)$/) { |
|
768
|
|
|
|
|
|
|
my ($back,$forward) = ($1,$4); |
|
769
|
|
|
|
|
|
|
my $x = length($3); |
|
770
|
|
|
|
|
|
|
return _testAttr("$back$x$forward"); |
|
771
|
|
|
|
|
|
|
} elsif (/^(\d+)\s*=(\d+)$/) { |
|
772
|
|
|
|
|
|
|
return ($1 == $2); |
|
773
|
|
|
|
|
|
|
} elsif (/^(\d+)\s*<(\d+)$/) { |
|
774
|
|
|
|
|
|
|
return ($1 < $2); |
|
775
|
|
|
|
|
|
|
} elsif (/^(\d+)\s*>(\d+)$/) { |
|
776
|
|
|
|
|
|
|
return ($1 > $2); |
|
777
|
|
|
|
|
|
|
} elsif (/^(['"])([^\1]*)\1$/) { |
|
778
|
|
|
|
|
|
|
return $2; |
|
779
|
|
|
|
|
|
|
} |
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
return 0; #$atr; |
|
782
|
|
|
|
|
|
|
} |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
# Funcao auxiliar de teste de predicados do XPath |
|
787
|
|
|
|
|
|
|
sub _starts_with { |
|
788
|
|
|
|
|
|
|
my ($string,$preffix) = @_; |
|
789
|
|
|
|
|
|
|
return 0 unless ($string && $preffix); |
|
790
|
|
|
|
|
|
|
return 1 if ($string =~ m!^$preffix!); |
|
791
|
|
|
|
|
|
|
return 0; |
|
792
|
|
|
|
|
|
|
} |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# Funcao auxiliar de teste de predicados do XPath |
|
796
|
|
|
|
|
|
|
sub _contains { |
|
797
|
|
|
|
|
|
|
my ($string,$s) = @_; |
|
798
|
|
|
|
|
|
|
return 0 unless ($string && $s); |
|
799
|
|
|
|
|
|
|
return 1 if ($string =~ m!$s!); |
|
800
|
|
|
|
|
|
|
return 0; |
|
801
|
|
|
|
|
|
|
} |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
# Funcao auxiliar de teste de predicados do XPath |
|
805
|
|
|
|
|
|
|
sub _normalize_space { |
|
806
|
|
|
|
|
|
|
my $z = shift; |
|
807
|
|
|
|
|
|
|
$z =~ /^\s*(.*?)\s*$/; |
|
808
|
|
|
|
|
|
|
$z = $1; |
|
809
|
|
|
|
|
|
|
$z =~ s!\s+! !g; |
|
810
|
|
|
|
|
|
|
return $z; |
|
811
|
|
|
|
|
|
|
} |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
sub _pathtodt { |
|
815
|
|
|
|
|
|
|
my %h = @_; |
|
816
|
|
|
|
|
|
|
my %aux=(); |
|
817
|
|
|
|
|
|
|
my %aux2=(); |
|
818
|
|
|
|
|
|
|
my %n = (); |
|
819
|
|
|
|
|
|
|
my $z; |
|
820
|
|
|
|
|
|
|
for $z (keys %h) { |
|
821
|
|
|
|
|
|
|
# TODO - Make it more generic |
|
822
|
|
|
|
|
|
|
if ( $z=~m{\w+(\|\w+)+}) { |
|
823
|
|
|
|
|
|
|
my @tags = split /\|/, $z; |
|
824
|
|
|
|
|
|
|
for(@tags) { |
|
825
|
|
|
|
|
|
|
$aux2{$_}=$h{$z} |
|
826
|
|
|
|
|
|
|
} |
|
827
|
|
|
|
|
|
|
} |
|
828
|
|
|
|
|
|
|
elsif ( $z=~m{(//|/|)(.*)/([^\[]*)(?:\[(.*)\])?} ) { |
|
829
|
|
|
|
|
|
|
my ($first,$second,$third,$fourth) = ($1,$2,$3,$4); |
|
830
|
|
|
|
|
|
|
if (($first eq "/") && (!$second)) { |
|
831
|
|
|
|
|
|
|
$first = ""; |
|
832
|
|
|
|
|
|
|
$second = '.*'; |
|
833
|
|
|
|
|
|
|
$third =~ s!\*!-default!; |
|
834
|
|
|
|
|
|
|
} else { |
|
835
|
|
|
|
|
|
|
$second =~ s!\*!\[^/\]\+!g; |
|
836
|
|
|
|
|
|
|
$second =~ s!/$!\(/\.\*\)\?!g; |
|
837
|
|
|
|
|
|
|
$second =~ s!//!\(/\.\*\)\?/!g; |
|
838
|
|
|
|
|
|
|
$third =~ s!\*!-default!g; |
|
839
|
|
|
|
|
|
|
} |
|
840
|
|
|
|
|
|
|
push( @{$aux{$third}} , [$first,$second,$h{$z},$fourth]); |
|
841
|
|
|
|
|
|
|
} |
|
842
|
|
|
|
|
|
|
else { $aux2{$z}=$h{$z};} |
|
843
|
|
|
|
|
|
|
} |
|
844
|
|
|
|
|
|
|
for $z (keys %aux){ |
|
845
|
|
|
|
|
|
|
my $code = sub { |
|
846
|
|
|
|
|
|
|
my $l; |
|
847
|
|
|
|
|
|
|
for $l (@{$aux{$z}}) { |
|
848
|
|
|
|
|
|
|
my $prefix = ""; |
|
849
|
|
|
|
|
|
|
$prefix = "^" unless (($l->[0]) or ($l->[1])); |
|
850
|
|
|
|
|
|
|
$prefix = "^" if (($l->[0] eq "/") && ($l->[1])); |
|
851
|
|
|
|
|
|
|
if ($l->[3]) { |
|
852
|
|
|
|
|
|
|
if(inctxt("$prefix$l->[1]") && _testAttr($l->[3])) |
|
853
|
|
|
|
|
|
|
{return &{$l->[2]}; } |
|
854
|
|
|
|
|
|
|
} else { |
|
855
|
|
|
|
|
|
|
if(inctxt("$prefix$l->[1]")) {return &{$l->[2]};} |
|
856
|
|
|
|
|
|
|
} |
|
857
|
|
|
|
|
|
|
} |
|
858
|
|
|
|
|
|
|
return &{ $aux2{$z}} if $aux2{$z} ; |
|
859
|
|
|
|
|
|
|
return &{ $h{-default}} if $h{-default}; |
|
860
|
|
|
|
|
|
|
&toxml(); |
|
861
|
|
|
|
|
|
|
}; |
|
862
|
|
|
|
|
|
|
$n{$z} = $code; |
|
863
|
|
|
|
|
|
|
} |
|
864
|
|
|
|
|
|
|
for $z (keys %aux2){ |
|
865
|
|
|
|
|
|
|
$n{$z} ||= $aux2{$z} ; |
|
866
|
|
|
|
|
|
|
} |
|
867
|
|
|
|
|
|
|
return %n; |
|
868
|
|
|
|
|
|
|
} |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
sub _omni { |
|
873
|
|
|
|
|
|
|
my ($par, $xml, @l) = @_; |
|
874
|
|
|
|
|
|
|
my $defaulttype = |
|
875
|
|
|
|
|
|
|
(exists($xml->{-type}) && exists($xml->{-type}{-default})) |
|
876
|
|
|
|
|
|
|
? |
|
877
|
|
|
|
|
|
|
$xml->{-type}{-default} : "STR"; |
|
878
|
|
|
|
|
|
|
my $type = $ty{$par} || $defaulttype; |
|
879
|
|
|
|
|
|
|
my %typeargs = (); |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
if (ref($type) eq "mmapon") { |
|
882
|
|
|
|
|
|
|
$typeargs{$_} = 1 for (@$type); |
|
883
|
|
|
|
|
|
|
$type = "MMAPON"; |
|
884
|
|
|
|
|
|
|
} |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
my $r ; |
|
887
|
|
|
|
|
|
|
if( $type eq 'STR') { $r = "" } |
|
888
|
|
|
|
|
|
|
elsif( $type eq 'THE_CHILD' or $type eq 'LAST_CHILD') { $r = 0 } |
|
889
|
|
|
|
|
|
|
elsif( $type eq 'SEQ' or $type eq "ARRAY") { $r = [] } |
|
890
|
|
|
|
|
|
|
elsif( $type eq 'SEQH' or $type eq "ARRAYOFHASH") { $r = [] } |
|
891
|
|
|
|
|
|
|
elsif( $type eq 'MAP' or $type eq "HASH") { $r = {} } |
|
892
|
|
|
|
|
|
|
elsif( $type eq 'MULTIMAP') { $r = {} } |
|
893
|
|
|
|
|
|
|
elsif( $type eq 'MMAPON' or $type eq "HASHOFARRAY") { $r = {} } |
|
894
|
|
|
|
|
|
|
elsif( $type eq 'NONE') { $r = "" } |
|
895
|
|
|
|
|
|
|
elsif( $type eq 'ZERO') { return "" } |
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
my ($name, $val, @val, $atr, $aux); |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
$u = $xml->{-userdata}; |
|
900
|
|
|
|
|
|
|
while(@l) { |
|
901
|
|
|
|
|
|
|
my $tree = shift @l; |
|
902
|
|
|
|
|
|
|
next unless $tree; |
|
903
|
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
$name = ref($tree) eq "XML::LibXML::CDATASection" ? "-pcdata" : $tree->getName(); |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
if (ref($tree) eq "XML::LibXML::CDATASection") { |
|
907
|
|
|
|
|
|
|
$val = $tree->getData(); |
|
908
|
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
$name = "-cdata"; |
|
910
|
|
|
|
|
|
|
$aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val; |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
if (defined($xml->{-cdata})) { |
|
913
|
|
|
|
|
|
|
push(@dtcontext,"-cdata"); |
|
914
|
|
|
|
|
|
|
$c = $aux; |
|
915
|
|
|
|
|
|
|
$aux = &{$xml->{-cdata}}; |
|
916
|
|
|
|
|
|
|
pop(@dtcontext); |
|
917
|
|
|
|
|
|
|
} elsif (defined($xml->{-pcdata})) { |
|
918
|
|
|
|
|
|
|
push(@dtcontext,"-pcdata"); |
|
919
|
|
|
|
|
|
|
$c = $aux; |
|
920
|
|
|
|
|
|
|
$aux = &{$xml->{-pcdata}}; |
|
921
|
|
|
|
|
|
|
pop(@dtcontext); |
|
922
|
|
|
|
|
|
|
} |
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
} elsif (ref($tree) eq "XML::LibXML::Comment") { |
|
925
|
|
|
|
|
|
|
### At the moment, treat as Text |
|
926
|
|
|
|
|
|
|
### We will need to change this, I hope! |
|
927
|
|
|
|
|
|
|
$val = ""; |
|
928
|
|
|
|
|
|
|
$name = "-pcdata"; |
|
929
|
|
|
|
|
|
|
$aux= (defined($xml->{-outputenc}))?_fromUTF8($val, $xml->{-outputenc}):$val; |
|
930
|
|
|
|
|
|
|
if (defined($xml->{-pcdata})) { |
|
931
|
|
|
|
|
|
|
push(@dtcontext,"-pcdata"); |
|
932
|
|
|
|
|
|
|
$c = $aux; |
|
933
|
|
|
|
|
|
|
$aux = &{$xml->{-pcdata}}; |
|
934
|
|
|
|
|
|
|
pop(@dtcontext); |
|
935
|
|
|
|
|
|
|
} |
|
936
|
|
|
|
|
|
|
} |
|
937
|
|
|
|
|
|
|
elsif (ref($tree) eq "XML::LibXML::Text") { |
|
938
|
|
|
|
|
|
|
$val = $tree->getData(); |
|
939
|
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
$name = "-pcdata"; |
|
941
|
|
|
|
|
|
|
$aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val; |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
if (defined($xml->{-pcdata})) { |
|
944
|
|
|
|
|
|
|
push(@dtcontext,"-pcdata"); |
|
945
|
|
|
|
|
|
|
$c = $aux; |
|
946
|
|
|
|
|
|
|
$aux = &{$xml->{-pcdata}}; |
|
947
|
|
|
|
|
|
|
pop(@dtcontext); |
|
948
|
|
|
|
|
|
|
} |
|
949
|
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
} elsif (ref($tree) eq "XML::LibXML::Element") { |
|
951
|
|
|
|
|
|
|
my %atr = _nodeAttributes($tree); |
|
952
|
|
|
|
|
|
|
$atr = \%atr; |
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
if (exists($xml->{-ignorecase})) { |
|
955
|
|
|
|
|
|
|
$name = lc($name); |
|
956
|
|
|
|
|
|
|
for (keys %$atr) { |
|
957
|
|
|
|
|
|
|
my ($k,$v) = (lc($_),$atr->{$_}); |
|
958
|
|
|
|
|
|
|
delete($atr->{$_}); |
|
959
|
|
|
|
|
|
|
$atr->{$k} = $v; |
|
960
|
|
|
|
|
|
|
} |
|
961
|
|
|
|
|
|
|
} |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
push(@dtcontext,$name); |
|
964
|
|
|
|
|
|
|
$dtcontextcount{$name}++; |
|
965
|
|
|
|
|
|
|
unshift(@dtatributes, $atr); |
|
966
|
|
|
|
|
|
|
unshift(@dtattributes, $atr); |
|
967
|
|
|
|
|
|
|
$aux = _omniele($xml, $name, _omni($name, $xml, ($tree->getChildnodes())), $atr); |
|
968
|
|
|
|
|
|
|
shift(@dtatributes); |
|
969
|
|
|
|
|
|
|
shift(@dtattributes); |
|
970
|
|
|
|
|
|
|
pop(@dtcontext); $dtcontextcount{$name}--; |
|
971
|
|
|
|
|
|
|
} elsif (ref($tree) eq "XML::LibXML::Node") { |
|
972
|
|
|
|
|
|
|
if ($tree->nodeType == XML_ENTITY_REF_NODE) { |
|
973
|
|
|
|
|
|
|
# if we get here, is because we are not expanding entities (I think) |
|
974
|
|
|
|
|
|
|
if ($tree->textContent) { |
|
975
|
|
|
|
|
|
|
$aux = $tree->textContent; |
|
976
|
|
|
|
|
|
|
} else { |
|
977
|
|
|
|
|
|
|
$aux = '&'.$tree->nodeName.';'; |
|
978
|
|
|
|
|
|
|
} |
|
979
|
|
|
|
|
|
|
} else { |
|
980
|
|
|
|
|
|
|
print STDERR "Not handled, generic node of type: [",$tree->nodeType,"]\n"; |
|
981
|
|
|
|
|
|
|
} |
|
982
|
|
|
|
|
|
|
} else { |
|
983
|
|
|
|
|
|
|
print STDERR "Not handled: [",ref($tree),"]\n"; |
|
984
|
|
|
|
|
|
|
} |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
if ($type eq "STR"){ if (defined($aux)) {$r .= $aux} ;} |
|
987
|
|
|
|
|
|
|
elsif ($type eq "THE_CHILD" or $type eq "LAST_CHILD"){ |
|
988
|
|
|
|
|
|
|
$r = $aux unless _whitepc($aux, $name); } |
|
989
|
|
|
|
|
|
|
elsif ($type eq "SEQ" or $type eq "ARRAY"){ |
|
990
|
|
|
|
|
|
|
push(@$r, $aux) unless _whitepc($aux, $name);} |
|
991
|
|
|
|
|
|
|
elsif ($type eq "SEQH" or $type eq "ARRAYHASH"){ |
|
992
|
|
|
|
|
|
|
push(@$r,{"-c" => $aux, |
|
993
|
|
|
|
|
|
|
"-q" => $name, |
|
994
|
|
|
|
|
|
|
_nodeAttributes($tree) |
|
995
|
|
|
|
|
|
|
}) unless _whitepc($aux,$name); |
|
996
|
|
|
|
|
|
|
} |
|
997
|
|
|
|
|
|
|
elsif($type eq "MMAPON"){ |
|
998
|
|
|
|
|
|
|
if(not _whitepc($aux,$name)){ |
|
999
|
|
|
|
|
|
|
if(! $typeargs{$name}) { |
|
1000
|
|
|
|
|
|
|
warn "duplicated tag '$name'\n" if(defined($r->{$name})); |
|
1001
|
|
|
|
|
|
|
$r->{$name} = $aux } |
|
1002
|
|
|
|
|
|
|
else { push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}} |
|
1003
|
|
|
|
|
|
|
} |
|
1004
|
|
|
|
|
|
|
elsif($type eq "MAP" or $type eq "HASH"){ |
|
1005
|
|
|
|
|
|
|
if(not _whitepc($aux,$name)){ |
|
1006
|
|
|
|
|
|
|
warn "duplicated tag '$name'\n" if(defined($r->{$name})); |
|
1007
|
|
|
|
|
|
|
$r->{$name} = $aux }} |
|
1008
|
|
|
|
|
|
|
elsif($type eq "MULTIMAP"){ |
|
1009
|
|
|
|
|
|
|
push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)} |
|
1010
|
|
|
|
|
|
|
elsif($type eq "NONE"){ $r = $aux;} |
|
1011
|
|
|
|
|
|
|
else { $r="undefined type !!!"} |
|
1012
|
|
|
|
|
|
|
} |
|
1013
|
|
|
|
|
|
|
$r; |
|
1014
|
|
|
|
|
|
|
} |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
sub _omniele { |
|
1019
|
|
|
|
|
|
|
my $xml = shift; |
|
1020
|
|
|
|
|
|
|
my $aux; |
|
1021
|
|
|
|
|
|
|
($q, $c, $aux) = @_; |
|
1022
|
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
%v = %$aux; |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
if (defined($xml->{-outputenc})) { |
|
1026
|
|
|
|
|
|
|
for (keys %v){ |
|
1027
|
|
|
|
|
|
|
$v{$_} = _fromUTF8($v{$_}, $xml->{-outputenc}) |
|
1028
|
|
|
|
|
|
|
} |
|
1029
|
|
|
|
|
|
|
} |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
if (defined $xml->{$q}) |
|
1032
|
|
|
|
|
|
|
{ &{$xml->{$q}} } |
|
1033
|
|
|
|
|
|
|
elsif (defined $xml->{'-default'}) |
|
1034
|
|
|
|
|
|
|
{ &{$xml->{'-default'}} } |
|
1035
|
|
|
|
|
|
|
elsif (defined $xml->{'-tohtml'}) |
|
1036
|
|
|
|
|
|
|
{ tohtml() } |
|
1037
|
|
|
|
|
|
|
else |
|
1038
|
|
|
|
|
|
|
{ toxml() } |
|
1039
|
|
|
|
|
|
|
} |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub xmltree { +{'-c' => $c, '-q' => $q, %v} } |
|
1044
|
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
sub tohtml { |
|
1046
|
|
|
|
|
|
|
my ($q,$v,$c); |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
if (not @_) { |
|
1049
|
|
|
|
|
|
|
($q,$v,$c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c); |
|
1050
|
|
|
|
|
|
|
} elsif (ref($_[0])) { |
|
1051
|
|
|
|
|
|
|
$c = shift; |
|
1052
|
|
|
|
|
|
|
} else { |
|
1053
|
|
|
|
|
|
|
($q,$v,$c) = @_; |
|
1054
|
|
|
|
|
|
|
} |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
if (not ref($c)) { |
|
1057
|
|
|
|
|
|
|
if ($q eq "-pcdata") { |
|
1058
|
|
|
|
|
|
|
return $c |
|
1059
|
|
|
|
|
|
|
} elsif ($q eq "link" || $q eq "br" || $q eq "hr" || $q eq "img") { |
|
1060
|
|
|
|
|
|
|
return _openTag($q,$v) |
|
1061
|
|
|
|
|
|
|
} else { |
|
1062
|
|
|
|
|
|
|
return _openTag($q,$v) . "$c$q>" |
|
1063
|
|
|
|
|
|
|
} |
|
1064
|
|
|
|
|
|
|
} |
|
1065
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) { |
|
1066
|
|
|
|
|
|
|
my %a = %$c; |
|
1067
|
|
|
|
|
|
|
my ($q,$c) = delete @a{"-q","-c"}; |
|
1068
|
|
|
|
|
|
|
tohtml($q,\%a,(ref($c)?tohtml($c):$c)); |
|
1069
|
|
|
|
|
|
|
} |
|
1070
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH") { |
|
1071
|
|
|
|
|
|
|
_openTag($q,$v). |
|
1072
|
|
|
|
|
|
|
join("",map {($_ ne "-pcdata") |
|
1073
|
|
|
|
|
|
|
? ( (ref($c->{$_}) eq "ARRAY") |
|
1074
|
|
|
|
|
|
|
? "<$_>". |
|
1075
|
|
|
|
|
|
|
join("$_>\n<$_>", @{$c->{$_}}). |
|
1076
|
|
|
|
|
|
|
"$_>\n" |
|
1077
|
|
|
|
|
|
|
: tohtml($_,{},$c->{$_})."\n" ) |
|
1078
|
|
|
|
|
|
|
: () } |
|
1079
|
|
|
|
|
|
|
keys %{$c} ) . |
|
1080
|
|
|
|
|
|
|
"$c->{-pcdata}$q>" } ######## "NOTYetREady" |
|
1081
|
|
|
|
|
|
|
elsif (ref($c) eq "ARRAY") { |
|
1082
|
|
|
|
|
|
|
if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") { |
|
1083
|
|
|
|
|
|
|
tohtml($q,$v,join("\n",map {tohtml($_)} @$c)) |
|
1084
|
|
|
|
|
|
|
} elsif (defined $q) { |
|
1085
|
|
|
|
|
|
|
tohtml($q,$v,join("",@{$c})) |
|
1086
|
|
|
|
|
|
|
} else { |
|
1087
|
|
|
|
|
|
|
join("\n",map {(ref($_)?tohtml($_):$_)} @$c) |
|
1088
|
|
|
|
|
|
|
} |
|
1089
|
|
|
|
|
|
|
} |
|
1090
|
|
|
|
|
|
|
} |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
sub toxml { |
|
1093
|
|
|
|
|
|
|
my ($q,$v,$c); |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
if (not @_) { |
|
1096
|
|
|
|
|
|
|
($q, $v, $c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c); |
|
1097
|
|
|
|
|
|
|
} elsif (ref($_[0])) { |
|
1098
|
|
|
|
|
|
|
$c = shift; |
|
1099
|
|
|
|
|
|
|
} else { |
|
1100
|
|
|
|
|
|
|
($q, $v, $c) = @_; |
|
1101
|
|
|
|
|
|
|
} |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
if (not ref($c)) { |
|
1104
|
|
|
|
|
|
|
if ($q eq "-pcdata") { |
|
1105
|
|
|
|
|
|
|
return $c |
|
1106
|
|
|
|
|
|
|
} elsif ($c eq "") { |
|
1107
|
|
|
|
|
|
|
return _emptyTag($q,$v) |
|
1108
|
|
|
|
|
|
|
} else { |
|
1109
|
|
|
|
|
|
|
return _openTag($q,$v) . "$c$q>" |
|
1110
|
|
|
|
|
|
|
} |
|
1111
|
|
|
|
|
|
|
} |
|
1112
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) { |
|
1113
|
|
|
|
|
|
|
my %a = %$c; |
|
1114
|
|
|
|
|
|
|
my ($q,$c) = delete @a{"-q","-c"}; |
|
1115
|
|
|
|
|
|
|
### _openTag($q,\%a).toxml($c).). |
|
1116
|
|
|
|
|
|
|
### toxml($q,\%a,join("\n",map {toxml($_)} @$c)) |
|
1117
|
|
|
|
|
|
|
toxml($q,\%a,(ref($c)?toxml($c):$c)); |
|
1118
|
|
|
|
|
|
|
} |
|
1119
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH") { |
|
1120
|
|
|
|
|
|
|
_openTag($q,$v). |
|
1121
|
|
|
|
|
|
|
join("",map {($_ ne "-pcdata") |
|
1122
|
|
|
|
|
|
|
? ( (ref($c->{$_}) eq "ARRAY") |
|
1123
|
|
|
|
|
|
|
? "<$_>". |
|
1124
|
|
|
|
|
|
|
join("$_>\n<$_>", @{$c->{$_}}). |
|
1125
|
|
|
|
|
|
|
"$_>\n" |
|
1126
|
|
|
|
|
|
|
: toxml($_,{},$c->{$_})."\n" ) |
|
1127
|
|
|
|
|
|
|
: () } |
|
1128
|
|
|
|
|
|
|
keys %{$c} ) . |
|
1129
|
|
|
|
|
|
|
"$c->{-pcdata}$q>" } ######## "NOTYetREady" |
|
1130
|
|
|
|
|
|
|
elsif (ref($c) eq "ARRAY") { |
|
1131
|
|
|
|
|
|
|
if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") { |
|
1132
|
|
|
|
|
|
|
toxml($q,$v,join("\n",map {toxml($_)} @$c)) |
|
1133
|
|
|
|
|
|
|
} elsif (defined $q) { |
|
1134
|
|
|
|
|
|
|
toxml($q,$v,join("",@{$c})) |
|
1135
|
|
|
|
|
|
|
} else { |
|
1136
|
|
|
|
|
|
|
join("\n",map {(ref($_)?toxml($_):$_)} @$c) |
|
1137
|
|
|
|
|
|
|
} |
|
1138
|
|
|
|
|
|
|
} |
|
1139
|
|
|
|
|
|
|
} |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
sub _openTag{ |
|
1143
|
|
|
|
|
|
|
"<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} ).">" |
|
1144
|
|
|
|
|
|
|
} |
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
sub _emptyTag{ |
|
1147
|
|
|
|
|
|
|
"<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} )."/>" |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
sub mkdtskel_fromDTD { |
|
1152
|
|
|
|
|
|
|
my $filename = shift; |
|
1153
|
|
|
|
|
|
|
my $file = ParseDTDFile($filename); |
|
1154
|
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
print <<'PERL'; |
|
1156
|
|
|
|
|
|
|
#!/usr/bin/perl |
|
1157
|
|
|
|
|
|
|
use warnings; |
|
1158
|
|
|
|
|
|
|
use strict; |
|
1159
|
|
|
|
|
|
|
use XML::DT; |
|
1160
|
|
|
|
|
|
|
my $filename = shift; |
|
1161
|
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
# Variable Reference |
|
1163
|
|
|
|
|
|
|
# |
|
1164
|
|
|
|
|
|
|
# $c - contents after child processing |
|
1165
|
|
|
|
|
|
|
# $q - element name (tag) |
|
1166
|
|
|
|
|
|
|
# %v - hash of attributes |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
my %handler=( |
|
1169
|
|
|
|
|
|
|
# '-outputenc' => 'ISO-8859-1', |
|
1170
|
|
|
|
|
|
|
# '-default' => sub{"<$q>$c$q>"}, |
|
1171
|
|
|
|
|
|
|
PERL |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
for (sort keys %{$file}) { |
|
1175
|
|
|
|
|
|
|
print " '$_' => sub { },"; |
|
1176
|
|
|
|
|
|
|
print " # attributes: ", |
|
1177
|
|
|
|
|
|
|
join(", ", keys %{$file->{$_}{attributes}}) if exists($file->{$_}{attributes}); |
|
1178
|
|
|
|
|
|
|
print "\n"; |
|
1179
|
|
|
|
|
|
|
} |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
print <<'PERL'; |
|
1183
|
|
|
|
|
|
|
); |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
print dt($filename, %handler); |
|
1186
|
|
|
|
|
|
|
PERL |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
} |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
sub mkdtskel{ |
|
1191
|
|
|
|
|
|
|
my @files = @_; |
|
1192
|
|
|
|
|
|
|
my $name; |
|
1193
|
|
|
|
|
|
|
my $HTML = ""; |
|
1194
|
|
|
|
|
|
|
my %element; |
|
1195
|
|
|
|
|
|
|
my %att; |
|
1196
|
|
|
|
|
|
|
my %mkdtskel = |
|
1197
|
|
|
|
|
|
|
('-default' => sub{ |
|
1198
|
|
|
|
|
|
|
$element{$q}++; |
|
1199
|
|
|
|
|
|
|
for (keys %v) { |
|
1200
|
|
|
|
|
|
|
$att{$q}{$_} = 1 |
|
1201
|
|
|
|
|
|
|
}; |
|
1202
|
|
|
|
|
|
|
""}, |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
'-end' => sub{ |
|
1205
|
|
|
|
|
|
|
print <<'END'; |
|
1206
|
|
|
|
|
|
|
#!/usr/bin/perl |
|
1207
|
|
|
|
|
|
|
use XML::DT; |
|
1208
|
|
|
|
|
|
|
use warnings; |
|
1209
|
|
|
|
|
|
|
use strict; |
|
1210
|
|
|
|
|
|
|
my $filename = shift; |
|
1211
|
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
# Variable Reference |
|
1213
|
|
|
|
|
|
|
# |
|
1214
|
|
|
|
|
|
|
# $c - contents after child processing |
|
1215
|
|
|
|
|
|
|
# $q - element name (tag) |
|
1216
|
|
|
|
|
|
|
# %v - hash of attributes |
|
1217
|
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
my %handler=( |
|
1219
|
|
|
|
|
|
|
# '-outputenc' => 'ISO-8859-1', |
|
1220
|
|
|
|
|
|
|
# '-default' => sub{"<$q>$c$q>"}, |
|
1221
|
|
|
|
|
|
|
END |
|
1222
|
|
|
|
|
|
|
print $HTML; |
|
1223
|
|
|
|
|
|
|
for $name (sort keys %element) { |
|
1224
|
|
|
|
|
|
|
print " '$name' => sub{ }, #"; |
|
1225
|
|
|
|
|
|
|
print " $element{$name} occurrences;"; |
|
1226
|
|
|
|
|
|
|
print ' attributes: ', |
|
1227
|
|
|
|
|
|
|
join(', ', keys %{$att{$name}}) if $att{$name}; |
|
1228
|
|
|
|
|
|
|
# print " \"\$q:\$c\"\n"; |
|
1229
|
|
|
|
|
|
|
print "\n"; |
|
1230
|
|
|
|
|
|
|
} |
|
1231
|
|
|
|
|
|
|
print <<'END'; |
|
1232
|
|
|
|
|
|
|
); |
|
1233
|
|
|
|
|
|
|
print dt($filename, %handler); |
|
1234
|
|
|
|
|
|
|
END |
|
1235
|
|
|
|
|
|
|
} |
|
1236
|
|
|
|
|
|
|
); |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
my $file = shift(@files); |
|
1239
|
|
|
|
|
|
|
while($file =~ /^-/){ |
|
1240
|
|
|
|
|
|
|
if ($file eq "-html") { |
|
1241
|
|
|
|
|
|
|
$HTML = " '-html' => 1,\n"; |
|
1242
|
|
|
|
|
|
|
$mkdtskel{'-html'} = 1;} |
|
1243
|
|
|
|
|
|
|
elsif($file eq "-latin1") { $mkdtskel{'-inputenc'}='ISO-8859-1';} |
|
1244
|
|
|
|
|
|
|
else { die("usage mktskel [-html] [-latin1] file \n")} |
|
1245
|
|
|
|
|
|
|
$file=shift(@files)} |
|
1246
|
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
dt($file,%mkdtskel) |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
sub _nodeAttributes { |
|
1253
|
|
|
|
|
|
|
my $node = shift; |
|
1254
|
|
|
|
|
|
|
my %answer = (); |
|
1255
|
|
|
|
|
|
|
my @attrs = $node->getAttributes(); |
|
1256
|
|
|
|
|
|
|
for (@attrs) { |
|
1257
|
|
|
|
|
|
|
if (ref($_) eq "XML::LibXML::Namespace") { |
|
1258
|
|
|
|
|
|
|
# TODO: This should not be ignored, I think. |
|
1259
|
|
|
|
|
|
|
# This sould be converted on a standard attribute with |
|
1260
|
|
|
|
|
|
|
# key 'namespace' and respective contents |
|
1261
|
|
|
|
|
|
|
} else { |
|
1262
|
|
|
|
|
|
|
$answer{$_->getName()} = $_->getValue(); |
|
1263
|
|
|
|
|
|
|
} |
|
1264
|
|
|
|
|
|
|
} |
|
1265
|
|
|
|
|
|
|
return %answer; |
|
1266
|
|
|
|
|
|
|
} |
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
sub mkdtdskel { |
|
1270
|
|
|
|
|
|
|
my @files = @_; |
|
1271
|
|
|
|
|
|
|
my $name; |
|
1272
|
|
|
|
|
|
|
my %att; |
|
1273
|
|
|
|
|
|
|
my %ele; |
|
1274
|
|
|
|
|
|
|
my %elel; |
|
1275
|
|
|
|
|
|
|
my $root; |
|
1276
|
|
|
|
|
|
|
my %handler=( |
|
1277
|
|
|
|
|
|
|
'-outputenc' => 'ISO-8859-1', |
|
1278
|
|
|
|
|
|
|
'-default' => sub{ |
|
1279
|
|
|
|
|
|
|
$elel{$q}++; |
|
1280
|
|
|
|
|
|
|
$root = $q unless ctxt(1); |
|
1281
|
|
|
|
|
|
|
$ele{ctxt(1)}{$q} ++; |
|
1282
|
|
|
|
|
|
|
for(keys(%v)){$att{$q}{$_} ++ } ; |
|
1283
|
|
|
|
|
|
|
}, |
|
1284
|
|
|
|
|
|
|
'-pcdata' => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1 }}, |
|
1285
|
|
|
|
|
|
|
); |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
while($files[0] =~ /^-/){ |
|
1288
|
|
|
|
|
|
|
if ($files[0] eq "-html") { $handler{'-html'} = 1;} |
|
1289
|
|
|
|
|
|
|
elsif($files[0] eq "-latin1") { $handler{'-inputenc'}='ISO-8859-1';} |
|
1290
|
|
|
|
|
|
|
else { die("usage mkdtdskel [-html] [-latin1] file* \n")} |
|
1291
|
|
|
|
|
|
|
shift(@files)} |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
for my $filename (@files){ |
|
1294
|
|
|
|
|
|
|
dt($filename,%handler); |
|
1295
|
|
|
|
|
|
|
} |
|
1296
|
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
print "\n\n"; |
|
1298
|
|
|
|
|
|
|
delete $elel{$root}; |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
for ($root, keys %elel){ |
|
1301
|
|
|
|
|
|
|
_putele($_, \%ele); |
|
1302
|
|
|
|
|
|
|
for $name (keys(%{$att{$_}})) { |
|
1303
|
|
|
|
|
|
|
print( "\t\n"); |
|
1304
|
|
|
|
|
|
|
print( "\t\n"); |
|
1305
|
|
|
|
|
|
|
} |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
} |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
sub _putele { |
|
1310
|
|
|
|
|
|
|
my ($e,$ele) = @_; |
|
1311
|
|
|
|
|
|
|
my @f ; |
|
1312
|
|
|
|
|
|
|
if ($ele->{$e}) { |
|
1313
|
|
|
|
|
|
|
@f = keys %{$ele->{$e}}; |
|
1314
|
|
|
|
|
|
|
print "
|
|
1315
|
|
|
|
|
|
|
(@f >= 1 && $f[0] eq "#PCDATA" ? "" : "*"), |
|
1316
|
|
|
|
|
|
|
" >\n"; |
|
1317
|
|
|
|
|
|
|
print "\n"; |
|
1318
|
|
|
|
|
|
|
} |
|
1319
|
|
|
|
|
|
|
else { |
|
1320
|
|
|
|
|
|
|
print "\n"; |
|
1321
|
|
|
|
|
|
|
} |
|
1322
|
|
|
|
|
|
|
} |
|
1323
|
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
sub _whitepc { |
|
1325
|
|
|
|
|
|
|
$_[1] eq '-pcdata' and $_[0] =~ /^[ \t\r\n]*$/ |
|
1326
|
|
|
|
|
|
|
} |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub MMAPON { |
|
1329
|
|
|
|
|
|
|
bless([@_],"mmapon") |
|
1330
|
|
|
|
|
|
|
} |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
sub _fromUTF8 { |
|
1334
|
|
|
|
|
|
|
my $string = shift; |
|
1335
|
|
|
|
|
|
|
my $encode = shift; |
|
1336
|
|
|
|
|
|
|
my $ans = eval { XML::LibXML::decodeFromUTF8($encode, $string) }; |
|
1337
|
|
|
|
|
|
|
if ($@) { |
|
1338
|
|
|
|
|
|
|
return $string |
|
1339
|
|
|
|
|
|
|
} else { |
|
1340
|
|
|
|
|
|
|
return $ans |
|
1341
|
|
|
|
|
|
|
} |
|
1342
|
|
|
|
|
|
|
} |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
1; |