line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- cperl -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package XML::DT; |
4
|
6
|
|
|
6
|
|
85560
|
use 5.008006; |
|
6
|
|
|
|
|
19
|
|
|
6
|
|
|
|
|
229
|
|
5
|
|
|
|
|
|
|
|
6
|
6
|
|
|
6
|
|
28
|
use strict; |
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
203
|
|
7
|
|
|
|
|
|
|
|
8
|
6
|
|
|
6
|
|
3448
|
use Data::Dumper; |
|
6
|
|
|
|
|
39634
|
|
|
6
|
|
|
|
|
1635
|
|
9
|
6
|
|
|
6
|
|
3037
|
use LWP::Simple; |
|
6
|
|
|
|
|
351835
|
|
|
6
|
|
|
|
|
51
|
|
10
|
6
|
|
|
6
|
|
5153
|
use XML::DTDParser "ParseDTDFile"; |
|
6
|
|
|
|
|
84253
|
|
|
6
|
|
|
|
|
466
|
|
11
|
|
|
|
|
|
|
|
12
|
6
|
|
|
6
|
|
5537
|
use XML::LibXML ':libxml'; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $PARSER = 'XML::LibXML'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use parent 'Exporter'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use vars qw($c $u %v $q @dtcontext %dtcontextcount @dtatributes |
18
|
|
|
|
|
|
|
@dtattributes ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @EXPORT = qw(&dt &dtstring &dturl &inctxt &ctxt &mkdtskel &inpath |
21
|
|
|
|
|
|
|
&mkdtskel_fromDTD &mkdtdskel &tohtml &toxml &MMAPON $c %v $q $u |
22
|
|
|
|
|
|
|
&xmltree &pathdturl @dtcontext %dtcontextcount |
23
|
|
|
|
|
|
|
@dtatributes @dtattributes &pathdt &pathdtstring |
24
|
|
|
|
|
|
|
&father &gfather &ggfather &root); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our $VERSION = '0.67'; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=encoding utf-8 |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
XML::DT - a package for down translation of XML files |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 SYNOPSIS |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
use XML::DT; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
%xml=( 'music' => sub{"Music from: $c\n"}, |
39
|
|
|
|
|
|
|
'lyrics' => sub{"Lyrics from: $v{name}\n"}, |
40
|
|
|
|
|
|
|
'title' => sub{ uc($c) }, |
41
|
|
|
|
|
|
|
'-userdata => { something => 'I like' }, |
42
|
|
|
|
|
|
|
'-default' => sub{"$q:$c"} ); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
print dt($filename,%xml); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 ABSTRACT |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This module is a XML down processor. It maps tag (element) |
49
|
|
|
|
|
|
|
names to functions to process that element and respective |
50
|
|
|
|
|
|
|
contents. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 DESCRIPTION |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
This module processes XML files with an approach similar to |
55
|
|
|
|
|
|
|
OMNIMARK. As XML parser it uses XML::LibXML module in an independent |
56
|
|
|
|
|
|
|
way. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
You can parse HTML files as if they were XML files. For this, you must |
59
|
|
|
|
|
|
|
supply an extra option to the hash: |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
%hander = ( -html => 1, |
62
|
|
|
|
|
|
|
... |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
You can also ask the parser to recover from XML errors: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
%hander = ( -recover => 1, |
68
|
|
|
|
|
|
|
... |
69
|
|
|
|
|
|
|
); |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 Functions |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head2 dt |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Down translation function C receives a filename and a set of |
76
|
|
|
|
|
|
|
expressions (functions) defining the processing and associated values |
77
|
|
|
|
|
|
|
for each element. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 dtstring |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
C works in a similar way with C but takes input from a |
82
|
|
|
|
|
|
|
string instead of a file. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 dturl |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
C works in a similar way with C but takes input from an |
87
|
|
|
|
|
|
|
Internet url instead of a file. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 pathdt |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The C function is a C function which can handle a subset |
92
|
|
|
|
|
|
|
of XPath on handler keys. Example: |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
%handler = ( |
95
|
|
|
|
|
|
|
"article/title" => sub{ toxml("h1",{},$c) }, |
96
|
|
|
|
|
|
|
"section/title" => sub{ toxml("h2",{},$c) }, |
97
|
|
|
|
|
|
|
"title" => sub{ $c }, |
98
|
|
|
|
|
|
|
"//image[@type='jpg']" => sub{ "JPEG: " }, |
99
|
|
|
|
|
|
|
"//image[@type='bmp']" => sub{ "BMP: sorry, no bitmaps on the web" }, |
100
|
|
|
|
|
|
|
) |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
pathdt($filename, %handler); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Here are some examples of valid XPath expressions under XML::DT: |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
/aaa |
107
|
|
|
|
|
|
|
/aaa/bbb |
108
|
|
|
|
|
|
|
//ccc - ccc somewhere (same as "ccc") |
109
|
|
|
|
|
|
|
/*/aaa/* |
110
|
|
|
|
|
|
|
//* - same as "-default" |
111
|
|
|
|
|
|
|
/aaa[@id] - aaa with an attribute id |
112
|
|
|
|
|
|
|
/*[@*] - root with an attribute |
113
|
|
|
|
|
|
|
/aaa[not(@name)] - aaa with no attribute "name" |
114
|
|
|
|
|
|
|
//bbb[@name='foo'] - ... attribute "name" = "foo" |
115
|
|
|
|
|
|
|
/ccc[normalize-space(@name)='bbb'] |
116
|
|
|
|
|
|
|
//*[name()='bbb'] - complex way of saying "//bbb" |
117
|
|
|
|
|
|
|
//*[starts-with(name(),'aa')] - an element named "aa.*" |
118
|
|
|
|
|
|
|
//*[contains(name(),'c')] - an element ".*c.*" |
119
|
|
|
|
|
|
|
//aaa[string-length(name())=4] "...." |
120
|
|
|
|
|
|
|
//aaa[string-length(name())<4] ".{1,4}" |
121
|
|
|
|
|
|
|
//aaa[string-length(name())>5] ".{5,}" |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Note that not all XPath is currently handled by XML::DT. A lot of |
124
|
|
|
|
|
|
|
XPath will never be added to XML::DT because is not in accordance with |
125
|
|
|
|
|
|
|
the down translation model. For more documentation about XPath check |
126
|
|
|
|
|
|
|
the specification at http://www.w3c.org or some tutorials under |
127
|
|
|
|
|
|
|
http://www.zvon.org |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 pathdtstring |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Like the C function but supporting XPath. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 pathdturl |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Like the C function but supporting XPath. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 ctxt |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Returns the context element of the currently being processed |
141
|
|
|
|
|
|
|
element. So, if you call C you will get your father element, |
142
|
|
|
|
|
|
|
and so on. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 inpath |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
C is true if the actual element path matches the |
147
|
|
|
|
|
|
|
provided pattern. This function is meant to be used in the element |
148
|
|
|
|
|
|
|
functions in order to achieve context dependent processing. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head2 inctxt |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
C is true if the actual element father matches the |
153
|
|
|
|
|
|
|
provided pattern. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 toxml |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
This is the default "-default" function. It can be used to generate |
158
|
|
|
|
|
|
|
XML based on C<$c> C<$q> and C<%v> variables. Example: add a new |
159
|
|
|
|
|
|
|
attribute to element C without changing it: |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
%handler=( ... |
162
|
|
|
|
|
|
|
ele1 => sub { $v{at1} = "v1"; toxml(); }, |
163
|
|
|
|
|
|
|
) |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
C can also be used with 3 arguments: tag, attributes and contents |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
toxml("a",{href=> "http://local/f.html"}, "example") |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
returns: |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
example |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Empty tags are written as empty tags. If you want an empty tag with opening and |
174
|
|
|
|
|
|
|
closing tags, then use the C. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 tohtml |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
See C. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 xmltree |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
This simple function just makes a HASH reference: |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
{ -c => $c, -q => $q, all_the_other_attributes } |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
The function C understands this structure and makes XML with it. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head2 mkdtskel |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Used by the mkdtskel script to generate automatically a XML::DT perl |
191
|
|
|
|
|
|
|
script file based on an XML file. Check C manpage for |
192
|
|
|
|
|
|
|
details. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 mkdtskel_fromDTD |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Used by the mkdtskel script to generate automatically a XML::DT perl |
197
|
|
|
|
|
|
|
script file based on an DTD file. Check C manpage for |
198
|
|
|
|
|
|
|
details. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 mkdtdskel |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Used by the mkdtskel script to generate automatically a XML::DT perl |
203
|
|
|
|
|
|
|
script file based on a DTD file. Check C manpage for |
204
|
|
|
|
|
|
|
details. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head1 Accessing parents |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
With XML::DT you can access an element parent (or grand-parent) |
209
|
|
|
|
|
|
|
attributes, till the root of the XML document. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
If you use c<$dtattributes[1]{foo} = 'bar'> on a processing function, |
212
|
|
|
|
|
|
|
you are defining the attribute C for that element parent. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
In the same way, you can use C<$dtattributes[2]> to access the |
215
|
|
|
|
|
|
|
grand-parent. C<$dtattributes[-1]> is, as expected, the XML document |
216
|
|
|
|
|
|
|
root element. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
There are some shortcuts: |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=over 4 |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=item C |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item C |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item C |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
You can use these functions to access to your C, grand-father |
229
|
|
|
|
|
|
|
(C) or great-grand-father (C): |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
father("x"); # returns value for attribute "x" on father element |
232
|
|
|
|
|
|
|
father("x", "value"); # sets value for attribute "x" on father |
233
|
|
|
|
|
|
|
# element |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
You can also use it directly as a reference to C<@dtattributes>: |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
father->{"x"}; # gets the attribute |
238
|
|
|
|
|
|
|
father->{"x"} = "value"; # sets the attribute |
239
|
|
|
|
|
|
|
$attributes = father; # gets all attributes reference |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=item C |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
You can use it as a function to access to your tree root element. |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
root("x"); # gets attribute C on root element |
247
|
|
|
|
|
|
|
root("x", "value"); # sets value for attribute C on root |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
You can also use it directly as a reference to C<$dtattributes[-1]>: |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
root->{"x"}; # gets the attribute x |
252
|
|
|
|
|
|
|
root->{"x"} = "value"; # sets the attribute x |
253
|
|
|
|
|
|
|
$attributes = root; # gets all attributes reference |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=back |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head1 User provided element processing functions |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
The user must provide an HASH with a function for each element, that |
260
|
|
|
|
|
|
|
computes element output. Functions can use the element name C<$q>, the |
261
|
|
|
|
|
|
|
element content C<$c> and the attribute values hash C<%v>. |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
All those global variables are defined in C<$CALLER::>. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Each time an element is find the associated function is called. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Content is calculated by concatenation of element contents strings and |
268
|
|
|
|
|
|
|
interior elements return values. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 C<-default> function |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
When a element has no associated function, the function associated |
273
|
|
|
|
|
|
|
with C<-default> called. If no C<-default> function is defined the |
274
|
|
|
|
|
|
|
default function returns a XML like string for the element. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
When you use C-type> definitions, you often need do set C<-default> |
277
|
|
|
|
|
|
|
function to return just the contents: C. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head2 C<-outputenc> option |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
C<-outputenc> defines the output encoding (default is Unicode UTF8). |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head2 C<-inputenc> option |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
C<-inputenc> forces a input encoding type. Whenever that is possible, |
286
|
|
|
|
|
|
|
define the input encoding in the XML file: |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 C<-pcdata> function |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
C<-pcdata> function is used to define transformation over the |
293
|
|
|
|
|
|
|
contents. Typically this function should look at context (see |
294
|
|
|
|
|
|
|
C function) |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
The default C<-pcdata> function is the identity |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head2 C<-cdata> function |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
You can process C<> in a way different from pcdata. If you |
301
|
|
|
|
|
|
|
define a C<-cdata> method, it will be used. Otherwise, the C<-pcdata> |
302
|
|
|
|
|
|
|
method is called. |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head2 C<-begin> function |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Function to be executed before processing XML file. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
Example of use: initialization of side-effect variables |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 C<-end> function |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Function to be executed after processing XML file. I can use C<$c> |
313
|
|
|
|
|
|
|
content value. The value returned by C<-end> will be the C return |
314
|
|
|
|
|
|
|
value. |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Example of use: post-processing of returned contents |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head2 C<-recover> option |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
If set, the parser will try to recover in XML errors. |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=head2 C<-html> option |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
If set, the parser will try to recover in errors. Note that this |
325
|
|
|
|
|
|
|
differs from the previous one in the sense it uses some knowledge of |
326
|
|
|
|
|
|
|
the HTML structure for the recovery. |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head2 C<-userdata> option |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Use this to pass any information you like to your handlers. The data |
331
|
|
|
|
|
|
|
structure you pass in this option will be available as C<< $u >> in |
332
|
|
|
|
|
|
|
your code. -- New in 0.62. |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head1 Elements with values other than strings (C<-type>) |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
By default all elements return strings, and contents (C<$c>) is the |
338
|
|
|
|
|
|
|
concatenation of the strings returned by the sub-elements. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
In some situations the XML text contains values that are better |
341
|
|
|
|
|
|
|
processed as a structured type. |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
The following types (functors) are available: |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=over 4 |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=item THE_CHILD |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Return the result of processing the only child of the element. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item LAST_CHILD |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Returns the result of processing the last child of the element. |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=item STR |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
concatenates all the sub-elements returned values (DEFAULT) all the |
358
|
|
|
|
|
|
|
sub-element should return strings to be concatenated; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=item SEQ |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
makes an ARRAY with all the sub elements contents; attributes are |
363
|
|
|
|
|
|
|
ignored (they should be processed in the sub-element). (returns a ref) |
364
|
|
|
|
|
|
|
If you have different types of sub-elements, you should use SEQH |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=item SEQH |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
makes an ARRAY of HASH with all the sub elements (returns a ref); for |
369
|
|
|
|
|
|
|
each sub-element: |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
-q => element name |
372
|
|
|
|
|
|
|
-c => contents |
373
|
|
|
|
|
|
|
at1 => at value1 for each attribute |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=item MAP |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
makes an HASH with the sub elements; keys are the sub-element names, |
378
|
|
|
|
|
|
|
values are their contents. Attributes are ignored. (they should be |
379
|
|
|
|
|
|
|
processed in the sub-element) (returns a ref) |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=item MULTIMAP |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
makes an HASH of ARRAY; keys are the sub-element names; values are |
384
|
|
|
|
|
|
|
lists of contents; attributes are ignored (they should be processed in |
385
|
|
|
|
|
|
|
the sub-element); (returns a ref) |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=item MMAPON(element-list) |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
makes an HASH with the sub-elements; keys are the sub-element names, |
390
|
|
|
|
|
|
|
values are their contents; attributes are ignored (they should be |
391
|
|
|
|
|
|
|
processed in the sub-element); for all the elements contained in the |
392
|
|
|
|
|
|
|
element-list, it is created an ARRAY with their contents. (returns a |
393
|
|
|
|
|
|
|
ref) |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=item XML |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
return a reference to an HASH with: |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
-q => element name |
400
|
|
|
|
|
|
|
-c => contents |
401
|
|
|
|
|
|
|
at1 => at value1 for each attribute |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item ZERO |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
don't process the sub-elements; return "" |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=back |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
When you use C-type> definitions, you often need do set C<-default> |
410
|
|
|
|
|
|
|
function returning just the contents C. |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 An example: |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
use XML::DT; |
415
|
|
|
|
|
|
|
%handler = ( contacts => sub{ [ split(";",$c)] }, |
416
|
|
|
|
|
|
|
-default => sub{$c}, |
417
|
|
|
|
|
|
|
-type => { institution => 'MAP', |
418
|
|
|
|
|
|
|
degrees => MMAPON('name') |
419
|
|
|
|
|
|
|
tels => 'SEQ' } |
420
|
|
|
|
|
|
|
); |
421
|
|
|
|
|
|
|
$a = dt ("f.xml", %handler); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
with the following f.xml |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
U.M. |
428
|
|
|
|
|
|
|
University of Minho |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
- 1111
|
431
|
|
|
|
|
|
|
- 1112
|
432
|
|
|
|
|
|
|
- 1113
|
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Portugal |
435
|
|
|
|
|
|
|
J.Joao; J.Rocha; J.Ramalho |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Computer science |
438
|
|
|
|
|
|
|
Informatica |
439
|
|
|
|
|
|
|
history |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
would make $a |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
{ 'name' => [ 'Computer science', |
445
|
|
|
|
|
|
|
'Informatica ', |
446
|
|
|
|
|
|
|
' history ' ], |
447
|
|
|
|
|
|
|
'institution' => { 'tels' => [ 1111, 1112, 1113 ], |
448
|
|
|
|
|
|
|
'name' => 'University of Minho', |
449
|
|
|
|
|
|
|
'where' => 'Portugal', |
450
|
|
|
|
|
|
|
'id' => 'U.M.', |
451
|
|
|
|
|
|
|
'contacts' => [ 'J.Joao', |
452
|
|
|
|
|
|
|
' J.Rocha', |
453
|
|
|
|
|
|
|
' J.Ramalho' ] } }; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head1 DT Skeleton generation |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
It is possible to build an initial processor program based on an example |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
To do this use the function C. |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Example: |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
perl -MXML::DT -e 'mkdtskel "f.xml"' > f.pl |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head1 DTD skeleton generation |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
It makes a naive DTD based on an example(s). |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
To do this use the function C. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Example: |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
perl -MXML::DT -e 'mkdtdskel "f.xml"' > f.dtd |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 SEE ALSO |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
mkdtskel(1) and mkdtdskel(1) |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head1 AUTHORS |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Home for XML::DT; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
http://natura.di.uminho.pt/~jj/perl/XML/ |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Jose Joao Almeida, |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
Alberto Manuel Simões, |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Michel Rodriguez |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
José Carlos Ramalho |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
Mark A. Hillebrand |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Copyright 1999-2012 Project Natura. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
This library is free software; you can redistribute it |
503
|
|
|
|
|
|
|
and/or modify it under the same terms as Perl itself. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=cut |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
our %ty = (); |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub dt { |
512
|
|
|
|
|
|
|
my ($file, %xml)=@_; |
513
|
|
|
|
|
|
|
my ($parser, $tree); |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
# Treat -decl option |
516
|
|
|
|
|
|
|
my $declr = ""; |
517
|
|
|
|
|
|
|
if ($xml{-declr}) { |
518
|
|
|
|
|
|
|
if ($xml{-outputenc}) { |
519
|
|
|
|
|
|
|
$declr = "\n"; |
520
|
|
|
|
|
|
|
} else { |
521
|
|
|
|
|
|
|
$declr = "\n"; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
%ty = (); |
526
|
|
|
|
|
|
|
%ty = (%{$xml{'-type'}}) if defined($xml{'-type'}); |
527
|
|
|
|
|
|
|
$ty{-ROOT} = "NONE"; |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
&{$xml{-begin}} if $xml{-begin}; |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# TODO --- how to force encoding with XML::LibXML? |
532
|
|
|
|
|
|
|
# $xml{-inputenc} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# create a new LibXML parser |
535
|
|
|
|
|
|
|
$parser = XML::LibXML->new(); |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
#### We don't wan't DT to load everytime the DTD (I Think!) |
538
|
|
|
|
|
|
|
$parser->validation(0); |
539
|
|
|
|
|
|
|
# $parser->expand_xinclude(0); # testing |
540
|
|
|
|
|
|
|
$parser->load_ext_dtd(0); |
541
|
|
|
|
|
|
|
$parser->expand_entities(0); |
542
|
|
|
|
|
|
|
$parser->expand_xincludes(1) if $xml{'-xinclude'}; |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# parse the file |
545
|
|
|
|
|
|
|
my $doc; |
546
|
|
|
|
|
|
|
if ( $xml{'-recover'}) { |
547
|
|
|
|
|
|
|
$parser->recover(1); |
548
|
|
|
|
|
|
|
eval { |
549
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
550
|
|
|
|
|
|
|
$doc = $parser->parse_file($file); |
551
|
|
|
|
|
|
|
}; |
552
|
|
|
|
|
|
|
return undef if !$doc; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
elsif ( $xml{'-html'}) { |
555
|
|
|
|
|
|
|
$parser->recover(1); |
556
|
|
|
|
|
|
|
eval { |
557
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
558
|
|
|
|
|
|
|
$doc = $parser->parse_html_file($file); |
559
|
|
|
|
|
|
|
}; |
560
|
|
|
|
|
|
|
return undef if !$doc; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
else { |
563
|
|
|
|
|
|
|
$doc = $parser->parse_file($file) |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# get the document root element |
567
|
|
|
|
|
|
|
$tree = $doc->getDocumentElement(); |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
my $return = ""; |
570
|
|
|
|
|
|
|
# execute End action if it exists |
571
|
|
|
|
|
|
|
if($xml{-end}) { |
572
|
|
|
|
|
|
|
$c = _omni("-ROOT", \%xml, $tree); |
573
|
|
|
|
|
|
|
$return = &{$xml{-end}} |
574
|
|
|
|
|
|
|
} else { |
575
|
|
|
|
|
|
|
$return = _omni("-ROOT",\%xml, $tree) |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
if ($declr) { |
579
|
|
|
|
|
|
|
return $declr.$return; |
580
|
|
|
|
|
|
|
} else { |
581
|
|
|
|
|
|
|
return $return; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub ctxt { |
587
|
|
|
|
|
|
|
my $level = $_[0]; |
588
|
|
|
|
|
|
|
$dtcontext[-$level-1]; |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub inpath { |
592
|
|
|
|
|
|
|
my $pattern = shift ; |
593
|
|
|
|
|
|
|
join ("/", @dtcontext) =~ m!\b$pattern\b!; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub inctxt { |
598
|
|
|
|
|
|
|
my $pattern = shift ; |
599
|
|
|
|
|
|
|
# see if is in root context... |
600
|
|
|
|
|
|
|
return 1 if (($pattern eq "^" && @dtcontext==1) || $pattern eq ".*"); |
601
|
|
|
|
|
|
|
join("/", @dtcontext) =~ m!$pattern/[^/]*$! ; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub father { |
605
|
|
|
|
|
|
|
my ($a,$b)=@_; |
606
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[1]{$a} = $b} |
607
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[1]{$a} } |
608
|
|
|
|
|
|
|
else {$dtattributes[1]} |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub gfather { |
612
|
|
|
|
|
|
|
my ($a,$b)=@_; |
613
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[2]{$a} = $b} |
614
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[2]{$a} } |
615
|
|
|
|
|
|
|
else {$dtattributes[2]} |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
sub ggfather { |
620
|
|
|
|
|
|
|
my ($a,$b)=@_; |
621
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[3]{$a} = $b} |
622
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[3]{$a} } |
623
|
|
|
|
|
|
|
else {$dtattributes[3]} |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
sub root { ### the root |
628
|
|
|
|
|
|
|
my ($a,$b)=@_; |
629
|
|
|
|
|
|
|
if (defined($b)){$dtattributes[-1]{$a} = $b } |
630
|
|
|
|
|
|
|
elsif(defined($a)){$dtattributes[-1]{$a} } |
631
|
|
|
|
|
|
|
else {$dtattributes[-1] } |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
sub pathdtstring{ |
635
|
|
|
|
|
|
|
my $string = shift; |
636
|
|
|
|
|
|
|
my %h = _pathtodt(@_); |
637
|
|
|
|
|
|
|
return dtstring($string,%h); |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
sub pathdturl{ |
643
|
|
|
|
|
|
|
my $url = shift; |
644
|
|
|
|
|
|
|
my %h = _pathtodt(@_); |
645
|
|
|
|
|
|
|
return dturl($url,%h); |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub dturl{ |
651
|
|
|
|
|
|
|
my $url = shift; |
652
|
|
|
|
|
|
|
my $contents = get($url); |
653
|
|
|
|
|
|
|
if ($contents) { |
654
|
|
|
|
|
|
|
return dtstring($contents, @_); |
655
|
|
|
|
|
|
|
} else { |
656
|
|
|
|
|
|
|
return undef; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
sub dtstring { |
663
|
|
|
|
|
|
|
my ($string, %xml)=@_; |
664
|
|
|
|
|
|
|
my ($parser, $tree); |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
my $declr = ""; |
667
|
|
|
|
|
|
|
if ($xml{-declr}) { |
668
|
|
|
|
|
|
|
if ($xml{-outputenc}) { |
669
|
|
|
|
|
|
|
$declr = "\n"; |
670
|
|
|
|
|
|
|
} else { |
671
|
|
|
|
|
|
|
$declr = "\n"; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
$xml{'-type'} = {} unless defined $xml{'-type'}; |
676
|
|
|
|
|
|
|
%ty = (%{$xml{'-type'}}, -ROOT => "NONE"); |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
# execute Begin action if it exists |
679
|
|
|
|
|
|
|
if ($xml{-begin}) { |
680
|
|
|
|
|
|
|
&{$xml{-begin}} |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
if ($xml{-inputenc}) { |
684
|
|
|
|
|
|
|
$string = XML::LibXML::encodeToUTF8($xml{-inputenc}, $string); |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# create a new LibXML parser |
688
|
|
|
|
|
|
|
$parser = XML::LibXML->new(); |
689
|
|
|
|
|
|
|
$parser->validation(0); |
690
|
|
|
|
|
|
|
$parser->load_ext_dtd(0); |
691
|
|
|
|
|
|
|
$parser->expand_entities(0); |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
# parse the string |
694
|
|
|
|
|
|
|
my $doc; |
695
|
|
|
|
|
|
|
if ( $xml{'-recover'}) { |
696
|
|
|
|
|
|
|
$parser->recover(1); |
697
|
|
|
|
|
|
|
eval { |
698
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
699
|
|
|
|
|
|
|
$doc = $parser->parse_string($string); |
700
|
|
|
|
|
|
|
}; |
701
|
|
|
|
|
|
|
return undef if !$doc; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
elsif ( $xml{'-html'}) { |
704
|
|
|
|
|
|
|
$parser->recover(1); |
705
|
|
|
|
|
|
|
eval{ |
706
|
|
|
|
|
|
|
local $SIG{__WARN__} = sub{}; |
707
|
|
|
|
|
|
|
$doc = $parser->parse_html_string($string); |
708
|
|
|
|
|
|
|
}; |
709
|
|
|
|
|
|
|
# if ($@) { return undef; } |
710
|
|
|
|
|
|
|
return undef unless defined $doc; |
711
|
|
|
|
|
|
|
} else { |
712
|
|
|
|
|
|
|
$doc = $parser->parse_string($string); |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
# get the document root element |
716
|
|
|
|
|
|
|
$tree = $doc->getDocumentElement(); |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
my $return; |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
# Check if we have an end function |
721
|
|
|
|
|
|
|
if ($xml{-end}) { |
722
|
|
|
|
|
|
|
$c = _omni("-ROOT", \%xml, $tree); |
723
|
|
|
|
|
|
|
$return = &{$xml{-end}} |
724
|
|
|
|
|
|
|
} else { |
725
|
|
|
|
|
|
|
$return = _omni("-ROOT", \%xml, $tree) |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
if ($declr) { |
729
|
|
|
|
|
|
|
return $declr.$return; |
730
|
|
|
|
|
|
|
} else { |
731
|
|
|
|
|
|
|
return $return; |
732
|
|
|
|
|
|
|
} |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
sub pathdt{ |
738
|
|
|
|
|
|
|
my $file = shift; |
739
|
|
|
|
|
|
|
my %h = _pathtodt(@_); |
740
|
|
|
|
|
|
|
return dt($file,%h); |
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
# Parsing dos predicados do XPath |
746
|
|
|
|
|
|
|
sub _testAttr { |
747
|
|
|
|
|
|
|
my $atr = shift; |
748
|
|
|
|
|
|
|
for ($atr) { |
749
|
|
|
|
|
|
|
s/name\(\)/'$q'/g; |
750
|
|
|
|
|
|
|
# s/\@([A-Za-z_]+)/'$v{$1}'/g; |
751
|
|
|
|
|
|
|
s/\@([A-Za-z_]+)/defined $v{$1}?"'$v{$1}'":"''"/ge; |
752
|
|
|
|
|
|
|
s/\@\*/keys %v?"'1'":"''"/ge; |
753
|
|
|
|
|
|
|
if (/^not\((.*)\)$/) { |
754
|
|
|
|
|
|
|
return ! _testAttr($1); |
755
|
|
|
|
|
|
|
} elsif (/^('|")([^\1]*)(\1)\s*=\s*('|")([^\4]*)\4$/) { |
756
|
|
|
|
|
|
|
return ($2 eq $5); |
757
|
|
|
|
|
|
|
} elsif (/^(.*?)normalize-space\((['"])([^\2)]*)\2\)(.*)$/) { |
758
|
|
|
|
|
|
|
my ($back,$forward)=($1,$4); |
759
|
|
|
|
|
|
|
my $x = _normalize_space($3); |
760
|
|
|
|
|
|
|
return _testAttr("$back'$x'$forward"); |
761
|
|
|
|
|
|
|
} elsif (/starts-with\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) { |
762
|
|
|
|
|
|
|
my $x = _starts_with($2,$4); |
763
|
|
|
|
|
|
|
return $x; |
764
|
|
|
|
|
|
|
} elsif (/contains\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) { |
765
|
|
|
|
|
|
|
my $x = _contains($2,$4); |
766
|
|
|
|
|
|
|
return $x; |
767
|
|
|
|
|
|
|
} elsif (/^(.*?)string-length\((['"])([^\2]*)\2\)(.*)$/) { |
768
|
|
|
|
|
|
|
my ($back,$forward) = ($1,$4); |
769
|
|
|
|
|
|
|
my $x = length($3); |
770
|
|
|
|
|
|
|
return _testAttr("$back$x$forward"); |
771
|
|
|
|
|
|
|
} elsif (/^(\d+)\s*=(\d+)$/) { |
772
|
|
|
|
|
|
|
return ($1 == $2); |
773
|
|
|
|
|
|
|
} elsif (/^(\d+)\s*<(\d+)$/) { |
774
|
|
|
|
|
|
|
return ($1 < $2); |
775
|
|
|
|
|
|
|
} elsif (/^(\d+)\s*>(\d+)$/) { |
776
|
|
|
|
|
|
|
return ($1 > $2); |
777
|
|
|
|
|
|
|
} elsif (/^(['"])([^\1]*)\1$/) { |
778
|
|
|
|
|
|
|
return $2; |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
return 0; #$atr; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
# Funcao auxiliar de teste de predicados do XPath |
787
|
|
|
|
|
|
|
sub _starts_with { |
788
|
|
|
|
|
|
|
my ($string,$preffix) = @_; |
789
|
|
|
|
|
|
|
return 0 unless ($string && $preffix); |
790
|
|
|
|
|
|
|
return 1 if ($string =~ m!^$preffix!); |
791
|
|
|
|
|
|
|
return 0; |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# Funcao auxiliar de teste de predicados do XPath |
796
|
|
|
|
|
|
|
sub _contains { |
797
|
|
|
|
|
|
|
my ($string,$s) = @_; |
798
|
|
|
|
|
|
|
return 0 unless ($string && $s); |
799
|
|
|
|
|
|
|
return 1 if ($string =~ m!$s!); |
800
|
|
|
|
|
|
|
return 0; |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
# Funcao auxiliar de teste de predicados do XPath |
805
|
|
|
|
|
|
|
sub _normalize_space { |
806
|
|
|
|
|
|
|
my $z = shift; |
807
|
|
|
|
|
|
|
$z =~ /^\s*(.*?)\s*$/; |
808
|
|
|
|
|
|
|
$z = $1; |
809
|
|
|
|
|
|
|
$z =~ s!\s+! !g; |
810
|
|
|
|
|
|
|
return $z; |
811
|
|
|
|
|
|
|
} |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
sub _pathtodt { |
815
|
|
|
|
|
|
|
my %h = @_; |
816
|
|
|
|
|
|
|
my %aux=(); |
817
|
|
|
|
|
|
|
my %aux2=(); |
818
|
|
|
|
|
|
|
my %n = (); |
819
|
|
|
|
|
|
|
my $z; |
820
|
|
|
|
|
|
|
for $z (keys %h) { |
821
|
|
|
|
|
|
|
# TODO - Make it more generic |
822
|
|
|
|
|
|
|
if ( $z=~m{\w+(\|\w+)+}) { |
823
|
|
|
|
|
|
|
my @tags = split /\|/, $z; |
824
|
|
|
|
|
|
|
for(@tags) { |
825
|
|
|
|
|
|
|
$aux2{$_}=$h{$z} |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
elsif ( $z=~m{(//|/|)(.*)/([^\[]*)(?:\[(.*)\])?} ) { |
829
|
|
|
|
|
|
|
my ($first,$second,$third,$fourth) = ($1,$2,$3,$4); |
830
|
|
|
|
|
|
|
if (($first eq "/") && (!$second)) { |
831
|
|
|
|
|
|
|
$first = ""; |
832
|
|
|
|
|
|
|
$second = '.*'; |
833
|
|
|
|
|
|
|
$third =~ s!\*!-default!; |
834
|
|
|
|
|
|
|
} else { |
835
|
|
|
|
|
|
|
$second =~ s!\*!\[^/\]\+!g; |
836
|
|
|
|
|
|
|
$second =~ s!/$!\(/\.\*\)\?!g; |
837
|
|
|
|
|
|
|
$second =~ s!//!\(/\.\*\)\?/!g; |
838
|
|
|
|
|
|
|
$third =~ s!\*!-default!g; |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
push( @{$aux{$third}} , [$first,$second,$h{$z},$fourth]); |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
else { $aux2{$z}=$h{$z};} |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
for $z (keys %aux){ |
845
|
|
|
|
|
|
|
my $code = sub { |
846
|
|
|
|
|
|
|
my $l; |
847
|
|
|
|
|
|
|
for $l (@{$aux{$z}}) { |
848
|
|
|
|
|
|
|
my $prefix = ""; |
849
|
|
|
|
|
|
|
$prefix = "^" unless (($l->[0]) or ($l->[1])); |
850
|
|
|
|
|
|
|
$prefix = "^" if (($l->[0] eq "/") && ($l->[1])); |
851
|
|
|
|
|
|
|
if ($l->[3]) { |
852
|
|
|
|
|
|
|
if(inctxt("$prefix$l->[1]") && _testAttr($l->[3])) |
853
|
|
|
|
|
|
|
{return &{$l->[2]}; } |
854
|
|
|
|
|
|
|
} else { |
855
|
|
|
|
|
|
|
if(inctxt("$prefix$l->[1]")) {return &{$l->[2]};} |
856
|
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
return &{ $aux2{$z}} if $aux2{$z} ; |
859
|
|
|
|
|
|
|
return &{ $h{-default}} if $h{-default}; |
860
|
|
|
|
|
|
|
&toxml(); |
861
|
|
|
|
|
|
|
}; |
862
|
|
|
|
|
|
|
$n{$z} = $code; |
863
|
|
|
|
|
|
|
} |
864
|
|
|
|
|
|
|
for $z (keys %aux2){ |
865
|
|
|
|
|
|
|
$n{$z} ||= $aux2{$z} ; |
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
return %n; |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
sub _omni { |
873
|
|
|
|
|
|
|
my ($par, $xml, @l) = @_; |
874
|
|
|
|
|
|
|
my $defaulttype = |
875
|
|
|
|
|
|
|
(exists($xml->{-type}) && exists($xml->{-type}{-default})) |
876
|
|
|
|
|
|
|
? |
877
|
|
|
|
|
|
|
$xml->{-type}{-default} : "STR"; |
878
|
|
|
|
|
|
|
my $type = $ty{$par} || $defaulttype; |
879
|
|
|
|
|
|
|
my %typeargs = (); |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
if (ref($type) eq "mmapon") { |
882
|
|
|
|
|
|
|
$typeargs{$_} = 1 for (@$type); |
883
|
|
|
|
|
|
|
$type = "MMAPON"; |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
my $r ; |
887
|
|
|
|
|
|
|
if( $type eq 'STR') { $r = "" } |
888
|
|
|
|
|
|
|
elsif( $type eq 'THE_CHILD' or $type eq 'LAST_CHILD') { $r = 0 } |
889
|
|
|
|
|
|
|
elsif( $type eq 'SEQ' or $type eq "ARRAY") { $r = [] } |
890
|
|
|
|
|
|
|
elsif( $type eq 'SEQH' or $type eq "ARRAYOFHASH") { $r = [] } |
891
|
|
|
|
|
|
|
elsif( $type eq 'MAP' or $type eq "HASH") { $r = {} } |
892
|
|
|
|
|
|
|
elsif( $type eq 'MULTIMAP') { $r = {} } |
893
|
|
|
|
|
|
|
elsif( $type eq 'MMAPON' or $type eq "HASHOFARRAY") { $r = {} } |
894
|
|
|
|
|
|
|
elsif( $type eq 'NONE') { $r = "" } |
895
|
|
|
|
|
|
|
elsif( $type eq 'ZERO') { return "" } |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
my ($name, $val, @val, $atr, $aux); |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
$u = $xml->{-userdata}; |
900
|
|
|
|
|
|
|
while(@l) { |
901
|
|
|
|
|
|
|
my $tree = shift @l; |
902
|
|
|
|
|
|
|
next unless $tree; |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
$name = ref($tree) eq "XML::LibXML::CDATASection" ? "-pcdata" : $tree->getName(); |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
if (ref($tree) eq "XML::LibXML::CDATASection") { |
907
|
|
|
|
|
|
|
$val = $tree->getData(); |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
$name = "-cdata"; |
910
|
|
|
|
|
|
|
$aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val; |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
if (defined($xml->{-cdata})) { |
913
|
|
|
|
|
|
|
push(@dtcontext,"-cdata"); |
914
|
|
|
|
|
|
|
$c = $aux; |
915
|
|
|
|
|
|
|
$aux = &{$xml->{-cdata}}; |
916
|
|
|
|
|
|
|
pop(@dtcontext); |
917
|
|
|
|
|
|
|
} elsif (defined($xml->{-pcdata})) { |
918
|
|
|
|
|
|
|
push(@dtcontext,"-pcdata"); |
919
|
|
|
|
|
|
|
$c = $aux; |
920
|
|
|
|
|
|
|
$aux = &{$xml->{-pcdata}}; |
921
|
|
|
|
|
|
|
pop(@dtcontext); |
922
|
|
|
|
|
|
|
} |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
} elsif (ref($tree) eq "XML::LibXML::Comment") { |
925
|
|
|
|
|
|
|
### At the moment, treat as Text |
926
|
|
|
|
|
|
|
### We will need to change this, I hope! |
927
|
|
|
|
|
|
|
$val = ""; |
928
|
|
|
|
|
|
|
$name = "-pcdata"; |
929
|
|
|
|
|
|
|
$aux= (defined($xml->{-outputenc}))?_fromUTF8($val, $xml->{-outputenc}):$val; |
930
|
|
|
|
|
|
|
if (defined($xml->{-pcdata})) { |
931
|
|
|
|
|
|
|
push(@dtcontext,"-pcdata"); |
932
|
|
|
|
|
|
|
$c = $aux; |
933
|
|
|
|
|
|
|
$aux = &{$xml->{-pcdata}}; |
934
|
|
|
|
|
|
|
pop(@dtcontext); |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
elsif (ref($tree) eq "XML::LibXML::Text") { |
938
|
|
|
|
|
|
|
$val = $tree->getData(); |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
$name = "-pcdata"; |
941
|
|
|
|
|
|
|
$aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val; |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
if (defined($xml->{-pcdata})) { |
944
|
|
|
|
|
|
|
push(@dtcontext,"-pcdata"); |
945
|
|
|
|
|
|
|
$c = $aux; |
946
|
|
|
|
|
|
|
$aux = &{$xml->{-pcdata}}; |
947
|
|
|
|
|
|
|
pop(@dtcontext); |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
} elsif (ref($tree) eq "XML::LibXML::Element") { |
951
|
|
|
|
|
|
|
my %atr = _nodeAttributes($tree); |
952
|
|
|
|
|
|
|
$atr = \%atr; |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
if (exists($xml->{-ignorecase})) { |
955
|
|
|
|
|
|
|
$name = lc($name); |
956
|
|
|
|
|
|
|
for (keys %$atr) { |
957
|
|
|
|
|
|
|
my ($k,$v) = (lc($_),$atr->{$_}); |
958
|
|
|
|
|
|
|
delete($atr->{$_}); |
959
|
|
|
|
|
|
|
$atr->{$k} = $v; |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
} |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
push(@dtcontext,$name); |
964
|
|
|
|
|
|
|
$dtcontextcount{$name}++; |
965
|
|
|
|
|
|
|
unshift(@dtatributes, $atr); |
966
|
|
|
|
|
|
|
unshift(@dtattributes, $atr); |
967
|
|
|
|
|
|
|
$aux = _omniele($xml, $name, _omni($name, $xml, ($tree->getChildnodes())), $atr); |
968
|
|
|
|
|
|
|
shift(@dtatributes); |
969
|
|
|
|
|
|
|
shift(@dtattributes); |
970
|
|
|
|
|
|
|
pop(@dtcontext); $dtcontextcount{$name}--; |
971
|
|
|
|
|
|
|
} elsif (ref($tree) eq "XML::LibXML::Node") { |
972
|
|
|
|
|
|
|
if ($tree->nodeType == XML_ENTITY_REF_NODE) { |
973
|
|
|
|
|
|
|
# if we get here, is because we are not expanding entities (I think) |
974
|
|
|
|
|
|
|
if ($tree->textContent) { |
975
|
|
|
|
|
|
|
$aux = $tree->textContent; |
976
|
|
|
|
|
|
|
} else { |
977
|
|
|
|
|
|
|
$aux = '&'.$tree->nodeName.';'; |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
} else { |
980
|
|
|
|
|
|
|
print STDERR "Not handled, generic node of type: [",$tree->nodeType,"]\n"; |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
} else { |
983
|
|
|
|
|
|
|
print STDERR "Not handled: [",ref($tree),"]\n"; |
984
|
|
|
|
|
|
|
} |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
if ($type eq "STR"){ if (defined($aux)) {$r .= $aux} ;} |
987
|
|
|
|
|
|
|
elsif ($type eq "THE_CHILD" or $type eq "LAST_CHILD"){ |
988
|
|
|
|
|
|
|
$r = $aux unless _whitepc($aux, $name); } |
989
|
|
|
|
|
|
|
elsif ($type eq "SEQ" or $type eq "ARRAY"){ |
990
|
|
|
|
|
|
|
push(@$r, $aux) unless _whitepc($aux, $name);} |
991
|
|
|
|
|
|
|
elsif ($type eq "SEQH" or $type eq "ARRAYHASH"){ |
992
|
|
|
|
|
|
|
push(@$r,{"-c" => $aux, |
993
|
|
|
|
|
|
|
"-q" => $name, |
994
|
|
|
|
|
|
|
_nodeAttributes($tree) |
995
|
|
|
|
|
|
|
}) unless _whitepc($aux,$name); |
996
|
|
|
|
|
|
|
} |
997
|
|
|
|
|
|
|
elsif($type eq "MMAPON"){ |
998
|
|
|
|
|
|
|
if(not _whitepc($aux,$name)){ |
999
|
|
|
|
|
|
|
if(! $typeargs{$name}) { |
1000
|
|
|
|
|
|
|
warn "duplicated tag '$name'\n" if(defined($r->{$name})); |
1001
|
|
|
|
|
|
|
$r->{$name} = $aux } |
1002
|
|
|
|
|
|
|
else { push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}} |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
elsif($type eq "MAP" or $type eq "HASH"){ |
1005
|
|
|
|
|
|
|
if(not _whitepc($aux,$name)){ |
1006
|
|
|
|
|
|
|
warn "duplicated tag '$name'\n" if(defined($r->{$name})); |
1007
|
|
|
|
|
|
|
$r->{$name} = $aux }} |
1008
|
|
|
|
|
|
|
elsif($type eq "MULTIMAP"){ |
1009
|
|
|
|
|
|
|
push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)} |
1010
|
|
|
|
|
|
|
elsif($type eq "NONE"){ $r = $aux;} |
1011
|
|
|
|
|
|
|
else { $r="undefined type !!!"} |
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
$r; |
1014
|
|
|
|
|
|
|
} |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
sub _omniele { |
1019
|
|
|
|
|
|
|
my $xml = shift; |
1020
|
|
|
|
|
|
|
my $aux; |
1021
|
|
|
|
|
|
|
($q, $c, $aux) = @_; |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
%v = %$aux; |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
if (defined($xml->{-outputenc})) { |
1026
|
|
|
|
|
|
|
for (keys %v){ |
1027
|
|
|
|
|
|
|
$v{$_} = _fromUTF8($v{$_}, $xml->{-outputenc}) |
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
if (defined $xml->{$q}) |
1032
|
|
|
|
|
|
|
{ &{$xml->{$q}} } |
1033
|
|
|
|
|
|
|
elsif (defined $xml->{'-default'}) |
1034
|
|
|
|
|
|
|
{ &{$xml->{'-default'}} } |
1035
|
|
|
|
|
|
|
elsif (defined $xml->{'-tohtml'}) |
1036
|
|
|
|
|
|
|
{ tohtml() } |
1037
|
|
|
|
|
|
|
else |
1038
|
|
|
|
|
|
|
{ toxml() } |
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub xmltree { +{'-c' => $c, '-q' => $q, %v} } |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
sub tohtml { |
1046
|
|
|
|
|
|
|
my ($q,$v,$c); |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
if (not @_) { |
1049
|
|
|
|
|
|
|
($q,$v,$c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c); |
1050
|
|
|
|
|
|
|
} elsif (ref($_[0])) { |
1051
|
|
|
|
|
|
|
$c = shift; |
1052
|
|
|
|
|
|
|
} else { |
1053
|
|
|
|
|
|
|
($q,$v,$c) = @_; |
1054
|
|
|
|
|
|
|
} |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
if (not ref($c)) { |
1057
|
|
|
|
|
|
|
if ($q eq "-pcdata") { |
1058
|
|
|
|
|
|
|
return $c |
1059
|
|
|
|
|
|
|
} elsif ($q eq "link" || $q eq "br" || $q eq "hr" || $q eq "img") { |
1060
|
|
|
|
|
|
|
return _openTag($q,$v) |
1061
|
|
|
|
|
|
|
} else { |
1062
|
|
|
|
|
|
|
return _openTag($q,$v) . "$c$q>" |
1063
|
|
|
|
|
|
|
} |
1064
|
|
|
|
|
|
|
} |
1065
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) { |
1066
|
|
|
|
|
|
|
my %a = %$c; |
1067
|
|
|
|
|
|
|
my ($q,$c) = delete @a{"-q","-c"}; |
1068
|
|
|
|
|
|
|
tohtml($q,\%a,(ref($c)?tohtml($c):$c)); |
1069
|
|
|
|
|
|
|
} |
1070
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH") { |
1071
|
|
|
|
|
|
|
_openTag($q,$v). |
1072
|
|
|
|
|
|
|
join("",map {($_ ne "-pcdata") |
1073
|
|
|
|
|
|
|
? ( (ref($c->{$_}) eq "ARRAY") |
1074
|
|
|
|
|
|
|
? "<$_>". |
1075
|
|
|
|
|
|
|
join("$_>\n<$_>", @{$c->{$_}}). |
1076
|
|
|
|
|
|
|
"$_>\n" |
1077
|
|
|
|
|
|
|
: tohtml($_,{},$c->{$_})."\n" ) |
1078
|
|
|
|
|
|
|
: () } |
1079
|
|
|
|
|
|
|
keys %{$c} ) . |
1080
|
|
|
|
|
|
|
"$c->{-pcdata}$q>" } ######## "NOTYetREady" |
1081
|
|
|
|
|
|
|
elsif (ref($c) eq "ARRAY") { |
1082
|
|
|
|
|
|
|
if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") { |
1083
|
|
|
|
|
|
|
tohtml($q,$v,join("\n",map {tohtml($_)} @$c)) |
1084
|
|
|
|
|
|
|
} elsif (defined $q) { |
1085
|
|
|
|
|
|
|
tohtml($q,$v,join("",@{$c})) |
1086
|
|
|
|
|
|
|
} else { |
1087
|
|
|
|
|
|
|
join("\n",map {(ref($_)?tohtml($_):$_)} @$c) |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
} |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
sub toxml { |
1093
|
|
|
|
|
|
|
my ($q,$v,$c); |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
if (not @_) { |
1096
|
|
|
|
|
|
|
($q, $v, $c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c); |
1097
|
|
|
|
|
|
|
} elsif (ref($_[0])) { |
1098
|
|
|
|
|
|
|
$c = shift; |
1099
|
|
|
|
|
|
|
} else { |
1100
|
|
|
|
|
|
|
($q, $v, $c) = @_; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
if (not ref($c)) { |
1104
|
|
|
|
|
|
|
if ($q eq "-pcdata") { |
1105
|
|
|
|
|
|
|
return $c |
1106
|
|
|
|
|
|
|
} elsif ($c eq "") { |
1107
|
|
|
|
|
|
|
return _emptyTag($q,$v) |
1108
|
|
|
|
|
|
|
} else { |
1109
|
|
|
|
|
|
|
return _openTag($q,$v) . "$c$q>" |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
} |
1112
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) { |
1113
|
|
|
|
|
|
|
my %a = %$c; |
1114
|
|
|
|
|
|
|
my ($q,$c) = delete @a{"-q","-c"}; |
1115
|
|
|
|
|
|
|
### _openTag($q,\%a).toxml($c).). |
1116
|
|
|
|
|
|
|
### toxml($q,\%a,join("\n",map {toxml($_)} @$c)) |
1117
|
|
|
|
|
|
|
toxml($q,\%a,(ref($c)?toxml($c):$c)); |
1118
|
|
|
|
|
|
|
} |
1119
|
|
|
|
|
|
|
elsif (ref($c) eq "HASH") { |
1120
|
|
|
|
|
|
|
_openTag($q,$v). |
1121
|
|
|
|
|
|
|
join("",map {($_ ne "-pcdata") |
1122
|
|
|
|
|
|
|
? ( (ref($c->{$_}) eq "ARRAY") |
1123
|
|
|
|
|
|
|
? "<$_>". |
1124
|
|
|
|
|
|
|
join("$_>\n<$_>", @{$c->{$_}}). |
1125
|
|
|
|
|
|
|
"$_>\n" |
1126
|
|
|
|
|
|
|
: toxml($_,{},$c->{$_})."\n" ) |
1127
|
|
|
|
|
|
|
: () } |
1128
|
|
|
|
|
|
|
keys %{$c} ) . |
1129
|
|
|
|
|
|
|
"$c->{-pcdata}$q>" } ######## "NOTYetREady" |
1130
|
|
|
|
|
|
|
elsif (ref($c) eq "ARRAY") { |
1131
|
|
|
|
|
|
|
if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") { |
1132
|
|
|
|
|
|
|
toxml($q,$v,join("\n",map {toxml($_)} @$c)) |
1133
|
|
|
|
|
|
|
} elsif (defined $q) { |
1134
|
|
|
|
|
|
|
toxml($q,$v,join("",@{$c})) |
1135
|
|
|
|
|
|
|
} else { |
1136
|
|
|
|
|
|
|
join("\n",map {(ref($_)?toxml($_):$_)} @$c) |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
} |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
sub _openTag{ |
1143
|
|
|
|
|
|
|
"<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} ).">" |
1144
|
|
|
|
|
|
|
} |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
sub _emptyTag{ |
1147
|
|
|
|
|
|
|
"<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} )."/>" |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
sub mkdtskel_fromDTD { |
1152
|
|
|
|
|
|
|
my $filename = shift; |
1153
|
|
|
|
|
|
|
my $file = ParseDTDFile($filename); |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
print <<'PERL'; |
1156
|
|
|
|
|
|
|
#!/usr/bin/perl |
1157
|
|
|
|
|
|
|
use warnings; |
1158
|
|
|
|
|
|
|
use strict; |
1159
|
|
|
|
|
|
|
use XML::DT; |
1160
|
|
|
|
|
|
|
my $filename = shift; |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
# Variable Reference |
1163
|
|
|
|
|
|
|
# |
1164
|
|
|
|
|
|
|
# $c - contents after child processing |
1165
|
|
|
|
|
|
|
# $q - element name (tag) |
1166
|
|
|
|
|
|
|
# %v - hash of attributes |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
my %handler=( |
1169
|
|
|
|
|
|
|
# '-outputenc' => 'ISO-8859-1', |
1170
|
|
|
|
|
|
|
# '-default' => sub{"<$q>$c$q>"}, |
1171
|
|
|
|
|
|
|
PERL |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
for (sort keys %{$file}) { |
1175
|
|
|
|
|
|
|
print " '$_' => sub { },"; |
1176
|
|
|
|
|
|
|
print " # attributes: ", |
1177
|
|
|
|
|
|
|
join(", ", keys %{$file->{$_}{attributes}}) if exists($file->{$_}{attributes}); |
1178
|
|
|
|
|
|
|
print "\n"; |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
print <<'PERL'; |
1183
|
|
|
|
|
|
|
); |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
print dt($filename, %handler); |
1186
|
|
|
|
|
|
|
PERL |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
sub mkdtskel{ |
1191
|
|
|
|
|
|
|
my @files = @_; |
1192
|
|
|
|
|
|
|
my $name; |
1193
|
|
|
|
|
|
|
my $HTML = ""; |
1194
|
|
|
|
|
|
|
my %element; |
1195
|
|
|
|
|
|
|
my %att; |
1196
|
|
|
|
|
|
|
my %mkdtskel = |
1197
|
|
|
|
|
|
|
('-default' => sub{ |
1198
|
|
|
|
|
|
|
$element{$q}++; |
1199
|
|
|
|
|
|
|
for (keys %v) { |
1200
|
|
|
|
|
|
|
$att{$q}{$_} = 1 |
1201
|
|
|
|
|
|
|
}; |
1202
|
|
|
|
|
|
|
""}, |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
'-end' => sub{ |
1205
|
|
|
|
|
|
|
print <<'END'; |
1206
|
|
|
|
|
|
|
#!/usr/bin/perl |
1207
|
|
|
|
|
|
|
use XML::DT; |
1208
|
|
|
|
|
|
|
use warnings; |
1209
|
|
|
|
|
|
|
use strict; |
1210
|
|
|
|
|
|
|
my $filename = shift; |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
# Variable Reference |
1213
|
|
|
|
|
|
|
# |
1214
|
|
|
|
|
|
|
# $c - contents after child processing |
1215
|
|
|
|
|
|
|
# $q - element name (tag) |
1216
|
|
|
|
|
|
|
# %v - hash of attributes |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
my %handler=( |
1219
|
|
|
|
|
|
|
# '-outputenc' => 'ISO-8859-1', |
1220
|
|
|
|
|
|
|
# '-default' => sub{"<$q>$c$q>"}, |
1221
|
|
|
|
|
|
|
END |
1222
|
|
|
|
|
|
|
print $HTML; |
1223
|
|
|
|
|
|
|
for $name (sort keys %element) { |
1224
|
|
|
|
|
|
|
print " '$name' => sub{ }, #"; |
1225
|
|
|
|
|
|
|
print " $element{$name} occurrences;"; |
1226
|
|
|
|
|
|
|
print ' attributes: ', |
1227
|
|
|
|
|
|
|
join(', ', keys %{$att{$name}}) if $att{$name}; |
1228
|
|
|
|
|
|
|
# print " \"\$q:\$c\"\n"; |
1229
|
|
|
|
|
|
|
print "\n"; |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
print <<'END'; |
1232
|
|
|
|
|
|
|
); |
1233
|
|
|
|
|
|
|
print dt($filename, %handler); |
1234
|
|
|
|
|
|
|
END |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
); |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
my $file = shift(@files); |
1239
|
|
|
|
|
|
|
while($file =~ /^-/){ |
1240
|
|
|
|
|
|
|
if ($file eq "-html") { |
1241
|
|
|
|
|
|
|
$HTML = " '-html' => 1,\n"; |
1242
|
|
|
|
|
|
|
$mkdtskel{'-html'} = 1;} |
1243
|
|
|
|
|
|
|
elsif($file eq "-latin1") { $mkdtskel{'-inputenc'}='ISO-8859-1';} |
1244
|
|
|
|
|
|
|
else { die("usage mktskel [-html] [-latin1] file \n")} |
1245
|
|
|
|
|
|
|
$file=shift(@files)} |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
dt($file,%mkdtskel) |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
sub _nodeAttributes { |
1253
|
|
|
|
|
|
|
my $node = shift; |
1254
|
|
|
|
|
|
|
my %answer = (); |
1255
|
|
|
|
|
|
|
my @attrs = $node->getAttributes(); |
1256
|
|
|
|
|
|
|
for (@attrs) { |
1257
|
|
|
|
|
|
|
if (ref($_) eq "XML::LibXML::Namespace") { |
1258
|
|
|
|
|
|
|
# TODO: This should not be ignored, I think. |
1259
|
|
|
|
|
|
|
# This sould be converted on a standard attribute with |
1260
|
|
|
|
|
|
|
# key 'namespace' and respective contents |
1261
|
|
|
|
|
|
|
} else { |
1262
|
|
|
|
|
|
|
$answer{$_->getName()} = $_->getValue(); |
1263
|
|
|
|
|
|
|
} |
1264
|
|
|
|
|
|
|
} |
1265
|
|
|
|
|
|
|
return %answer; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
sub mkdtdskel { |
1270
|
|
|
|
|
|
|
my @files = @_; |
1271
|
|
|
|
|
|
|
my $name; |
1272
|
|
|
|
|
|
|
my %att; |
1273
|
|
|
|
|
|
|
my %ele; |
1274
|
|
|
|
|
|
|
my %elel; |
1275
|
|
|
|
|
|
|
my $root; |
1276
|
|
|
|
|
|
|
my %handler=( |
1277
|
|
|
|
|
|
|
'-outputenc' => 'ISO-8859-1', |
1278
|
|
|
|
|
|
|
'-default' => sub{ |
1279
|
|
|
|
|
|
|
$elel{$q}++; |
1280
|
|
|
|
|
|
|
$root = $q unless ctxt(1); |
1281
|
|
|
|
|
|
|
$ele{ctxt(1)}{$q} ++; |
1282
|
|
|
|
|
|
|
for(keys(%v)){$att{$q}{$_} ++ } ; |
1283
|
|
|
|
|
|
|
}, |
1284
|
|
|
|
|
|
|
'-pcdata' => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1 }}, |
1285
|
|
|
|
|
|
|
); |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
while($files[0] =~ /^-/){ |
1288
|
|
|
|
|
|
|
if ($files[0] eq "-html") { $handler{'-html'} = 1;} |
1289
|
|
|
|
|
|
|
elsif($files[0] eq "-latin1") { $handler{'-inputenc'}='ISO-8859-1';} |
1290
|
|
|
|
|
|
|
else { die("usage mkdtdskel [-html] [-latin1] file* \n")} |
1291
|
|
|
|
|
|
|
shift(@files)} |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
for my $filename (@files){ |
1294
|
|
|
|
|
|
|
dt($filename,%handler); |
1295
|
|
|
|
|
|
|
} |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
print "\n\n"; |
1298
|
|
|
|
|
|
|
delete $elel{$root}; |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
for ($root, keys %elel){ |
1301
|
|
|
|
|
|
|
_putele($_, \%ele); |
1302
|
|
|
|
|
|
|
for $name (keys(%{$att{$_}})) { |
1303
|
|
|
|
|
|
|
print( "\t\n"); |
1304
|
|
|
|
|
|
|
print( "\t\n"); |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
sub _putele { |
1310
|
|
|
|
|
|
|
my ($e,$ele) = @_; |
1311
|
|
|
|
|
|
|
my @f ; |
1312
|
|
|
|
|
|
|
if ($ele->{$e}) { |
1313
|
|
|
|
|
|
|
@f = keys %{$ele->{$e}}; |
1314
|
|
|
|
|
|
|
print "
|
1315
|
|
|
|
|
|
|
(@f >= 1 && $f[0] eq "#PCDATA" ? "" : "*"), |
1316
|
|
|
|
|
|
|
" >\n"; |
1317
|
|
|
|
|
|
|
print "\n"; |
1318
|
|
|
|
|
|
|
} |
1319
|
|
|
|
|
|
|
else { |
1320
|
|
|
|
|
|
|
print "\n"; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
sub _whitepc { |
1325
|
|
|
|
|
|
|
$_[1] eq '-pcdata' and $_[0] =~ /^[ \t\r\n]*$/ |
1326
|
|
|
|
|
|
|
} |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub MMAPON { |
1329
|
|
|
|
|
|
|
bless([@_],"mmapon") |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
sub _fromUTF8 { |
1334
|
|
|
|
|
|
|
my $string = shift; |
1335
|
|
|
|
|
|
|
my $encode = shift; |
1336
|
|
|
|
|
|
|
my $ans = eval { XML::LibXML::decodeFromUTF8($encode, $string) }; |
1337
|
|
|
|
|
|
|
if ($@) { |
1338
|
|
|
|
|
|
|
return $string |
1339
|
|
|
|
|
|
|
} else { |
1340
|
|
|
|
|
|
|
return $ans |
1341
|
|
|
|
|
|
|
} |
1342
|
|
|
|
|
|
|
} |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
1; |