line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
XML::Filter::Glossary - SAX2 filter for keyword lookup and replacement |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use XML::SAX::Writer; |
8
|
|
|
|
|
|
|
use XML::Filter::Glossary; |
9
|
|
|
|
|
|
|
use XML::SAX::ParserFactory; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $writer = XML::SAX::Writer->new(); |
12
|
|
|
|
|
|
|
my $glossary = XML::Filter::Glossary->new(Handler=>$writer); |
13
|
|
|
|
|
|
|
my $parser = XML::SAX::ParserFactory->parser(Handler=>$glossary); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$glossary->set_glossary("/usr/home/asc/bookmarks.xbel"); |
16
|
|
|
|
|
|
|
$parser->parse_string("This is \"aaronland\""); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# prints : |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
This is aaronland |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 DESCRIPTION |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
This package is modelled after the UserLand glossary system where words, or phrases, wrapped in double-quotes are compared against a lookup table and are replaced by their corresponding entries. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
Currently only one type of lookup table is supported : a well-formed XBEL bookmarks file. Support for other kinds of lookup tables may be added at a later date. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 KEYWORDS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
Keywords are flagged as being any word, or words, between double quotes which are then looked up in the glossary. Alternately, you may specify keyword phrases with singleton elements that are the property of a user-defined namespace. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
If no match is found, the text is left unaltered. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
If a match is located, the result is then parsed with Robert Cameron's REX shallow parsing regular expressions. Chunks of balanced markup are then re-inserted into the SAX stream via I. Anything else, including markup not determined to be well-formed, is added as character data. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=cut |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
package XML::Filter::Glossary; |
42
|
2
|
|
|
2
|
|
146368
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
84
|
|
43
|
|
|
|
|
|
|
|
44
|
2
|
|
|
2
|
|
3211
|
use XML::Filter::Merger; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use XML::SAX::ParserFactory; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
use vars qw( @ISA ); |
48
|
|
|
|
|
|
|
@ISA = qw( XML::Filter::Merger ); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
$XML::Filter::Glossary::VERSION = '0.2'; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# REX/Perl 1.0 |
53
|
|
|
|
|
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
54
|
|
|
|
|
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser |
55
|
|
|
|
|
|
|
# University, November, 1998. |
56
|
|
|
|
|
|
|
# Copyright (c) 1998, Robert D. Cameron. |
57
|
|
|
|
|
|
|
# The following code may be freely used and distributed provided that |
58
|
|
|
|
|
|
|
# this copyright and citation notice remains intact and that modifications |
59
|
|
|
|
|
|
|
# or additions are clearly identified. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
my $TextSE = "[^<]+"; |
62
|
|
|
|
|
|
|
my $UntilHyphen = "[^-]*-"; |
63
|
|
|
|
|
|
|
my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; |
64
|
|
|
|
|
|
|
my $CommentCE = "$Until2Hyphens>?"; |
65
|
|
|
|
|
|
|
my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
66
|
|
|
|
|
|
|
my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; |
67
|
|
|
|
|
|
|
my $S = "[ \\n\\t\\r]+"; |
68
|
|
|
|
|
|
|
my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
69
|
|
|
|
|
|
|
my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
70
|
|
|
|
|
|
|
my $Name = "(?:$NameStrt)(?:$NameChar)*"; |
71
|
|
|
|
|
|
|
my $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
72
|
|
|
|
|
|
|
my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; |
73
|
|
|
|
|
|
|
my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
74
|
|
|
|
|
|
|
my $S1 = "[\\n\\r\\t ]"; |
75
|
|
|
|
|
|
|
my $UntilQMs = "[^?]*\\?+"; |
76
|
|
|
|
|
|
|
my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; |
77
|
|
|
|
|
|
|
my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; |
78
|
|
|
|
|
|
|
my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; |
79
|
|
|
|
|
|
|
my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
80
|
|
|
|
|
|
|
my $PI_CE = "$Name(?:$PI_Tail)?"; |
81
|
|
|
|
|
|
|
my $EndTagCE = "$Name(?:$S)?>?"; |
82
|
|
|
|
|
|
|
my $AttValSE = "\"[^<\"]*\"|'[^<']*'"; |
83
|
|
|
|
|
|
|
my $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"; |
84
|
|
|
|
|
|
|
my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)?"; |
85
|
|
|
|
|
|
|
my $XML_SPE = "$TextSE|$MarkupSPE"; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# End of REX/Perl 1.0 |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head1 PACKAGE METHODS |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=head2 __PACKAGE__->new() |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Inherits from I |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head1 OBJECT METHODS |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head2 $pkg->set_glossary($path) |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Set the path to your glossary file. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=cut |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub set_glossary { |
106
|
|
|
|
|
|
|
my $self = shift; |
107
|
|
|
|
|
|
|
$self->{'__glossary'} = $_[0]; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 $pkg->register_namespace() |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Register data to allow the filter to recognize specific tags as containing data to be used for keyword lookup. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Valid arguments are |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=over |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item * |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
B |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=over |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item * |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
I |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
String. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
The prefix for your glossary namespace. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item * |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
I |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
String. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
The URI for your glossary namespace. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=item * |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
I |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
String. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Default value is "id" |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=back |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# Use syntax |
151
|
|
|
|
|
|
|
$glossary->register_namespace({ |
152
|
|
|
|
|
|
|
Prefix => "g", |
153
|
|
|
|
|
|
|
NamespaceURI => "http://www.aaronland.net/glossary" |
154
|
|
|
|
|
|
|
}); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# Use syntax |
157
|
|
|
|
|
|
|
$glossary->register_namespace({ |
158
|
|
|
|
|
|
|
Prefix => "g", |
159
|
|
|
|
|
|
|
NamespaceURI => "http://www.aaronland.net/glossary", |
160
|
|
|
|
|
|
|
KeywordAttr => "phrase", |
161
|
|
|
|
|
|
|
}); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=item * |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
B |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Toggle back to default double-quote syntax |
168
|
|
|
|
|
|
|
$glossary->register_namespace(0); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=back |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=cut |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub register_namespace { |
175
|
|
|
|
|
|
|
my $self = shift; |
176
|
|
|
|
|
|
|
my $ns = shift; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
if (! $ns) { |
179
|
|
|
|
|
|
|
$self->{'__nsaware'} = 0; |
180
|
|
|
|
|
|
|
$self->{'__prefix'} = undef; |
181
|
|
|
|
|
|
|
$self->{'__namespace'} = undef; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
return 1; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
if (ref($ns) ne "HASH") { |
187
|
|
|
|
|
|
|
print STDERR "Namespace data must be passed as a hash reference.\n"; |
188
|
|
|
|
|
|
|
return 0; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
if (($ns->{Prefix}) && |
192
|
|
|
|
|
|
|
($ns->{NamespaceURI})) { |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
$self->{'__nsaware'} = 1; |
195
|
|
|
|
|
|
|
$self->{'__prefix'} = $ns->{Prefix}; |
196
|
|
|
|
|
|
|
$self->{'__namespace'} = $ns->{NamespaceURI}; |
197
|
|
|
|
|
|
|
$self->{'__kwattr'} = $ns->{KeywordAttr} || "id"; |
198
|
|
|
|
|
|
|
return 1; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub start_prefix_mapping { |
203
|
|
|
|
|
|
|
my $self = shift; |
204
|
|
|
|
|
|
|
my $data = shift; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
return if (($data->{'Prefix'} eq $self->{'__prefix'}) && |
207
|
|
|
|
|
|
|
($data->{'NamespaceURI'} eq $self->{'__namespace'})); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
$self->SUPER::start_prefix_mapping($data); |
210
|
|
|
|
|
|
|
return 1; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub start_element { |
214
|
|
|
|
|
|
|
my $self = shift; |
215
|
|
|
|
|
|
|
my $data = shift; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
unless (($self->{'__nsaware'}) && |
218
|
|
|
|
|
|
|
($self->{'__prefix'} eq $data->{'Prefix'})) { |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
$self->_stripnamespace($data) if (! $self->{'__bangns'}); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$self->SUPER::start_element($data); |
223
|
|
|
|
|
|
|
return 1; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
my $keyword = $data->{Attributes}->{'{}'.$self->{'__kwattr'}}->{'Value'} || $data->{'LocalName'}; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
if (($keyword) && (my $result = $self->lookup_keyword($keyword))) { |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
$self->process_result(\$result); |
233
|
|
|
|
|
|
|
return 1; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>$keyword}); |
239
|
|
|
|
|
|
|
return 1; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub end_element { |
243
|
|
|
|
|
|
|
my $self = shift; |
244
|
|
|
|
|
|
|
my $data = shift; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
unless (($self->{'__nsaware'}) && |
247
|
|
|
|
|
|
|
($self->{'__prefix'} eq $data->{'Prefix'})) { |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
$self->SUPER::end_element($data); |
250
|
|
|
|
|
|
|
return 1; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub characters { |
256
|
|
|
|
|
|
|
my $self = shift; |
257
|
|
|
|
|
|
|
my $data = shift; |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
if ($self->{'__nsaware'}) { |
260
|
|
|
|
|
|
|
$self->SUPER::characters($data); |
261
|
|
|
|
|
|
|
return 1; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
while (not $data->{Data} =~ m/\G\z/gc) { |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
$data->{Data} =~ m/\G([^"]*)(?:"([^"\\]*(\\.[^"\\]*)*)")*/gcm; |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
my $text = $1; |
271
|
|
|
|
|
|
|
my $keyword = $2; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# print STDERR "[$text] [$keyword]\n"; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
if ($keyword) { |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
if (my $result = $self->lookup_keyword($keyword)) { |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>"$text "}); |
280
|
|
|
|
|
|
|
$self->process_result(\$result); |
281
|
|
|
|
|
|
|
next; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# Unable to find a link, so put everything back |
285
|
|
|
|
|
|
|
# the way you found it. |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>"$text \"$keyword\""}); |
288
|
|
|
|
|
|
|
next; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# No keyword. Just send back the text as is. |
292
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>$text}); |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
return 1; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub lookup_keyword { |
299
|
|
|
|
|
|
|
my $self = shift; |
300
|
|
|
|
|
|
|
my $keyword = shift; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
if (! exists $self->{'__cache'}{$keyword}) { |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
if (! $self->{'__lookup'}) { |
305
|
|
|
|
|
|
|
my $lookup = join("::",__PACKAGE__,"XBEL"); |
306
|
|
|
|
|
|
|
eval "require $lookup;"; |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
$self->{'__lookup'} = $lookup->new(); |
309
|
|
|
|
|
|
|
$self->{'__parser'} = XML::SAX::ParserFactory->parser(Handler=>$self->{'__lookup'}); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
$self->{'__lookup'}->set_keyword($keyword); |
313
|
|
|
|
|
|
|
$self->{'__parser'}->parse_uri($self->{'__glossary'}); |
314
|
|
|
|
|
|
|
$self->{'__cache'}{$keyword} = $self->{'__lookup'}->result(); |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
return $self->{'__cache'}{$keyword}; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub process_result { |
321
|
|
|
|
|
|
|
my $self = shift; |
322
|
|
|
|
|
|
|
my $result = shift; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
my $cdata = undef; |
325
|
|
|
|
|
|
|
my $markup = undef; |
326
|
|
|
|
|
|
|
my $element = undef; |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
# Hack Until I figure where to tweak |
329
|
|
|
|
|
|
|
# the REX expressions. Ick ick ick. |
330
|
|
|
|
|
|
|
$$result =~ s/>>
|
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
while (not $$result =~ m/\G\z/gc) { |
333
|
|
|
|
|
|
|
$$result =~ m/\G($TextSE)?($MarkupSPE)*/gcm; |
334
|
|
|
|
|
|
|
# print "PARSE [$1] [$2]\n"; |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
if ($element) { |
337
|
|
|
|
|
|
|
$markup .= $1; |
338
|
|
|
|
|
|
|
} else { |
339
|
|
|
|
|
|
|
$cdata .= $1; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
if ($2) { |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
if ($cdata) { |
345
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>$cdata}); |
346
|
|
|
|
|
|
|
# print "CDATA '$cdata'\n"; |
347
|
|
|
|
|
|
|
$cdata = undef; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
my $_markup = $2; |
351
|
|
|
|
|
|
|
$markup .= $_markup; |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
$_markup =~ /^<(\/)?([^\s>]+)/; |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
if (($1) && ($element eq $2)) { |
356
|
|
|
|
|
|
|
# print "MARKUP '$markup'\n"; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
$self->set_include_all_roots( 1 ); |
359
|
|
|
|
|
|
|
XML::SAX::ParserFactory->parser(Handler=>$self)->parse_string($markup); |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
$markup = undef; |
362
|
|
|
|
|
|
|
$element = undef; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
if ((! $1) && (! $element)) { |
366
|
|
|
|
|
|
|
# print "New Element : $2\n"; |
367
|
|
|
|
|
|
|
$element = $2; |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Hark, a singleton! |
370
|
|
|
|
|
|
|
if ($_markup =~ /\/>$/) { |
371
|
|
|
|
|
|
|
$self->set_include_all_roots( 1 ); |
372
|
|
|
|
|
|
|
XML::SAX::ParserFactory->parser(Handler=>$self)->parse_string($markup); |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
$markup = undef; |
375
|
|
|
|
|
|
|
$element = undef; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
if ($cdata) { |
384
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>$cdata}); |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
if ($markup) { |
388
|
|
|
|
|
|
|
print STDERR "WARNING\nThere was a bunch of unbalanced markup leftover: '$markup'\n"; |
389
|
|
|
|
|
|
|
$self->SUPER::characters({Data=>$markup}); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
return 1; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub _stripnamespace { |
396
|
|
|
|
|
|
|
my $self = shift; |
397
|
|
|
|
|
|
|
my $data = shift; |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
foreach my $ns (keys %{$data->{Attributes}}) { |
400
|
|
|
|
|
|
|
if ($ns eq "{http://www.w3.org/2000/xmlns/}".$self->{'__prefix'}) { |
401
|
|
|
|
|
|
|
delete $data->{Attributes}{$ns}; |
402
|
|
|
|
|
|
|
$self->{'__bangns'} = 1; |
403
|
|
|
|
|
|
|
last; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head1 VERSION |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
0.2 |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=head1 DATE |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
September 12, 2002 |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head1 AUTHOR |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Aaron Straup Cope |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head1 TO DO |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=over 4 |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item * |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Support for Netscape bookmarks |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=item * |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Support for IE Favorites (via XML::Directory::SAX) |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=item * |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Support for UserLand glossaries (serialized) |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=back |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=head1 BACKGROUND |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
http://www.la-grange.net/2002/09/04.html |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
http://aaronland.info/weblog/archive/4586 |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head1 SEE ALSO |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
http://glossary.userland.com/ |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
http://pyxml.sourceforge.net/topics/xbel/ |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
http://www.cs.sfu.ca/~cameron/REX.html |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
L |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
L |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head1 BUGS |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item * |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Certainly, not outside the realm of possibility. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=back |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head1 LICENSE |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Copyright (c) 2002, Aaron Straup Cope. All Rights Reserved. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
This is free software, you may use it and distribute it under the same terms as Perl itself. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=cut |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
return 1; |