, "fix the attribute
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
my $q= '
|
1082
|
|
|
|
|
|
|
if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } |
1083
|
|
|
|
|
|
|
if( $$xml=~ m{$q}) |
1084
|
|
|
|
|
|
|
{ $$xml=~ s{$q}{
|
1085
|
|
|
|
|
|
|
} |
1086
|
|
|
|
|
|
|
else |
1087
|
|
|
|
|
|
|
{ my $encoding= _encoding_from_meta( $tree); |
1088
|
|
|
|
|
|
|
unless( keys %xml_parser_encoding) { %xml_parser_encoding= _xml_parser_encodings(); } |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
if( ! $add_decl) |
1091
|
|
|
|
|
|
|
{ if( $xml_parser_encoding{$encoding}) |
1092
|
|
|
|
|
|
|
{ $add_decl=1; } |
1093
|
|
|
|
|
|
|
elsif( $encoding eq 'euc-jp' && $xml_parser_encoding{'x-euc-jp-jisx0221'}) |
1094
|
|
|
|
|
|
|
{ $encoding="x-euc-jp-jisx0221"; $add_decl=1;} |
1095
|
|
|
|
|
|
|
elsif( $encoding eq 'shift-jis' && $xml_parser_encoding{'x-sjis-jisx0221'}) |
1096
|
|
|
|
|
|
|
{ $encoding="x-sjis-jisx0221"; $add_decl=1;} |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
if( $add_decl) |
1099
|
|
|
|
|
|
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; |
1100
|
|
|
|
|
|
|
#warn " added decl (encoding $encoding)\n"; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
else |
1103
|
|
|
|
|
|
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; |
1104
|
|
|
|
|
|
|
#warn " converting to utf8 from $encoding\n"; |
1105
|
|
|
|
|
|
|
$$xml= _to_utf8( $encoding, $$xml); |
1106
|
|
|
|
|
|
|
} |
1107
|
|
|
|
|
|
|
} |
1108
|
|
|
|
|
|
|
else |
1109
|
|
|
|
|
|
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; |
1110
|
|
|
|
|
|
|
#warn " converting to utf8 from $encoding\n"; |
1111
|
|
|
|
|
|
|
$$xml= _to_utf8( $encoding, $$xml); |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
} |
1114
|
|
|
|
|
|
|
} |
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
# some versions of HTML::TreeBuilder escape CDATA sections |
1118
|
|
|
|
|
|
|
$$xml=~ s{(<!\[CDATA\[.*?\]\]>)}{_unescape_cdata( $1)}eg; |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub _xml_parser_encodings |
1123
|
|
|
|
|
|
|
{ my @encodings=( 'iso-8859-1'); # this one is included by default, there is no map for it in @INC |
1124
|
|
|
|
|
|
|
foreach my $inc (@INC) |
1125
|
|
|
|
|
|
|
{ push @encodings, map { basename( $_, '.enc') } glob( File::Spec->catdir( $inc => XML => Parser => Encodings => '*.enc')); } |
1126
|
|
|
|
|
|
|
return map { $_ => 1 } @encodings; |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub _unescape_cdata |
1132
|
|
|
|
|
|
|
{ my( $cdata)= @_; |
1133
|
|
|
|
|
|
|
$cdata=~s{<}{<}g; |
1134
|
|
|
|
|
|
|
$cdata=~s{>}{>}g; |
1135
|
|
|
|
|
|
|
$cdata=~s{&}{&}g; |
1136
|
|
|
|
|
|
|
return $cdata; |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
sub _as_XML { |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
# fork of HTML::Element::as_XML, which is a little too buggy and inconsistent between versions for my liking |
1142
|
|
|
|
|
|
|
my ($elt) = @_; |
1143
|
|
|
|
|
|
|
my $xml= ''; |
1144
|
|
|
|
|
|
|
my $empty_element_map = $elt->_empty_element_map; |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
my ( $tag, $node, $start ); # per-iteration scratch |
1147
|
|
|
|
|
|
|
$elt->traverse( |
1148
|
|
|
|
|
|
|
sub { |
1149
|
|
|
|
|
|
|
( $node, $start ) = @_; |
1150
|
|
|
|
|
|
|
if ( ref $node ) |
1151
|
|
|
|
|
|
|
{ # it's an element |
1152
|
|
|
|
|
|
|
$tag = $node->{'_tag'}; |
1153
|
|
|
|
|
|
|
if ($start) |
1154
|
|
|
|
|
|
|
{ # on the way in |
1155
|
|
|
|
|
|
|
foreach my $att ( grep { ! m{^(_|/$)} } keys %$node ) |
1156
|
|
|
|
|
|
|
{ # fix attribute names instead of dying |
1157
|
|
|
|
|
|
|
my $new_att= $att; |
1158
|
|
|
|
|
|
|
if( $att=~ m{^\d}) { $new_att= "a$att"; } |
1159
|
|
|
|
|
|
|
$new_att=~ s{[^\w\d:_-]}{}g; |
1160
|
|
|
|
|
|
|
$new_att ||= 'a'; |
1161
|
|
|
|
|
|
|
if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; } |
1162
|
|
|
|
|
|
|
} |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
if ( $empty_element_map->{$tag} && (!@{ $node->{'_content'} || []}) ) |
1165
|
|
|
|
|
|
|
{ $xml.= $node->starttag_XML( undef, 1 ); } |
1166
|
|
|
|
|
|
|
else |
1167
|
|
|
|
|
|
|
{ $xml.= $node->starttag_XML(undef); } |
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
else |
1170
|
|
|
|
|
|
|
{ # on the way out |
1171
|
|
|
|
|
|
|
unless ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || [] } ) |
1172
|
|
|
|
|
|
|
{ $xml.= $node->endtag_XML(); |
1173
|
|
|
|
|
|
|
} # otherwise it will have been an <... /> tag. |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
elsif( $node=~ /
|
1177
|
|
|
|
|
|
|
{ foreach my $chunk (split /()/s, $node) # chunks are CDATA sections or normal text |
1178
|
|
|
|
|
|
|
{ $xml.= $chunk =~ m{
|
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
else # it's just text |
1181
|
|
|
|
|
|
|
{ $xml .= _xml_escape($node); } |
1182
|
|
|
|
|
|
|
1; # keep traversing |
1183
|
|
|
|
|
|
|
} |
1184
|
|
|
|
|
|
|
); |
1185
|
|
|
|
|
|
|
return $xml; |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
sub _xml_escape |
1189
|
|
|
|
|
|
|
{ my( $html)= @_; |
1190
|
|
|
|
|
|
|
$html =~ s{&(?! # An ampersand that isn't followed by... |
1191
|
|
|
|
|
|
|
( \#[0-9]+; | # A hash mark, digits and semicolon, or |
1192
|
|
|
|
|
|
|
\#x[0-9a-fA-F]+; | # A hash mark, "x", hex digits and semicolon, or |
1193
|
|
|
|
|
|
|
[\w]+; # A valid unicode entity name and semicolon |
1194
|
|
|
|
|
|
|
) |
1195
|
|
|
|
|
|
|
) |
1196
|
|
|
|
|
|
|
} |
1197
|
|
|
|
|
|
|
{&}gx if 0; # Needs to be escaped to amp |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
$html=~ s{&}{&}g; |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
# in old versions of HTML::TreeBuilder & can come out as &Amp; |
1202
|
|
|
|
|
|
|
if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&}g; } |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
# simple character escapes |
1205
|
|
|
|
|
|
|
$html =~ s/</g; |
1206
|
|
|
|
|
|
|
$html =~ s/>/>/g; |
1207
|
|
|
|
|
|
|
$html =~ s/"/"/g; |
1208
|
|
|
|
|
|
|
$html =~ s/'/'/g; |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
return $html; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
sub _check_xml |
1217
|
|
|
|
|
|
|
{ my( $xml)= @_; # $xml is a ref to the xml string |
1218
|
|
|
|
|
|
|
my $ok= eval { XML::Parser->new->parse( $$xml); }; |
1219
|
|
|
|
|
|
|
#if( $ok) { warn " parse OK\n"; } |
1220
|
|
|
|
|
|
|
return $ok; |
1221
|
|
|
|
|
|
|
} |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
sub _encoding_from_meta |
1224
|
|
|
|
|
|
|
{ my( $tree)= @_; |
1225
|
|
|
|
|
|
|
my $enc="iso-8859-1"; |
1226
|
|
|
|
|
|
|
my @meta= $tree->find( 'meta'); |
1227
|
|
|
|
|
|
|
foreach my $meta (@meta) |
1228
|
|
|
|
|
|
|
{ if( $meta->{'http-equiv'} && ($meta->{'http-equiv'} =~ m{^\s*content-type\s*}i) |
1229
|
|
|
|
|
|
|
&& $meta->{content} && ($meta->{content} =~ m{^\s*text/html\s*;\s*charset\s*=\s*(\S*)\s*}i) |
1230
|
|
|
|
|
|
|
) |
1231
|
|
|
|
|
|
|
{ $enc= lc $1; |
1232
|
|
|
|
|
|
|
#warn " encoding from meta tag is '$enc'\n"; |
1233
|
|
|
|
|
|
|
last; |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
return $enc; |
1237
|
|
|
|
|
|
|
} |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
{ sub _to_utf8 |
1240
|
|
|
|
|
|
|
{ my( $encoding, $string)= @_; |
1241
|
|
|
|
|
|
|
local $SIG{__DIE__}; |
1242
|
|
|
|
|
|
|
if( _use( 'Encode')) |
1243
|
|
|
|
|
|
|
{ Encode::from_to( $string, $encoding => 'utf8', 0x0400); } # 0x0400 is Encode::FB_XMLCREF |
1244
|
|
|
|
|
|
|
elsif( _use( 'Text::Iconv')) |
1245
|
|
|
|
|
|
|
{ my $converter = eval { Text::Iconv->new( $encoding => "utf8") }; |
1246
|
|
|
|
|
|
|
if( $converter) { $string= $converter->convert( $string); } |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) |
1249
|
|
|
|
|
|
|
{ my $map= Unicode::Map8->new( $encoding); |
1250
|
|
|
|
|
|
|
$string= $map->tou( $string)->utf8; |
1251
|
|
|
|
|
|
|
} |
1252
|
|
|
|
|
|
|
$string=~ s{[\x00-\x08\x0B\x0C\x0E-\x1F]}{}g; # get rid of control chars, portable in 5.6 |
1253
|
|
|
|
|
|
|
return $string; |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
} |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
sub _indent_xhtml |
1259
|
|
|
|
|
|
|
{ my( $xhtml)= @_; # $xhtml is a ref |
1260
|
|
|
|
|
|
|
my %block_tag= map { $_ => 1 } qw( html |
1261
|
|
|
|
|
|
|
head |
1262
|
|
|
|
|
|
|
meta title link script base |
1263
|
|
|
|
|
|
|
body |
1264
|
|
|
|
|
|
|
h1 h2 h3 h4 h5 h6 |
1265
|
|
|
|
|
|
|
p br address blockquote pre |
1266
|
|
|
|
|
|
|
ol ul li dd dl dt |
1267
|
|
|
|
|
|
|
table tr td th tbody tfoot thead col colgroup caption |
1268
|
|
|
|
|
|
|
div frame frameset hr |
1269
|
|
|
|
|
|
|
); |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
my $level=0; |
1272
|
|
|
|
|
|
|
$$xhtml=~ s{( (?:|[CDATA[.*?]]>)) # ignore comments and CDATA sections |
1273
|
|
|
|
|
|
|
| <(\w+)((?:\s+\w+\s*=\s*(?:"[^"]*"|'[^']*'))*\s*/>) # empty tag |
1274
|
|
|
|
|
|
|
| <(\w+) # start tag |
1275
|
|
|
|
|
|
|
|(\w+) # end tag |
1276
|
|
|
|
|
|
|
) |
1277
|
|
|
|
|
|
|
} |
1278
|
|
|
|
|
|
|
{ if( $2 && $block_tag{$2}) { my $indent= " " x $level; |
1279
|
|
|
|
|
|
|
"\n$indent<$2$3"; |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
elsif( $4 && $block_tag{$4}) { my $indent= " " x $level; |
1282
|
|
|
|
|
|
|
$level++ unless( $4=~ m{/>}); |
1283
|
|
|
|
|
|
|
my $nl= $4 eq 'html' ? '' : "\n"; |
1284
|
|
|
|
|
|
|
"$nl$indent<$4"; |
1285
|
|
|
|
|
|
|
} |
1286
|
|
|
|
|
|
|
elsif( $5 && $block_tag{$5}) { $level--; "$5"; } |
1287
|
|
|
|
|
|
|
else { $1; } |
1288
|
|
|
|
|
|
|
}xesg; |
1289
|
|
|
|
|
|
|
} |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
sub add_stylesheet |
1293
|
|
|
|
|
|
|
{ my( $t, $type, $href)= @_; |
1294
|
|
|
|
|
|
|
my %text_type= map { $_ => 1 } qw( xsl css); |
1295
|
|
|
|
|
|
|
my $ss= $t->{twig_elt_class}->new( $PI); |
1296
|
|
|
|
|
|
|
if( $text_type{$type}) |
1297
|
|
|
|
|
|
|
{ $ss->_set_pi( 'xml-stylesheet', qq{type="text/$type" href="$href"}); } |
1298
|
|
|
|
|
|
|
else |
1299
|
|
|
|
|
|
|
{ croak "unsupported style sheet type '$type'"; } |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
$t->_add_cpi_outside_of_root( leading_cpi => $ss); |
1302
|
|
|
|
|
|
|
return $t; |
1303
|
|
|
|
|
|
|
} |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
{ my %used; # module => 1 if require ok, 0 otherwise |
1306
|
|
|
|
|
|
|
my %disallowed; # for testing, refuses to _use modules in this hash |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
sub _disallow_use ## no critic (Subroutines::ProhibitNestedSubs); |
1309
|
|
|
|
|
|
|
{ my( @modules)= @_; |
1310
|
|
|
|
|
|
|
$disallowed{$_}= 1 foreach (@modules); |
1311
|
|
|
|
|
|
|
} |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
sub _allow_use ## no critic (Subroutines::ProhibitNestedSubs); |
1314
|
|
|
|
|
|
|
{ my( @modules)= @_; |
1315
|
|
|
|
|
|
|
$disallowed{$_}= 0 foreach (@modules); |
1316
|
|
|
|
|
|
|
} |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
sub _use ## no critic (Subroutines::ProhibitNestedSubs); |
1319
|
|
|
|
|
|
|
{ my( $module, $version)= @_; |
1320
|
|
|
|
|
|
|
$version ||= 0; |
1321
|
|
|
|
|
|
|
if( $disallowed{$module}) { return 0; } |
1322
|
|
|
|
|
|
|
if( $used{$module}) { return 1; } |
1323
|
|
|
|
|
|
|
if( eval "require $module") { import $module; $used{$module}= 1; # no critic ProhibitStringyEval |
1324
|
|
|
|
|
|
|
if( $version) |
1325
|
|
|
|
|
|
|
{ |
1326
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
1327
|
|
|
|
|
|
|
no strict 'refs'; |
1328
|
|
|
|
|
|
|
if( ${"${module}::VERSION"} >= $version ) { return 1; } |
1329
|
|
|
|
|
|
|
else { return 0; } |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
else |
1332
|
|
|
|
|
|
|
{ return 1; } |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
else { $used{$module}= 0; return 0; } |
1335
|
|
|
|
|
|
|
} |
1336
|
|
|
|
|
|
|
} |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
# used to solve the [n] predicates while avoiding getting the entire list |
1339
|
|
|
|
|
|
|
# needs a prototype to accept passing bare blocks |
1340
|
|
|
|
|
|
|
sub _first_n(&$@) ## no critic (Subroutines::ProhibitSubroutinePrototypes); |
1341
|
|
|
|
|
|
|
{ my $coderef= shift; |
1342
|
|
|
|
|
|
|
my $n= shift; |
1343
|
|
|
|
|
|
|
my $i=0; |
1344
|
|
|
|
|
|
|
if( $n > 0) |
1345
|
|
|
|
|
|
|
{ foreach (@_) { if( &$coderef) { $i++; return $_ if( $i == $n); } } } |
1346
|
|
|
|
|
|
|
elsif( $n < 0) |
1347
|
|
|
|
|
|
|
{ foreach (reverse @_) { if( &$coderef) { $i--; return $_ if( $i == $n); } } } |
1348
|
|
|
|
|
|
|
else |
1349
|
|
|
|
|
|
|
{ croak "illegal position number 0"; } |
1350
|
|
|
|
|
|
|
return undef; |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
sub _slurp_uri |
1354
|
|
|
|
|
|
|
{ my( $uri, $base)= @_; |
1355
|
|
|
|
|
|
|
if( $uri=~ m{^\w+://}) { _use( 'LWP::Simple'); return LWP::Simple::get( $uri); } |
1356
|
|
|
|
|
|
|
else { return _slurp( _based_filename( $uri, $base)); } |
1357
|
|
|
|
|
|
|
} |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
sub _based_filename |
1360
|
|
|
|
|
|
|
{ my( $filename, $base)= @_; |
1361
|
|
|
|
|
|
|
# cf. XML/Parser.pm's file_ext_ent_handler |
1362
|
|
|
|
|
|
|
if (defined($base) and not ($filename =~ m{^(?:[\\/]|\w+:)})) |
1363
|
|
|
|
|
|
|
{ my $newpath = $base; |
1364
|
|
|
|
|
|
|
$newpath =~ s{[^\\/:]*$}{$filename}; |
1365
|
|
|
|
|
|
|
$filename = $newpath; |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
return $filename; |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
sub _slurp |
1371
|
|
|
|
|
|
|
{ my( $filename)= @_; |
1372
|
|
|
|
|
|
|
my $to_slurp; |
1373
|
|
|
|
|
|
|
open( $to_slurp, "<$filename") or croak "cannot open '$filename': $!"; |
1374
|
|
|
|
|
|
|
local $/= undef; |
1375
|
|
|
|
|
|
|
my $content= <$to_slurp>; |
1376
|
|
|
|
|
|
|
close $to_slurp; |
1377
|
|
|
|
|
|
|
return $content; |
1378
|
|
|
|
|
|
|
} |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
sub _slurp_fh |
1381
|
|
|
|
|
|
|
{ my( $fh)= @_; |
1382
|
|
|
|
|
|
|
local $/= undef; |
1383
|
|
|
|
|
|
|
my $content= <$fh>; |
1384
|
|
|
|
|
|
|
return $content; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
# I should really add extra options to allow better configuration of the |
1388
|
|
|
|
|
|
|
# LWP::UserAgent object |
1389
|
|
|
|
|
|
|
# this method forks (except on VMS!) |
1390
|
|
|
|
|
|
|
# - the child gets the data and copies it to the pipe, |
1391
|
|
|
|
|
|
|
# - the parent reads the stream and sends it to XML::Parser |
1392
|
|
|
|
|
|
|
# the data is cut it chunks the size of the XML::Parser::Expat buffer |
1393
|
|
|
|
|
|
|
# the method returns the twig and the status |
1394
|
|
|
|
|
|
|
sub _parseurl |
1395
|
|
|
|
|
|
|
{ my( $t, $safe, $url, $agent)= @_; |
1396
|
|
|
|
|
|
|
_use( 'LWP') || croak "LWP not available, needed to use parseurl methods"; |
1397
|
|
|
|
|
|
|
if( $^O ne 'VMS') |
1398
|
|
|
|
|
|
|
{ pipe( README, WRITEME) or croak "cannot create connected pipes: $!"; |
1399
|
|
|
|
|
|
|
if( my $pid= fork) |
1400
|
|
|
|
|
|
|
{ # parent code: parse the incoming file |
1401
|
|
|
|
|
|
|
close WRITEME; # no need to write |
1402
|
|
|
|
|
|
|
my $result= $safe ? $t->safe_parse( \*README) : $t->parse( \*README); |
1403
|
|
|
|
|
|
|
close README; |
1404
|
|
|
|
|
|
|
return $@ ? 0 : $t; |
1405
|
|
|
|
|
|
|
} |
1406
|
|
|
|
|
|
|
else |
1407
|
|
|
|
|
|
|
{ # child |
1408
|
|
|
|
|
|
|
close README; # no need to read |
1409
|
|
|
|
|
|
|
local $|=1; |
1410
|
|
|
|
|
|
|
$agent ||= LWP::UserAgent->new; |
1411
|
|
|
|
|
|
|
my $request = HTTP::Request->new( GET => $url); |
1412
|
|
|
|
|
|
|
# _pass_url_content is called with chunks of data the same size as |
1413
|
|
|
|
|
|
|
# the XML::Parser buffer |
1414
|
|
|
|
|
|
|
my $response = $agent->request( $request, |
1415
|
|
|
|
|
|
|
sub { _pass_url_content( \*WRITEME, @_); }, $BUFSIZE); |
1416
|
|
|
|
|
|
|
$response->is_success or croak "$url ", $response->message; |
1417
|
|
|
|
|
|
|
close WRITEME; |
1418
|
|
|
|
|
|
|
CORE::exit(); # CORE is there for mod_perl (which redefines exit) |
1419
|
|
|
|
|
|
|
} |
1420
|
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
else |
1422
|
|
|
|
|
|
|
{ # VMS branch (hard to test!) |
1423
|
|
|
|
|
|
|
local $|=1; |
1424
|
|
|
|
|
|
|
$agent ||= LWP::UserAgent->new; |
1425
|
|
|
|
|
|
|
my $request = HTTP::Request->new( GET => $url); |
1426
|
|
|
|
|
|
|
my $response = $agent->request( $request); |
1427
|
|
|
|
|
|
|
$response->is_success or croak "$url ", $response->message; |
1428
|
|
|
|
|
|
|
my $result= $safe ? $t->safe_parse($response->content) : $t->parse($response->content); |
1429
|
|
|
|
|
|
|
return $@ ? 0 : $t; |
1430
|
|
|
|
|
|
|
} |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
} |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
# get the (hopefully!) XML data from the URL and |
1435
|
|
|
|
|
|
|
sub _pass_url_content |
1436
|
|
|
|
|
|
|
{ my( $fh, $data, $response, $protocol)= @_; |
1437
|
|
|
|
|
|
|
print {$fh} $data; |
1438
|
|
|
|
|
|
|
} |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
sub add_options |
1441
|
|
|
|
|
|
|
{ my %args= map { $_, 1 } @_; |
1442
|
|
|
|
|
|
|
%args= _normalize_args( %args); |
1443
|
|
|
|
|
|
|
foreach (keys %args) { $valid_option{$_}++; } |
1444
|
|
|
|
|
|
|
} |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
sub _pretty_print_styles { return XML::Twig::Elt::_pretty_print_styles(); } |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
sub _twig_store_internal_dtd |
1449
|
|
|
|
|
|
|
{ |
1450
|
|
|
|
|
|
|
# warn " in _twig_store_internal_dtd...\n"; # DEBUG handler |
1451
|
|
|
|
|
|
|
my( $p, $string)= @_; |
1452
|
|
|
|
|
|
|
my $t= $p->{twig}; |
1453
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) { $string= $p->original_string(); } |
1454
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $string; |
1455
|
|
|
|
|
|
|
return; |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
sub _twig_stop_storing_internal_dtd |
1459
|
|
|
|
|
|
|
{ # warn " in _twig_stop_storing_internal_dtd...\n"; # DEBUG handler |
1460
|
|
|
|
|
|
|
my $p= shift; |
1461
|
|
|
|
|
|
|
if( @saved_default_handler && defined $saved_default_handler[1]) |
1462
|
|
|
|
|
|
|
{ $p->setHandlers( @saved_default_handler); } |
1463
|
|
|
|
|
|
|
else |
1464
|
|
|
|
|
|
|
{ |
1465
|
|
|
|
|
|
|
$p->setHandlers( Default => undef); |
1466
|
|
|
|
|
|
|
} |
1467
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{internal}=~ s{^\s*\[}{}; |
1468
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{internal}=~ s{\]\s*$}{}; |
1469
|
|
|
|
|
|
|
return; |
1470
|
|
|
|
|
|
|
} |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
sub _twig_doctype_fin_print |
1473
|
|
|
|
|
|
|
{ # warn " in _twig_doctype_fin_print...\n"; # DEBUG handler |
1474
|
|
|
|
|
|
|
my( $p)= shift; |
1475
|
|
|
|
|
|
|
if( $p->{twig}->{twig_doctype}->{has_internal} && !$expat_1_95_2) { print ' ]>'; } |
1476
|
|
|
|
|
|
|
return; |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
sub _normalize_args |
1481
|
|
|
|
|
|
|
{ my %normalized_args; |
1482
|
|
|
|
|
|
|
while( my $key= shift ) |
1483
|
|
|
|
|
|
|
{ $key= join '', map { ucfirst } split /_/, $key; |
1484
|
|
|
|
|
|
|
#$key= "Twig".$key unless( substr( $key, 0, 4) eq 'Twig'); |
1485
|
|
|
|
|
|
|
$normalized_args{$key}= shift ; |
1486
|
|
|
|
|
|
|
} |
1487
|
|
|
|
|
|
|
return %normalized_args; |
1488
|
|
|
|
|
|
|
} |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
sub _is_fh { return unless $_[0]; return $_[0] if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')); } |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
sub _set_handler |
1493
|
|
|
|
|
|
|
{ my( $handlers, $whole_path, $handler)= @_; |
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
my $H_SPECIAL = qr{($ALL|$DEFAULT|$COMMENT|$TEXT)}; |
1496
|
|
|
|
|
|
|
my $H_PI = qr{(\?|$PI)\s*(([^\s]*)\s*)}; |
1497
|
|
|
|
|
|
|
my $H_LEVEL = qr{level \s* \( \s* ([0-9]+) \s* \)}x; |
1498
|
|
|
|
|
|
|
my $H_REGEXP = qr{\(\?([\^xism]*)(-[\^xism]*)?:(.*)\)}x; |
1499
|
|
|
|
|
|
|
my $H_XPATH = qr{(/?/?$REG_TAG_PART? \s* ($REG_PREDICATE\s*)?)+}x; |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
my $prev_handler; |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
my $cpath= $whole_path; |
1504
|
|
|
|
|
|
|
#warn "\$cpath: '$cpath\n"; |
1505
|
|
|
|
|
|
|
while( $cpath && $cpath=~ s{^\s*($H_SPECIAL|$H_PI|$H_LEVEL|$H_REGEXP|$H_XPATH)\s*($|\|)}{}) |
1506
|
|
|
|
|
|
|
{ my $path= $1; |
1507
|
|
|
|
|
|
|
#warn "\$cpath: '$cpath' - $path: '$path'\n"; |
1508
|
|
|
|
|
|
|
$prev_handler ||= $handlers->{handlers}->{string}->{$path} || undef; # $prev_handler gets the first found handler |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
_set_special_handler ( $handlers, $path, $handler, $prev_handler) |
1511
|
|
|
|
|
|
|
|| _set_pi_handler ( $handlers, $path, $handler, $prev_handler) |
1512
|
|
|
|
|
|
|
|| _set_level_handler ( $handlers, $path, $handler, $prev_handler) |
1513
|
|
|
|
|
|
|
|| _set_regexp_handler ( $handlers, $path, $handler, $prev_handler) |
1514
|
|
|
|
|
|
|
|| _set_xpath_handler ( $handlers, $path, $handler, $prev_handler) |
1515
|
|
|
|
|
|
|
|| croak "unrecognized expression in handler: '$whole_path'"; |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
# this both takes care of the simple (gi) handlers and store |
1518
|
|
|
|
|
|
|
# the handler code reference for other handlers |
1519
|
|
|
|
|
|
|
$handlers->{handlers}->{string}->{$path}= $handler; |
1520
|
|
|
|
|
|
|
} |
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
if( $cpath) { croak "unrecognized expression in handler: '$whole_path'"; } |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
return $prev_handler; |
1525
|
|
|
|
|
|
|
} |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
sub _set_special_handler |
1529
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
1530
|
|
|
|
|
|
|
if( $path =~ m{^\s*($ALL|$DEFAULT|$COMMENT|$TEXT)\s*$}io ) |
1531
|
|
|
|
|
|
|
{ $handlers->{handlers}->{$1}= $handler; |
1532
|
|
|
|
|
|
|
return 1; |
1533
|
|
|
|
|
|
|
} |
1534
|
|
|
|
|
|
|
else |
1535
|
|
|
|
|
|
|
{ return 0; } |
1536
|
|
|
|
|
|
|
} |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
sub _set_xpath_handler |
1539
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
1540
|
|
|
|
|
|
|
if( my $handler_data= _parse_xpath_handler( $path, $handler)) |
1541
|
|
|
|
|
|
|
{ _add_handler( $handlers, $handler_data, $path, $prev_handler); |
1542
|
|
|
|
|
|
|
return 1; |
1543
|
|
|
|
|
|
|
} |
1544
|
|
|
|
|
|
|
else |
1545
|
|
|
|
|
|
|
{ return 0; } |
1546
|
|
|
|
|
|
|
} |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
sub _add_handler |
1549
|
|
|
|
|
|
|
{ my( $handlers, $handler_data, $path, $prev_handler)= @_; |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
my $tag= $handler_data->{tag}; |
1552
|
|
|
|
|
|
|
my @handlers= $handlers->{xpath_handler}->{$tag} ? @{$handlers->{xpath_handler}->{$tag}} : (); |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
if( $prev_handler) { @handlers= grep { $_->{path} ne $path } @handlers; } |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
push @handlers, $handler_data if( $handler_data->{handler}); |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
if( @handlers > 1) |
1559
|
|
|
|
|
|
|
{ @handlers= sort { (($b->{score}->{type} || 0) <=> ($a->{score}->{type} || 0)) |
1560
|
|
|
|
|
|
|
|| (($b->{score}->{anchored} || 0) <=> ($a->{score}->{anchored} || 0)) |
1561
|
|
|
|
|
|
|
|| (($b->{score}->{steps} || 0) <=> ($a->{score}->{steps} || 0)) |
1562
|
|
|
|
|
|
|
|| (($b->{score}->{predicates} || 0) <=> ($a->{score}->{predicates} || 0)) |
1563
|
|
|
|
|
|
|
|| (($b->{score}->{tests} || 0) <=> ($a->{score}->{tests} || 0)) |
1564
|
|
|
|
|
|
|
|| ($a->{path} cmp $b->{path}) |
1565
|
|
|
|
|
|
|
} @handlers; |
1566
|
|
|
|
|
|
|
} |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
$handlers->{xpath_handler}->{$tag}= \@handlers; |
1569
|
|
|
|
|
|
|
} |
1570
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
sub _set_pi_handler |
1572
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
1573
|
|
|
|
|
|
|
# PI conditions ( '?target' => \&handler or '?' => \&handler |
1574
|
|
|
|
|
|
|
# or '#PItarget' => \&handler or '#PI' => \&handler) |
1575
|
|
|
|
|
|
|
if( $path=~ /^\s*(?:\?|$PI)\s*(?:([^\s]*)\s*)$/) |
1576
|
|
|
|
|
|
|
{ my $target= $1 || ''; |
1577
|
|
|
|
|
|
|
# update the path_handlers count, knowing that |
1578
|
|
|
|
|
|
|
# either the previous or the new handler can be undef |
1579
|
|
|
|
|
|
|
$handlers->{pi_handlers}->{$1}= $handler; |
1580
|
|
|
|
|
|
|
return 1; |
1581
|
|
|
|
|
|
|
} |
1582
|
|
|
|
|
|
|
else |
1583
|
|
|
|
|
|
|
{ return 0; |
1584
|
|
|
|
|
|
|
} |
1585
|
|
|
|
|
|
|
} |
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
sub _set_level_handler |
1588
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
1589
|
|
|
|
|
|
|
if( $path =~ m{^ \s* level \s* \( \s* ([0-9]+) \s* \) \s* $}ox ) |
1590
|
|
|
|
|
|
|
{ my $level= $1; |
1591
|
|
|
|
|
|
|
my $sub= sub { my( $stack)= @_; return( ($stack->[-1]->{$ST_TAG} !~ m{^#}) && (scalar @$stack == $level + 1) ) }; |
1592
|
|
|
|
|
|
|
my $handler_data= { tag=> '*', score => { type => $LEVEL_TRIGGER}, trigger => $sub, |
1593
|
|
|
|
|
|
|
path => $path, handler => $handler, test_on_text => 0 |
1594
|
|
|
|
|
|
|
}; |
1595
|
|
|
|
|
|
|
_add_handler( $handlers, $handler_data, $path, $prev_handler); |
1596
|
|
|
|
|
|
|
return 1; |
1597
|
|
|
|
|
|
|
} |
1598
|
|
|
|
|
|
|
else |
1599
|
|
|
|
|
|
|
{ return 0; } |
1600
|
|
|
|
|
|
|
} |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
sub _set_regexp_handler |
1603
|
|
|
|
|
|
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; |
1604
|
|
|
|
|
|
|
# if the expression was a regexp it is now a string (it was stringified when it became a hash key) |
1605
|
|
|
|
|
|
|
if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) |
1606
|
|
|
|
|
|
|
{ my $regexp= qr/(?$1:$2)/; # convert it back into a regexp |
1607
|
|
|
|
|
|
|
my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{$ST_TAG} =~ $regexp ) }; |
1608
|
|
|
|
|
|
|
my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, |
1609
|
|
|
|
|
|
|
path => $path, handler => $handler, test_on_text => 0 |
1610
|
|
|
|
|
|
|
}; |
1611
|
|
|
|
|
|
|
_add_handler( $handlers, $handler_data, $path, $prev_handler); |
1612
|
|
|
|
|
|
|
return 1; |
1613
|
|
|
|
|
|
|
} |
1614
|
|
|
|
|
|
|
else |
1615
|
|
|
|
|
|
|
{ return 0; } |
1616
|
|
|
|
|
|
|
} |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
my $DEBUG_HANDLER= 0; # 0 or 1 (output the handler checking code) or 2 (super verbose) |
1619
|
|
|
|
|
|
|
my $handler_string; # store the handler itself |
1620
|
|
|
|
|
|
|
sub _set_debug_handler { $DEBUG_HANDLER= shift; } |
1621
|
|
|
|
|
|
|
sub _warn_debug_handler { if( $DEBUG_HANDLER < 3) { warn @_; } else { $handler_string .= join( '', @_); } } |
1622
|
|
|
|
|
|
|
sub _return_debug_handler { my $string= $handler_string; $handler_string=''; return $string; } |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
sub _parse_xpath_handler |
1625
|
|
|
|
|
|
|
{ my( $xpath, $handler)= @_; |
1626
|
|
|
|
|
|
|
my $xpath_original= $xpath; |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
if( $DEBUG_HANDLER >=1) { _warn_debug_handler( "\n\nparsing path '$xpath'\n"); } |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
my $path_to_check= $xpath; |
1632
|
|
|
|
|
|
|
$path_to_check=~ s{/?/?$REG_TAG_PART?\s*(?:$REG_PREDICATE\s*)?}{}g; |
1633
|
|
|
|
|
|
|
if( $DEBUG_HANDLER && $path_to_check=~ /\S/) { _warn_debug_handler( "left: $path_to_check\n"); } |
1634
|
|
|
|
|
|
|
return if( $path_to_check=~ /\S/); |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
(my $xpath_to_display= $xpath)=~ s{(["{}'\[\]\@\$])}{\\$1}g; |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
my @xpath_steps; |
1639
|
|
|
|
|
|
|
my $last_token_is_sep; |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
while( $xpath=~ s{^\s* |
1642
|
|
|
|
|
|
|
( (//?) # separator |
1643
|
|
|
|
|
|
|
| (?:$REG_TAG_PART\s*(?:$REG_PREDICATE\s*)?) # tag name and optional predicate |
1644
|
|
|
|
|
|
|
| (?:$REG_PREDICATE) # just a predicate |
1645
|
|
|
|
|
|
|
) |
1646
|
|
|
|
|
|
|
} |
1647
|
|
|
|
|
|
|
{}x |
1648
|
|
|
|
|
|
|
) |
1649
|
|
|
|
|
|
|
{ # check that we have alternating separators and steps |
1650
|
|
|
|
|
|
|
if( $2) # found a separator |
1651
|
|
|
|
|
|
|
{ if( $last_token_is_sep) { return 0; } # 2 separators in a row |
1652
|
|
|
|
|
|
|
$last_token_is_sep= 1; |
1653
|
|
|
|
|
|
|
} |
1654
|
|
|
|
|
|
|
else |
1655
|
|
|
|
|
|
|
{ if( defined( $last_token_is_sep) && !$last_token_is_sep) { return 0; } # 2 steps in a row |
1656
|
|
|
|
|
|
|
$last_token_is_sep= 0; |
1657
|
|
|
|
|
|
|
} |
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
push @xpath_steps, $1; |
1660
|
|
|
|
|
|
|
} |
1661
|
|
|
|
|
|
|
if( $last_token_is_sep) { return 0; } # expression cannot end with a separator |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
my $i=-1; |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
my $perlfunc= _join_n( $NO_WARNINGS . ';', |
1666
|
|
|
|
|
|
|
q|my( $stack)= @_; |, |
1667
|
|
|
|
|
|
|
q|my @current_elts= (scalar @$stack); |, |
1668
|
|
|
|
|
|
|
q|my @new_current_elts; |, |
1669
|
|
|
|
|
|
|
q|my $elt; |, |
1670
|
|
|
|
|
|
|
($DEBUG_HANDLER >= 1) && (qq#warn q{checking path '$xpath_to_display'\n};#), |
1671
|
|
|
|
|
|
|
); |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
my $last_tag=''; |
1675
|
|
|
|
|
|
|
my $anchored= $xpath_original=~ m{^\s*/(?!/)} ? 1 : 0; |
1676
|
|
|
|
|
|
|
my $score={ type => $XPATH_TRIGGER, anchored => $anchored }; |
1677
|
|
|
|
|
|
|
my $flag= { test_on_text => 0 }; |
1678
|
|
|
|
|
|
|
my $sep='/'; # '/' or '//' |
1679
|
|
|
|
|
|
|
while( my $xpath_step= pop @xpath_steps) |
1680
|
|
|
|
|
|
|
{ my( $tag, $predicate)= $xpath_step =~ m{^($REG_TAG_PART)?(?:\[(.*)\])?\s*$}; |
1681
|
|
|
|
|
|
|
$score->{steps}++; |
1682
|
|
|
|
|
|
|
$tag||='*'; |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
my $warn_empty_stack= $DEBUG_HANDLER >= 2 ? qq{warn "return with empty stack\\n";} : ''; |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
if( $predicate) |
1687
|
|
|
|
|
|
|
{ if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate is: '$predicate'\n"); } |
1688
|
|
|
|
|
|
|
# changes $predicate (from an XPath expression to a Perl one) |
1689
|
|
|
|
|
|
|
if( $predicate=~ m{^\s*$REG_NUMBER\s*$}) { croak "position selector [$predicate] not supported on twig_handlers"; } |
1690
|
|
|
|
|
|
|
_parse_predicate_in_handler( $predicate, $flag, $score); |
1691
|
|
|
|
|
|
|
if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate becomes: '$predicate'\n"); } |
1692
|
|
|
|
|
|
|
} |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
my $tag_cond= _tag_cond( $tag); |
1695
|
|
|
|
|
|
|
my $cond= join( " && ", grep { $_ } $tag_cond, $predicate) || 1; |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
if( $css_sel && $tag=~ m{\.}) { $tag=~s{\.[^.]*$}{}; $tag ||='*'; } |
1698
|
|
|
|
|
|
|
$tag=~ s{(.)#.+$}{$1}; |
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
$last_tag ||= $tag; |
1701
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
if( $sep eq '/') |
1703
|
|
|
|
|
|
|
{ |
1704
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, |
1705
|
|
|
|
|
|
|
q# { next if( !$current_elt); #, |
1706
|
|
|
|
|
|
|
q# $current_elt--; #, |
1707
|
|
|
|
|
|
|
q# $elt= $stack->[$current_elt]; #, |
1708
|
|
|
|
|
|
|
q# if( %s) { push @new_current_elts, $current_elt;} #, |
1709
|
|
|
|
|
|
|
q# } #, |
1710
|
|
|
|
|
|
|
), |
1711
|
|
|
|
|
|
|
$cond |
1712
|
|
|
|
|
|
|
); |
1713
|
|
|
|
|
|
|
} |
1714
|
|
|
|
|
|
|
elsif( $sep eq '//') |
1715
|
|
|
|
|
|
|
{ |
1716
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, |
1717
|
|
|
|
|
|
|
q# { next if( !$current_elt); #, |
1718
|
|
|
|
|
|
|
q# $current_elt--; #, |
1719
|
|
|
|
|
|
|
q# my $candidate= $current_elt; #, |
1720
|
|
|
|
|
|
|
q# while( $candidate >=0) #, |
1721
|
|
|
|
|
|
|
q# { $elt= $stack->[$candidate]; #, |
1722
|
|
|
|
|
|
|
q# if( %s) { push @new_current_elts, $candidate;} #, |
1723
|
|
|
|
|
|
|
q# $candidate--; #, |
1724
|
|
|
|
|
|
|
q# } #, |
1725
|
|
|
|
|
|
|
q# } #, |
1726
|
|
|
|
|
|
|
), |
1727
|
|
|
|
|
|
|
$cond |
1728
|
|
|
|
|
|
|
); |
1729
|
|
|
|
|
|
|
} |
1730
|
|
|
|
|
|
|
my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq%fail at cond '$cond'%;#) : ''; |
1731
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#unless( @new_current_elts) { %s return 0; } #, |
1732
|
|
|
|
|
|
|
q#@current_elts= @new_current_elts; #, |
1733
|
|
|
|
|
|
|
q#@new_current_elts=(); #, |
1734
|
|
|
|
|
|
|
), |
1735
|
|
|
|
|
|
|
$warn |
1736
|
|
|
|
|
|
|
); |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
$sep= pop @xpath_steps; |
1739
|
|
|
|
|
|
|
} |
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
if( $anchored) # there should be a better way, but this works |
1742
|
|
|
|
|
|
|
{ |
1743
|
|
|
|
|
|
|
my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq{fail, stack not empty};#) : ''; |
1744
|
|
|
|
|
|
|
$perlfunc .= sprintf( _join_n( q#if( ! grep { $_ == 0 } @current_elts) { %s return 0;}#), $warn); |
1745
|
|
|
|
|
|
|
} |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
$perlfunc.= qq{warn "handler for '$xpath_to_display' triggered\\n";\n} if( $DEBUG_HANDLER >=2); |
1748
|
|
|
|
|
|
|
$perlfunc.= qq{return q{$xpath_original};\n}; |
1749
|
|
|
|
|
|
|
_warn_debug_handler( "\nperlfunc:\n$perlfunc\n") if( $DEBUG_HANDLER>=1); |
1750
|
|
|
|
|
|
|
my $s= eval "sub { $perlfunc }"; |
1751
|
|
|
|
|
|
|
if( $@) |
1752
|
|
|
|
|
|
|
{ croak "wrong handler condition '$xpath' ($@);" } |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
_warn_debug_handler( "last tag: '$last_tag', test_on_text: '$flag->{test_on_text}'\n") if( $DEBUG_HANDLER >=1); |
1755
|
|
|
|
|
|
|
_warn_debug_handler( "score: ", join( ' ', map { "$_: $score->{$_}" } sort keys %$score), "\n") if( $DEBUG_HANDLER >=1); |
1756
|
|
|
|
|
|
|
return { tag=> $last_tag, score => $score, trigger => $s, path => $xpath_original, handler => $handler, test_on_text => $flag->{test_on_text} }; |
1757
|
|
|
|
|
|
|
} |
1758
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
sub _join_n { return join( "\n", @_, ''); } |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
# the "tag" part can be , . or # (where tag can be *, or start with # for hidden tags) |
1762
|
|
|
|
|
|
|
sub _tag_cond |
1763
|
|
|
|
|
|
|
{ my( $full_tag)= @_; |
1764
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
my( $tag, $class, $id); |
1766
|
|
|
|
|
|
|
if( $full_tag=~ m{^(.+)#(.+)$}) |
1767
|
|
|
|
|
|
|
{ ($tag, $id)= ($1, $2); } # # |
1768
|
|
|
|
|
|
|
else |
1769
|
|
|
|
|
|
|
{ ( $tag, $class)= $css_sel ? $full_tag=~ m{^(.*?)(?:\.([^.]*))?$} : ($full_tag, undef); } |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
my $tag_cond = $tag && $tag ne '*' ? qq#(\$elt->{'$ST_TAG'} eq "$tag")# : ''; |
1772
|
|
|
|
|
|
|
my $id_cond = defined $id ? qq#(\$elt->{id} eq "$id")# : ''; |
1773
|
|
|
|
|
|
|
my $class_cond = defined $class ? qq#(\$elt->{class}=~ m{(^| )$class( |\$)})# : ''; |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
my $full_cond= join( ' && ', grep { $_ } ( $tag_cond, $class_cond, $id_cond)); |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
return $full_cond; |
1778
|
|
|
|
|
|
|
} |
1779
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
# input: the predicate ($_[0]) which will be changed in place |
1781
|
|
|
|
|
|
|
# flags, a hashref with various flags (like test_on_text) |
1782
|
|
|
|
|
|
|
# the score |
1783
|
|
|
|
|
|
|
sub _parse_predicate_in_handler |
1784
|
|
|
|
|
|
|
{ my( $flag, $score)= @_[1..2]; |
1785
|
|
|
|
|
|
|
$_[0]=~ s{( ($REG_STRING) # strings |
1786
|
|
|
|
|
|
|
|\@($REG_TAG_NAME)(\s* $REG_MATCH \s* $REG_REGEXP) # @att and regexp |
1787
|
|
|
|
|
|
|
|\@($REG_TAG_NAME)(?=\s*(?:[><=!])) # @att followed by a comparison operator |
1788
|
|
|
|
|
|
|
|\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |
1789
|
|
|
|
|
|
|
|=~|!~ # matching operators |
1790
|
|
|
|
|
|
|
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |
1791
|
|
|
|
|
|
|
|([><]=?|=|!=) # test, other cases |
1792
|
|
|
|
|
|
|
|($REG_FUNCTION) # no arg functions |
1793
|
|
|
|
|
|
|
# this bit is a mess, but it is the only solution with this half-baked parser |
1794
|
|
|
|
|
|
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child)=~ /regexp/ |
1795
|
|
|
|
|
|
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_STRING) # string( child) = "value" (or other test) |
1796
|
|
|
|
|
|
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_NUMBER) # string( child) = nb (or other test) |
1797
|
|
|
|
|
|
|
|(and|or) |
1798
|
|
|
|
|
|
|
# |($REG_NAME(?=\s*(and|or|$))) # nested tag name (needs to be after all other unquoted strings) |
1799
|
|
|
|
|
|
|
|($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings) |
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
)} |
1802
|
|
|
|
|
|
|
{ my( $token, $str, $att_re_name, $att_re_regexp, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag) |
1803
|
|
|
|
|
|
|
= ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14); |
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
$score->{predicates}++; |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
# store tests on text (they are not always allowed) |
1808
|
|
|
|
|
|
|
if( $func || $str_regexp || $str_test_num || $str_test_alpha ) { $flag->{test_on_text}= 1; } |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
if( defined $str) { $token } |
1811
|
|
|
|
|
|
|
elsif( $tag) { qq{(\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->has_child( '$tag'))} } |
1812
|
|
|
|
|
|
|
elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})} |
1813
|
|
|
|
|
|
|
: qq{\$elt->{'$att'}} |
1814
|
|
|
|
|
|
|
} |
1815
|
|
|
|
|
|
|
elsif( $att_re_name) { $att_re_name=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att_re_name'}$att_re_regexp)} |
1816
|
|
|
|
|
|
|
: qq{\$elt->{'$att_re_name'}$att_re_regexp} |
1817
|
|
|
|
|
|
|
} |
1818
|
|
|
|
|
|
|
# for some reason Devel::Cover flags the following lines as not tested. They are though. |
1819
|
|
|
|
|
|
|
elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))} |
1820
|
|
|
|
|
|
|
: qq{defined( \$elt->{'$bare_att'})} |
1821
|
|
|
|
|
|
|
} |
1822
|
|
|
|
|
|
|
elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged |
1823
|
|
|
|
|
|
|
elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } |
1824
|
|
|
|
|
|
|
elsif( $func && $func=~ m{^string}) |
1825
|
|
|
|
|
|
|
{ "\$elt->{'$ST_ELT'}->text"; } |
1826
|
|
|
|
|
|
|
elsif( $str_regexp && $str_regexp =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) |
1827
|
|
|
|
|
|
|
{ "defined( _first_n { \$_->text $2 $3 } 1, \$elt->{'$ST_ELT'}->_children( '$1'))"; } |
1828
|
|
|
|
|
|
|
elsif( $str_test_alpha && $str_test_alpha =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_STRING)}) |
1829
|
|
|
|
|
|
|
{ my( $tag, $op, $str)= ($1, $2, $3); |
1830
|
|
|
|
|
|
|
$str=~ s{(?<=.)'(?=.)}{\\'}g; # escape a quote within the string |
1831
|
|
|
|
|
|
|
$str=~ s{^"}{'}; |
1832
|
|
|
|
|
|
|
$str=~ s{"$}{'}; |
1833
|
|
|
|
|
|
|
"defined( _first_n { \$_->text $PERL_ALPHA_TEST{$op} $str } 1, \$elt->{'$ST_ELT'}->children( '$tag'))"; } |
1834
|
|
|
|
|
|
|
elsif( $str_test_num && $str_test_num =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_NUMBER)}) |
1835
|
|
|
|
|
|
|
{ my $test= ($2 eq '=') ? '==' : $2; |
1836
|
|
|
|
|
|
|
"defined( _first_n { \$_->text $test $3 } 1, \$elt->{'$ST_ELT'}->children( '$1'))"; |
1837
|
|
|
|
|
|
|
} |
1838
|
|
|
|
|
|
|
elsif( $and_or) { $score->{tests}++; $and_or eq 'and' ? '&&' : '||' ; } |
1839
|
|
|
|
|
|
|
else { $token; } |
1840
|
|
|
|
|
|
|
}gexs; |
1841
|
|
|
|
|
|
|
} |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
|
1844
|
|
|
|
|
|
|
sub setCharHandler |
1845
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
1846
|
|
|
|
|
|
|
$t->{twig_char_handler}= $handler; |
1847
|
|
|
|
|
|
|
} |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
sub _reset_handlers |
1851
|
|
|
|
|
|
|
{ my $handlers= shift; |
1852
|
|
|
|
|
|
|
delete $handlers->{handlers}; |
1853
|
|
|
|
|
|
|
delete $handlers->{path_handlers}; |
1854
|
|
|
|
|
|
|
delete $handlers->{subpath_handlers}; |
1855
|
|
|
|
|
|
|
$handlers->{attcond_handlers_exp}=[] if( $handlers->{attcond_handlers}); |
1856
|
|
|
|
|
|
|
delete $handlers->{attcond_handlers}; |
1857
|
|
|
|
|
|
|
} |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
sub _set_handlers |
1860
|
|
|
|
|
|
|
{ my $handlers= shift || return; |
1861
|
|
|
|
|
|
|
my $set_handlers= {}; |
1862
|
|
|
|
|
|
|
foreach my $path (keys %{$handlers}) |
1863
|
|
|
|
|
|
|
{ _set_handler( $set_handlers, $path, $handlers->{$path}); } |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
return $set_handlers; |
1866
|
|
|
|
|
|
|
} |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
sub setTwigHandler |
1870
|
|
|
|
|
|
|
{ my( $t, $path, $handler)= @_; |
1871
|
|
|
|
|
|
|
$t->{twig_handlers} ||={}; |
1872
|
|
|
|
|
|
|
return _set_handler( $t->{twig_handlers}, $path, $handler); |
1873
|
|
|
|
|
|
|
} |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
sub setTwigHandlers |
1876
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
1877
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_handlers} || undef; |
1878
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_handlers}); |
1879
|
|
|
|
|
|
|
$t->{twig_handlers}= _set_handlers( $handlers); |
1880
|
|
|
|
|
|
|
return $previous_handlers; |
1881
|
|
|
|
|
|
|
} |
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
sub setStartTagHandler |
1884
|
|
|
|
|
|
|
{ my( $t, $path, $handler)= @_; |
1885
|
|
|
|
|
|
|
$t->{twig_starttag_handlers}||={}; |
1886
|
|
|
|
|
|
|
return _set_handler( $t->{twig_starttag_handlers}, $path, $handler); |
1887
|
|
|
|
|
|
|
} |
1888
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
sub setStartTagHandlers |
1890
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
1891
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_starttag_handlers} || undef; |
1892
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_starttag_handlers}); |
1893
|
|
|
|
|
|
|
$t->{twig_starttag_handlers}= _set_handlers( $handlers); |
1894
|
|
|
|
|
|
|
return $previous_handlers; |
1895
|
|
|
|
|
|
|
} |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
sub setIgnoreEltsHandler |
1898
|
|
|
|
|
|
|
{ my( $t, $path, $action)= @_; |
1899
|
|
|
|
|
|
|
$t->{twig_ignore_elts_handlers}||={}; |
1900
|
|
|
|
|
|
|
return _set_handler( $t->{twig_ignore_elts_handlers}, $path, $action ); |
1901
|
|
|
|
|
|
|
} |
1902
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
sub setIgnoreEltsHandlers |
1904
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
1905
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_ignore_elts_handlers}; |
1906
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_ignore_elts_handlers}); |
1907
|
|
|
|
|
|
|
$t->{twig_ignore_elts_handlers}= _set_handlers( $handlers); |
1908
|
|
|
|
|
|
|
return $previous_handlers; |
1909
|
|
|
|
|
|
|
} |
1910
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
sub setEndTagHandler |
1912
|
|
|
|
|
|
|
{ my( $t, $path, $handler)= @_; |
1913
|
|
|
|
|
|
|
$t->{twig_endtag_handlers}||={}; |
1914
|
|
|
|
|
|
|
return _set_handler( $t->{twig_endtag_handlers}, $path,$handler); |
1915
|
|
|
|
|
|
|
} |
1916
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
sub setEndTagHandlers |
1918
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
1919
|
|
|
|
|
|
|
my $previous_handlers= $t->{twig_endtag_handlers}; |
1920
|
|
|
|
|
|
|
_reset_handlers( $t->{twig_endtag_handlers}); |
1921
|
|
|
|
|
|
|
$t->{twig_endtag_handlers}= _set_handlers( $handlers); |
1922
|
|
|
|
|
|
|
return $previous_handlers; |
1923
|
|
|
|
|
|
|
} |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
# a little more complex: set the twig_handlers only if a code ref is given |
1926
|
|
|
|
|
|
|
sub setTwigRoots |
1927
|
|
|
|
|
|
|
{ my( $t, $handlers)= @_; |
1928
|
|
|
|
|
|
|
my $previous_roots= $t->{twig_roots}; |
1929
|
|
|
|
|
|
|
_reset_handlers($t->{twig_roots}); |
1930
|
|
|
|
|
|
|
$t->{twig_roots}= _set_handlers( $handlers); |
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
_check_illegal_twig_roots_handlers( $t->{twig_roots}); |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
foreach my $path (keys %{$handlers}) |
1935
|
|
|
|
|
|
|
{ $t->{twig_handlers}||= {}; |
1936
|
|
|
|
|
|
|
_set_handler( $t->{twig_handlers}, $path, $handlers->{$path}) |
1937
|
|
|
|
|
|
|
if( ref($handlers->{$path}) && isa( $handlers->{$path}, 'CODE')); |
1938
|
|
|
|
|
|
|
} |
1939
|
|
|
|
|
|
|
return $previous_roots; |
1940
|
|
|
|
|
|
|
} |
1941
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
sub _check_illegal_twig_roots_handlers |
1943
|
|
|
|
|
|
|
{ my( $handlers)= @_; |
1944
|
|
|
|
|
|
|
foreach my $tag_handlers (values %{$handlers->{xpath_handler}}) |
1945
|
|
|
|
|
|
|
{ foreach my $handler_data (@$tag_handlers) |
1946
|
|
|
|
|
|
|
{ if( my $type= $handler_data->{test_on_text}) |
1947
|
|
|
|
|
|
|
{ croak "string() condition not supported on twig_roots option"; } |
1948
|
|
|
|
|
|
|
} |
1949
|
|
|
|
|
|
|
} |
1950
|
|
|
|
|
|
|
return; |
1951
|
|
|
|
|
|
|
} |
1952
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
# just store the reference to the expat object in the twig |
1955
|
|
|
|
|
|
|
sub _twig_init |
1956
|
|
|
|
|
|
|
{ # warn " in _twig_init...\n"; # DEBUG handler |
1957
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
my $p= shift; |
1959
|
|
|
|
|
|
|
my $t=$p->{twig}; |
1960
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
if( $t->{twig_parsing} ) { croak "cannot reuse a twig that is already parsing"; } |
1962
|
|
|
|
|
|
|
$t->{twig_parsing}=1; |
1963
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
$t->{twig_parser}= $p; |
1965
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $t->{twig_parser}); } |
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
# in case they had been created by a previous parse |
1968
|
|
|
|
|
|
|
delete $t->{twig_dtd}; |
1969
|
|
|
|
|
|
|
delete $t->{twig_doctype}; |
1970
|
|
|
|
|
|
|
delete $t->{twig_xmldecl}; |
1971
|
|
|
|
|
|
|
delete $t->{twig_root}; |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
# if needed set the output filehandle |
1974
|
|
|
|
|
|
|
$t->_set_fh_to_twig_output_fh(); |
1975
|
|
|
|
|
|
|
return; |
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
# uses eval to catch the parser's death |
1979
|
|
|
|
|
|
|
sub safe_parse |
1980
|
|
|
|
|
|
|
{ my $t= shift; |
1981
|
|
|
|
|
|
|
eval { $t->parse( @_); } ; |
1982
|
|
|
|
|
|
|
return $@ ? $t->_reset_twig_after_error : $t; |
1983
|
|
|
|
|
|
|
} |
1984
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
sub safe_parsefile |
1986
|
|
|
|
|
|
|
{ my $t= shift; |
1987
|
|
|
|
|
|
|
eval { $t->parsefile( @_); } ; |
1988
|
|
|
|
|
|
|
return $@ ? $t->_reset_twig_after_error : $t; |
1989
|
|
|
|
|
|
|
} |
1990
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
# restore a twig in a proper state so it can be reused for a new parse |
1992
|
|
|
|
|
|
|
sub _reset_twig |
1993
|
|
|
|
|
|
|
{ my $t= shift; |
1994
|
|
|
|
|
|
|
$t->{twig_parsing}= 0; |
1995
|
|
|
|
|
|
|
delete $t->{twig_current}; |
1996
|
|
|
|
|
|
|
delete $t->{extra_data}; |
1997
|
|
|
|
|
|
|
delete $t->{twig_dtd}; |
1998
|
|
|
|
|
|
|
delete $t->{twig_in_pcdata}; |
1999
|
|
|
|
|
|
|
delete $t->{twig_in_cdata}; |
2000
|
|
|
|
|
|
|
delete $t->{twig_stored_space}; |
2001
|
|
|
|
|
|
|
delete $t->{twig_entity_list}; |
2002
|
|
|
|
|
|
|
$t->root->delete if( $t->root); |
2003
|
|
|
|
|
|
|
delete $t->{twig_root}; |
2004
|
|
|
|
|
|
|
return $t; |
2005
|
|
|
|
|
|
|
} |
2006
|
|
|
|
|
|
|
|
2007
|
|
|
|
|
|
|
sub _reset_twig_after_error |
2008
|
|
|
|
|
|
|
{ my $t= shift; |
2009
|
|
|
|
|
|
|
$t->_reset_twig; |
2010
|
|
|
|
|
|
|
return undef; |
2011
|
|
|
|
|
|
|
} |
2012
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
sub _add_or_discard_stored_spaces |
2015
|
|
|
|
|
|
|
{ my $t= shift; |
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
$t->{twig_right_after_root}=0; #XX |
2018
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
my $current= $t->{twig_current} or return; # ugly hack, with ignore on, twig_current can disappear |
2020
|
|
|
|
|
|
|
return unless length $t->{twig_stored_spaces}; |
2021
|
|
|
|
|
|
|
my $current_gi= $XML::Twig::index2gi[$current->{'gi'}]; |
2022
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
if( ! $t->{twig_discard_all_spaces}) |
2024
|
|
|
|
|
|
|
{ if( ! defined( $t->{twig_space_policy}->{$current_gi})) |
2025
|
|
|
|
|
|
|
{ $t->{twig_space_policy}->{$current_gi}= _space_policy( $t, $current_gi); } |
2026
|
|
|
|
|
|
|
if( $t->{twig_space_policy}->{$current_gi} || ($t->{twig_stored_spaces}!~ m{\n}) || $t->{twig_preserve_space}) |
2027
|
|
|
|
|
|
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces} ); } |
2028
|
|
|
|
|
|
|
} |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
return; |
2033
|
|
|
|
|
|
|
} |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
# the default twig handlers, which build the tree |
2036
|
|
|
|
|
|
|
sub _twig_start |
2037
|
|
|
|
|
|
|
{ # warn " in _twig_start...\n"; # DEBUG handler |
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
#foreach my $s (@_) { next if ref $s; warn "$s: ", is_utf8( $s) ? "has flag" : "FLAG NOT SET"; } # YYY |
2040
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
my ($p, $gi, @att)= @_; |
2042
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2043
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
# empty the stored pcdata (space stored in case they are really part of |
2045
|
|
|
|
|
|
|
# a pcdata element) or stored it if the space policy dictates so |
2046
|
|
|
|
|
|
|
# create a pcdata element with the spaces if need be |
2047
|
|
|
|
|
|
|
_add_or_discard_stored_spaces( $t); |
2048
|
|
|
|
|
|
|
my $parent= $t->{twig_current}; |
2049
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
# if we were parsing PCDATA then we exit the pcdata |
2051
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
2052
|
|
|
|
|
|
|
{ $t->{twig_in_pcdata}= 0; |
2053
|
|
|
|
|
|
|
delete $parent->{'twig_current'}; |
2054
|
|
|
|
|
|
|
$parent= $parent->{parent}; |
2055
|
|
|
|
|
|
|
} |
2056
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
# if we choose to keep the encoding then we need to parse the tag |
2058
|
|
|
|
|
|
|
if( my $func = $t->{parse_start_tag}) |
2059
|
|
|
|
|
|
|
{ ($gi, @att)= &$func($p->original_string); } |
2060
|
|
|
|
|
|
|
elsif( $t->{twig_entities_in_attribute}) |
2061
|
|
|
|
|
|
|
{ |
2062
|
|
|
|
|
|
|
($gi,@att)= _parse_start_tag( $p->recognized_string); |
2063
|
|
|
|
|
|
|
$t->{twig_entities_in_attribute}=0; |
2064
|
|
|
|
|
|
|
} |
2065
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
# if we are using an external DTD, we need to fill the default attributes |
2067
|
|
|
|
|
|
|
if( $t->{twig_read_external_dtd}) { _fill_default_atts( $t, $gi, \@att); } |
2068
|
|
|
|
|
|
|
|
2069
|
|
|
|
|
|
|
# filter the input data if need be |
2070
|
|
|
|
|
|
|
if( my $filter= $t->{twig_input_filter}) |
2071
|
|
|
|
|
|
|
{ $gi= $filter->( $gi); |
2072
|
|
|
|
|
|
|
foreach my $att (@att) { $att= $filter->($att); } |
2073
|
|
|
|
|
|
|
} |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
my $ns_decl; |
2076
|
|
|
|
|
|
|
if( $t->{twig_map_xmlns}) |
2077
|
|
|
|
|
|
|
{ $ns_decl= _replace_ns( $t, \$gi, \@att); } |
2078
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
my $elt= $t->{twig_elt_class}->new( $gi); |
2080
|
|
|
|
|
|
|
$elt->set_atts( @att); |
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
# now we can store the tag and atts |
2083
|
|
|
|
|
|
|
my $context= { $ST_TAG => $gi, $ST_ELT => $elt, @att}; |
2084
|
|
|
|
|
|
|
$context->{$ST_NS}= $ns_decl if $ns_decl; |
2085
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $context->{$ST_ELT}); } |
2086
|
|
|
|
|
|
|
push @{$t->{_twig_context_stack}}, $context; |
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
delete $parent->{'twig_current'} if( $parent); |
2089
|
|
|
|
|
|
|
$t->{twig_current}= $elt; |
2090
|
|
|
|
|
|
|
$elt->{'twig_current'}=1; |
2091
|
|
|
|
|
|
|
|
2092
|
|
|
|
|
|
|
if( $parent) |
2093
|
|
|
|
|
|
|
{ my $prev_sibling= $parent->{last_child}; |
2094
|
|
|
|
|
|
|
if( $prev_sibling) |
2095
|
|
|
|
|
|
|
{ $prev_sibling->{next_sibling}= $elt; |
2096
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
2097
|
|
|
|
|
|
|
} |
2098
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
2100
|
|
|
|
|
|
|
unless( $parent->{first_child}) { $parent->{first_child}= $elt; } |
2101
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
2102
|
|
|
|
|
|
|
} |
2103
|
|
|
|
|
|
|
else |
2104
|
|
|
|
|
|
|
{ # processing root |
2105
|
|
|
|
|
|
|
$t->set_root( $elt); |
2106
|
|
|
|
|
|
|
# call dtd handler if need be |
2107
|
|
|
|
|
|
|
$t->{twig_dtd_handler}->($t, $t->{twig_dtd}) |
2108
|
|
|
|
|
|
|
if( defined $t->{twig_dtd_handler}); |
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
# set this so we can catch external entities |
2111
|
|
|
|
|
|
|
# (the handler was modified during DTD processing) |
2112
|
|
|
|
|
|
|
if( $t->{twig_default_print}) |
2113
|
|
|
|
|
|
|
{ $p->setHandlers( Default => \&_twig_print); } |
2114
|
|
|
|
|
|
|
elsif( $t->{twig_roots}) |
2115
|
|
|
|
|
|
|
{ $p->setHandlers( Default => sub { return }); } |
2116
|
|
|
|
|
|
|
else |
2117
|
|
|
|
|
|
|
{ $p->setHandlers( Default => \&_twig_default); } |
2118
|
|
|
|
|
|
|
} |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
$elt->{empty}= $p->recognized_string=~ m{/\s*>$}s ? 1 : 0; |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
$elt->{extra_data}= $t->{extra_data} if( $t->{extra_data}); |
2123
|
|
|
|
|
|
|
$t->{extra_data}=''; |
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
# if the element is ID-ed then store that info |
2126
|
|
|
|
|
|
|
my $id= $elt->{'att'}->{$ID}; |
2127
|
|
|
|
|
|
|
if( defined $id) |
2128
|
|
|
|
|
|
|
{ $t->{twig_id_list}->{$id}= $elt; |
2129
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $t->{twig_id_list}->{$id}); } |
2130
|
|
|
|
|
|
|
} |
2131
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
# call user handler if need be |
2133
|
|
|
|
|
|
|
if( $t->{twig_starttag_handlers}) |
2134
|
|
|
|
|
|
|
{ # call all appropriate handlers |
2135
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); |
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
local $_= $elt; |
2138
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
2140
|
|
|
|
|
|
|
{ $handler->($t, $elt) || last; } |
2141
|
|
|
|
|
|
|
# call _all_ handler if needed |
2142
|
|
|
|
|
|
|
if( my $all= $t->{twig_starttag_handlers}->{handlers}->{$ALL}) |
2143
|
|
|
|
|
|
|
{ $all->($t, $elt); } |
2144
|
|
|
|
|
|
|
} |
2145
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
# check if the tag is in the list of tags to be ignored |
2147
|
|
|
|
|
|
|
if( $t->{twig_ignore_elts_handlers}) |
2148
|
|
|
|
|
|
|
{ my @handlers= _handler( $t, $t->{twig_ignore_elts_handlers}, $gi); |
2149
|
|
|
|
|
|
|
# only the first handler counts, it contains the action (discard/print/string) |
2150
|
|
|
|
|
|
|
if( @handlers) { my $action= shift @handlers; $t->ignore( $elt, $action); } |
2151
|
|
|
|
|
|
|
} |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}++; } |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
return; |
2157
|
|
|
|
|
|
|
} |
2158
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
sub _replace_ns |
2160
|
|
|
|
|
|
|
{ my( $t, $gi, $atts)= @_; |
2161
|
|
|
|
|
|
|
my $decls; |
2162
|
|
|
|
|
|
|
foreach my $new_prefix ( $t->parser->new_ns_prefixes) |
2163
|
|
|
|
|
|
|
{ my $uri= $t->parser->expand_ns_prefix( $new_prefix); |
2164
|
|
|
|
|
|
|
# replace the prefix if it is mapped |
2165
|
|
|
|
|
|
|
$decls->{$new_prefix}= $uri; |
2166
|
|
|
|
|
|
|
if( !$t->{twig_keep_original_prefix} && (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri})) |
2167
|
|
|
|
|
|
|
{ $new_prefix= $mapped_prefix; } |
2168
|
|
|
|
|
|
|
# now put the namespace declaration back in the element |
2169
|
|
|
|
|
|
|
if( $new_prefix eq '#default') |
2170
|
|
|
|
|
|
|
{ push @$atts, "xmlns" => $uri; } |
2171
|
|
|
|
|
|
|
else |
2172
|
|
|
|
|
|
|
{ push @$atts, "xmlns:$new_prefix" => $uri; } |
2173
|
|
|
|
|
|
|
} |
2174
|
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
if( $t->{twig_keep_original_prefix}) |
2176
|
|
|
|
|
|
|
{ # things become more complex: we need to find the original prefix |
2177
|
|
|
|
|
|
|
# and store both prefixes |
2178
|
|
|
|
|
|
|
my $ns_info= $t->_ns_info( $$gi); |
2179
|
|
|
|
|
|
|
my $map_att; |
2180
|
|
|
|
|
|
|
if( $ns_info->{mapped_prefix}) |
2181
|
|
|
|
|
|
|
{ $$gi= "$ns_info->{mapped_prefix}:$$gi"; |
2182
|
|
|
|
|
|
|
$map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; |
2183
|
|
|
|
|
|
|
} |
2184
|
|
|
|
|
|
|
my $att_name=1; |
2185
|
|
|
|
|
|
|
foreach( @$atts) |
2186
|
|
|
|
|
|
|
{ if( $att_name) |
2187
|
|
|
|
|
|
|
{ |
2188
|
|
|
|
|
|
|
my $ns_info= $t->_ns_info( $_); |
2189
|
|
|
|
|
|
|
if( $ns_info->{mapped_prefix}) |
2190
|
|
|
|
|
|
|
{ $_= "$ns_info->{mapped_prefix}:$_"; |
2191
|
|
|
|
|
|
|
$map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; |
2192
|
|
|
|
|
|
|
} |
2193
|
|
|
|
|
|
|
$att_name=0; |
2194
|
|
|
|
|
|
|
} |
2195
|
|
|
|
|
|
|
else |
2196
|
|
|
|
|
|
|
{ $att_name=1; } |
2197
|
|
|
|
|
|
|
} |
2198
|
|
|
|
|
|
|
push @$atts, '#original_gi', $map_att if( $map_att); |
2199
|
|
|
|
|
|
|
} |
2200
|
|
|
|
|
|
|
else |
2201
|
|
|
|
|
|
|
{ $$gi= $t->_replace_prefix( $$gi); |
2202
|
|
|
|
|
|
|
my $att_name=1; |
2203
|
|
|
|
|
|
|
foreach( @$atts) |
2204
|
|
|
|
|
|
|
{ if( $att_name) { $_= $t->_replace_prefix( $_); $att_name=0; } |
2205
|
|
|
|
|
|
|
else { $att_name=1; } |
2206
|
|
|
|
|
|
|
} |
2207
|
|
|
|
|
|
|
} |
2208
|
|
|
|
|
|
|
return $decls; |
2209
|
|
|
|
|
|
|
} |
2210
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
# extract prefix, local_name, uri, mapped_prefix from a name |
2213
|
|
|
|
|
|
|
# will only work if called from a start or end tag handler |
2214
|
|
|
|
|
|
|
sub _ns_info |
2215
|
|
|
|
|
|
|
{ my( $t, $name)= @_; |
2216
|
|
|
|
|
|
|
my $ns_info={}; |
2217
|
|
|
|
|
|
|
my $p= $t->parser; |
2218
|
|
|
|
|
|
|
$ns_info->{uri}= $p->namespace( $name); |
2219
|
|
|
|
|
|
|
return $ns_info unless( $ns_info->{uri}); |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
$ns_info->{prefix}= _a_proper_ns_prefix( $p, $ns_info->{uri}); |
2222
|
|
|
|
|
|
|
$ns_info->{mapped_prefix}= $t->{twig_map_xmlns}->{$ns_info->{uri}} || $ns_info->{prefix}; |
2223
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
return $ns_info; |
2225
|
|
|
|
|
|
|
} |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
sub _a_proper_ns_prefix |
2228
|
|
|
|
|
|
|
{ my( $p, $uri)= @_; |
2229
|
|
|
|
|
|
|
foreach my $prefix ($p->current_ns_prefixes) |
2230
|
|
|
|
|
|
|
{ if( $p->expand_ns_prefix( $prefix) eq $uri) |
2231
|
|
|
|
|
|
|
{ return $prefix; } |
2232
|
|
|
|
|
|
|
} |
2233
|
|
|
|
|
|
|
return; |
2234
|
|
|
|
|
|
|
} |
2235
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
# returns the uri bound to a prefix in the original document |
2237
|
|
|
|
|
|
|
# only works in a handler |
2238
|
|
|
|
|
|
|
# can be used to deal with xsi:type attributes |
2239
|
|
|
|
|
|
|
sub original_uri |
2240
|
|
|
|
|
|
|
{ my( $t, $prefix)= @_; |
2241
|
|
|
|
|
|
|
my $ST_NS = '##ns' ; |
2242
|
|
|
|
|
|
|
foreach my $ns (map { $_->{$ST_NS} if $_->{$ST_NS} } reverse @{$t->{_twig_context_stack}}) |
2243
|
|
|
|
|
|
|
{ return $ns->{$prefix} || next; } |
2244
|
|
|
|
|
|
|
return; |
2245
|
|
|
|
|
|
|
} |
2246
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
sub _fill_default_atts |
2249
|
|
|
|
|
|
|
{ my( $t, $gi, $atts)= @_; |
2250
|
|
|
|
|
|
|
my $dtd= $t->{twig_dtd}; |
2251
|
|
|
|
|
|
|
my $attlist= $dtd->{att}->{$gi}; |
2252
|
|
|
|
|
|
|
my %value= @$atts; |
2253
|
|
|
|
|
|
|
foreach my $att (keys %$attlist) |
2254
|
|
|
|
|
|
|
{ if( !exists( $value{$att}) |
2255
|
|
|
|
|
|
|
&& exists( $attlist->{$att}->{default}) |
2256
|
|
|
|
|
|
|
&& ( $attlist->{$att}->{default} ne '#IMPLIED') |
2257
|
|
|
|
|
|
|
) |
2258
|
|
|
|
|
|
|
{ # the quotes are included in the default, so we need to remove them |
2259
|
|
|
|
|
|
|
my $default_value= substr( $attlist->{$att}->{default}, 1, -1); |
2260
|
|
|
|
|
|
|
push @$atts, $att, $default_value; |
2261
|
|
|
|
|
|
|
} |
2262
|
|
|
|
|
|
|
} |
2263
|
|
|
|
|
|
|
return; |
2264
|
|
|
|
|
|
|
} |
2265
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
# the default function to parse a start tag (in keep_encoding mode) |
2268
|
|
|
|
|
|
|
# can be overridden with the parse_start_tag method |
2269
|
|
|
|
|
|
|
# only works for 1-byte character sets |
2270
|
|
|
|
|
|
|
sub _parse_start_tag |
2271
|
|
|
|
|
|
|
{ my $string= shift; |
2272
|
|
|
|
|
|
|
my( $gi, @atts); |
2273
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
# get the gi (between < and the first space, / or > character) |
2275
|
|
|
|
|
|
|
#if( $string=~ s{^<\s*([^\s>/]*)[\s>/]*}{}s) |
2276
|
|
|
|
|
|
|
if( $string=~ s{^<\s*($REG_TAG_NAME)\s*[\s>/]}{}s) |
2277
|
|
|
|
|
|
|
{ $gi= $1; } |
2278
|
|
|
|
|
|
|
else |
2279
|
|
|
|
|
|
|
{ croak "error parsing tag '$string'"; } |
2280
|
|
|
|
|
|
|
while( $string=~ s{^([^\s=]*)\s*=\s*(["'])(.*?)\2\s*}{}s) |
2281
|
|
|
|
|
|
|
{ push @atts, $1, $3; } |
2282
|
|
|
|
|
|
|
return $gi, @atts; |
2283
|
|
|
|
|
|
|
} |
2284
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
sub set_root |
2286
|
|
|
|
|
|
|
{ my( $t, $elt)= @_; |
2287
|
|
|
|
|
|
|
$t->{twig_root}= $elt; |
2288
|
|
|
|
|
|
|
if( $elt) |
2289
|
|
|
|
|
|
|
{ $elt->{twig}= $t; |
2290
|
|
|
|
|
|
|
if( $weakrefs) { weaken( $elt->{twig}); } |
2291
|
|
|
|
|
|
|
} |
2292
|
|
|
|
|
|
|
return $t; |
2293
|
|
|
|
|
|
|
} |
2294
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
sub _twig_end |
2296
|
|
|
|
|
|
|
{ # warn " in _twig_end...\n"; # DEBUG handler |
2297
|
|
|
|
|
|
|
my ($p, $gi) = @_; |
2298
|
|
|
|
|
|
|
|
2299
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata} && (my $text_handler= $t->{TwigHandlers}->{$TEXT}) ) |
2302
|
|
|
|
|
|
|
{ local $_= $t->{twig_current}; $text_handler->( $t, $_) if $_; |
2303
|
|
|
|
|
|
|
} |
2304
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
if( $t->{twig_map_xmlns}) { $gi= $t->_replace_prefix( $gi); } |
2306
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
_add_or_discard_stored_spaces( $t); |
2308
|
|
|
|
|
|
|
|
2309
|
|
|
|
|
|
|
# the new twig_current is the parent |
2310
|
|
|
|
|
|
|
my $elt= $t->{twig_current}; |
2311
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
2312
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
# if we were parsing PCDATA then we exit the pcdata too |
2314
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
2315
|
|
|
|
|
|
|
{ |
2316
|
|
|
|
|
|
|
$t->{twig_in_pcdata}= 0; |
2317
|
|
|
|
|
|
|
$elt= $elt->{parent} if($elt->{parent}); |
2318
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
2319
|
|
|
|
|
|
|
} |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
# parent is the new current element |
2322
|
|
|
|
|
|
|
my $parent= $elt->{parent}; |
2323
|
|
|
|
|
|
|
$t->{twig_current}= $parent; |
2324
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
if( $parent) |
2326
|
|
|
|
|
|
|
{ $parent->{'twig_current'}=1; |
2327
|
|
|
|
|
|
|
# twig_to_be_normalized |
2328
|
|
|
|
|
|
|
if( $parent->{twig_to_be_normalized}) { $parent->normalize; $parent->{twig_to_be_normalized}=0; } |
2329
|
|
|
|
|
|
|
} |
2330
|
|
|
|
|
|
|
|
2331
|
|
|
|
|
|
|
if( $t->{extra_data}) |
2332
|
|
|
|
|
|
|
{ $elt->_set_extra_data_before_end_tag( $t->{extra_data}); |
2333
|
|
|
|
|
|
|
$t->{extra_data}=''; |
2334
|
|
|
|
|
|
|
} |
2335
|
|
|
|
|
|
|
|
2336
|
|
|
|
|
|
|
if( $t->{twig_handlers}) |
2337
|
|
|
|
|
|
|
{ # look for handlers |
2338
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_handlers}, $gi); |
2339
|
|
|
|
|
|
|
|
2340
|
|
|
|
|
|
|
if( $t->{twig_tdh}) |
2341
|
|
|
|
|
|
|
{ if( @handlers) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, \@handlers ]; } |
2342
|
|
|
|
|
|
|
if( my $all= $t->{twig_handlers}->{handlers}->{$ALL}) |
2343
|
|
|
|
|
|
|
{ push @{$t->{twig_handlers_to_trigger}}, [ $elt, [$all] ]; } |
2344
|
|
|
|
|
|
|
} |
2345
|
|
|
|
|
|
|
else |
2346
|
|
|
|
|
|
|
{ |
2347
|
|
|
|
|
|
|
local $_= $elt; # so we can use $_ in the handlers |
2348
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
2350
|
|
|
|
|
|
|
{ $handler->($t, $elt) || last; } |
2351
|
|
|
|
|
|
|
# call _all_ handler if needed |
2352
|
|
|
|
|
|
|
my $all= $t->{twig_handlers}->{handlers}->{$ALL}; |
2353
|
|
|
|
|
|
|
if( $all) |
2354
|
|
|
|
|
|
|
{ $all->($t, $elt); } |
2355
|
|
|
|
|
|
|
if( @handlers || $all) { $t->{twig_right_after_root}=0; } |
2356
|
|
|
|
|
|
|
} |
2357
|
|
|
|
|
|
|
} |
2358
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
# if twig_roots is set for the element then set appropriate handler |
2360
|
|
|
|
|
|
|
if( $t->{twig_root_depth} and ($p->depth == $t->{twig_root_depth}) ) |
2361
|
|
|
|
|
|
|
{ if( $t->{twig_default_print}) |
2362
|
|
|
|
|
|
|
{ # select the proper fh (and store the currently selected one) |
2363
|
|
|
|
|
|
|
$t->_set_fh_to_twig_output_fh(); |
2364
|
|
|
|
|
|
|
if( !$p->depth==1) { $t->{twig_right_after_root}=1; } #XX |
2365
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
2366
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_roots_print_original); } |
2367
|
|
|
|
|
|
|
else |
2368
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_roots_print); } |
2369
|
|
|
|
|
|
|
} |
2370
|
|
|
|
|
|
|
else |
2371
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_roots); } |
2372
|
|
|
|
|
|
|
} |
2373
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}--; } |
2375
|
|
|
|
|
|
|
|
2376
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
2377
|
|
|
|
|
|
|
return; |
2378
|
|
|
|
|
|
|
} |
2379
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
sub _trigger_tdh |
2381
|
|
|
|
|
|
|
{ my( $t)= @_; |
2382
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
if( @{$t->{twig_handlers_to_trigger}}) |
2384
|
|
|
|
|
|
|
{ my @handlers_to_trigger_now= sort { $a->[0]->cmp( $b->[0]) } @{$t->{twig_handlers_to_trigger}}; |
2385
|
|
|
|
|
|
|
foreach my $elt_handlers (@handlers_to_trigger_now) |
2386
|
|
|
|
|
|
|
{ my( $handled_elt, $handlers_to_trigger)= @$elt_handlers; |
2387
|
|
|
|
|
|
|
foreach my $handler ( @$handlers_to_trigger) |
2388
|
|
|
|
|
|
|
{ local $_= $handled_elt; $handler->($t, $handled_elt) || last; } |
2389
|
|
|
|
|
|
|
} |
2390
|
|
|
|
|
|
|
} |
2391
|
|
|
|
|
|
|
return; |
2392
|
|
|
|
|
|
|
} |
2393
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
# return the list of handler that can be activated for an element |
2395
|
|
|
|
|
|
|
# (either of CODE ref's or 1's for twig_roots) |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
sub _handler |
2398
|
|
|
|
|
|
|
{ my( $t, $handlers, $gi)= @_; |
2399
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
my @found_handlers=(); |
2401
|
|
|
|
|
|
|
my $found_handler; |
2402
|
|
|
|
|
|
|
|
2403
|
|
|
|
|
|
|
foreach my $handler ( map { @$_ } grep { $_ } $handlers->{xpath_handler}->{$gi}, $handlers->{xpath_handler}->{'*'}) |
2404
|
|
|
|
|
|
|
{ my $trigger= $handler->{trigger}; |
2405
|
|
|
|
|
|
|
if( my $found_path= $trigger->( $t->{_twig_context_stack})) |
2406
|
|
|
|
|
|
|
{ my $found_handler= $handler->{handler}; |
2407
|
|
|
|
|
|
|
push @found_handlers, $found_handler; |
2408
|
|
|
|
|
|
|
} |
2409
|
|
|
|
|
|
|
} |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
# if no handler found call default handler if defined |
2412
|
|
|
|
|
|
|
if( !@found_handlers && defined $handlers->{handlers}->{$DEFAULT}) |
2413
|
|
|
|
|
|
|
{ push @found_handlers, $handlers->{handlers}->{$DEFAULT}; } |
2414
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
if( @found_handlers and $t->{twig_do_not_chain_handlers}) |
2416
|
|
|
|
|
|
|
{ @found_handlers= ($found_handlers[0]); } |
2417
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
return @found_handlers; # empty if no handler found |
2419
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
} |
2421
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
sub _replace_prefix |
2424
|
|
|
|
|
|
|
{ my( $t, $name)= @_; |
2425
|
|
|
|
|
|
|
my $p= $t->parser; |
2426
|
|
|
|
|
|
|
my $uri= $p->namespace( $name); |
2427
|
|
|
|
|
|
|
# try to get the namespace from default if none is found (for attributes) |
2428
|
|
|
|
|
|
|
# this should probably be an option |
2429
|
|
|
|
|
|
|
if( !$uri and( $name!~/^xml/)) { $uri= $p->expand_ns_prefix( '#default'); } |
2430
|
|
|
|
|
|
|
if( $uri) |
2431
|
|
|
|
|
|
|
{ if (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri} || $DEFAULT_URI2NS{$uri}) |
2432
|
|
|
|
|
|
|
{ return "$mapped_prefix:$name"; } |
2433
|
|
|
|
|
|
|
else |
2434
|
|
|
|
|
|
|
{ my $prefix= _a_proper_ns_prefix( $p, $uri); |
2435
|
|
|
|
|
|
|
if( $prefix eq '#default') { $prefix=''; } |
2436
|
|
|
|
|
|
|
return $prefix ? "$prefix:$name" : $name; |
2437
|
|
|
|
|
|
|
} |
2438
|
|
|
|
|
|
|
} |
2439
|
|
|
|
|
|
|
else |
2440
|
|
|
|
|
|
|
{ return $name; } |
2441
|
|
|
|
|
|
|
} |
2442
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
sub _twig_char |
2445
|
|
|
|
|
|
|
{ # warn " in _twig_char...\n"; # DEBUG handler |
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
my ($p, $string)= @_; |
2448
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2449
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
2451
|
|
|
|
|
|
|
{ if( !$t->{twig_in_cdata}) |
2452
|
|
|
|
|
|
|
{ $string= $p->original_string(); } |
2453
|
|
|
|
|
|
|
else |
2454
|
|
|
|
|
|
|
{ |
2455
|
|
|
|
|
|
|
use bytes; # > perl 5.5 |
2456
|
|
|
|
|
|
|
if( length( $string) < 1024) |
2457
|
|
|
|
|
|
|
{ $string= $p->original_string(); } |
2458
|
|
|
|
|
|
|
else |
2459
|
|
|
|
|
|
|
{ #warn "dodgy case"; |
2460
|
|
|
|
|
|
|
# TODO original_string does not hold the entire string, but $string is wrong |
2461
|
|
|
|
|
|
|
# I believe due to a bug in XML::Parser |
2462
|
|
|
|
|
|
|
# for now, we use the original string, even if it means that it's been converted to utf8 |
2463
|
|
|
|
|
|
|
} |
2464
|
|
|
|
|
|
|
} |
2465
|
|
|
|
|
|
|
} |
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
if( $t->{twig_input_filter}) { $string= $t->{twig_input_filter}->( $string); } |
2468
|
|
|
|
|
|
|
if( $t->{twig_char_handler}) { $string= $t->{twig_char_handler}->( $string); } |
2469
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
my $elt= $t->{twig_current}; |
2471
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
if( $t->{twig_in_cdata}) |
2473
|
|
|
|
|
|
|
{ # text is the continuation of a previously created cdata |
2474
|
|
|
|
|
|
|
$elt->{cdata}.= $t->{twig_stored_spaces} . $string; |
2475
|
|
|
|
|
|
|
} |
2476
|
|
|
|
|
|
|
elsif( $t->{twig_in_pcdata}) |
2477
|
|
|
|
|
|
|
{ # text is the continuation of a previously created pcdata |
2478
|
|
|
|
|
|
|
if( $t->{extra_data}) |
2479
|
|
|
|
|
|
|
{ $elt->_push_extra_data_in_pcdata( $t->{extra_data}, length( $elt->{pcdata})); |
2480
|
|
|
|
|
|
|
$t->{extra_data}=''; |
2481
|
|
|
|
|
|
|
} |
2482
|
|
|
|
|
|
|
$elt->{pcdata}.= $string; |
2483
|
|
|
|
|
|
|
} |
2484
|
|
|
|
|
|
|
else |
2485
|
|
|
|
|
|
|
{ |
2486
|
|
|
|
|
|
|
# text is just space, which might be discarded later |
2487
|
|
|
|
|
|
|
if( $string=~/\A\s*\Z/s) |
2488
|
|
|
|
|
|
|
{ |
2489
|
|
|
|
|
|
|
if( $t->{extra_data}) |
2490
|
|
|
|
|
|
|
{ # we got extra data (comment, pi), lets add the spaces to it |
2491
|
|
|
|
|
|
|
$t->{extra_data} .= $string; |
2492
|
|
|
|
|
|
|
} |
2493
|
|
|
|
|
|
|
else |
2494
|
|
|
|
|
|
|
{ # no extra data, just store the spaces |
2495
|
|
|
|
|
|
|
$t->{twig_stored_spaces}.= $string; |
2496
|
|
|
|
|
|
|
} |
2497
|
|
|
|
|
|
|
} |
2498
|
|
|
|
|
|
|
else |
2499
|
|
|
|
|
|
|
{ my $new_elt= _insert_pcdata( $t, $t->{twig_stored_spaces}.$string); |
2500
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
2501
|
|
|
|
|
|
|
$new_elt->{'twig_current'}=1; |
2502
|
|
|
|
|
|
|
$t->{twig_current}= $new_elt; |
2503
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=1; |
2504
|
|
|
|
|
|
|
if( $t->{extra_data}) |
2505
|
|
|
|
|
|
|
{ $new_elt->_push_extra_data_in_pcdata( $t->{extra_data}, 0); |
2506
|
|
|
|
|
|
|
$t->{extra_data}=''; |
2507
|
|
|
|
|
|
|
} |
2508
|
|
|
|
|
|
|
} |
2509
|
|
|
|
|
|
|
} |
2510
|
|
|
|
|
|
|
return; |
2511
|
|
|
|
|
|
|
} |
2512
|
|
|
|
|
|
|
|
2513
|
|
|
|
|
|
|
sub _twig_cdatastart |
2514
|
|
|
|
|
|
|
{ # warn " in _twig_cdatastart...\n"; # DEBUG handler |
2515
|
|
|
|
|
|
|
|
2516
|
|
|
|
|
|
|
my $p= shift; |
2517
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2518
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
$t->{twig_in_cdata}=1; |
2520
|
|
|
|
|
|
|
my $cdata= $t->{twig_elt_class}->new( $CDATA); |
2521
|
|
|
|
|
|
|
my $twig_current= $t->{twig_current}; |
2522
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
2524
|
|
|
|
|
|
|
{ # create the node as a sibling of the PCDATA |
2525
|
|
|
|
|
|
|
$cdata->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; |
2526
|
|
|
|
|
|
|
$twig_current->{next_sibling}= $cdata; |
2527
|
|
|
|
|
|
|
my $parent= $twig_current->{parent}; |
2528
|
|
|
|
|
|
|
$cdata->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; |
2529
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
2530
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=0; |
2531
|
|
|
|
|
|
|
} |
2532
|
|
|
|
|
|
|
else |
2533
|
|
|
|
|
|
|
{ # we have to create a PCDATA element if we need to store spaces |
2534
|
|
|
|
|
|
|
if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) |
2535
|
|
|
|
|
|
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces}); } |
2536
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
2537
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
# create the node as a child of the current element |
2539
|
|
|
|
|
|
|
$cdata->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; |
2540
|
|
|
|
|
|
|
if( my $prev_sibling= $twig_current->{last_child}) |
2541
|
|
|
|
|
|
|
{ $cdata->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; |
2542
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $cdata; |
2543
|
|
|
|
|
|
|
} |
2544
|
|
|
|
|
|
|
else |
2545
|
|
|
|
|
|
|
{ $twig_current->{first_child}= $cdata; } |
2546
|
|
|
|
|
|
|
delete $twig_current->{empty}; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; |
2547
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
} |
2549
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
delete $twig_current->{'twig_current'}; |
2551
|
|
|
|
|
|
|
$t->{twig_current}= $cdata; |
2552
|
|
|
|
|
|
|
$cdata->{'twig_current'}=1; |
2553
|
|
|
|
|
|
|
if( $t->{extra_data}) { $cdata->set_extra_data( $t->{extra_data}); $t->{extra_data}='' }; |
2554
|
|
|
|
|
|
|
return; |
2555
|
|
|
|
|
|
|
} |
2556
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
sub _twig_cdataend |
2558
|
|
|
|
|
|
|
{ # warn " in _twig_cdataend...\n"; # DEBUG handler |
2559
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
my $p= shift; |
2561
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2562
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
$t->{twig_in_cdata}=0; |
2564
|
|
|
|
|
|
|
|
2565
|
|
|
|
|
|
|
my $elt= $t->{twig_current}; |
2566
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
2567
|
|
|
|
|
|
|
my $cdata= $elt->{cdata}; |
2568
|
|
|
|
|
|
|
$elt->{cdata}= $cdata; |
2569
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA }; |
2571
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
if( $t->{twig_handlers}) |
2573
|
|
|
|
|
|
|
{ # look for handlers |
2574
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_handlers}, $CDATA); |
2575
|
|
|
|
|
|
|
local $_= $elt; # so we can use $_ in the handlers |
2576
|
|
|
|
|
|
|
foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } |
2577
|
|
|
|
|
|
|
} |
2578
|
|
|
|
|
|
|
|
2579
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
2580
|
|
|
|
|
|
|
|
2581
|
|
|
|
|
|
|
$elt= $elt->{parent}; |
2582
|
|
|
|
|
|
|
$t->{twig_current}= $elt; |
2583
|
|
|
|
|
|
|
$elt->{'twig_current'}=1; |
2584
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
$t->{twig_long_cdata}=0; |
2586
|
|
|
|
|
|
|
return; |
2587
|
|
|
|
|
|
|
} |
2588
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
sub _pi_elt_handlers |
2590
|
|
|
|
|
|
|
{ my( $t, $pi)= @_; |
2591
|
|
|
|
|
|
|
my $pi_handlers= $t->{twig_handlers}->{pi_handlers} || return; |
2592
|
|
|
|
|
|
|
foreach my $handler ( $pi_handlers->{$pi->{target}}, $pi_handlers->{''}) |
2593
|
|
|
|
|
|
|
{ if( $handler) { local $_= $pi; $handler->( $t, $pi) || last; } } |
2594
|
|
|
|
|
|
|
} |
2595
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
sub _pi_text_handler |
2597
|
|
|
|
|
|
|
{ my( $t, $target, $data)= @_; |
2598
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target}) |
2599
|
|
|
|
|
|
|
{ return $handler->( $t, $target, $data); } |
2600
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{''}) |
2601
|
|
|
|
|
|
|
{ return $handler->( $t, $target, $data); } |
2602
|
|
|
|
|
|
|
return defined( $data) && $data ne '' ? "$target $data?>" : "$target?>" ; |
2603
|
|
|
|
|
|
|
} |
2604
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
sub _comment_elt_handler |
2606
|
|
|
|
|
|
|
{ my( $t, $comment)= @_; |
2607
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) |
2608
|
|
|
|
|
|
|
{ local $_= $comment; $handler->($t, $comment); } |
2609
|
|
|
|
|
|
|
} |
2610
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
sub _comment_text_handler |
2612
|
|
|
|
|
|
|
{ my( $t, $comment)= @_; |
2613
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) |
2614
|
|
|
|
|
|
|
{ $comment= $handler->($t, $comment); |
2615
|
|
|
|
|
|
|
if( !defined $comment || $comment eq '') { return ''; } |
2616
|
|
|
|
|
|
|
} |
2617
|
|
|
|
|
|
|
return ""; |
2618
|
|
|
|
|
|
|
} |
2619
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
|
2622
|
|
|
|
|
|
|
sub _twig_comment |
2623
|
|
|
|
|
|
|
{ # warn " in _twig_comment...\n"; # DEBUG handler |
2624
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
my( $p, $comment_text)= @_; |
2626
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2627
|
|
|
|
|
|
|
|
2628
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) { $comment_text= substr( $p->original_string(), 4, -3); } |
2629
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
$t->_twig_pi_comment( $p, $COMMENT, $t->{twig_keep_comments}, $t->{twig_process_comments}, |
2631
|
|
|
|
|
|
|
'_set_comment', '_comment_elt_handler', '_comment_text_handler', $comment_text |
2632
|
|
|
|
|
|
|
); |
2633
|
|
|
|
|
|
|
return; |
2634
|
|
|
|
|
|
|
} |
2635
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
sub _twig_pi |
2637
|
|
|
|
|
|
|
{ # warn " in _twig_pi...\n"; # DEBUG handler |
2638
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
my( $p, $target, $data)= @_; |
2640
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2641
|
|
|
|
|
|
|
|
2642
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
2643
|
|
|
|
|
|
|
{ my $pi_text= substr( $p->original_string(), 2, -2); |
2644
|
|
|
|
|
|
|
($target, $data)= split( /\s+/, $pi_text, 2); |
2645
|
|
|
|
|
|
|
} |
2646
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
$t->_twig_pi_comment( $p, $PI, $t->{twig_keep_pi}, $t->{twig_process_pi}, |
2648
|
|
|
|
|
|
|
'_set_pi', '_pi_elt_handlers', '_pi_text_handler', $target, $data |
2649
|
|
|
|
|
|
|
); |
2650
|
|
|
|
|
|
|
return; |
2651
|
|
|
|
|
|
|
} |
2652
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
sub _twig_pi_comment |
2654
|
|
|
|
|
|
|
{ my( $t, $p, $type, $keep, $process, $set, $elt_handler, $text_handler, @parser_args)= @_; |
2655
|
|
|
|
|
|
|
|
2656
|
|
|
|
|
|
|
if( $t->{twig_input_filter}) |
2657
|
|
|
|
|
|
|
{ foreach my $arg (@parser_args) { $arg= $t->{twig_input_filter}->( $arg); } } |
2658
|
|
|
|
|
|
|
|
2659
|
|
|
|
|
|
|
# if pi/comments are to be kept then we piggyback them to the current element |
2660
|
|
|
|
|
|
|
if( $keep) |
2661
|
|
|
|
|
|
|
{ # first add spaces |
2662
|
|
|
|
|
|
|
if( $t->{twig_stored_spaces}) |
2663
|
|
|
|
|
|
|
{ $t->{extra_data}.= $t->{twig_stored_spaces}; |
2664
|
|
|
|
|
|
|
$t->{twig_stored_spaces}= ''; |
2665
|
|
|
|
|
|
|
} |
2666
|
|
|
|
|
|
|
|
2667
|
|
|
|
|
|
|
my $extra_data= $t->$text_handler( @parser_args); |
2668
|
|
|
|
|
|
|
$t->{extra_data}.= $extra_data; |
2669
|
|
|
|
|
|
|
|
2670
|
|
|
|
|
|
|
} |
2671
|
|
|
|
|
|
|
elsif( $process) |
2672
|
|
|
|
|
|
|
{ |
2673
|
|
|
|
|
|
|
my $twig_current= $t->{twig_current}; # defined unless we are outside of the root |
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
my $elt= $t->{twig_elt_class}->new( $type); |
2676
|
|
|
|
|
|
|
$elt->$set( @parser_args); |
2677
|
|
|
|
|
|
|
if( $t->{extra_data}) |
2678
|
|
|
|
|
|
|
{ $elt->set_extra_data( $t->{extra_data}); |
2679
|
|
|
|
|
|
|
$t->{extra_data}=''; |
2680
|
|
|
|
|
|
|
} |
2681
|
|
|
|
|
|
|
|
2682
|
|
|
|
|
|
|
unless( $t->root) |
2683
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( leading_cpi => $elt); |
2684
|
|
|
|
|
|
|
} |
2685
|
|
|
|
|
|
|
elsif( $t->{twig_in_pcdata}) |
2686
|
|
|
|
|
|
|
{ # create the node as a sibling of the PCDATA |
2687
|
|
|
|
|
|
|
$elt->paste_after( $twig_current); |
2688
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=0; |
2689
|
|
|
|
|
|
|
} |
2690
|
|
|
|
|
|
|
elsif( $twig_current) |
2691
|
|
|
|
|
|
|
{ # we have to create a PCDATA element if we need to store spaces |
2692
|
|
|
|
|
|
|
if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) |
2693
|
|
|
|
|
|
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces}); } |
2694
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
2695
|
|
|
|
|
|
|
# create the node as a child of the current element |
2696
|
|
|
|
|
|
|
$elt->paste_last_child( $twig_current); |
2697
|
|
|
|
|
|
|
} |
2698
|
|
|
|
|
|
|
else |
2699
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( trailing_cpi => $elt); } |
2700
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
if( $twig_current) |
2702
|
|
|
|
|
|
|
{ delete $twig_current->{'twig_current'}; |
2703
|
|
|
|
|
|
|
my $parent= $elt->{parent}; |
2704
|
|
|
|
|
|
|
$t->{twig_current}= $parent; |
2705
|
|
|
|
|
|
|
$parent->{'twig_current'}=1; |
2706
|
|
|
|
|
|
|
} |
2707
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
$t->$elt_handler( $elt); |
2709
|
|
|
|
|
|
|
} |
2710
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
} |
2712
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
|
2714
|
|
|
|
|
|
|
# add a comment or pi before the first element |
2715
|
|
|
|
|
|
|
sub _add_cpi_outside_of_root |
2716
|
|
|
|
|
|
|
{ my($t, $type, $elt)= @_; # $type is 'leading_cpi' or 'trailing_cpi' |
2717
|
|
|
|
|
|
|
$t->{$type} ||= $t->{twig_elt_class}->new( '#CPI'); |
2718
|
|
|
|
|
|
|
# create the node as a child of the current element |
2719
|
|
|
|
|
|
|
$elt->paste_last_child( $t->{$type}); |
2720
|
|
|
|
|
|
|
return $t; |
2721
|
|
|
|
|
|
|
} |
2722
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
sub _twig_final |
2724
|
|
|
|
|
|
|
{ # warn " in _twig_final...\n"; # DEBUG handler |
2725
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
my $p= shift; |
2727
|
|
|
|
|
|
|
my $t= $p->isa( 'XML::Twig') ? $p : $p->{twig}; |
2728
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
# store trailing data |
2730
|
|
|
|
|
|
|
if( $t->{extra_data}) { $t->{trailing_cpi_text} = $t->{extra_data}; $t->{extra_data}=''; } |
2731
|
|
|
|
|
|
|
$t->{trailing_spaces}= $t->{twig_stored_spaces} || ''; |
2732
|
|
|
|
|
|
|
my $s= $t->{twig_stored_spaces}; $s=~s{\n}{\\n}g; |
2733
|
|
|
|
|
|
|
if( $t->{twig_stored_spaces}) { my $s= $t->{twig_stored_spaces}; } |
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
# restore the selected filehandle if needed |
2736
|
|
|
|
|
|
|
$t->_set_fh_to_selected_fh(); |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
$t->_trigger_tdh if( $t->{twig_tdh}); |
2739
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
select $t->{twig_original_selected_fh} if($t->{twig_original_selected_fh}); # probably dodgy |
2741
|
|
|
|
|
|
|
|
2742
|
|
|
|
|
|
|
if( exists $t->{twig_autoflush_data}) |
2743
|
|
|
|
|
|
|
{ my @args; |
2744
|
|
|
|
|
|
|
push @args, $t->{twig_autoflush_data}->{fh} if( $t->{twig_autoflush_data}->{fh}); |
2745
|
|
|
|
|
|
|
push @args, @{$t->{twig_autoflush_data}->{args}} if( $t->{twig_autoflush_data}->{args}); |
2746
|
|
|
|
|
|
|
$t->flush( @args); |
2747
|
|
|
|
|
|
|
delete $t->{twig_autoflush_data}; |
2748
|
|
|
|
|
|
|
$t->root->delete if $t->root; |
2749
|
|
|
|
|
|
|
} |
2750
|
|
|
|
|
|
|
|
2751
|
|
|
|
|
|
|
# tries to clean-up (probably not very well at the moment) |
2752
|
|
|
|
|
|
|
#undef $p->{twig}; |
2753
|
|
|
|
|
|
|
undef $t->{twig_parser}; |
2754
|
|
|
|
|
|
|
delete $t->{twig_parsing}; |
2755
|
|
|
|
|
|
|
@{$t}{ qw( twig_parser twig_parsing _twig_context_stack twig_current) }=(); |
2756
|
|
|
|
|
|
|
|
2757
|
|
|
|
|
|
|
return $t; |
2758
|
|
|
|
|
|
|
} |
2759
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
sub _insert_pcdata |
2761
|
|
|
|
|
|
|
{ my( $t, $string)= @_; |
2762
|
|
|
|
|
|
|
# create a new PCDATA element |
2763
|
|
|
|
|
|
|
my $parent= $t->{twig_current}; # always defined |
2764
|
|
|
|
|
|
|
my $elt; |
2765
|
|
|
|
|
|
|
if( exists $t->{twig_alt_elt_class}) |
2766
|
|
|
|
|
|
|
{ $elt= $t->{twig_elt_class}->new( $PCDATA); |
2767
|
|
|
|
|
|
|
$elt->{pcdata}= $string; |
2768
|
|
|
|
|
|
|
} |
2769
|
|
|
|
|
|
|
else |
2770
|
|
|
|
|
|
|
{ $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); } |
2771
|
|
|
|
|
|
|
|
2772
|
|
|
|
|
|
|
my $prev_sibling= $parent->{last_child}; |
2773
|
|
|
|
|
|
|
if( $prev_sibling) |
2774
|
|
|
|
|
|
|
{ $prev_sibling->{next_sibling}= $elt; |
2775
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
2776
|
|
|
|
|
|
|
} |
2777
|
|
|
|
|
|
|
else |
2778
|
|
|
|
|
|
|
{ $parent->{first_child}= $elt; } |
2779
|
|
|
|
|
|
|
|
2780
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
2781
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
2782
|
|
|
|
|
|
|
$t->{twig_stored_spaces}=''; |
2783
|
|
|
|
|
|
|
return $elt; |
2784
|
|
|
|
|
|
|
} |
2785
|
|
|
|
|
|
|
|
2786
|
|
|
|
|
|
|
sub _space_policy |
2787
|
|
|
|
|
|
|
{ my( $t, $gi)= @_; |
2788
|
|
|
|
|
|
|
my $policy; |
2789
|
|
|
|
|
|
|
$policy=0 if( $t->{twig_discard_spaces}); |
2790
|
|
|
|
|
|
|
$policy=1 if( $t->{twig_keep_spaces}); |
2791
|
|
|
|
|
|
|
$policy=1 if( $t->{twig_keep_spaces_in} |
2792
|
|
|
|
|
|
|
&& $t->{twig_keep_spaces_in}->{$gi}); |
2793
|
|
|
|
|
|
|
$policy=0 if( $t->{twig_discard_spaces_in} |
2794
|
|
|
|
|
|
|
&& $t->{twig_discard_spaces_in}->{$gi}); |
2795
|
|
|
|
|
|
|
return $policy; |
2796
|
|
|
|
|
|
|
} |
2797
|
|
|
|
|
|
|
|
2798
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
sub _twig_entity |
2800
|
|
|
|
|
|
|
{ # warn " in _twig_entity...\n"; # DEBUG handler |
2801
|
|
|
|
|
|
|
my( $p, $name, $val, $sysid, $pubid, $ndata, $param)= @_; |
2802
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2803
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
#{ no warnings; my $base= $p->base; warn "_twig_entity called: expand: '$t->{twig_expand_external_ents}', base: '$base', name: '$name', val: '$val', sysid: '$sysid', pubid: '$pubid', ndata: '$ndata', param: '$param'\n";} |
2805
|
|
|
|
|
|
|
|
2806
|
|
|
|
|
|
|
my $missing_entity=0; |
2807
|
|
|
|
|
|
|
|
2808
|
|
|
|
|
|
|
if( $sysid) |
2809
|
|
|
|
|
|
|
{ if($ndata) |
2810
|
|
|
|
|
|
|
{ if( ! -f _based_filename( $sysid, $p->base)) { $missing_entity= 1; } |
2811
|
|
|
|
|
|
|
} |
2812
|
|
|
|
|
|
|
else |
2813
|
|
|
|
|
|
|
{ if( $t->{twig_expand_external_ents}) |
2814
|
|
|
|
|
|
|
{ $val= eval { _slurp_uri( $sysid, $p->base) }; |
2815
|
|
|
|
|
|
|
if( ! defined $val) |
2816
|
|
|
|
|
|
|
{ if( $t->{twig_extern_ent_nofail}) |
2817
|
|
|
|
|
|
|
{ $missing_entity= 1; } |
2818
|
|
|
|
|
|
|
else |
2819
|
|
|
|
|
|
|
{ _croak( "cannot load SYSTEM entity '$name' from '$sysid': $@", 3); } |
2820
|
|
|
|
|
|
|
} |
2821
|
|
|
|
|
|
|
} |
2822
|
|
|
|
|
|
|
} |
2823
|
|
|
|
|
|
|
} |
2824
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
my $ent=XML::Twig::Entity->new( $name, $val, $sysid, $pubid, $ndata, $param); |
2826
|
|
|
|
|
|
|
if( $missing_entity) { $t->{twig_missing_system_entities}->{$name}= $ent; } |
2827
|
|
|
|
|
|
|
|
2828
|
|
|
|
|
|
|
my $entity_list= $t->entity_list; |
2829
|
|
|
|
|
|
|
if( $entity_list) { $entity_list->add( $ent); } |
2830
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
if( $parser_version > 2.27) |
2832
|
|
|
|
|
|
|
{ # this is really ugly, but with some versions of XML::Parser the value |
2833
|
|
|
|
|
|
|
# of the entity is not properly returned by the default handler |
2834
|
|
|
|
|
|
|
my $ent_decl= $ent->text; |
2835
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
2836
|
|
|
|
|
|
|
{ if( defined $ent->{val} && ($ent_decl !~ /["']/)) |
2837
|
|
|
|
|
|
|
{ my $val= $ent->{val}; |
2838
|
|
|
|
|
|
|
$ent_decl .= $val =~ /"/ ? qq{'$val' } : qq{"$val" }; |
2839
|
|
|
|
|
|
|
} |
2840
|
|
|
|
|
|
|
# for my solaris box (perl 5.6.1, XML::Parser 2.31, expat?) |
2841
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=~ s{
|
2842
|
|
|
|
|
|
|
} |
2843
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $ent_decl |
2844
|
|
|
|
|
|
|
unless( $t->{twig_doctype}->{internal}=~ m{
|
2845
|
|
|
|
|
|
|
} |
2846
|
|
|
|
|
|
|
|
2847
|
|
|
|
|
|
|
return; |
2848
|
|
|
|
|
|
|
} |
2849
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
sub _twig_notation |
2851
|
|
|
|
|
|
|
{ my( $p, $name, $base, $sysid, $pubid ) = @_; |
2852
|
|
|
|
|
|
|
my $t = $p->{twig}; |
2853
|
|
|
|
|
|
|
|
2854
|
|
|
|
|
|
|
my $notation = XML::Twig::Notation->new( $name, $base, $sysid, $pubid ); |
2855
|
|
|
|
|
|
|
my $notation_list = $t->notation_list(); |
2856
|
|
|
|
|
|
|
if( $notation_list ) { $notation_list->add( $notation ); } |
2857
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
# internal should get the recognized_string, but XML::Parser does not provide it |
2859
|
|
|
|
|
|
|
# so we need to re-create it ( $notation->text) and stick it there. |
2860
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $notation->text; |
2861
|
|
|
|
|
|
|
|
2862
|
|
|
|
|
|
|
return; |
2863
|
|
|
|
|
|
|
} |
2864
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
|
2866
|
|
|
|
|
|
|
sub _twig_extern_ent |
2867
|
|
|
|
|
|
|
{ # warn " in _twig_extern_ent...I (", $_[0]->original_string, ")\n"; # DEBUG handler |
2868
|
|
|
|
|
|
|
my( $p, $base, $sysid, $pubid)= @_; |
2869
|
|
|
|
|
|
|
my $t= $p->{twig}; |
2870
|
|
|
|
|
|
|
if( $t->{twig_no_expand}) |
2871
|
|
|
|
|
|
|
{ my $ent_name= $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string; |
2872
|
|
|
|
|
|
|
_twig_insert_ent( $t, $ent_name); |
2873
|
|
|
|
|
|
|
return ''; |
2874
|
|
|
|
|
|
|
} |
2875
|
|
|
|
|
|
|
my $ent_content= eval { $t->{twig_ext_ent_handler}->( $p, $base, $sysid) }; |
2876
|
|
|
|
|
|
|
if( ! defined $ent_content) |
2877
|
|
|
|
|
|
|
{ |
2878
|
|
|
|
|
|
|
my $ent_name = $p->recognized_string; |
2879
|
|
|
|
|
|
|
my $file = _based_filename( $sysid, $base); |
2880
|
|
|
|
|
|
|
my $error_message= "cannot expand $ent_name - cannot load '$file'"; |
2881
|
|
|
|
|
|
|
if( $t->{twig_extern_ent_nofail}) { return ""; } |
2882
|
|
|
|
|
|
|
else { _croak( $error_message); } |
2883
|
|
|
|
|
|
|
} |
2884
|
|
|
|
|
|
|
return $ent_content; |
2885
|
|
|
|
|
|
|
} |
2886
|
|
|
|
|
|
|
|
2887
|
|
|
|
|
|
|
# I use this so I can change the $Carp::CarpLevel (which determines how many call frames to skip when reporting an error) |
2888
|
|
|
|
|
|
|
sub _croak |
2889
|
|
|
|
|
|
|
{ my( $message, $level)= @_; |
2890
|
|
|
|
|
|
|
$Carp::CarpLevel= $level || 0; |
2891
|
|
|
|
|
|
|
croak $message; |
2892
|
|
|
|
|
|
|
} |
2893
|
|
|
|
|
|
|
|
2894
|
|
|
|
|
|
|
sub _twig_xmldecl |
2895
|
|
|
|
|
|
|
{ # warn " in _twig_xmldecl...\n"; # DEBUG handler |
2896
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
my $p= shift; |
2898
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2899
|
|
|
|
|
|
|
$t->{twig_xmldecl}||={}; # could have been set by set_output_encoding |
2900
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{version}= shift; |
2901
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{encoding}= shift; |
2902
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{standalone}= shift; |
2903
|
|
|
|
|
|
|
return; |
2904
|
|
|
|
|
|
|
} |
2905
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
sub _twig_doctype |
2907
|
|
|
|
|
|
|
{ # warn " in _twig_doctype...\n"; # DEBUG handler |
2908
|
|
|
|
|
|
|
my( $p, $name, $sysid, $pub, $internal)= @_; |
2909
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2910
|
|
|
|
|
|
|
$t->{twig_doctype}||= {}; # create |
2911
|
|
|
|
|
|
|
$t->{twig_doctype}->{name}= $name; # always there |
2912
|
|
|
|
|
|
|
$t->{twig_doctype}->{sysid}= $sysid; # |
2913
|
|
|
|
|
|
|
$t->{twig_doctype}->{pub}= $pub; # |
2914
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
# now let's try to cope with XML::Parser 2.28 and above |
2916
|
|
|
|
|
|
|
if( $parser_version > 2.27) |
2917
|
|
|
|
|
|
|
{ @saved_default_handler= $p->setHandlers( Default => \&_twig_store_internal_dtd, |
2918
|
|
|
|
|
|
|
Entity => \&_twig_entity, |
2919
|
|
|
|
|
|
|
); |
2920
|
|
|
|
|
|
|
$p->setHandlers( DoctypeFin => \&_twig_stop_storing_internal_dtd); |
2921
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=''; |
2922
|
|
|
|
|
|
|
} |
2923
|
|
|
|
|
|
|
else |
2924
|
|
|
|
|
|
|
# for XML::Parser before 2.28 |
2925
|
|
|
|
|
|
|
{ $internal||=''; |
2926
|
|
|
|
|
|
|
$internal=~ s{^\s*\[}{}; |
2927
|
|
|
|
|
|
|
$internal=~ s{]\s*$}{}; |
2928
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=$internal; |
2929
|
|
|
|
|
|
|
} |
2930
|
|
|
|
|
|
|
|
2931
|
|
|
|
|
|
|
# now check if we want to get the DTD info |
2932
|
|
|
|
|
|
|
if( $t->{twig_read_external_dtd} && $sysid) |
2933
|
|
|
|
|
|
|
{ # let's build a fake document with an internal DTD |
2934
|
|
|
|
|
|
|
if( $t->{DTDBase}) |
2935
|
|
|
|
|
|
|
{ _use( 'File::Spec'); |
2936
|
|
|
|
|
|
|
$sysid=File::Spec->catfile($t->{DTDBase}, $sysid); |
2937
|
|
|
|
|
|
|
} |
2938
|
|
|
|
|
|
|
my $dtd= _slurp_uri( $sysid); |
2939
|
|
|
|
|
|
|
# if the DTD includes an XML declaration, it needs to be moved before the DOCTYPE bit |
2940
|
|
|
|
|
|
|
if( $dtd=~ s{^(\s*<\?xml(\s+\w+\s*=\s*("[^"]*"|'[^']*'))*\s*\?>)}{}) |
2941
|
|
|
|
|
|
|
{ $dtd= "$1<$name/>"; } |
2942
|
|
|
|
|
|
|
else |
2943
|
|
|
|
|
|
|
{ $dtd= "<$name/>"; } |
2944
|
|
|
|
|
|
|
|
2945
|
|
|
|
|
|
|
$t->save_global_state(); # save the globals (they will be reset by the following new) |
2946
|
|
|
|
|
|
|
my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig |
2947
|
|
|
|
|
|
|
$t_dtd->parse( $dtd); # parse it |
2948
|
|
|
|
|
|
|
$t->{twig_dtd}= $t_dtd->{twig_dtd}; # grab the dtd info |
2949
|
|
|
|
|
|
|
#$t->{twig_dtd_is_external}=1; |
2950
|
|
|
|
|
|
|
$t->entity_list->_add_list( $t_dtd->entity_list) if( $t_dtd->entity_list); # grab the entity info |
2951
|
|
|
|
|
|
|
$t->notation_list->_add_list( $t_dtd->notation_list) if( $t_dtd->notation_list); # grab the notation info |
2952
|
|
|
|
|
|
|
$t->restore_global_state(); |
2953
|
|
|
|
|
|
|
} |
2954
|
|
|
|
|
|
|
return; |
2955
|
|
|
|
|
|
|
} |
2956
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
sub _twig_element |
2958
|
|
|
|
|
|
|
{ # warn " in _twig_element...\n"; # DEBUG handler |
2959
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
my( $p, $name, $model)= @_; |
2961
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2962
|
|
|
|
|
|
|
$t->{twig_dtd}||= {}; # may create the dtd |
2963
|
|
|
|
|
|
|
$t->{twig_dtd}->{model}||= {}; # may create the model hash |
2964
|
|
|
|
|
|
|
$t->{twig_dtd}->{elt_list}||= []; # ordered list of elements |
2965
|
|
|
|
|
|
|
push @{$t->{twig_dtd}->{elt_list}}, $name; # store the elt |
2966
|
|
|
|
|
|
|
$t->{twig_dtd}->{model}->{$name}= $model; # store the model |
2967
|
|
|
|
|
|
|
if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) |
2968
|
|
|
|
|
|
|
{ my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; |
2969
|
|
|
|
|
|
|
unless( $text) |
2970
|
|
|
|
|
|
|
{ # this version of XML::Parser does not return the text in the *_string method |
2971
|
|
|
|
|
|
|
# we need to rebuild it |
2972
|
|
|
|
|
|
|
$text= ""; |
2973
|
|
|
|
|
|
|
} |
2974
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal} .= $text; |
2975
|
|
|
|
|
|
|
} |
2976
|
|
|
|
|
|
|
return; |
2977
|
|
|
|
|
|
|
} |
2978
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
sub _twig_attlist |
2980
|
|
|
|
|
|
|
{ # warn " in _twig_attlist...\n"; # DEBUG handler |
2981
|
|
|
|
|
|
|
|
2982
|
|
|
|
|
|
|
my( $p, $gi, $att, $type, $default, $fixed)= @_; |
2983
|
|
|
|
|
|
|
#warn "in attlist: gi: '$gi', att: '$att', type: '$type', default: '$default', fixed: '$fixed'\n"; |
2984
|
|
|
|
|
|
|
my $t=$p->{twig}; |
2985
|
|
|
|
|
|
|
$t->{twig_dtd}||= {}; # create dtd if need be |
2986
|
|
|
|
|
|
|
$t->{twig_dtd}->{$gi}||= {}; # create elt if need be |
2987
|
|
|
|
|
|
|
#$t->{twig_dtd}->{$gi}->{att}||= {}; # create att if need be |
2988
|
|
|
|
|
|
|
if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) |
2989
|
|
|
|
|
|
|
{ my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; |
2990
|
|
|
|
|
|
|
unless( $text) |
2991
|
|
|
|
|
|
|
{ # this version of XML::Parser does not return the text in the *_string method |
2992
|
|
|
|
|
|
|
# we need to rebuild it |
2993
|
|
|
|
|
|
|
my $att_decl="$att $type"; |
2994
|
|
|
|
|
|
|
$att_decl .= " #FIXED" if( $fixed); |
2995
|
|
|
|
|
|
|
$att_decl .= " $default" if( defined $default); |
2996
|
|
|
|
|
|
|
# 2 cases: there is already an attlist on that element or not |
2997
|
|
|
|
|
|
|
if( $t->{twig_dtd}->{att}->{$gi}) |
2998
|
|
|
|
|
|
|
{ # there is already an attlist, add to it |
2999
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}=~ s{(} |
3000
|
|
|
|
|
|
|
{ "$1$2\n" . ' ' x length( $1) . "$att_decl\n>"}es; |
3001
|
|
|
|
|
|
|
} |
3002
|
|
|
|
|
|
|
else |
3003
|
|
|
|
|
|
|
{ # create the attlist |
3004
|
|
|
|
|
|
|
$t->{twig_doctype}->{internal}.= "" |
3005
|
|
|
|
|
|
|
} |
3006
|
|
|
|
|
|
|
} |
3007
|
|
|
|
|
|
|
} |
3008
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}= {} ; |
3009
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{type}= $type; |
3010
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{default}= $default if( defined $default); |
3011
|
|
|
|
|
|
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{fixed}= $fixed; |
3012
|
|
|
|
|
|
|
return; |
3013
|
|
|
|
|
|
|
} |
3014
|
|
|
|
|
|
|
|
3015
|
|
|
|
|
|
|
sub _twig_default |
3016
|
|
|
|
|
|
|
{ # warn " in _twig_default...\n"; # DEBUG handler |
3017
|
|
|
|
|
|
|
|
3018
|
|
|
|
|
|
|
my( $p, $string)= @_; |
3019
|
|
|
|
|
|
|
|
3020
|
|
|
|
|
|
|
my $t= $p->{twig}; |
3021
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
# we need to process the data in 2 cases: entity, or spaces after the closing tag |
3023
|
|
|
|
|
|
|
|
3024
|
|
|
|
|
|
|
# after the closing tag (no twig_current and root has been created) |
3025
|
|
|
|
|
|
|
if( ! $t->{twig_current} && $t->{twig_root} && $string=~ m{^\s+$}m) { $t->{twig_stored_spaces} .= $string; } |
3026
|
|
|
|
|
|
|
|
3027
|
|
|
|
|
|
|
# process only if we have an entity |
3028
|
|
|
|
|
|
|
if( $string=~ m{^&([^;]*);$}) |
3029
|
|
|
|
|
|
|
{ # the entity has to be pure pcdata, or we have a problem |
3030
|
|
|
|
|
|
|
if( ($p->original_string=~ m{^<}) && ($p->original_string=~ m{>$}) ) |
3031
|
|
|
|
|
|
|
{ # string is a tag, entity is in an attribute |
3032
|
|
|
|
|
|
|
$t->{twig_entities_in_attribute}=1 if( $t->{twig_do_not_escape_amp_in_atts}); |
3033
|
|
|
|
|
|
|
} |
3034
|
|
|
|
|
|
|
else |
3035
|
|
|
|
|
|
|
{ my $ent; |
3036
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
3037
|
|
|
|
|
|
|
{ _twig_char( $p, $string); |
3038
|
|
|
|
|
|
|
$ent= substr( $string, 1, -1); |
3039
|
|
|
|
|
|
|
} |
3040
|
|
|
|
|
|
|
else |
3041
|
|
|
|
|
|
|
{ $ent= _twig_insert_ent( $t, $string); |
3042
|
|
|
|
|
|
|
} |
3043
|
|
|
|
|
|
|
|
3044
|
|
|
|
|
|
|
return $ent; |
3045
|
|
|
|
|
|
|
} |
3046
|
|
|
|
|
|
|
} |
3047
|
|
|
|
|
|
|
} |
3048
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
sub _twig_insert_ent |
3050
|
|
|
|
|
|
|
{ |
3051
|
|
|
|
|
|
|
my( $t, $string)=@_; |
3052
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
my $twig_current= $t->{twig_current}; |
3054
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
my $ent= $t->{twig_elt_class}->new( $ENT); |
3056
|
|
|
|
|
|
|
$ent->{ent}= $string; |
3057
|
|
|
|
|
|
|
|
3058
|
|
|
|
|
|
|
_add_or_discard_stored_spaces( $t); |
3059
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
if( $t->{twig_in_pcdata}) |
3061
|
|
|
|
|
|
|
{ # create the node as a sibling of the #PCDATA |
3062
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
$ent->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; |
3064
|
|
|
|
|
|
|
$twig_current->{next_sibling}= $ent; |
3065
|
|
|
|
|
|
|
my $parent= $twig_current->{parent}; |
3066
|
|
|
|
|
|
|
$ent->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; |
3067
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
3068
|
|
|
|
|
|
|
# the twig_current is now the parent |
3069
|
|
|
|
|
|
|
delete $twig_current->{'twig_current'}; |
3070
|
|
|
|
|
|
|
$t->{twig_current}= $parent; |
3071
|
|
|
|
|
|
|
# we left pcdata |
3072
|
|
|
|
|
|
|
$t->{twig_in_pcdata}=0; |
3073
|
|
|
|
|
|
|
} |
3074
|
|
|
|
|
|
|
else |
3075
|
|
|
|
|
|
|
{ # create the node as a child of the current element |
3076
|
|
|
|
|
|
|
$ent->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; |
3077
|
|
|
|
|
|
|
if( my $prev_sibling= $twig_current->{last_child}) |
3078
|
|
|
|
|
|
|
{ $ent->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; |
3079
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $ent; |
3080
|
|
|
|
|
|
|
} |
3081
|
|
|
|
|
|
|
else |
3082
|
|
|
|
|
|
|
{ if( $twig_current) { $twig_current->{first_child}= $ent; } } |
3083
|
|
|
|
|
|
|
if( $twig_current) { delete $twig_current->{empty}; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } |
3084
|
|
|
|
|
|
|
} |
3085
|
|
|
|
|
|
|
|
3086
|
|
|
|
|
|
|
# meant to trigger entity handler, does not seem to be activated at this time |
3087
|
|
|
|
|
|
|
#if( my $handler= $t->{twig_handlers}->{gi}->{$ENT}) |
3088
|
|
|
|
|
|
|
# { local $_= $ent; $handler->( $t, $ent); } |
3089
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
return $ent; |
3091
|
|
|
|
|
|
|
} |
3092
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
sub parser |
3094
|
|
|
|
|
|
|
{ return $_[0]->{twig_parser}; } |
3095
|
|
|
|
|
|
|
|
3096
|
|
|
|
|
|
|
# returns the declaration text (or a default one) |
3097
|
|
|
|
|
|
|
sub xmldecl |
3098
|
|
|
|
|
|
|
{ my $t= shift; |
3099
|
|
|
|
|
|
|
return '' unless( $t->{twig_xmldecl} || $t->{output_encoding}); |
3100
|
|
|
|
|
|
|
my $decl_string; |
3101
|
|
|
|
|
|
|
my $decl= $t->{twig_xmldecl}; |
3102
|
|
|
|
|
|
|
if( $decl) |
3103
|
|
|
|
|
|
|
{ my $version= $decl->{version}; |
3104
|
|
|
|
|
|
|
$decl_string= q{
|
3105
|
|
|
|
|
|
|
$decl_string .= qq{ version="$version"}; |
3106
|
|
|
|
|
|
|
|
3107
|
|
|
|
|
|
|
# encoding can either have been set (in $decl->{output_encoding}) |
3108
|
|
|
|
|
|
|
# or come from the document (in $decl->{encoding}) |
3109
|
|
|
|
|
|
|
if( $t->{output_encoding}) |
3110
|
|
|
|
|
|
|
{ my $encoding= $t->{output_encoding}; |
3111
|
|
|
|
|
|
|
$decl_string .= qq{ encoding="$encoding"}; |
3112
|
|
|
|
|
|
|
} |
3113
|
|
|
|
|
|
|
elsif( $decl->{encoding}) |
3114
|
|
|
|
|
|
|
{ my $encoding= $decl->{encoding}; |
3115
|
|
|
|
|
|
|
$decl_string .= qq{ encoding="$encoding"}; |
3116
|
|
|
|
|
|
|
} |
3117
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
if( defined( $decl->{standalone})) |
3119
|
|
|
|
|
|
|
{ $decl_string .= q{ standalone="}; |
3120
|
|
|
|
|
|
|
$decl_string .= $decl->{standalone} ? "yes" : "no"; |
3121
|
|
|
|
|
|
|
$decl_string .= q{"}; |
3122
|
|
|
|
|
|
|
} |
3123
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
$decl_string .= "?>\n"; |
3125
|
|
|
|
|
|
|
} |
3126
|
|
|
|
|
|
|
else |
3127
|
|
|
|
|
|
|
{ my $encoding= $t->{output_encoding}; |
3128
|
|
|
|
|
|
|
$decl_string= qq{}; |
3129
|
|
|
|
|
|
|
} |
3130
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
my $output_filter= XML::Twig::Elt::output_filter(); |
3132
|
|
|
|
|
|
|
return $output_filter ? $output_filter->( $decl_string) : $decl_string; |
3133
|
|
|
|
|
|
|
} |
3134
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
sub set_doctype |
3136
|
|
|
|
|
|
|
{ my( $t, $name, $system, $public, $internal)= @_; |
3137
|
|
|
|
|
|
|
$t->{twig_doctype}= {} unless defined $t->{twig_doctype}; |
3138
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype}; |
3139
|
|
|
|
|
|
|
$doctype->{name} = $name if( defined $name); |
3140
|
|
|
|
|
|
|
$doctype->{sysid} = $system if( defined $system); |
3141
|
|
|
|
|
|
|
$doctype->{pub} = $public if( defined $public); |
3142
|
|
|
|
|
|
|
$doctype->{internal} = $internal if( defined $internal); |
3143
|
|
|
|
|
|
|
} |
3144
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
sub doctype_name |
3146
|
|
|
|
|
|
|
{ my $t= shift; |
3147
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
3148
|
|
|
|
|
|
|
return $doctype->{name} || ''; |
3149
|
|
|
|
|
|
|
} |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
sub system_id |
3152
|
|
|
|
|
|
|
{ my $t= shift; |
3153
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
3154
|
|
|
|
|
|
|
return $doctype->{sysid} || ''; |
3155
|
|
|
|
|
|
|
} |
3156
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
sub public_id |
3158
|
|
|
|
|
|
|
{ my $t= shift; |
3159
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
3160
|
|
|
|
|
|
|
return $doctype->{pub} || ''; |
3161
|
|
|
|
|
|
|
} |
3162
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
sub internal_subset |
3164
|
|
|
|
|
|
|
{ my $t= shift; |
3165
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
3166
|
|
|
|
|
|
|
return $doctype->{internal} || ''; |
3167
|
|
|
|
|
|
|
} |
3168
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
# return the dtd object |
3170
|
|
|
|
|
|
|
sub dtd |
3171
|
|
|
|
|
|
|
{ my $t= shift; |
3172
|
|
|
|
|
|
|
return $t->{twig_dtd}; |
3173
|
|
|
|
|
|
|
} |
3174
|
|
|
|
|
|
|
|
3175
|
|
|
|
|
|
|
# return an element model, or the list of element models |
3176
|
|
|
|
|
|
|
sub model |
3177
|
|
|
|
|
|
|
{ my $t= shift; |
3178
|
|
|
|
|
|
|
my $elt= shift; |
3179
|
|
|
|
|
|
|
return $t->dtd->{model}->{$elt} if( $elt); |
3180
|
|
|
|
|
|
|
return (sort keys %{$t->dtd->{model}}); |
3181
|
|
|
|
|
|
|
} |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
|
3184
|
|
|
|
|
|
|
# return the entity_list object |
3185
|
|
|
|
|
|
|
sub entity_list |
3186
|
|
|
|
|
|
|
{ my $t= shift; |
3187
|
|
|
|
|
|
|
return $t->{twig_entity_list}; |
3188
|
|
|
|
|
|
|
} |
3189
|
|
|
|
|
|
|
|
3190
|
|
|
|
|
|
|
# return the list of entity names |
3191
|
|
|
|
|
|
|
sub entity_names |
3192
|
|
|
|
|
|
|
{ my $t= shift; |
3193
|
|
|
|
|
|
|
return $t->entity_list->entity_names; |
3194
|
|
|
|
|
|
|
} |
3195
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
# return the entity object |
3197
|
|
|
|
|
|
|
sub entity |
3198
|
|
|
|
|
|
|
{ my $t= shift; |
3199
|
|
|
|
|
|
|
my $entity_name= shift; |
3200
|
|
|
|
|
|
|
return $t->entity_list->ent( $entity_name); |
3201
|
|
|
|
|
|
|
} |
3202
|
|
|
|
|
|
|
|
3203
|
|
|
|
|
|
|
# return the notation_list object |
3204
|
|
|
|
|
|
|
sub notation_list |
3205
|
|
|
|
|
|
|
{ my $t= shift; |
3206
|
|
|
|
|
|
|
return $t->{twig_notation_list}; |
3207
|
|
|
|
|
|
|
} |
3208
|
|
|
|
|
|
|
|
3209
|
|
|
|
|
|
|
# return the list of notation names |
3210
|
|
|
|
|
|
|
sub notation_names |
3211
|
|
|
|
|
|
|
{ my $t= shift; |
3212
|
|
|
|
|
|
|
return $t->notation_list->notation_names; |
3213
|
|
|
|
|
|
|
} |
3214
|
|
|
|
|
|
|
|
3215
|
|
|
|
|
|
|
# return the notation object |
3216
|
|
|
|
|
|
|
sub notation |
3217
|
|
|
|
|
|
|
{ my $t= shift; |
3218
|
|
|
|
|
|
|
my $notation_name= shift; |
3219
|
|
|
|
|
|
|
return $t->notation_list->notation( $notation_name); |
3220
|
|
|
|
|
|
|
} |
3221
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
|
3225
|
|
|
|
|
|
|
sub print_prolog |
3226
|
|
|
|
|
|
|
{ my $t= shift; |
3227
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : $t->{twig_output_fh} || select() || \*STDOUT; |
3228
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
3229
|
|
|
|
|
|
|
no strict 'refs'; |
3230
|
|
|
|
|
|
|
print {$fh} $t->prolog( @_); |
3231
|
|
|
|
|
|
|
} |
3232
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
sub prolog |
3234
|
|
|
|
|
|
|
{ my $t= shift; |
3235
|
|
|
|
|
|
|
if( $t->{no_prolog}){ return ''; } |
3236
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
return $t->{no_prolog} ? '' |
3238
|
|
|
|
|
|
|
: defined $t->{no_dtd_output} ? $t->xmldecl |
3239
|
|
|
|
|
|
|
: $t->xmldecl . $t->doctype( @_); |
3240
|
|
|
|
|
|
|
} |
3241
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
sub doctype |
3243
|
|
|
|
|
|
|
{ my $t= shift; |
3244
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
3245
|
|
|
|
|
|
|
my $update_dtd = $args{UpdateDTD} || ''; |
3246
|
|
|
|
|
|
|
my $doctype_text=''; |
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype}; |
3249
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
if( $doctype) |
3251
|
|
|
|
|
|
|
{ $doctype_text .= qq{{name}} if( $doctype->{name}); |
3252
|
|
|
|
|
|
|
$doctype_text .= qq{ PUBLIC "$doctype->{pub}"} if( $doctype->{pub}); |
3253
|
|
|
|
|
|
|
$doctype_text .= qq{ SYSTEM} if( $doctype->{sysid} && !$doctype->{pub}); |
3254
|
|
|
|
|
|
|
$doctype_text .= qq{ "$doctype->{sysid}"} if( $doctype->{sysid}); |
3255
|
|
|
|
|
|
|
} |
3256
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
if( $update_dtd) |
3258
|
|
|
|
|
|
|
{ if( $doctype) |
3259
|
|
|
|
|
|
|
{ my $internal=$doctype->{internal}; |
3260
|
|
|
|
|
|
|
# awful hack, but at least it works a little better that what was there before |
3261
|
|
|
|
|
|
|
if( $internal) |
3262
|
|
|
|
|
|
|
{ # remove entity and notation declarations (they will be re-generated from the updated entity list) |
3263
|
|
|
|
|
|
|
$internal=~ s{]*) >\s*}{}xg; |
3264
|
|
|
|
|
|
|
$internal=~ s{\s*}{}sxg; |
3265
|
|
|
|
|
|
|
$internal=~ s{^\n}{}; |
3266
|
|
|
|
|
|
|
} |
3267
|
|
|
|
|
|
|
$internal .= $t->entity_list->text ||'' if( $t->entity_list); |
3268
|
|
|
|
|
|
|
$internal .= $t->notation_list->text ||'' if( $t->notation_list); |
3269
|
|
|
|
|
|
|
if( $internal) { $doctype_text .= "[\n$internal]>\n"; } |
3270
|
|
|
|
|
|
|
} |
3271
|
|
|
|
|
|
|
elsif( !$t->{'twig_dtd'} && ( keys %{$t->entity_list} || keys %{$t->notation_list} ) ) |
3272
|
|
|
|
|
|
|
{ $doctype_text .= "root->gi . " [\n" . $t->entity_list->text . $t->notation_list->text . "\n]>";} |
3273
|
|
|
|
|
|
|
else |
3274
|
|
|
|
|
|
|
{ $doctype_text= $t->{twig_dtd}; |
3275
|
|
|
|
|
|
|
$doctype_text .= $t->dtd_text; |
3276
|
|
|
|
|
|
|
} |
3277
|
|
|
|
|
|
|
} |
3278
|
|
|
|
|
|
|
elsif( $doctype) |
3279
|
|
|
|
|
|
|
{ if( my $internal= $doctype->{internal}) |
3280
|
|
|
|
|
|
|
{ # add opening and closing brackets if not already there |
3281
|
|
|
|
|
|
|
# plus some spaces and newlines for a nice formating |
3282
|
|
|
|
|
|
|
# I test it here because I can't remember which version of |
3283
|
|
|
|
|
|
|
# XML::Parser need it or not, nor guess which one will in the |
3284
|
|
|
|
|
|
|
# future, so this about the best I can do |
3285
|
|
|
|
|
|
|
$internal=~ s{^\s*(\[\s*)?}{ [\n}; |
3286
|
|
|
|
|
|
|
$internal=~ s{\s*(\]\s*(>\s*)?)?\s*$}{\n]>\n}; |
3287
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
# XML::Parser does not include the NOTATION declarations in the DTD |
3289
|
|
|
|
|
|
|
# at least in the current version. So put them back |
3290
|
|
|
|
|
|
|
#if( $t->notation_list && $internal !~ m{
|
3291
|
|
|
|
|
|
|
# { $internal=~ s{(\n]>\n)$}{ "\n" . $t->notation_list->text . $1}es; } |
3292
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
$doctype_text .= $internal; |
3294
|
|
|
|
|
|
|
} |
3295
|
|
|
|
|
|
|
} |
3296
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
if( $doctype_text) |
3298
|
|
|
|
|
|
|
{ |
3299
|
|
|
|
|
|
|
# terrible hack, as I can't figure out in which case the darn prolog |
3300
|
|
|
|
|
|
|
# should get an extra > (depends on XML::Parser and expat versions) |
3301
|
|
|
|
|
|
|
$doctype_text=~ s/(>\s*)*$/>\n/; # if($doctype_text); |
3302
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
my $output_filter= XML::Twig::Elt::output_filter(); |
3304
|
|
|
|
|
|
|
return $output_filter ? $output_filter->( $doctype_text) : $doctype_text; |
3305
|
|
|
|
|
|
|
} |
3306
|
|
|
|
|
|
|
else |
3307
|
|
|
|
|
|
|
{ return $doctype_text; } |
3308
|
|
|
|
|
|
|
} |
3309
|
|
|
|
|
|
|
|
3310
|
|
|
|
|
|
|
sub _leading_cpi |
3311
|
|
|
|
|
|
|
{ my $t= shift; |
3312
|
|
|
|
|
|
|
my $leading_cpi= $t->{leading_cpi} || return ''; |
3313
|
|
|
|
|
|
|
return $leading_cpi->sprint( 1); |
3314
|
|
|
|
|
|
|
} |
3315
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
sub _trailing_cpi |
3317
|
|
|
|
|
|
|
{ my $t= shift; |
3318
|
|
|
|
|
|
|
my $trailing_cpi= $t->{trailing_cpi} || return ''; |
3319
|
|
|
|
|
|
|
return $trailing_cpi->sprint( 1); |
3320
|
|
|
|
|
|
|
} |
3321
|
|
|
|
|
|
|
|
3322
|
|
|
|
|
|
|
sub _trailing_cpi_text |
3323
|
|
|
|
|
|
|
{ my $t= shift; |
3324
|
|
|
|
|
|
|
return $t->{trailing_cpi_text} || ''; |
3325
|
|
|
|
|
|
|
} |
3326
|
|
|
|
|
|
|
|
3327
|
|
|
|
|
|
|
sub print_to_file |
3328
|
|
|
|
|
|
|
{ my( $t, $filename)= (shift, shift); |
3329
|
|
|
|
|
|
|
my $out_fh; |
3330
|
|
|
|
|
|
|
# open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 |
3331
|
|
|
|
|
|
|
my $mode= $t->{twig_keep_encoding} && ! _use_perlio() ? '>' : '>:utf8'; # >= perl 5.8 |
3332
|
|
|
|
|
|
|
open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 |
3333
|
|
|
|
|
|
|
$t->print( $out_fh, @_); |
3334
|
|
|
|
|
|
|
close $out_fh; |
3335
|
|
|
|
|
|
|
return $t; |
3336
|
|
|
|
|
|
|
} |
3337
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
# probably only works on *nix (at least the chmod bit) |
3339
|
|
|
|
|
|
|
# first print to a temporary file, then rename that file to the desired file name, then change permissions |
3340
|
|
|
|
|
|
|
# to the original file permissions (or to the current umask) |
3341
|
|
|
|
|
|
|
sub safe_print_to_file |
3342
|
|
|
|
|
|
|
{ my( $t, $filename)= (shift, shift); |
3343
|
|
|
|
|
|
|
my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; |
3344
|
|
|
|
|
|
|
XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; |
3345
|
|
|
|
|
|
|
my $tmpdir= dirname( $filename); |
3346
|
|
|
|
|
|
|
my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); |
3347
|
|
|
|
|
|
|
$t->print_to_file( $tmpfilename, @_); |
3348
|
|
|
|
|
|
|
rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); |
3349
|
|
|
|
|
|
|
chmod $perm, $filename; |
3350
|
|
|
|
|
|
|
return $t; |
3351
|
|
|
|
|
|
|
} |
3352
|
|
|
|
|
|
|
|
3353
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
sub print |
3355
|
|
|
|
|
|
|
{ my $t= shift; |
3356
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
3357
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
3358
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
my $old_select = defined $fh ? select $fh : undef; |
3360
|
|
|
|
|
|
|
my $old_pretty = defined ($args{PrettyPrint}) ? $t->set_pretty_print( $args{PrettyPrint}) : undef; |
3361
|
|
|
|
|
|
|
my $old_empty_tag = defined ($args{EmptyTags}) ? $t->set_empty_tag_style( $args{EmptyTags}) : undef; |
3362
|
|
|
|
|
|
|
|
3363
|
|
|
|
|
|
|
#if( !$t->{encoding} || lc( $t->{encoding}) eq 'utf-8') { my $out= $fh || \*STDOUT; binmode $out, ':utf8'; } |
3364
|
|
|
|
|
|
|
|
3365
|
|
|
|
|
|
|
if( $perl_version > 5.006 && ! $t->{twig_keep_encoding} && _use_perlio() ) { binmode( $fh || \*STDOUT, ":utf8" ); } |
3366
|
|
|
|
|
|
|
|
3367
|
|
|
|
|
|
|
print $t->prolog( %args) . $t->_leading_cpi( %args); |
3368
|
|
|
|
|
|
|
$t->{twig_root}->print; |
3369
|
|
|
|
|
|
|
print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) |
3370
|
|
|
|
|
|
|
. $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) |
3371
|
|
|
|
|
|
|
. ( ($t->{twig_keep_spaces}||'') && ($t->{trailing_spaces} || '')) |
3372
|
|
|
|
|
|
|
; |
3373
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); |
3376
|
|
|
|
|
|
|
$t->set_empty_tag_style( $old_empty_tag) if( defined $old_empty_tag); |
3377
|
|
|
|
|
|
|
if( $fh) { select $old_select; } |
3378
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
return $t; |
3380
|
|
|
|
|
|
|
} |
3381
|
|
|
|
|
|
|
|
3382
|
|
|
|
|
|
|
|
3383
|
|
|
|
|
|
|
sub flush |
3384
|
|
|
|
|
|
|
{ my $t= shift; |
3385
|
|
|
|
|
|
|
|
3386
|
|
|
|
|
|
|
$t->_trigger_tdh if $t->{twig_tdh}; |
3387
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
return if( $t->{twig_completely_flushed}); |
3389
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
3391
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
3392
|
|
|
|
|
|
|
my $up_to= ref $_[0] ? shift : undef; |
3393
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
3394
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
my $old_pretty; |
3396
|
|
|
|
|
|
|
if( defined $args{PrettyPrint}) |
3397
|
|
|
|
|
|
|
{ $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); |
3398
|
|
|
|
|
|
|
delete $args{PrettyPrint}; |
3399
|
|
|
|
|
|
|
} |
3400
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
my $old_empty_tag_style; |
3402
|
|
|
|
|
|
|
if( $args{EmptyTags}) |
3403
|
|
|
|
|
|
|
{ $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); |
3404
|
|
|
|
|
|
|
delete $args{EmptyTags}; |
3405
|
|
|
|
|
|
|
} |
3406
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
|
3408
|
|
|
|
|
|
|
# the "real" last element processed, as _twig_end has closed it |
3409
|
|
|
|
|
|
|
my $last_elt; |
3410
|
|
|
|
|
|
|
my $flush_trailing_data=0; |
3411
|
|
|
|
|
|
|
if( $up_to) |
3412
|
|
|
|
|
|
|
{ $last_elt= $up_to; } |
3413
|
|
|
|
|
|
|
elsif( $t->{twig_current}) |
3414
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_current}->{last_child}; } |
3415
|
|
|
|
|
|
|
else |
3416
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_root}; |
3417
|
|
|
|
|
|
|
$flush_trailing_data=1; |
3418
|
|
|
|
|
|
|
$t->{twig_completely_flushed}=1; |
3419
|
|
|
|
|
|
|
} |
3420
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
# flush the DTD unless it has ready flushed (ie root has been flushed) |
3422
|
|
|
|
|
|
|
my $elt= $t->{twig_root}; |
3423
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
3424
|
|
|
|
|
|
|
{ # store flush info so we can auto-flush later |
3425
|
|
|
|
|
|
|
if( $t->{twig_autoflush}) |
3426
|
|
|
|
|
|
|
{ $t->{twig_autoflush_data}={}; |
3427
|
|
|
|
|
|
|
$t->{twig_autoflush_data}->{fh} = $fh if( $fh); |
3428
|
|
|
|
|
|
|
$t->{twig_autoflush_data}->{args} = \@_ if( @_); |
3429
|
|
|
|
|
|
|
} |
3430
|
|
|
|
|
|
|
$t->print_prolog( %args); |
3431
|
|
|
|
|
|
|
print $t->_leading_cpi; |
3432
|
|
|
|
|
|
|
} |
3433
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
while( $elt) |
3435
|
|
|
|
|
|
|
{ my $next_elt; |
3436
|
|
|
|
|
|
|
if( $last_elt && $last_elt->in( $elt)) |
3437
|
|
|
|
|
|
|
{ |
3438
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
3439
|
|
|
|
|
|
|
{ # just output the front tag |
3440
|
|
|
|
|
|
|
print $elt->start_tag(); |
3441
|
|
|
|
|
|
|
$elt->{'flushed'}=1; |
3442
|
|
|
|
|
|
|
} |
3443
|
|
|
|
|
|
|
$next_elt= $elt->{first_child}; |
3444
|
|
|
|
|
|
|
} |
3445
|
|
|
|
|
|
|
else |
3446
|
|
|
|
|
|
|
{ # an element before the last one or the last one, |
3447
|
|
|
|
|
|
|
$next_elt= $elt->{next_sibling}; |
3448
|
|
|
|
|
|
|
$elt->_flush(); |
3449
|
|
|
|
|
|
|
$elt->delete; |
3450
|
|
|
|
|
|
|
last if( $last_elt && ($elt == $last_elt)); |
3451
|
|
|
|
|
|
|
} |
3452
|
|
|
|
|
|
|
$elt= $next_elt; |
3453
|
|
|
|
|
|
|
} |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
if( $flush_trailing_data) |
3456
|
|
|
|
|
|
|
{ print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) |
3457
|
|
|
|
|
|
|
, $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) |
3458
|
|
|
|
|
|
|
} |
3459
|
|
|
|
|
|
|
|
3460
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
3461
|
|
|
|
|
|
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); |
3462
|
|
|
|
|
|
|
$t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); |
3463
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
if( my $ids= $t->{twig_id_list}) |
3465
|
|
|
|
|
|
|
{ while( my ($id, $elt)= each %$ids) |
3466
|
|
|
|
|
|
|
{ if( ! defined $elt) |
3467
|
|
|
|
|
|
|
{ delete $t->{twig_id_list}->{$id} } |
3468
|
|
|
|
|
|
|
} |
3469
|
|
|
|
|
|
|
} |
3470
|
|
|
|
|
|
|
|
3471
|
|
|
|
|
|
|
return $t; |
3472
|
|
|
|
|
|
|
} |
3473
|
|
|
|
|
|
|
|
3474
|
|
|
|
|
|
|
|
3475
|
|
|
|
|
|
|
# flushes up to an element |
3476
|
|
|
|
|
|
|
# this method just reorders the arguments and calls flush |
3477
|
|
|
|
|
|
|
sub flush_up_to |
3478
|
|
|
|
|
|
|
{ my $t= shift; |
3479
|
|
|
|
|
|
|
my $up_to= shift; |
3480
|
|
|
|
|
|
|
if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')) |
3481
|
|
|
|
|
|
|
{ my $fh= shift; |
3482
|
|
|
|
|
|
|
$t->flush( $fh, $up_to, @_); |
3483
|
|
|
|
|
|
|
} |
3484
|
|
|
|
|
|
|
else |
3485
|
|
|
|
|
|
|
{ $t->flush( $up_to, @_); } |
3486
|
|
|
|
|
|
|
|
3487
|
|
|
|
|
|
|
return $t; |
3488
|
|
|
|
|
|
|
} |
3489
|
|
|
|
|
|
|
|
3490
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
# same as print except the entire document text is returned as a string |
3492
|
|
|
|
|
|
|
sub sprint |
3493
|
|
|
|
|
|
|
{ my $t= shift; |
3494
|
|
|
|
|
|
|
my %args= _normalize_args( @_); |
3495
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
my $old_pretty; |
3497
|
|
|
|
|
|
|
if( defined $args{PrettyPrint}) |
3498
|
|
|
|
|
|
|
{ $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); |
3499
|
|
|
|
|
|
|
delete $args{PrettyPrint}; |
3500
|
|
|
|
|
|
|
} |
3501
|
|
|
|
|
|
|
|
3502
|
|
|
|
|
|
|
my $old_empty_tag_style; |
3503
|
|
|
|
|
|
|
if( defined $args{EmptyTags}) |
3504
|
|
|
|
|
|
|
{ $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); |
3505
|
|
|
|
|
|
|
delete $args{EmptyTags}; |
3506
|
|
|
|
|
|
|
} |
3507
|
|
|
|
|
|
|
|
3508
|
|
|
|
|
|
|
my $string= $t->prolog( %args) # xml declaration and doctype |
3509
|
|
|
|
|
|
|
. $t->_leading_cpi( %args) # leading comments and pi's in 'process' mode |
3510
|
|
|
|
|
|
|
. ( ($t->{twig_root} && $t->{twig_root}->sprint) || '') |
3511
|
|
|
|
|
|
|
. $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) |
3512
|
|
|
|
|
|
|
. $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) |
3513
|
|
|
|
|
|
|
; |
3514
|
|
|
|
|
|
|
if( $t->{twig_keep_spaces} && $t->{trailing_spaces}) { $string .= $t->{trailing_spaces}; } |
3515
|
|
|
|
|
|
|
|
3516
|
|
|
|
|
|
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); |
3517
|
|
|
|
|
|
|
$t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); |
3518
|
|
|
|
|
|
|
|
3519
|
|
|
|
|
|
|
return $string; |
3520
|
|
|
|
|
|
|
} |
3521
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
|
3523
|
|
|
|
|
|
|
# this method discards useless elements in a tree |
3524
|
|
|
|
|
|
|
# it does the same thing as a flush except it does not print it |
3525
|
|
|
|
|
|
|
# the second argument is an element, the last purged element |
3526
|
|
|
|
|
|
|
# (this argument is usually set through the purge_up_to method) |
3527
|
|
|
|
|
|
|
sub purge |
3528
|
|
|
|
|
|
|
{ my $t= shift; |
3529
|
|
|
|
|
|
|
my $up_to= shift; |
3530
|
|
|
|
|
|
|
|
3531
|
|
|
|
|
|
|
$t->_trigger_tdh if $t->{twig_tdh}; |
3532
|
|
|
|
|
|
|
|
3533
|
|
|
|
|
|
|
# the "real" last element processed, as _twig_end has closed it |
3534
|
|
|
|
|
|
|
my $last_elt; |
3535
|
|
|
|
|
|
|
if( $up_to) |
3536
|
|
|
|
|
|
|
{ $last_elt= $up_to; } |
3537
|
|
|
|
|
|
|
elsif( $t->{twig_current}) |
3538
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_current}->{last_child}; } |
3539
|
|
|
|
|
|
|
else |
3540
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_root}; } |
3541
|
|
|
|
|
|
|
|
3542
|
|
|
|
|
|
|
my $elt= $t->{twig_root}; |
3543
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
while( $elt) |
3545
|
|
|
|
|
|
|
{ my $next_elt; |
3546
|
|
|
|
|
|
|
if( $last_elt && $last_elt->in( $elt)) |
3547
|
|
|
|
|
|
|
{ $elt->{'flushed'}=1; |
3548
|
|
|
|
|
|
|
$next_elt= $elt->{first_child}; |
3549
|
|
|
|
|
|
|
} |
3550
|
|
|
|
|
|
|
else |
3551
|
|
|
|
|
|
|
{ # an element before the last one or the last one, |
3552
|
|
|
|
|
|
|
$next_elt= $elt->{next_sibling}; |
3553
|
|
|
|
|
|
|
$elt->delete; |
3554
|
|
|
|
|
|
|
last if( $last_elt && ($elt == $last_elt) ); |
3555
|
|
|
|
|
|
|
} |
3556
|
|
|
|
|
|
|
$elt= $next_elt; |
3557
|
|
|
|
|
|
|
} |
3558
|
|
|
|
|
|
|
|
3559
|
|
|
|
|
|
|
if( my $ids= $t->{twig_id_list}) |
3560
|
|
|
|
|
|
|
{ while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } |
3561
|
|
|
|
|
|
|
|
3562
|
|
|
|
|
|
|
return $t; |
3563
|
|
|
|
|
|
|
} |
3564
|
|
|
|
|
|
|
|
3565
|
|
|
|
|
|
|
# flushes up to an element. This method just calls purge |
3566
|
|
|
|
|
|
|
sub purge_up_to |
3567
|
|
|
|
|
|
|
{ my $t= shift; |
3568
|
|
|
|
|
|
|
return $t->purge( @_); |
3569
|
|
|
|
|
|
|
} |
3570
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
sub root |
3572
|
|
|
|
|
|
|
{ return $_[0]->{twig_root}; } |
3573
|
|
|
|
|
|
|
|
3574
|
|
|
|
|
|
|
sub normalize |
3575
|
|
|
|
|
|
|
{ return $_[0]->root->normalize; } |
3576
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
|
3578
|
|
|
|
|
|
|
# create accessor methods on attribute names |
3579
|
|
|
|
|
|
|
{ my %accessor; # memorize accessor names so re-creating them won't trigger an error |
3580
|
|
|
|
|
|
|
sub att_accessors |
3581
|
|
|
|
|
|
|
{ |
3582
|
|
|
|
|
|
|
my $twig_or_class= shift; |
3583
|
|
|
|
|
|
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} |
3584
|
|
|
|
|
|
|
: 'XML::Twig::Elt' |
3585
|
|
|
|
|
|
|
; |
3586
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
3587
|
|
|
|
|
|
|
no strict 'refs'; |
3588
|
|
|
|
|
|
|
foreach my $att (@_) |
3589
|
|
|
|
|
|
|
{ _croak( "attempt to redefine existing method $att using att_accessors") |
3590
|
|
|
|
|
|
|
if( $elt_class->can( $att) && !$accessor{$att}); |
3591
|
|
|
|
|
|
|
|
3592
|
|
|
|
|
|
|
if( !$accessor{$att}) |
3593
|
|
|
|
|
|
|
{ *{"$elt_class\::$att"}= |
3594
|
|
|
|
|
|
|
sub |
3595
|
|
|
|
|
|
|
:lvalue # > perl 5.5 |
3596
|
|
|
|
|
|
|
{ my $elt= shift; |
3597
|
|
|
|
|
|
|
if( @_) { $elt->{att}->{$att}= $_[0]; } |
3598
|
|
|
|
|
|
|
$elt->{att}->{$att}; |
3599
|
|
|
|
|
|
|
}; |
3600
|
|
|
|
|
|
|
$accessor{$att}=1; |
3601
|
|
|
|
|
|
|
} |
3602
|
|
|
|
|
|
|
} |
3603
|
|
|
|
|
|
|
return $twig_or_class; |
3604
|
|
|
|
|
|
|
} |
3605
|
|
|
|
|
|
|
} |
3606
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
{ my %accessor; # memorize accessor names so re-creating them won't trigger an error |
3608
|
|
|
|
|
|
|
sub elt_accessors |
3609
|
|
|
|
|
|
|
{ |
3610
|
|
|
|
|
|
|
my $twig_or_class= shift; |
3611
|
|
|
|
|
|
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} |
3612
|
|
|
|
|
|
|
: 'XML::Twig::Elt' |
3613
|
|
|
|
|
|
|
; |
3614
|
|
|
|
|
|
|
|
3615
|
|
|
|
|
|
|
# if arg is a hash ref, it's exp => name, otherwise it's a list of tags |
3616
|
|
|
|
|
|
|
my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} |
3617
|
|
|
|
|
|
|
: map { $_ => $_ } @_; |
3618
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
3619
|
|
|
|
|
|
|
no strict 'refs'; |
3620
|
|
|
|
|
|
|
while( my( $alias, $exp)= each %exp_to_alias ) |
3621
|
|
|
|
|
|
|
{ if( $elt_class->can( $alias) && !$accessor{$alias}) |
3622
|
|
|
|
|
|
|
{ _croak( "attempt to redefine existing method $alias using elt_accessors"); } |
3623
|
|
|
|
|
|
|
|
3624
|
|
|
|
|
|
|
if( !$accessor{$alias}) |
3625
|
|
|
|
|
|
|
{ *{"$elt_class\::$alias"}= |
3626
|
|
|
|
|
|
|
sub |
3627
|
|
|
|
|
|
|
{ my $elt= shift; |
3628
|
|
|
|
|
|
|
return wantarray ? $elt->children( $exp) : $elt->first_child( $exp); |
3629
|
|
|
|
|
|
|
}; |
3630
|
|
|
|
|
|
|
$accessor{$alias}=1; |
3631
|
|
|
|
|
|
|
} |
3632
|
|
|
|
|
|
|
} |
3633
|
|
|
|
|
|
|
return $twig_or_class; |
3634
|
|
|
|
|
|
|
} |
3635
|
|
|
|
|
|
|
} |
3636
|
|
|
|
|
|
|
|
3637
|
|
|
|
|
|
|
{ my %accessor; # memorize accessor names so re-creating them won't trigger an error |
3638
|
|
|
|
|
|
|
sub field_accessors |
3639
|
|
|
|
|
|
|
{ |
3640
|
|
|
|
|
|
|
my $twig_or_class= shift; |
3641
|
|
|
|
|
|
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} |
3642
|
|
|
|
|
|
|
: 'XML::Twig::Elt' |
3643
|
|
|
|
|
|
|
; |
3644
|
|
|
|
|
|
|
my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} |
3645
|
|
|
|
|
|
|
: map { $_ => $_ } @_; |
3646
|
|
|
|
|
|
|
|
3647
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
3648
|
|
|
|
|
|
|
no strict 'refs'; |
3649
|
|
|
|
|
|
|
while( my( $alias, $exp)= each %exp_to_alias ) |
3650
|
|
|
|
|
|
|
{ if( $elt_class->can( $alias) && !$accessor{$alias}) |
3651
|
|
|
|
|
|
|
{ _croak( "attempt to redefine existing method $exp using field_accessors"); } |
3652
|
|
|
|
|
|
|
if( !$accessor{$alias}) |
3653
|
|
|
|
|
|
|
{ *{"$elt_class\::$alias"}= |
3654
|
|
|
|
|
|
|
sub |
3655
|
|
|
|
|
|
|
{ my $elt= shift; |
3656
|
|
|
|
|
|
|
$elt->field( $exp) |
3657
|
|
|
|
|
|
|
}; |
3658
|
|
|
|
|
|
|
$accessor{$alias}=1; |
3659
|
|
|
|
|
|
|
} |
3660
|
|
|
|
|
|
|
} |
3661
|
|
|
|
|
|
|
return $twig_or_class; |
3662
|
|
|
|
|
|
|
} |
3663
|
|
|
|
|
|
|
} |
3664
|
|
|
|
|
|
|
|
3665
|
|
|
|
|
|
|
sub first_elt |
3666
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
3667
|
|
|
|
|
|
|
my $root= $t->root || return undef; |
3668
|
|
|
|
|
|
|
return $root if( $root->passes( $cond)); |
3669
|
|
|
|
|
|
|
return $root->next_elt( $cond); |
3670
|
|
|
|
|
|
|
} |
3671
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
sub last_elt |
3673
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
3674
|
|
|
|
|
|
|
my $root= $t->root || return undef; |
3675
|
|
|
|
|
|
|
return $root->last_descendant( $cond); |
3676
|
|
|
|
|
|
|
} |
3677
|
|
|
|
|
|
|
|
3678
|
|
|
|
|
|
|
sub next_n_elt |
3679
|
|
|
|
|
|
|
{ my( $t, $offset, $cond)= @_; |
3680
|
|
|
|
|
|
|
$offset -- if( $t->root->matches( $cond) ); |
3681
|
|
|
|
|
|
|
return $t->root->next_n_elt( $offset, $cond); |
3682
|
|
|
|
|
|
|
} |
3683
|
|
|
|
|
|
|
|
3684
|
|
|
|
|
|
|
sub get_xpath |
3685
|
|
|
|
|
|
|
{ my $twig= shift; |
3686
|
|
|
|
|
|
|
if( isa( $_[0], 'ARRAY')) |
3687
|
|
|
|
|
|
|
{ my $elt_array= shift; |
3688
|
|
|
|
|
|
|
return _unique_elts( map { $_->get_xpath( @_) } @$elt_array); |
3689
|
|
|
|
|
|
|
} |
3690
|
|
|
|
|
|
|
else |
3691
|
|
|
|
|
|
|
{ return $twig->root->get_xpath( @_); } |
3692
|
|
|
|
|
|
|
} |
3693
|
|
|
|
|
|
|
|
3694
|
|
|
|
|
|
|
# get a list of elts and return a sorted list of unique elts |
3695
|
|
|
|
|
|
|
sub _unique_elts |
3696
|
|
|
|
|
|
|
{ my @sorted= sort { $a ->cmp( $b) } @_; |
3697
|
|
|
|
|
|
|
my @unique; |
3698
|
|
|
|
|
|
|
while( my $current= shift @sorted) |
3699
|
|
|
|
|
|
|
{ push @unique, $current unless( @unique && ($unique[-1] == $current)); } |
3700
|
|
|
|
|
|
|
return @unique; |
3701
|
|
|
|
|
|
|
} |
3702
|
|
|
|
|
|
|
|
3703
|
|
|
|
|
|
|
sub findvalue |
3704
|
|
|
|
|
|
|
{ my $twig= shift; |
3705
|
|
|
|
|
|
|
if( isa( $_[0], 'ARRAY')) |
3706
|
|
|
|
|
|
|
{ my $elt_array= shift; |
3707
|
|
|
|
|
|
|
return join( '', map { $_->findvalue( @_) } @$elt_array); |
3708
|
|
|
|
|
|
|
} |
3709
|
|
|
|
|
|
|
else |
3710
|
|
|
|
|
|
|
{ return $twig->root->findvalue( @_); } |
3711
|
|
|
|
|
|
|
} |
3712
|
|
|
|
|
|
|
|
3713
|
|
|
|
|
|
|
sub findvalues |
3714
|
|
|
|
|
|
|
{ my $twig= shift; |
3715
|
|
|
|
|
|
|
if( isa( $_[0], 'ARRAY')) |
3716
|
|
|
|
|
|
|
{ my $elt_array= shift; |
3717
|
|
|
|
|
|
|
return map { $_->findvalues( @_) } @$elt_array; |
3718
|
|
|
|
|
|
|
} |
3719
|
|
|
|
|
|
|
else |
3720
|
|
|
|
|
|
|
{ return $twig->root->findvalues( @_); } |
3721
|
|
|
|
|
|
|
} |
3722
|
|
|
|
|
|
|
|
3723
|
|
|
|
|
|
|
sub set_id_seed |
3724
|
|
|
|
|
|
|
{ my $t= shift; |
3725
|
|
|
|
|
|
|
XML::Twig::Elt->set_id_seed( @_); |
3726
|
|
|
|
|
|
|
return $t; |
3727
|
|
|
|
|
|
|
} |
3728
|
|
|
|
|
|
|
|
3729
|
|
|
|
|
|
|
# return an array ref to an index, or undef |
3730
|
|
|
|
|
|
|
sub index |
3731
|
|
|
|
|
|
|
{ my( $twig, $name, $index)= @_; |
3732
|
|
|
|
|
|
|
return defined( $index) ? $twig->{_twig_index}->{$name}->[$index] : $twig->{_twig_index}->{$name}; |
3733
|
|
|
|
|
|
|
} |
3734
|
|
|
|
|
|
|
|
3735
|
|
|
|
|
|
|
# return a list with just the root |
3736
|
|
|
|
|
|
|
# if a condition is given then return an empty list unless the root matches |
3737
|
|
|
|
|
|
|
sub children |
3738
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
3739
|
|
|
|
|
|
|
my $root= $t->root; |
3740
|
|
|
|
|
|
|
unless( $cond && !($root->passes( $cond)) ) |
3741
|
|
|
|
|
|
|
{ return ($root); } |
3742
|
|
|
|
|
|
|
else |
3743
|
|
|
|
|
|
|
{ return (); } |
3744
|
|
|
|
|
|
|
} |
3745
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
sub _children { return ($_[0]->root); } |
3747
|
|
|
|
|
|
|
|
3748
|
|
|
|
|
|
|
# weird, but here for completude |
3749
|
|
|
|
|
|
|
# used to solve (non-sensical) /doc[1] XPath queries |
3750
|
|
|
|
|
|
|
sub child |
3751
|
|
|
|
|
|
|
{ my $t= shift; |
3752
|
|
|
|
|
|
|
my $nb= shift; |
3753
|
|
|
|
|
|
|
return ($t->children( @_))[$nb]; |
3754
|
|
|
|
|
|
|
} |
3755
|
|
|
|
|
|
|
|
3756
|
|
|
|
|
|
|
sub descendants |
3757
|
|
|
|
|
|
|
{ my( $t, $cond)= @_; |
3758
|
|
|
|
|
|
|
my $root= $t->root; |
3759
|
|
|
|
|
|
|
if( $root->passes( $cond) ) |
3760
|
|
|
|
|
|
|
{ return ($root, $root->descendants( $cond)); } |
3761
|
|
|
|
|
|
|
else |
3762
|
|
|
|
|
|
|
{ return ( $root->descendants( $cond)); } |
3763
|
|
|
|
|
|
|
} |
3764
|
|
|
|
|
|
|
|
3765
|
|
|
|
|
|
|
sub simplify { my $t= shift; $t->root->simplify( @_); } |
3766
|
|
|
|
|
|
|
sub subs_text { my $t= shift; $t->root->subs_text( @_); } |
3767
|
|
|
|
|
|
|
sub trim { my $t= shift; $t->root->trim( @_); } |
3768
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
|
3770
|
|
|
|
|
|
|
sub set_keep_encoding |
3771
|
|
|
|
|
|
|
{ my( $t, $keep)= @_; |
3772
|
|
|
|
|
|
|
$t->{twig_keep_encoding}= $keep; |
3773
|
|
|
|
|
|
|
$t->{NoExpand}= $keep; |
3774
|
|
|
|
|
|
|
return XML::Twig::Elt::set_keep_encoding( $keep); |
3775
|
|
|
|
|
|
|
} |
3776
|
|
|
|
|
|
|
|
3777
|
|
|
|
|
|
|
sub set_expand_external_entities |
3778
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_expand_external_entities( @_); } |
3779
|
|
|
|
|
|
|
|
3780
|
|
|
|
|
|
|
sub escape_gt |
3781
|
|
|
|
|
|
|
{ my $t= shift; $t->{twig_escape_gt}= 1; return XML::Twig::Elt::escape_gt( @_); } |
3782
|
|
|
|
|
|
|
|
3783
|
|
|
|
|
|
|
sub do_not_escape_gt |
3784
|
|
|
|
|
|
|
{ my $t= shift; $t->{twig_escape_gt}= 0; return XML::Twig::Elt::do_not_escape_gt( @_); } |
3785
|
|
|
|
|
|
|
|
3786
|
|
|
|
|
|
|
sub elt_id |
3787
|
|
|
|
|
|
|
{ return $_[0]->{twig_id_list}->{$_[1]}; } |
3788
|
|
|
|
|
|
|
|
3789
|
|
|
|
|
|
|
# change it in ALL twigs at the moment |
3790
|
|
|
|
|
|
|
sub change_gi |
3791
|
|
|
|
|
|
|
{ my( $twig, $old_gi, $new_gi)= @_; |
3792
|
|
|
|
|
|
|
my $index; |
3793
|
|
|
|
|
|
|
return unless($index= $XML::Twig::gi2index{$old_gi}); |
3794
|
|
|
|
|
|
|
$XML::Twig::index2gi[$index]= $new_gi; |
3795
|
|
|
|
|
|
|
delete $XML::Twig::gi2index{$old_gi}; |
3796
|
|
|
|
|
|
|
$XML::Twig::gi2index{$new_gi}= $index; |
3797
|
|
|
|
|
|
|
return $twig; |
3798
|
|
|
|
|
|
|
} |
3799
|
|
|
|
|
|
|
|
3800
|
|
|
|
|
|
|
|
3801
|
|
|
|
|
|
|
# builds the DTD from the stored (possibly updated) data |
3802
|
|
|
|
|
|
|
sub dtd_text |
3803
|
|
|
|
|
|
|
{ my $t= shift; |
3804
|
|
|
|
|
|
|
my $dtd= $t->{twig_dtd}; |
3805
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype} or return ''; |
3806
|
|
|
|
|
|
|
my $string= "{name}; |
3807
|
|
|
|
|
|
|
|
3808
|
|
|
|
|
|
|
$string .= " [\n"; |
3809
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
foreach my $gi (@{$dtd->{elt_list}}) |
3811
|
|
|
|
|
|
|
{ $string.= "{model}->{$gi}.">\n" ; |
3812
|
|
|
|
|
|
|
if( $dtd->{att}->{$gi}) |
3813
|
|
|
|
|
|
|
{ my $attlist= $dtd->{att}->{$gi}; |
3814
|
|
|
|
|
|
|
$string.= "
|
3815
|
|
|
|
|
|
|
foreach my $att ( sort keys %{$attlist}) |
3816
|
|
|
|
|
|
|
{ |
3817
|
|
|
|
|
|
|
if( $attlist->{$att}->{fixed}) |
3818
|
|
|
|
|
|
|
{ $string.= " $att $attlist->{$att}->{type} #FIXED $attlist->{$att}->{default}"; } |
3819
|
|
|
|
|
|
|
else |
3820
|
|
|
|
|
|
|
{ $string.= " $att $attlist->{$att}->{type} $attlist->{$att}->{default}"; } |
3821
|
|
|
|
|
|
|
$string.= "\n"; |
3822
|
|
|
|
|
|
|
} |
3823
|
|
|
|
|
|
|
$string.= ">\n"; |
3824
|
|
|
|
|
|
|
} |
3825
|
|
|
|
|
|
|
} |
3826
|
|
|
|
|
|
|
$string.= $t->entity_list->text if( $t->entity_list); |
3827
|
|
|
|
|
|
|
$string.= "\n]>\n"; |
3828
|
|
|
|
|
|
|
return $string; |
3829
|
|
|
|
|
|
|
} |
3830
|
|
|
|
|
|
|
|
3831
|
|
|
|
|
|
|
# prints the DTD from the stored (possibly updated) data |
3832
|
|
|
|
|
|
|
sub dtd_print |
3833
|
|
|
|
|
|
|
{ my $t= shift; |
3834
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
3835
|
|
|
|
|
|
|
if( $fh) { print $fh $t->dtd_text; } |
3836
|
|
|
|
|
|
|
else { print $t->dtd_text; } |
3837
|
|
|
|
|
|
|
return $t; |
3838
|
|
|
|
|
|
|
} |
3839
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
# build the subs that call directly expat |
3841
|
|
|
|
|
|
|
BEGIN |
3842
|
|
|
|
|
|
|
{ my @expat_methods= qw( depth in_element within_element context |
3843
|
|
|
|
|
|
|
current_line current_column current_byte |
3844
|
|
|
|
|
|
|
recognized_string original_string |
3845
|
|
|
|
|
|
|
xpcroak xpcarp |
3846
|
|
|
|
|
|
|
base current_element element_index |
3847
|
|
|
|
|
|
|
xml_escape |
3848
|
|
|
|
|
|
|
position_in_context); |
3849
|
|
|
|
|
|
|
foreach my $method (@expat_methods) |
3850
|
|
|
|
|
|
|
{ |
3851
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
3852
|
|
|
|
|
|
|
no strict 'refs'; |
3853
|
|
|
|
|
|
|
*{$method}= sub { my $t= shift; |
3854
|
|
|
|
|
|
|
_croak( "calling $method after parsing is finished") unless( $t->{twig_parsing}); |
3855
|
|
|
|
|
|
|
return $t->{twig_parser}->$method(@_); |
3856
|
|
|
|
|
|
|
}; |
3857
|
|
|
|
|
|
|
} |
3858
|
|
|
|
|
|
|
} |
3859
|
|
|
|
|
|
|
|
3860
|
|
|
|
|
|
|
sub path |
3861
|
|
|
|
|
|
|
{ my( $t, $gi)= @_; |
3862
|
|
|
|
|
|
|
if( $t->{twig_map_xmlns}) |
3863
|
|
|
|
|
|
|
{ return "/" . join( "/", map { $t->_replace_prefix( $_)} ($t->{twig_parser}->context, $gi)); } |
3864
|
|
|
|
|
|
|
else |
3865
|
|
|
|
|
|
|
{ return "/" . join( "/", ($t->{twig_parser}->context, $gi)); } |
3866
|
|
|
|
|
|
|
} |
3867
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
sub finish |
3869
|
|
|
|
|
|
|
{ my $t= shift; |
3870
|
|
|
|
|
|
|
return $t->{twig_parser}->finish; |
3871
|
|
|
|
|
|
|
} |
3872
|
|
|
|
|
|
|
|
3873
|
|
|
|
|
|
|
# just finish the parse by printing the rest of the document |
3874
|
|
|
|
|
|
|
sub finish_print |
3875
|
|
|
|
|
|
|
{ my( $t, $fh)= @_; |
3876
|
|
|
|
|
|
|
my $old_fh; |
3877
|
|
|
|
|
|
|
unless( defined $fh) |
3878
|
|
|
|
|
|
|
{ $t->_set_fh_to_twig_output_fh(); } |
3879
|
|
|
|
|
|
|
elsif( defined $fh) |
3880
|
|
|
|
|
|
|
{ $old_fh= select $fh; |
3881
|
|
|
|
|
|
|
$t->{twig_original_selected_fh}= $old_fh if( $old_fh); |
3882
|
|
|
|
|
|
|
} |
3883
|
|
|
|
|
|
|
|
3884
|
|
|
|
|
|
|
my $p=$t->{twig_parser}; |
3885
|
|
|
|
|
|
|
if( $t->{twig_keep_encoding}) |
3886
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_finish_print); } |
3887
|
|
|
|
|
|
|
else |
3888
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers_finish_print_original); } |
3889
|
|
|
|
|
|
|
return $t; |
3890
|
|
|
|
|
|
|
} |
3891
|
|
|
|
|
|
|
|
3892
|
|
|
|
|
|
|
sub set_remove_cdata { return XML::Twig::Elt::set_remove_cdata( @_); } |
3893
|
|
|
|
|
|
|
|
3894
|
|
|
|
|
|
|
sub output_filter { return XML::Twig::Elt::output_filter( @_); } |
3895
|
|
|
|
|
|
|
sub set_output_filter { return XML::Twig::Elt::set_output_filter( @_); } |
3896
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
sub output_text_filter { return XML::Twig::Elt::output_text_filter( @_); } |
3898
|
|
|
|
|
|
|
sub set_output_text_filter { return XML::Twig::Elt::set_output_text_filter( @_); } |
3899
|
|
|
|
|
|
|
|
3900
|
|
|
|
|
|
|
sub set_input_filter |
3901
|
|
|
|
|
|
|
{ my( $t, $input_filter)= @_; |
3902
|
|
|
|
|
|
|
my $old_filter= $t->{twig_input_filter}; |
3903
|
|
|
|
|
|
|
if( !$input_filter || isa( $input_filter, 'CODE') ) |
3904
|
|
|
|
|
|
|
{ $t->{twig_input_filter}= $input_filter; } |
3905
|
|
|
|
|
|
|
elsif( $input_filter eq 'latin1') |
3906
|
|
|
|
|
|
|
{ $t->{twig_input_filter}= latin1(); } |
3907
|
|
|
|
|
|
|
elsif( $filter{$input_filter}) |
3908
|
|
|
|
|
|
|
{ $t->{twig_input_filter}= $filter{$input_filter}; } |
3909
|
|
|
|
|
|
|
else |
3910
|
|
|
|
|
|
|
{ _croak( "invalid input filter: $input_filter"); } |
3911
|
|
|
|
|
|
|
|
3912
|
|
|
|
|
|
|
return $old_filter; |
3913
|
|
|
|
|
|
|
} |
3914
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
sub set_empty_tag_style |
3916
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_empty_tag_style( @_); } |
3917
|
|
|
|
|
|
|
|
3918
|
|
|
|
|
|
|
sub set_pretty_print |
3919
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_pretty_print( @_); } |
3920
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
sub set_quote |
3922
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_quote( @_); } |
3923
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
sub set_indent |
3925
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_indent( @_); } |
3926
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
sub set_keep_atts_order |
3928
|
|
|
|
|
|
|
{ shift; return XML::Twig::Elt::set_keep_atts_order( @_); } |
3929
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
sub keep_atts_order |
3931
|
|
|
|
|
|
|
{ return XML::Twig::Elt::keep_atts_order( @_); } |
3932
|
|
|
|
|
|
|
|
3933
|
|
|
|
|
|
|
sub set_do_not_escape_amp_in_atts |
3934
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_do_not_escape_amp_in_atts( @_); } |
3935
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
# save and restore package globals (the ones in XML::Twig::Elt) |
3937
|
|
|
|
|
|
|
# should probably return the XML::Twig object itself, but instead |
3938
|
|
|
|
|
|
|
# returns the state (as a hashref) for backward compatibility |
3939
|
|
|
|
|
|
|
sub save_global_state |
3940
|
|
|
|
|
|
|
{ my $t= shift; |
3941
|
|
|
|
|
|
|
return $t->{twig_saved_state}= XML::Twig::Elt::global_state(); |
3942
|
|
|
|
|
|
|
} |
3943
|
|
|
|
|
|
|
|
3944
|
|
|
|
|
|
|
sub restore_global_state |
3945
|
|
|
|
|
|
|
{ my $t= shift; |
3946
|
|
|
|
|
|
|
XML::Twig::Elt::set_global_state( $t->{twig_saved_state}); |
3947
|
|
|
|
|
|
|
} |
3948
|
|
|
|
|
|
|
|
3949
|
|
|
|
|
|
|
sub global_state |
3950
|
|
|
|
|
|
|
{ return XML::Twig::Elt::global_state(); } |
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
sub set_global_state |
3953
|
|
|
|
|
|
|
{ return XML::Twig::Elt::set_global_state( $_[1]); } |
3954
|
|
|
|
|
|
|
|
3955
|
|
|
|
|
|
|
sub dispose |
3956
|
|
|
|
|
|
|
{ my $t= shift; |
3957
|
|
|
|
|
|
|
$t->DESTROY; |
3958
|
|
|
|
|
|
|
return; |
3959
|
|
|
|
|
|
|
} |
3960
|
|
|
|
|
|
|
|
3961
|
|
|
|
|
|
|
sub DESTROY |
3962
|
|
|
|
|
|
|
{ my $t= shift; |
3963
|
|
|
|
|
|
|
if( $t->{twig_root} && isa( $t->{twig_root}, 'XML::Twig::Elt')) |
3964
|
|
|
|
|
|
|
{ $t->{twig_root}->delete } |
3965
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
# added to break circular references |
3967
|
|
|
|
|
|
|
undef $t->{twig}; |
3968
|
|
|
|
|
|
|
undef $t->{twig_root}->{twig} if( $t->{twig_root}); |
3969
|
|
|
|
|
|
|
undef $t->{twig_parser}; |
3970
|
|
|
|
|
|
|
|
3971
|
|
|
|
|
|
|
undef %$t;# prevents memory leaks (especially when using mod_perl) |
3972
|
|
|
|
|
|
|
undef $t; |
3973
|
|
|
|
|
|
|
} |
3974
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
# return true if perl was compiled using perlio |
3976
|
|
|
|
|
|
|
# if perl is not available return true, these days perlio should be used |
3977
|
|
|
|
|
|
|
sub _use_perlio |
3978
|
|
|
|
|
|
|
{ my $perl= _this_perl(); |
3979
|
|
|
|
|
|
|
return $perl ? grep /useperlio=define/, `$perl -V` : 1; |
3980
|
|
|
|
|
|
|
} |
3981
|
|
|
|
|
|
|
|
3982
|
|
|
|
|
|
|
# returns the parth to the perl executable (if available) |
3983
|
|
|
|
|
|
|
sub _this_perl |
3984
|
|
|
|
|
|
|
{ # straight from perlvar |
3985
|
|
|
|
|
|
|
my $secure_perl_path= $Config{perlpath}; |
3986
|
|
|
|
|
|
|
if ($^O ne 'VMS') |
3987
|
|
|
|
|
|
|
{ $secure_perl_path .= $Config{_exe} unless $secure_perl_path =~ m/$Config{_exe}$/i; } |
3988
|
|
|
|
|
|
|
if( ! -f $secure_perl_path) { $secure_perl_path= ''; } # when perl is not available (PDK) |
3989
|
|
|
|
|
|
|
return $secure_perl_path; |
3990
|
|
|
|
|
|
|
} |
3991
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
# |
3993
|
|
|
|
|
|
|
# non standard handlers |
3994
|
|
|
|
|
|
|
# |
3995
|
|
|
|
|
|
|
|
3996
|
|
|
|
|
|
|
# kludge: expat 1.95.2 calls both Default AND Doctype handlers |
3997
|
|
|
|
|
|
|
# so if the default handler finds '
|
3998
|
|
|
|
|
|
|
# unset itself (_twig_print_doctype will reset it) |
3999
|
|
|
|
|
|
|
sub _twig_print_check_doctype |
4000
|
|
|
|
|
|
|
{ # warn " in _twig_print_check_doctype...\n"; # DEBUG handler |
4001
|
|
|
|
|
|
|
|
4002
|
|
|
|
|
|
|
my $p= shift; |
4003
|
|
|
|
|
|
|
my $string= $p->recognized_string(); |
4004
|
|
|
|
|
|
|
if( $string eq '
|
4005
|
|
|
|
|
|
|
{ |
4006
|
|
|
|
|
|
|
$p->setHandlers( Default => undef); |
4007
|
|
|
|
|
|
|
$p->setHandlers( Entity => undef); |
4008
|
|
|
|
|
|
|
$expat_1_95_2=1; |
4009
|
|
|
|
|
|
|
} |
4010
|
|
|
|
|
|
|
else |
4011
|
|
|
|
|
|
|
{ print $string; } |
4012
|
|
|
|
|
|
|
|
4013
|
|
|
|
|
|
|
return; |
4014
|
|
|
|
|
|
|
} |
4015
|
|
|
|
|
|
|
|
4016
|
|
|
|
|
|
|
|
4017
|
|
|
|
|
|
|
sub _twig_print |
4018
|
|
|
|
|
|
|
{ # warn " in _twig_print...\n"; # DEBUG handler |
4019
|
|
|
|
|
|
|
my $p= shift; |
4020
|
|
|
|
|
|
|
if( $expat_1_95_2 && ($p->recognized_string eq '[') && !$p->{twig}->{expat_1_95_2_seen_bracket}) |
4021
|
|
|
|
|
|
|
{ # otherwise the opening square bracket of the doctype gets printed twice |
4022
|
|
|
|
|
|
|
$p->{twig}->{expat_1_95_2_seen_bracket}=1; |
4023
|
|
|
|
|
|
|
} |
4024
|
|
|
|
|
|
|
else |
4025
|
|
|
|
|
|
|
{ if( $p->{twig}->{twig_right_after_root}) |
4026
|
|
|
|
|
|
|
{ my $s= $p->recognized_string(); print $s if $s=~ m{\S}; } |
4027
|
|
|
|
|
|
|
else |
4028
|
|
|
|
|
|
|
{ print $p->recognized_string(); } |
4029
|
|
|
|
|
|
|
} |
4030
|
|
|
|
|
|
|
return; |
4031
|
|
|
|
|
|
|
} |
4032
|
|
|
|
|
|
|
# recognized_string does not seem to work for entities, go figure! |
4033
|
|
|
|
|
|
|
# so this handler is used to print them anyway |
4034
|
|
|
|
|
|
|
sub _twig_print_entity |
4035
|
|
|
|
|
|
|
{ # warn " in _twig_print_entity...\n"; # DEBUG handler |
4036
|
|
|
|
|
|
|
my $p= shift; |
4037
|
|
|
|
|
|
|
XML::Twig::Entity->new( @_)->print; |
4038
|
|
|
|
|
|
|
} |
4039
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
# kludge: expat 1.95.2 calls both Default AND Doctype handlers |
4041
|
|
|
|
|
|
|
# so if the default handler finds '
|
4042
|
|
|
|
|
|
|
# unset itself (_twig_print_doctype will reset it) |
4043
|
|
|
|
|
|
|
sub _twig_print_original_check_doctype |
4044
|
|
|
|
|
|
|
{ # warn " in _twig_print_original_check_doctype...\n"; # DEBUG handler |
4045
|
|
|
|
|
|
|
|
4046
|
|
|
|
|
|
|
my $p= shift; |
4047
|
|
|
|
|
|
|
my $string= $p->original_string(); |
4048
|
|
|
|
|
|
|
if( $string eq '
|
4049
|
|
|
|
|
|
|
{ $p->setHandlers( Default => undef); |
4050
|
|
|
|
|
|
|
$p->setHandlers( Entity => undef); |
4051
|
|
|
|
|
|
|
$expat_1_95_2=1; |
4052
|
|
|
|
|
|
|
} |
4053
|
|
|
|
|
|
|
else |
4054
|
|
|
|
|
|
|
{ print $string; } |
4055
|
|
|
|
|
|
|
|
4056
|
|
|
|
|
|
|
return; |
4057
|
|
|
|
|
|
|
} |
4058
|
|
|
|
|
|
|
|
4059
|
|
|
|
|
|
|
sub _twig_print_original |
4060
|
|
|
|
|
|
|
{ # warn " in _twig_print_original...\n"; # DEBUG handler |
4061
|
|
|
|
|
|
|
my $p= shift; |
4062
|
|
|
|
|
|
|
print $p->original_string(); |
4063
|
|
|
|
|
|
|
return; |
4064
|
|
|
|
|
|
|
} |
4065
|
|
|
|
|
|
|
|
4066
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
sub _twig_print_original_doctype |
4068
|
|
|
|
|
|
|
{ # warn " in _twig_print_original_doctype...\n"; # DEBUG handler |
4069
|
|
|
|
|
|
|
|
4070
|
|
|
|
|
|
|
my( $p, $name, $sysid, $pubid, $internal)= @_; |
4071
|
|
|
|
|
|
|
if( $name) |
4072
|
|
|
|
|
|
|
{ # with recent versions of XML::Parser original_string does not work, |
4073
|
|
|
|
|
|
|
# hence we need to rebuild the doctype declaration |
4074
|
|
|
|
|
|
|
my $doctype=''; |
4075
|
|
|
|
|
|
|
$doctype .= qq{
|
4076
|
|
|
|
|
|
|
$doctype .= qq{ PUBLIC "$pubid"} if( $pubid); |
4077
|
|
|
|
|
|
|
$doctype .= qq{ SYSTEM} if( $sysid && !$pubid); |
4078
|
|
|
|
|
|
|
$doctype .= qq{ "$sysid"} if( $sysid); |
4079
|
|
|
|
|
|
|
$doctype .= ' [' if( $internal && !$expat_1_95_2) ; |
4080
|
|
|
|
|
|
|
$doctype .= qq{>} unless( $internal || $expat_1_95_2); |
4081
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{has_internal}=$internal; |
4082
|
|
|
|
|
|
|
print $doctype; |
4083
|
|
|
|
|
|
|
} |
4084
|
|
|
|
|
|
|
$p->setHandlers( Default => \&_twig_print_original); |
4085
|
|
|
|
|
|
|
return; |
4086
|
|
|
|
|
|
|
} |
4087
|
|
|
|
|
|
|
|
4088
|
|
|
|
|
|
|
sub _twig_print_doctype |
4089
|
|
|
|
|
|
|
{ # warn " in _twig_print_doctype...\n"; # DEBUG handler |
4090
|
|
|
|
|
|
|
my( $p, $name, $sysid, $pubid, $internal)= @_; |
4091
|
|
|
|
|
|
|
if( $name) |
4092
|
|
|
|
|
|
|
{ # with recent versions of XML::Parser original_string does not work, |
4093
|
|
|
|
|
|
|
# hence we need to rebuild the doctype declaration |
4094
|
|
|
|
|
|
|
my $doctype=''; |
4095
|
|
|
|
|
|
|
$doctype .= qq{
|
4096
|
|
|
|
|
|
|
$doctype .= qq{ PUBLIC "$pubid"} if( $pubid); |
4097
|
|
|
|
|
|
|
$doctype .= qq{ SYSTEM} if( $sysid && !$pubid); |
4098
|
|
|
|
|
|
|
$doctype .= qq{ "$sysid"} if( $sysid); |
4099
|
|
|
|
|
|
|
$doctype .= ' [' if( $internal) ; |
4100
|
|
|
|
|
|
|
$doctype .= qq{>} unless( $internal || $expat_1_95_2); |
4101
|
|
|
|
|
|
|
$p->{twig}->{twig_doctype}->{has_internal}=$internal; |
4102
|
|
|
|
|
|
|
print $doctype; |
4103
|
|
|
|
|
|
|
} |
4104
|
|
|
|
|
|
|
$p->setHandlers( Default => \&_twig_print); |
4105
|
|
|
|
|
|
|
return; |
4106
|
|
|
|
|
|
|
} |
4107
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
|
4109
|
|
|
|
|
|
|
sub _twig_print_original_default |
4110
|
|
|
|
|
|
|
{ # warn " in _twig_print_original_default...\n"; # DEBUG handler |
4111
|
|
|
|
|
|
|
my $p= shift; |
4112
|
|
|
|
|
|
|
print $p->original_string(); |
4113
|
|
|
|
|
|
|
return; |
4114
|
|
|
|
|
|
|
} |
4115
|
|
|
|
|
|
|
|
4116
|
|
|
|
|
|
|
# account for the case where the element is empty |
4117
|
|
|
|
|
|
|
sub _twig_print_end_original |
4118
|
|
|
|
|
|
|
{ # warn " in _twig_print_end_original...\n"; # DEBUG handler |
4119
|
|
|
|
|
|
|
my $p= shift; |
4120
|
|
|
|
|
|
|
print $p->original_string(); |
4121
|
|
|
|
|
|
|
return; |
4122
|
|
|
|
|
|
|
} |
4123
|
|
|
|
|
|
|
|
4124
|
|
|
|
|
|
|
sub _twig_start_check_roots |
4125
|
|
|
|
|
|
|
{ # warn " in _twig_start_check_roots...\n"; # DEBUG handler |
4126
|
|
|
|
|
|
|
my $p= shift; |
4127
|
|
|
|
|
|
|
my $gi= shift; |
4128
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
my $t= $p->{twig}; |
4130
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; |
4132
|
|
|
|
|
|
|
|
4133
|
|
|
|
|
|
|
my $ns_decl; |
4134
|
|
|
|
|
|
|
unless( $p->depth == 0) |
4135
|
|
|
|
|
|
|
{ if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@_); } |
4136
|
|
|
|
|
|
|
} |
4137
|
|
|
|
|
|
|
|
4138
|
|
|
|
|
|
|
my $context= { $ST_TAG => $gi, @_}; |
4139
|
|
|
|
|
|
|
$context->{$ST_NS}= $ns_decl if $ns_decl; |
4140
|
|
|
|
|
|
|
push @{$t->{_twig_context_stack}}, $context; |
4141
|
|
|
|
|
|
|
my %att= @_; |
4142
|
|
|
|
|
|
|
|
4143
|
|
|
|
|
|
|
if( _handler( $t, $t->{twig_roots}, $gi)) |
4144
|
|
|
|
|
|
|
{ $p->setHandlers( %twig_handlers); # restore regular handlers |
4145
|
|
|
|
|
|
|
$t->{twig_root_depth}= $p->depth; |
4146
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start |
4147
|
|
|
|
|
|
|
_twig_start( $p, $gi, @_); |
4148
|
|
|
|
|
|
|
return; |
4149
|
|
|
|
|
|
|
} |
4150
|
|
|
|
|
|
|
|
4151
|
|
|
|
|
|
|
# $tag will always be true if it needs to be printed (the tag string is never empty) |
4152
|
|
|
|
|
|
|
my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string |
4153
|
|
|
|
|
|
|
: $p->recognized_string |
4154
|
|
|
|
|
|
|
: ''; |
4155
|
|
|
|
|
|
|
|
4156
|
|
|
|
|
|
|
if( $p->depth == 0) |
4157
|
|
|
|
|
|
|
{ |
4158
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
4159
|
|
|
|
|
|
|
no strict 'refs'; |
4160
|
|
|
|
|
|
|
print {$fh} $tag if( $tag); |
4161
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start |
4162
|
|
|
|
|
|
|
_twig_start( $p, $gi, @_); |
4163
|
|
|
|
|
|
|
$t->root->{'flushed'}=1; # or the root start tag gets output the first time we flush |
4164
|
|
|
|
|
|
|
} |
4165
|
|
|
|
|
|
|
elsif( $t->{twig_starttag_handlers}) |
4166
|
|
|
|
|
|
|
{ # look for start tag handlers |
4167
|
|
|
|
|
|
|
|
4168
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); |
4169
|
|
|
|
|
|
|
my $last_handler_res; |
4170
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
4171
|
|
|
|
|
|
|
{ $last_handler_res= $handler->($t, $gi, %att); |
4172
|
|
|
|
|
|
|
last unless $last_handler_res; |
4173
|
|
|
|
|
|
|
} |
4174
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
4175
|
|
|
|
|
|
|
no strict 'refs'; |
4176
|
|
|
|
|
|
|
print {$fh} $tag if( $tag && (!@handlers || $last_handler_res)); |
4177
|
|
|
|
|
|
|
} |
4178
|
|
|
|
|
|
|
else |
4179
|
|
|
|
|
|
|
{ |
4180
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
4181
|
|
|
|
|
|
|
no strict 'refs'; |
4182
|
|
|
|
|
|
|
print {$fh} $tag if( $tag); |
4183
|
|
|
|
|
|
|
} |
4184
|
|
|
|
|
|
|
return; |
4185
|
|
|
|
|
|
|
} |
4186
|
|
|
|
|
|
|
|
4187
|
|
|
|
|
|
|
sub _twig_end_check_roots |
4188
|
|
|
|
|
|
|
{ # warn " in _twig_end_check_roots...\n"; # DEBUG handler |
4189
|
|
|
|
|
|
|
|
4190
|
|
|
|
|
|
|
my( $p, $gi, %att)= @_; |
4191
|
|
|
|
|
|
|
my $t= $p->{twig}; |
4192
|
|
|
|
|
|
|
# $tag can be empty (), hence the undef and the tests for defined |
4193
|
|
|
|
|
|
|
my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string |
4194
|
|
|
|
|
|
|
: $p->recognized_string |
4195
|
|
|
|
|
|
|
: undef; |
4196
|
|
|
|
|
|
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; |
4197
|
|
|
|
|
|
|
|
4198
|
|
|
|
|
|
|
if( $t->{twig_endtag_handlers}) |
4199
|
|
|
|
|
|
|
{ # look for end tag handlers |
4200
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); |
4201
|
|
|
|
|
|
|
my $last_handler_res=1; |
4202
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
4203
|
|
|
|
|
|
|
{ $last_handler_res= $handler->($t, $gi) || last; } |
4204
|
|
|
|
|
|
|
#if( ! $last_handler_res) |
4205
|
|
|
|
|
|
|
# { pop @{$t->{_twig_context_stack}}; warn "tested"; |
4206
|
|
|
|
|
|
|
# return; |
4207
|
|
|
|
|
|
|
# } |
4208
|
|
|
|
|
|
|
} |
4209
|
|
|
|
|
|
|
{ |
4210
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
4211
|
|
|
|
|
|
|
no strict 'refs'; |
4212
|
|
|
|
|
|
|
print {$fh} $tag if( defined $tag); |
4213
|
|
|
|
|
|
|
} |
4214
|
|
|
|
|
|
|
if( $p->depth == 0) |
4215
|
|
|
|
|
|
|
{ |
4216
|
|
|
|
|
|
|
_twig_end( $p, $gi); |
4217
|
|
|
|
|
|
|
$t->root->{end_tag_flushed}=1; |
4218
|
|
|
|
|
|
|
} |
4219
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
4221
|
|
|
|
|
|
|
return; |
4222
|
|
|
|
|
|
|
} |
4223
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
sub _twig_pi_check_roots |
4225
|
|
|
|
|
|
|
{ # warn " in _twig_pi_check_roots...\n"; # DEBUG handler |
4226
|
|
|
|
|
|
|
my( $p, $target, $data)= @_; |
4227
|
|
|
|
|
|
|
my $t= $p->{twig}; |
4228
|
|
|
|
|
|
|
my $pi= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string |
4229
|
|
|
|
|
|
|
: $p->recognized_string |
4230
|
|
|
|
|
|
|
: undef; |
4231
|
|
|
|
|
|
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; |
4232
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target} |
4234
|
|
|
|
|
|
|
|| $t->{twig_handlers}->{pi_handlers}->{''} |
4235
|
|
|
|
|
|
|
) |
4236
|
|
|
|
|
|
|
{ # if handler is called on pi, then it needs to be processed as a regular node |
4237
|
|
|
|
|
|
|
my @flags= qw( twig_process_pi twig_keep_pi); |
4238
|
|
|
|
|
|
|
my @save= @{$t}{@flags}; # save pi related flags |
4239
|
|
|
|
|
|
|
@{$t}{@flags}= (1, 0); # override them, pi needs to be processed |
4240
|
|
|
|
|
|
|
_twig_pi( @_); # call handler on the pi |
4241
|
|
|
|
|
|
|
@{$t}{@flags}= @save;; # restore flag |
4242
|
|
|
|
|
|
|
} |
4243
|
|
|
|
|
|
|
else |
4244
|
|
|
|
|
|
|
{ |
4245
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict); |
4246
|
|
|
|
|
|
|
no strict 'refs'; |
4247
|
|
|
|
|
|
|
print {$fh} $pi if( defined( $pi)); |
4248
|
|
|
|
|
|
|
} |
4249
|
|
|
|
|
|
|
return; |
4250
|
|
|
|
|
|
|
} |
4251
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
|
4253
|
|
|
|
|
|
|
sub _output_ignored |
4254
|
|
|
|
|
|
|
{ my( $t, $p)= @_; |
4255
|
|
|
|
|
|
|
my $action= $t->{twig_ignore_action}; |
4256
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; |
4258
|
|
|
|
|
|
|
|
4259
|
|
|
|
|
|
|
if( $action eq 'print' ) { print $p->$get_string; } |
4260
|
|
|
|
|
|
|
else |
4261
|
|
|
|
|
|
|
{ my $string_ref; |
4262
|
|
|
|
|
|
|
if( $action eq 'string') |
4263
|
|
|
|
|
|
|
{ $string_ref= \$t->{twig_buffered_string}; } |
4264
|
|
|
|
|
|
|
elsif( ref( $action) && ref( $action) eq 'SCALAR') |
4265
|
|
|
|
|
|
|
{ $string_ref= $action; } |
4266
|
|
|
|
|
|
|
else |
4267
|
|
|
|
|
|
|
{ _croak( "wrong ignore action: $action"); } |
4268
|
|
|
|
|
|
|
|
4269
|
|
|
|
|
|
|
$$string_ref .= $p->$get_string; |
4270
|
|
|
|
|
|
|
} |
4271
|
|
|
|
|
|
|
} |
4272
|
|
|
|
|
|
|
|
4273
|
|
|
|
|
|
|
|
4274
|
|
|
|
|
|
|
|
4275
|
|
|
|
|
|
|
sub _twig_ignore_start |
4276
|
|
|
|
|
|
|
{ # warn " in _twig_ignore_start...\n"; # DEBUG handler |
4277
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
my( $p, $gi)= @_; |
4279
|
|
|
|
|
|
|
my $t= $p->{twig}; |
4280
|
|
|
|
|
|
|
$t->{twig_ignore_level}++; |
4281
|
|
|
|
|
|
|
my $action= $t->{twig_ignore_action}; |
4282
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
$t->_output_ignored( $p) unless $action eq 'discard'; |
4284
|
|
|
|
|
|
|
return; |
4285
|
|
|
|
|
|
|
} |
4286
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
sub _twig_ignore_end |
4288
|
|
|
|
|
|
|
{ # warn " in _twig_ignore_end...\n"; # DEBUG handler |
4289
|
|
|
|
|
|
|
|
4290
|
|
|
|
|
|
|
my( $p, $gi)= @_; |
4291
|
|
|
|
|
|
|
my $t= $p->{twig}; |
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
my $action= $t->{twig_ignore_action}; |
4294
|
|
|
|
|
|
|
$t->_output_ignored( $p) unless $action eq 'discard'; |
4295
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
$t->{twig_ignore_level}--; |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
if( ! $t->{twig_ignore_level}) |
4299
|
|
|
|
|
|
|
{ |
4300
|
|
|
|
|
|
|
$t->{twig_current} = $t->{twig_ignore_elt}; |
4301
|
|
|
|
|
|
|
$t->{twig_current}->{'twig_current'}=1; |
4302
|
|
|
|
|
|
|
|
4303
|
|
|
|
|
|
|
$t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it, |
4304
|
|
|
|
|
|
|
# but could also delete elements that should not be deleted) |
4305
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
# restore the saved stack to the current level |
4307
|
|
|
|
|
|
|
splice( @{$t->{_twig_context_stack}}, $p->depth+ 1 ); |
4308
|
|
|
|
|
|
|
#warn "stack: ", _dump_stack( $t->{_twig_context_stack}), "\n"; |
4309
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
$p->setHandlers( @{$t->{twig_saved_handlers}}); |
4311
|
|
|
|
|
|
|
# test for handlers |
4312
|
|
|
|
|
|
|
if( $t->{twig_endtag_handlers}) |
4313
|
|
|
|
|
|
|
{ # look for end tag handlers |
4314
|
|
|
|
|
|
|
my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); |
4315
|
|
|
|
|
|
|
my $last_handler_res=1; |
4316
|
|
|
|
|
|
|
foreach my $handler ( @handlers) |
4317
|
|
|
|
|
|
|
{ $last_handler_res= $handler->($t, $gi) || last; } |
4318
|
|
|
|
|
|
|
} |
4319
|
|
|
|
|
|
|
pop @{$t->{_twig_context_stack}}; |
4320
|
|
|
|
|
|
|
}; |
4321
|
|
|
|
|
|
|
return; |
4322
|
|
|
|
|
|
|
} |
4323
|
|
|
|
|
|
|
|
4324
|
|
|
|
|
|
|
#sub _dump_stack { my( $stack)= @_; return join( ":", map { $_->{$ST_TAG} } @$stack); } |
4325
|
|
|
|
|
|
|
|
4326
|
|
|
|
|
|
|
sub ignore |
4327
|
|
|
|
|
|
|
{ my( $t, $elt, $action)= @_; |
4328
|
|
|
|
|
|
|
my $current= $t->{twig_current}; |
4329
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
if( ! ($elt && ref( $elt) && isa( $elt, 'XML::Twig::Elt'))) { $elt= $current; } |
4331
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
#warn "ignore: current = ", $current->tag, ", elt = ", $elt->tag, ")\n"; |
4333
|
|
|
|
|
|
|
|
4334
|
|
|
|
|
|
|
# we need the ($elt == $current->{last_child}) test because the current element is set to the |
4335
|
|
|
|
|
|
|
# parent _before_ handlers are called (and I can't figure out how to fix this) |
4336
|
|
|
|
|
|
|
unless( ($elt == $current) || ($current->{last_child} && ($elt == $current->{last_child})) || $current->in( $elt)) |
4337
|
|
|
|
|
|
|
{ _croak( "element to be ignored must be ancestor of current element"); } |
4338
|
|
|
|
|
|
|
|
4339
|
|
|
|
|
|
|
$t->{twig_ignore_level}= $current == $elt ? 1 : $t->_level_in_stack( $current) - $t->_level_in_stack($elt) + 1; |
4340
|
|
|
|
|
|
|
#warn "twig_ignore_level: $t->{twig_ignore_level} (current: ", $current->tag, ", elt: ", $elt->tag, ")\n"; |
4341
|
|
|
|
|
|
|
$t->{twig_ignore_elt} = $elt; # save it, so we can delete it later |
4342
|
|
|
|
|
|
|
|
4343
|
|
|
|
|
|
|
$action ||= 'discard'; |
4344
|
|
|
|
|
|
|
if( !($action eq 'print' || $action eq 'string' || ( ref( $action) && ref( $action) eq 'SCALAR'))) |
4345
|
|
|
|
|
|
|
{ $action= 'discard'; } |
4346
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
$t->{twig_ignore_action}= $action; |
4348
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
my $p= $t->{twig_parser}; |
4350
|
|
|
|
|
|
|
my @saved_handlers= $p->setHandlers( %twig_handlers_ignore); # set handlers |
4351
|
|
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; |
4353
|
|
|
|
|
|
|
|
4354
|
|
|
|
|
|
|
my $default_handler; |
4355
|
|
|
|
|
|
|
|
4356
|
|
|
|
|
|
|
if( $action ne 'discard') |
4357
|
|
|
|
|
|
|
{ if( $action eq 'print') |
4358
|
|
|
|
|
|
|
{ $p->setHandlers( Default => sub { print $_[0]->$get_string; }); } |
4359
|
|
|
|
|
|
|
else |
4360
|
|
|
|
|
|
|
{ my $string_ref; |
4361
|
|
|
|
|
|
|
if( $action eq 'string') |
4362
|
|
|
|
|
|
|
{ if( ! exists $t->{twig_buffered_string}) { $t->{twig_buffered_string}=''; } |
4363
|
|
|
|
|
|
|
$string_ref= \$t->{twig_buffered_string}; |
4364
|
|
|
|
|
|
|
} |
4365
|
|
|
|
|
|
|
elsif( ref( $action) && ref( $action) eq 'SCALAR') |
4366
|
|
|
|
|
|
|
{ $string_ref= $action; } |
4367
|
|
|
|
|
|
|
|
4368
|
|
|
|
|
|
|
$p->setHandlers( Default => sub { $$string_ref .= $_[0]->$get_string; }); |
4369
|
|
|
|
|
|
|
} |
4370
|
|
|
|
|
|
|
$t->_output_ignored( $p, $action); |
4371
|
|
|
|
|
|
|
} |
4372
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
|
4374
|
|
|
|
|
|
|
$t->{twig_saved_handlers}= \@saved_handlers; # save current handlers |
4375
|
|
|
|
|
|
|
} |
4376
|
|
|
|
|
|
|
|
4377
|
|
|
|
|
|
|
sub _level_in_stack |
4378
|
|
|
|
|
|
|
{ my( $t, $elt)= @_; |
4379
|
|
|
|
|
|
|
my $level=1; |
4380
|
|
|
|
|
|
|
foreach my $elt_in_stack ( @{$t->{_twig_context_stack}} ) |
4381
|
|
|
|
|
|
|
{ if( $elt_in_stack->{$ST_ELT} && ($elt == $elt_in_stack->{$ST_ELT})) { return $level } |
4382
|
|
|
|
|
|
|
$level++; |
4383
|
|
|
|
|
|
|
} |
4384
|
|
|
|
|
|
|
} |
4385
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
|
4387
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
# select $t->{twig_output_fh} and store the current selected fh |
4389
|
|
|
|
|
|
|
sub _set_fh_to_twig_output_fh |
4390
|
|
|
|
|
|
|
{ my $t= shift; |
4391
|
|
|
|
|
|
|
my $output_fh= $t->{twig_output_fh}; |
4392
|
|
|
|
|
|
|
if( $output_fh && !$t->{twig_output_fh_selected}) |
4393
|
|
|
|
|
|
|
{ # there is an output fh |
4394
|
|
|
|
|
|
|
$t->{twig_selected_fh}= select(); # store the currently selected fh |
4395
|
|
|
|
|
|
|
$t->{twig_output_fh_selected}=1; |
4396
|
|
|
|
|
|
|
select $output_fh; # select the output fh for the twig |
4397
|
|
|
|
|
|
|
} |
4398
|
|
|
|
|
|
|
} |
4399
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
# select the fh that was stored in $t->{twig_selected_fh} |
4401
|
|
|
|
|
|
|
# (before $t->{twig_output_fh} was selected) |
4402
|
|
|
|
|
|
|
sub _set_fh_to_selected_fh |
4403
|
|
|
|
|
|
|
{ my $t= shift; |
4404
|
|
|
|
|
|
|
return unless( $t->{twig_output_fh}); |
4405
|
|
|
|
|
|
|
my $selected_fh= $t->{twig_selected_fh}; |
4406
|
|
|
|
|
|
|
$t->{twig_output_fh_selected}=0; |
4407
|
|
|
|
|
|
|
select $selected_fh; |
4408
|
|
|
|
|
|
|
return; |
4409
|
|
|
|
|
|
|
} |
4410
|
|
|
|
|
|
|
|
4411
|
|
|
|
|
|
|
|
4412
|
|
|
|
|
|
|
sub encoding |
4413
|
|
|
|
|
|
|
{ return $_[0]->{twig_xmldecl}->{encoding} if( $_[0]->{twig_xmldecl}); } |
4414
|
|
|
|
|
|
|
|
4415
|
|
|
|
|
|
|
sub set_encoding |
4416
|
|
|
|
|
|
|
{ my( $t, $encoding)= @_; |
4417
|
|
|
|
|
|
|
$t->{twig_xmldecl} ||={}; |
4418
|
|
|
|
|
|
|
$t->set_xml_version( "1.0") unless( $t->xml_version); |
4419
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{encoding}= $encoding; |
4420
|
|
|
|
|
|
|
return $t; |
4421
|
|
|
|
|
|
|
} |
4422
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
sub output_encoding |
4424
|
|
|
|
|
|
|
{ return $_[0]->{output_encoding}; } |
4425
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
sub set_output_encoding |
4427
|
|
|
|
|
|
|
{ my( $t, $encoding)= @_; |
4428
|
|
|
|
|
|
|
my $output_filter= $t->output_filter || ''; |
4429
|
|
|
|
|
|
|
|
4430
|
|
|
|
|
|
|
if( ($encoding && $encoding !~ m{^utf-?8$}i) || $t->{twig_keep_encoding} || $output_filter) |
4431
|
|
|
|
|
|
|
{ $t->set_output_filter( _encoding_filter( $encoding || '')); } |
4432
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
$t->{output_encoding}= $encoding; |
4434
|
|
|
|
|
|
|
return $t; |
4435
|
|
|
|
|
|
|
} |
4436
|
|
|
|
|
|
|
|
4437
|
|
|
|
|
|
|
sub xml_version |
4438
|
|
|
|
|
|
|
{ return $_[0]->{twig_xmldecl}->{version} if( $_[0]->{twig_xmldecl}); } |
4439
|
|
|
|
|
|
|
|
4440
|
|
|
|
|
|
|
sub set_xml_version |
4441
|
|
|
|
|
|
|
{ my( $t, $version)= @_; |
4442
|
|
|
|
|
|
|
$t->{twig_xmldecl} ||={}; |
4443
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{version}= $version; |
4444
|
|
|
|
|
|
|
return $t; |
4445
|
|
|
|
|
|
|
} |
4446
|
|
|
|
|
|
|
|
4447
|
|
|
|
|
|
|
sub standalone |
4448
|
|
|
|
|
|
|
{ return $_[0]->{twig_xmldecl}->{standalone} if( $_[0]->{twig_xmldecl}); } |
4449
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
sub set_standalone |
4451
|
|
|
|
|
|
|
{ my( $t, $standalone)= @_; |
4452
|
|
|
|
|
|
|
$t->{twig_xmldecl} ||={}; |
4453
|
|
|
|
|
|
|
$t->set_xml_version( "1.0") unless( $t->xml_version); |
4454
|
|
|
|
|
|
|
$t->{twig_xmldecl}->{standalone}= $standalone; |
4455
|
|
|
|
|
|
|
return $t; |
4456
|
|
|
|
|
|
|
} |
4457
|
|
|
|
|
|
|
|
4458
|
|
|
|
|
|
|
|
4459
|
|
|
|
|
|
|
# SAX methods |
4460
|
|
|
|
|
|
|
|
4461
|
|
|
|
|
|
|
sub toSAX1 |
4462
|
|
|
|
|
|
|
{ _croak( "cannot use toSAX1 while parsing (use flush_toSAX1)") if (defined $_[0]->{twig_parser}); |
4463
|
|
|
|
|
|
|
shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, |
4464
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX1 |
4465
|
|
|
|
|
|
|
); |
4466
|
|
|
|
|
|
|
} |
4467
|
|
|
|
|
|
|
|
4468
|
|
|
|
|
|
|
sub toSAX2 |
4469
|
|
|
|
|
|
|
{ _croak( "cannot use toSAX2 while parsing (use flush_toSAX2)") if (defined $_[0]->{twig_parser}); |
4470
|
|
|
|
|
|
|
shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, |
4471
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX2 |
4472
|
|
|
|
|
|
|
); |
4473
|
|
|
|
|
|
|
} |
4474
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
|
4476
|
|
|
|
|
|
|
sub _toSAX |
4477
|
|
|
|
|
|
|
{ my( $t, $handler, $start_tag_data, $end_tag_data) = @_; |
4478
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
if( my $start_document = $handler->can( 'start_document')) |
4480
|
|
|
|
|
|
|
{ $start_document->( $handler); } |
4481
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
$t->_prolog_toSAX( $handler); |
4483
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
if( $t->root) { $t->root->_toSAX( $handler, $start_tag_data, $end_tag_data) ; } |
4485
|
|
|
|
|
|
|
if( my $end_document = $handler->can( 'end_document')) |
4486
|
|
|
|
|
|
|
{ $end_document->( $handler); } |
4487
|
|
|
|
|
|
|
} |
4488
|
|
|
|
|
|
|
|
4489
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
sub flush_toSAX1 |
4491
|
|
|
|
|
|
|
{ shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, |
4492
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX1 |
4493
|
|
|
|
|
|
|
); |
4494
|
|
|
|
|
|
|
} |
4495
|
|
|
|
|
|
|
|
4496
|
|
|
|
|
|
|
sub flush_toSAX2 |
4497
|
|
|
|
|
|
|
{ shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, |
4498
|
|
|
|
|
|
|
\&XML::Twig::Elt::_end_tag_data_SAX2 |
4499
|
|
|
|
|
|
|
); |
4500
|
|
|
|
|
|
|
} |
4501
|
|
|
|
|
|
|
|
4502
|
|
|
|
|
|
|
sub _flush_toSAX |
4503
|
|
|
|
|
|
|
{ my( $t, $handler, $start_tag_data, $end_tag_data)= @_; |
4504
|
|
|
|
|
|
|
|
4505
|
|
|
|
|
|
|
# the "real" last element processed, as _twig_end has closed it |
4506
|
|
|
|
|
|
|
my $last_elt; |
4507
|
|
|
|
|
|
|
if( $t->{twig_current}) |
4508
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_current}->{last_child}; } |
4509
|
|
|
|
|
|
|
else |
4510
|
|
|
|
|
|
|
{ $last_elt= $t->{twig_root}; } |
4511
|
|
|
|
|
|
|
|
4512
|
|
|
|
|
|
|
my $elt= $t->{twig_root}; |
4513
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
4514
|
|
|
|
|
|
|
{ # init unless already done (ie root has been flushed) |
4515
|
|
|
|
|
|
|
if( my $start_document = $handler->can( 'start_document')) |
4516
|
|
|
|
|
|
|
{ $start_document->( $handler); } |
4517
|
|
|
|
|
|
|
# flush the DTD |
4518
|
|
|
|
|
|
|
$t->_prolog_toSAX( $handler) |
4519
|
|
|
|
|
|
|
} |
4520
|
|
|
|
|
|
|
|
4521
|
|
|
|
|
|
|
while( $elt) |
4522
|
|
|
|
|
|
|
{ my $next_elt; |
4523
|
|
|
|
|
|
|
if( $last_elt && $last_elt->in( $elt)) |
4524
|
|
|
|
|
|
|
{ |
4525
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
4526
|
|
|
|
|
|
|
{ # just output the front tag |
4527
|
|
|
|
|
|
|
if( my $start_element = $handler->can( 'start_element')) |
4528
|
|
|
|
|
|
|
{ if( my $tag_data= $start_tag_data->( $elt)) |
4529
|
|
|
|
|
|
|
{ $start_element->( $handler, $tag_data); } |
4530
|
|
|
|
|
|
|
} |
4531
|
|
|
|
|
|
|
$elt->{'flushed'}=1; |
4532
|
|
|
|
|
|
|
} |
4533
|
|
|
|
|
|
|
$next_elt= $elt->{first_child}; |
4534
|
|
|
|
|
|
|
} |
4535
|
|
|
|
|
|
|
else |
4536
|
|
|
|
|
|
|
{ # an element before the last one or the last one, |
4537
|
|
|
|
|
|
|
$next_elt= $elt->{next_sibling}; |
4538
|
|
|
|
|
|
|
$elt->_toSAX( $handler, $start_tag_data, $end_tag_data); |
4539
|
|
|
|
|
|
|
$elt->delete; |
4540
|
|
|
|
|
|
|
last if( $last_elt && ($elt == $last_elt)); |
4541
|
|
|
|
|
|
|
} |
4542
|
|
|
|
|
|
|
$elt= $next_elt; |
4543
|
|
|
|
|
|
|
} |
4544
|
|
|
|
|
|
|
if( !$t->{twig_parsing}) |
4545
|
|
|
|
|
|
|
{ if( my $end_document = $handler->can( 'end_document')) |
4546
|
|
|
|
|
|
|
{ $end_document->( $handler); } |
4547
|
|
|
|
|
|
|
} |
4548
|
|
|
|
|
|
|
} |
4549
|
|
|
|
|
|
|
|
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
sub _prolog_toSAX |
4552
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
4553
|
|
|
|
|
|
|
$t->_xmldecl_toSAX( $handler); |
4554
|
|
|
|
|
|
|
$t->_DTD_toSAX( $handler); |
4555
|
|
|
|
|
|
|
} |
4556
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
sub _xmldecl_toSAX |
4558
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
4559
|
|
|
|
|
|
|
my $decl= $t->{twig_xmldecl}; |
4560
|
|
|
|
|
|
|
my $data= { Version => $decl->{version}, |
4561
|
|
|
|
|
|
|
Encoding => $decl->{encoding}, |
4562
|
|
|
|
|
|
|
Standalone => $decl->{standalone}, |
4563
|
|
|
|
|
|
|
}; |
4564
|
|
|
|
|
|
|
if( my $xml_decl= $handler->can( 'xml_decl')) |
4565
|
|
|
|
|
|
|
{ $xml_decl->( $handler, $data); } |
4566
|
|
|
|
|
|
|
} |
4567
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
sub _DTD_toSAX |
4569
|
|
|
|
|
|
|
{ my( $t, $handler)= @_; |
4570
|
|
|
|
|
|
|
my $doctype= $t->{twig_doctype}; |
4571
|
|
|
|
|
|
|
return unless( $doctype); |
4572
|
|
|
|
|
|
|
my $data= { Name => $doctype->{name}, |
4573
|
|
|
|
|
|
|
PublicId => $doctype->{pub}, |
4574
|
|
|
|
|
|
|
SystemId => $doctype->{sysid}, |
4575
|
|
|
|
|
|
|
}; |
4576
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
if( my $start_dtd= $handler->can( 'start_dtd')) |
4578
|
|
|
|
|
|
|
{ $start_dtd->( $handler, $data); } |
4579
|
|
|
|
|
|
|
|
4580
|
|
|
|
|
|
|
# I should call code to export the internal subset here |
4581
|
|
|
|
|
|
|
|
4582
|
|
|
|
|
|
|
if( my $end_dtd= $handler->can( 'end_dtd')) |
4583
|
|
|
|
|
|
|
{ $end_dtd->( $handler); } |
4584
|
|
|
|
|
|
|
} |
4585
|
|
|
|
|
|
|
|
4586
|
|
|
|
|
|
|
# input/output filters |
4587
|
|
|
|
|
|
|
|
4588
|
|
|
|
|
|
|
sub latin1 |
4589
|
|
|
|
|
|
|
{ local $SIG{__DIE__}; |
4590
|
|
|
|
|
|
|
if( _use( 'Encode')) |
4591
|
|
|
|
|
|
|
{ return encode_convert( 'ISO-8859-15'); } |
4592
|
|
|
|
|
|
|
elsif( _use( 'Text::Iconv')) |
4593
|
|
|
|
|
|
|
{ return iconv_convert( 'ISO-8859-15'); } |
4594
|
|
|
|
|
|
|
elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) |
4595
|
|
|
|
|
|
|
{ return unicode_convert( 'ISO-8859-15'); } |
4596
|
|
|
|
|
|
|
else |
4597
|
|
|
|
|
|
|
{ return \®exp2latin1; } |
4598
|
|
|
|
|
|
|
} |
4599
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
sub _encoding_filter |
4601
|
|
|
|
|
|
|
{ |
4602
|
|
|
|
|
|
|
{ local $SIG{__DIE__}; |
4603
|
|
|
|
|
|
|
my $encoding= $_[1] || $_[0]; |
4604
|
|
|
|
|
|
|
if( _use( 'Encode')) |
4605
|
|
|
|
|
|
|
{ my $sub= encode_convert( $encoding); |
4606
|
|
|
|
|
|
|
return $sub; |
4607
|
|
|
|
|
|
|
} |
4608
|
|
|
|
|
|
|
elsif( _use( 'Text::Iconv')) |
4609
|
|
|
|
|
|
|
{ return iconv_convert( $encoding); } |
4610
|
|
|
|
|
|
|
elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) |
4611
|
|
|
|
|
|
|
{ return unicode_convert( $encoding); } |
4612
|
|
|
|
|
|
|
} |
4613
|
|
|
|
|
|
|
_croak( "Encode, Text::Iconv or Unicode::Map8 and Unicode::String need to be installed in order to use encoding options"); |
4614
|
|
|
|
|
|
|
} |
4615
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
# shamelessly lifted from XML::TyePYX (works only with XML::Parse 2.27) |
4617
|
|
|
|
|
|
|
sub regexp2latin1 |
4618
|
|
|
|
|
|
|
{ my $text=shift; |
4619
|
|
|
|
|
|
|
$text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1); |
4620
|
|
|
|
|
|
|
my $lo = ord($2); |
4621
|
|
|
|
|
|
|
chr((($hi & 0x03) <<6) | ($lo & 0x3F)) |
4622
|
|
|
|
|
|
|
}ge; |
4623
|
|
|
|
|
|
|
return $text; |
4624
|
|
|
|
|
|
|
} |
4625
|
|
|
|
|
|
|
|
4626
|
|
|
|
|
|
|
|
4627
|
|
|
|
|
|
|
sub html_encode |
4628
|
|
|
|
|
|
|
{ _use( 'HTML::Entities') or croak "cannot use html_encode: missing HTML::Entities"; |
4629
|
|
|
|
|
|
|
return HTML::Entities::encode_entities($_[0] ); |
4630
|
|
|
|
|
|
|
} |
4631
|
|
|
|
|
|
|
|
4632
|
|
|
|
|
|
|
sub safe_encode |
4633
|
|
|
|
|
|
|
{ my $str= shift; |
4634
|
|
|
|
|
|
|
if( $perl_version < 5.008) |
4635
|
|
|
|
|
|
|
{ # the no utf8 makes the regexp work in 5.6 |
4636
|
|
|
|
|
|
|
no utf8; # = perl 5.6 |
4637
|
|
|
|
|
|
|
$str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} |
4638
|
|
|
|
|
|
|
{_XmlUtf8Decode($1)}egs; |
4639
|
|
|
|
|
|
|
} |
4640
|
|
|
|
|
|
|
else |
4641
|
|
|
|
|
|
|
{ $str= encode( ascii => $str, $FB_HTMLCREF); } |
4642
|
|
|
|
|
|
|
return $str; |
4643
|
|
|
|
|
|
|
} |
4644
|
|
|
|
|
|
|
|
4645
|
|
|
|
|
|
|
sub safe_encode_hex |
4646
|
|
|
|
|
|
|
{ my $str= shift; |
4647
|
|
|
|
|
|
|
if( $perl_version < 5.008) |
4648
|
|
|
|
|
|
|
{ # the no utf8 makes the regexp work in 5.6 |
4649
|
|
|
|
|
|
|
no utf8; # = perl 5.6 |
4650
|
|
|
|
|
|
|
$str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} |
4651
|
|
|
|
|
|
|
{_XmlUtf8Decode($1, 1)}egs; |
4652
|
|
|
|
|
|
|
} |
4653
|
|
|
|
|
|
|
else |
4654
|
|
|
|
|
|
|
{ $str= encode( ascii => $str, $FB_XMLCREF); } |
4655
|
|
|
|
|
|
|
return $str; |
4656
|
|
|
|
|
|
|
} |
4657
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
# this one shamelessly lifted from XML::DOM |
4659
|
|
|
|
|
|
|
# does NOT work on 5.8.0 |
4660
|
|
|
|
|
|
|
sub _XmlUtf8Decode |
4661
|
|
|
|
|
|
|
{ my ($str, $hex) = @_; |
4662
|
|
|
|
|
|
|
my $len = length ($str); |
4663
|
|
|
|
|
|
|
my $n; |
4664
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
if ($len == 2) |
4666
|
|
|
|
|
|
|
{ my @n = unpack "C2", $str; |
4667
|
|
|
|
|
|
|
$n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); |
4668
|
|
|
|
|
|
|
} |
4669
|
|
|
|
|
|
|
elsif ($len == 3) |
4670
|
|
|
|
|
|
|
{ my @n = unpack "C3", $str; |
4671
|
|
|
|
|
|
|
$n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f); |
4672
|
|
|
|
|
|
|
} |
4673
|
|
|
|
|
|
|
elsif ($len == 4) |
4674
|
|
|
|
|
|
|
{ my @n = unpack "C4", $str; |
4675
|
|
|
|
|
|
|
$n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) |
4676
|
|
|
|
|
|
|
+ (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); |
4677
|
|
|
|
|
|
|
} |
4678
|
|
|
|
|
|
|
elsif ($len == 1) # just to be complete... |
4679
|
|
|
|
|
|
|
{ $n = ord ($str); } |
4680
|
|
|
|
|
|
|
else |
4681
|
|
|
|
|
|
|
{ croak "bad value [$str] for _XmlUtf8Decode"; } |
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
my $char= $hex ? sprintf ("%x;", $n) : "$n;"; |
4684
|
|
|
|
|
|
|
return $char; |
4685
|
|
|
|
|
|
|
} |
4686
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
|
4688
|
|
|
|
|
|
|
sub unicode_convert |
4689
|
|
|
|
|
|
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly |
4690
|
|
|
|
|
|
|
_use( 'Unicode::Map8') or croak "Unicode::Map8 not available, needed for encoding filter: $!"; |
4691
|
|
|
|
|
|
|
_use( 'Unicode::String') or croak "Unicode::String not available, needed for encoding filter: $!"; |
4692
|
|
|
|
|
|
|
import Unicode::String qw(utf8); |
4693
|
|
|
|
|
|
|
my $sub= eval qq{ { $NO_WARNINGS; |
4694
|
|
|
|
|
|
|
my \$cnv; |
4695
|
|
|
|
|
|
|
BEGIN { \$cnv= Unicode::Map8->new(\$enc) |
4696
|
|
|
|
|
|
|
or croak "Can't create converter to \$enc"; |
4697
|
|
|
|
|
|
|
} |
4698
|
|
|
|
|
|
|
sub { return \$cnv->to8 (utf8(\$_[0])->ucs2); } |
4699
|
|
|
|
|
|
|
} |
4700
|
|
|
|
|
|
|
}; |
4701
|
|
|
|
|
|
|
unless( $sub) { croak $@; } |
4702
|
|
|
|
|
|
|
return $sub; |
4703
|
|
|
|
|
|
|
} |
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
sub iconv_convert |
4706
|
|
|
|
|
|
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly |
4707
|
|
|
|
|
|
|
_use( 'Text::Iconv') or croak "Text::Iconv not available, needed for encoding filter: $!"; |
4708
|
|
|
|
|
|
|
my $sub= eval qq{ { $NO_WARNINGS; |
4709
|
|
|
|
|
|
|
my \$cnv; |
4710
|
|
|
|
|
|
|
BEGIN { \$cnv = Text::Iconv->new( 'utf8', \$enc) |
4711
|
|
|
|
|
|
|
or croak "Can't create iconv converter to \$enc"; |
4712
|
|
|
|
|
|
|
} |
4713
|
|
|
|
|
|
|
sub { return \$cnv->convert( \$_[0]); } |
4714
|
|
|
|
|
|
|
} |
4715
|
|
|
|
|
|
|
}; |
4716
|
|
|
|
|
|
|
unless( $sub) |
4717
|
|
|
|
|
|
|
{ if( $@=~ m{^Unsupported conversion: Invalid argument}) |
4718
|
|
|
|
|
|
|
{ croak "Unsupported encoding: $enc"; } |
4719
|
|
|
|
|
|
|
else |
4720
|
|
|
|
|
|
|
{ croak $@; } |
4721
|
|
|
|
|
|
|
} |
4722
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
return $sub; |
4724
|
|
|
|
|
|
|
} |
4725
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
sub encode_convert |
4727
|
|
|
|
|
|
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly |
4728
|
|
|
|
|
|
|
my $sub= eval qq{sub { $NO_WARNINGS; return encode( "$enc", \$_[0]); } }; |
4729
|
|
|
|
|
|
|
croak "can't create Encode-based filter: $@" unless( $sub); |
4730
|
|
|
|
|
|
|
return $sub; |
4731
|
|
|
|
|
|
|
} |
4732
|
|
|
|
|
|
|
|
4733
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
# XML::XPath compatibility |
4735
|
|
|
|
|
|
|
sub getRootNode { return $_[0]; } |
4736
|
|
|
|
|
|
|
sub getParentNode { return undef; } |
4737
|
|
|
|
|
|
|
sub getChildNodes { my @children= ($_[0]->root); return wantarray ? @children : \@children; } |
4738
|
|
|
|
|
|
|
|
4739
|
|
|
|
|
|
|
sub _weakrefs { return $weakrefs; } |
4740
|
|
|
|
|
|
|
sub _set_weakrefs { $weakrefs=shift() || 0; XML::Twig::Elt::set_destroy()if ! $weakrefs; } # for testing purposes |
4741
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
sub _dump |
4743
|
|
|
|
|
|
|
{ my $t= shift; |
4744
|
|
|
|
|
|
|
my $dump=''; |
4745
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
$dump="document\n"; # should dump twig level data here |
4747
|
|
|
|
|
|
|
if( $t->root) { $dump .= $t->root->_dump( @_); } |
4748
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
return $dump; |
4750
|
|
|
|
|
|
|
|
4751
|
|
|
|
|
|
|
} |
4752
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
|
4754
|
|
|
|
|
|
|
1; |
4755
|
|
|
|
|
|
|
|
4756
|
|
|
|
|
|
|
###################################################################### |
4757
|
|
|
|
|
|
|
package XML::Twig::Entity_list; |
4758
|
|
|
|
|
|
|
###################################################################### |
4759
|
|
|
|
|
|
|
|
4760
|
|
|
|
|
|
|
*isa= *UNIVERSAL::isa; |
4761
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
sub new |
4763
|
|
|
|
|
|
|
{ my $class = shift; |
4764
|
|
|
|
|
|
|
my $self={ entities => {}, updated => 0}; |
4765
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
bless $self, $class; |
4767
|
|
|
|
|
|
|
return $self; |
4768
|
|
|
|
|
|
|
|
4769
|
|
|
|
|
|
|
} |
4770
|
|
|
|
|
|
|
|
4771
|
|
|
|
|
|
|
sub add_new_ent |
4772
|
|
|
|
|
|
|
{ my $ent_list= shift; |
4773
|
|
|
|
|
|
|
my $ent= XML::Twig::Entity->new( @_); |
4774
|
|
|
|
|
|
|
$ent_list->add( $ent); |
4775
|
|
|
|
|
|
|
return $ent_list; |
4776
|
|
|
|
|
|
|
} |
4777
|
|
|
|
|
|
|
|
4778
|
|
|
|
|
|
|
sub _add_list |
4779
|
|
|
|
|
|
|
{ my( $ent_list, $to_add)= @_; |
4780
|
|
|
|
|
|
|
my $ents_to_add= $to_add->{entities}; |
4781
|
|
|
|
|
|
|
return $ent_list unless( $ents_to_add && %$ents_to_add); |
4782
|
|
|
|
|
|
|
@{$ent_list->{entities}}{keys %$ents_to_add}= values %$ents_to_add; |
4783
|
|
|
|
|
|
|
$ent_list->{updated}=1; |
4784
|
|
|
|
|
|
|
return $ent_list; |
4785
|
|
|
|
|
|
|
} |
4786
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
sub add |
4788
|
|
|
|
|
|
|
{ my( $ent_list, $ent)= @_; |
4789
|
|
|
|
|
|
|
$ent_list->{entities}->{$ent->{name}}= $ent; |
4790
|
|
|
|
|
|
|
$ent_list->{updated}=1; |
4791
|
|
|
|
|
|
|
return $ent_list; |
4792
|
|
|
|
|
|
|
} |
4793
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
sub ent |
4795
|
|
|
|
|
|
|
{ my( $ent_list, $ent_name)= @_; |
4796
|
|
|
|
|
|
|
return $ent_list->{entities}->{$ent_name}; |
4797
|
|
|
|
|
|
|
} |
4798
|
|
|
|
|
|
|
|
4799
|
|
|
|
|
|
|
# can be called with an entity or with an entity name |
4800
|
|
|
|
|
|
|
sub delete |
4801
|
|
|
|
|
|
|
{ my $ent_list= shift; |
4802
|
|
|
|
|
|
|
if( isa( ref $_[0], 'XML::Twig::Entity')) |
4803
|
|
|
|
|
|
|
{ # the second arg is an entity |
4804
|
|
|
|
|
|
|
my $ent= shift; |
4805
|
|
|
|
|
|
|
delete $ent_list->{entities}->{$ent->{name}}; |
4806
|
|
|
|
|
|
|
} |
4807
|
|
|
|
|
|
|
else |
4808
|
|
|
|
|
|
|
{ # the second arg was not entity, must be a string then |
4809
|
|
|
|
|
|
|
my $name= shift; |
4810
|
|
|
|
|
|
|
delete $ent_list->{entities}->{$name}; |
4811
|
|
|
|
|
|
|
} |
4812
|
|
|
|
|
|
|
$ent_list->{updated}=1; |
4813
|
|
|
|
|
|
|
return $ent_list; |
4814
|
|
|
|
|
|
|
} |
4815
|
|
|
|
|
|
|
|
4816
|
|
|
|
|
|
|
sub print |
4817
|
|
|
|
|
|
|
{ my ($ent_list, $fh)= @_; |
4818
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
4819
|
|
|
|
|
|
|
|
4820
|
|
|
|
|
|
|
foreach my $ent_name ( sort keys %{$ent_list->{entities}}) |
4821
|
|
|
|
|
|
|
{ my $ent= $ent_list->{entities}->{$ent_name}; |
4822
|
|
|
|
|
|
|
# we have to test what the entity is or un-defined entities can creep in |
4823
|
|
|
|
|
|
|
if( isa( $ent, 'XML::Twig::Entity')) { $ent->print(); } |
4824
|
|
|
|
|
|
|
} |
4825
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
4826
|
|
|
|
|
|
|
return $ent_list; |
4827
|
|
|
|
|
|
|
} |
4828
|
|
|
|
|
|
|
|
4829
|
|
|
|
|
|
|
sub text |
4830
|
|
|
|
|
|
|
{ my ($ent_list)= @_; |
4831
|
|
|
|
|
|
|
return join "\n", map { $ent_list->{entities}->{$_}->text} sort keys %{$ent_list->{entities}}; |
4832
|
|
|
|
|
|
|
} |
4833
|
|
|
|
|
|
|
|
4834
|
|
|
|
|
|
|
# return the list of entity names |
4835
|
|
|
|
|
|
|
sub entity_names |
4836
|
|
|
|
|
|
|
{ my $ent_list= shift; |
4837
|
|
|
|
|
|
|
return (sort keys %{$ent_list->{entities}}) ; |
4838
|
|
|
|
|
|
|
} |
4839
|
|
|
|
|
|
|
|
4840
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
sub list |
4842
|
|
|
|
|
|
|
{ my ($ent_list)= @_; |
4843
|
|
|
|
|
|
|
return map { $ent_list->{entities}->{$_} } sort keys %{$ent_list->{entities}}; |
4844
|
|
|
|
|
|
|
} |
4845
|
|
|
|
|
|
|
|
4846
|
|
|
|
|
|
|
1; |
4847
|
|
|
|
|
|
|
|
4848
|
|
|
|
|
|
|
###################################################################### |
4849
|
|
|
|
|
|
|
package XML::Twig::Entity; |
4850
|
|
|
|
|
|
|
###################################################################### |
4851
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
#*isa= *UNIVERSAL::isa; |
4853
|
|
|
|
|
|
|
|
4854
|
|
|
|
|
|
|
sub new |
4855
|
|
|
|
|
|
|
{ my( $class, $name, $val, $sysid, $pubid, $ndata, $param)= @_; |
4856
|
|
|
|
|
|
|
$class= ref( $class) || $class; |
4857
|
|
|
|
|
|
|
|
4858
|
|
|
|
|
|
|
my $self={}; |
4859
|
|
|
|
|
|
|
|
4860
|
|
|
|
|
|
|
$self->{name} = $name; |
4861
|
|
|
|
|
|
|
$self->{val} = $val if( defined $val ); |
4862
|
|
|
|
|
|
|
$self->{sysid} = $sysid if( defined $sysid); |
4863
|
|
|
|
|
|
|
$self->{pubid} = $pubid if( defined $pubid); |
4864
|
|
|
|
|
|
|
$self->{ndata} = $ndata if( defined $ndata); |
4865
|
|
|
|
|
|
|
$self->{param} = $param if( defined $param); |
4866
|
|
|
|
|
|
|
|
4867
|
|
|
|
|
|
|
bless $self, $class; |
4868
|
|
|
|
|
|
|
return $self; |
4869
|
|
|
|
|
|
|
} |
4870
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
|
4872
|
|
|
|
|
|
|
sub name { return $_[0]->{name}; } |
4873
|
|
|
|
|
|
|
sub val { return $_[0]->{val}; } |
4874
|
|
|
|
|
|
|
sub sysid { return defined( $_[0]->{sysid}) ? $_[0]->{sysid} : ''; } |
4875
|
|
|
|
|
|
|
sub pubid { return defined( $_[0]->{pubid}) ? $_[0]->{pubid} : ''; } |
4876
|
|
|
|
|
|
|
sub ndata { return defined( $_[0]->{ndata}) ? $_[0]->{ndata} : ''; } |
4877
|
|
|
|
|
|
|
sub param { return defined( $_[0]->{param}) ? $_[0]->{param} : ''; } |
4878
|
|
|
|
|
|
|
|
4879
|
|
|
|
|
|
|
|
4880
|
|
|
|
|
|
|
sub print |
4881
|
|
|
|
|
|
|
{ my ($ent, $fh)= @_; |
4882
|
|
|
|
|
|
|
my $text= $ent->text; |
4883
|
|
|
|
|
|
|
if( $fh) { print $fh $text . "\n"; } |
4884
|
|
|
|
|
|
|
else { print $text . "\n"; } |
4885
|
|
|
|
|
|
|
} |
4886
|
|
|
|
|
|
|
|
4887
|
|
|
|
|
|
|
sub sprint |
4888
|
|
|
|
|
|
|
{ my ($ent)= @_; |
4889
|
|
|
|
|
|
|
return $ent->text; |
4890
|
|
|
|
|
|
|
} |
4891
|
|
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
sub text |
4893
|
|
|
|
|
|
|
{ my ($ent)= @_; |
4894
|
|
|
|
|
|
|
#warn "text called: '", $ent->_dump, "'\n"; |
4895
|
|
|
|
|
|
|
return '' if( !$ent->{name}); |
4896
|
|
|
|
|
|
|
my @tokens; |
4897
|
|
|
|
|
|
|
push @tokens, '
|
4898
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
push @tokens, '%' if( $ent->{param}); |
4900
|
|
|
|
|
|
|
push @tokens, $ent->{name}; |
4901
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
if( defined $ent->{val} && !defined( $ent->{sysid}) && !defined($ent->{pubid}) ) |
4903
|
|
|
|
|
|
|
{ push @tokens, _quoted_val( $ent->{val}); |
4904
|
|
|
|
|
|
|
} |
4905
|
|
|
|
|
|
|
elsif( defined $ent->{sysid}) |
4906
|
|
|
|
|
|
|
{ push @tokens, 'PUBLIC', _quoted_val( $ent->{pubid}) if( $ent->{pubid}); |
4907
|
|
|
|
|
|
|
push @tokens, 'SYSTEM' unless( $ent->{pubid}); |
4908
|
|
|
|
|
|
|
push @tokens, _quoted_val( $ent->{sysid}); |
4909
|
|
|
|
|
|
|
push @tokens, 'NDATA', $ent->{ndata} if( $ent->{ndata}); |
4910
|
|
|
|
|
|
|
} |
4911
|
|
|
|
|
|
|
return join( ' ', @tokens) . '>'; |
4912
|
|
|
|
|
|
|
} |
4913
|
|
|
|
|
|
|
|
4914
|
|
|
|
|
|
|
sub _quoted_val |
4915
|
|
|
|
|
|
|
{ my $q= $_[0]=~ m{"} ? q{'} : q{"}; |
4916
|
|
|
|
|
|
|
return qq{$q$_[0]$q}; |
4917
|
|
|
|
|
|
|
} |
4918
|
|
|
|
|
|
|
|
4919
|
|
|
|
|
|
|
sub _dump |
4920
|
|
|
|
|
|
|
{ my( $ent)= @_; return join( " - ", map { "$_ => '$ent->{$_}'" } grep { defined $ent->{$_} } sort keys %$ent); } |
4921
|
|
|
|
|
|
|
|
4922
|
|
|
|
|
|
|
1; |
4923
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
###################################################################### |
4925
|
|
|
|
|
|
|
package XML::Twig::Notation_list; |
4926
|
|
|
|
|
|
|
###################################################################### |
4927
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
*isa= *UNIVERSAL::isa; |
4929
|
|
|
|
|
|
|
|
4930
|
|
|
|
|
|
|
sub new |
4931
|
|
|
|
|
|
|
{ my $class = shift; |
4932
|
|
|
|
|
|
|
my $self={ notations => {}, updated => 0}; |
4933
|
|
|
|
|
|
|
|
4934
|
|
|
|
|
|
|
bless $self, $class; |
4935
|
|
|
|
|
|
|
return $self; |
4936
|
|
|
|
|
|
|
|
4937
|
|
|
|
|
|
|
} |
4938
|
|
|
|
|
|
|
|
4939
|
|
|
|
|
|
|
sub add_new_notation |
4940
|
|
|
|
|
|
|
{ my $notation_list= shift; |
4941
|
|
|
|
|
|
|
my $notation= XML::Twig::Notation->new( @_); |
4942
|
|
|
|
|
|
|
$notation_list->add( $notation); |
4943
|
|
|
|
|
|
|
return $notation_list; |
4944
|
|
|
|
|
|
|
} |
4945
|
|
|
|
|
|
|
|
4946
|
|
|
|
|
|
|
sub _add_list |
4947
|
|
|
|
|
|
|
{ my( $notation_list, $to_add)= @_; |
4948
|
|
|
|
|
|
|
my $notations_to_add= $to_add->{notations}; |
4949
|
|
|
|
|
|
|
return $notation_list unless( $notations_to_add && %$notations_to_add); |
4950
|
|
|
|
|
|
|
@{$notation_list->{notations}}{keys %$notations_to_add}= values %$notations_to_add; |
4951
|
|
|
|
|
|
|
$notation_list->{updated}=1; |
4952
|
|
|
|
|
|
|
return $notation_list; |
4953
|
|
|
|
|
|
|
} |
4954
|
|
|
|
|
|
|
|
4955
|
|
|
|
|
|
|
sub add |
4956
|
|
|
|
|
|
|
{ my( $notation_list, $notation)= @_; |
4957
|
|
|
|
|
|
|
$notation_list->{notations}->{$notation->{name}}= $notation; |
4958
|
|
|
|
|
|
|
$notation_list->{updated}=1; |
4959
|
|
|
|
|
|
|
return $notation_list; |
4960
|
|
|
|
|
|
|
} |
4961
|
|
|
|
|
|
|
|
4962
|
|
|
|
|
|
|
sub notation |
4963
|
|
|
|
|
|
|
{ my( $notation_list, $notation_name)= @_; |
4964
|
|
|
|
|
|
|
return $notation_list->{notations}->{$notation_name}; |
4965
|
|
|
|
|
|
|
} |
4966
|
|
|
|
|
|
|
|
4967
|
|
|
|
|
|
|
# can be called with an notation or with an notation name |
4968
|
|
|
|
|
|
|
sub delete |
4969
|
|
|
|
|
|
|
{ my $notation_list= shift; |
4970
|
|
|
|
|
|
|
if( isa( ref $_[0], 'XML::Twig::Notation')) |
4971
|
|
|
|
|
|
|
{ # the second arg is an notation |
4972
|
|
|
|
|
|
|
my $notation= shift; |
4973
|
|
|
|
|
|
|
delete $notation_list->{notations}->{$notation->{name}}; |
4974
|
|
|
|
|
|
|
} |
4975
|
|
|
|
|
|
|
else |
4976
|
|
|
|
|
|
|
{ # the second arg was not notation, must be a string then |
4977
|
|
|
|
|
|
|
my $name= shift; |
4978
|
|
|
|
|
|
|
delete $notation_list->{notations}->{$name}; |
4979
|
|
|
|
|
|
|
} |
4980
|
|
|
|
|
|
|
$notation_list->{updated}=1; |
4981
|
|
|
|
|
|
|
return $notation_list; |
4982
|
|
|
|
|
|
|
} |
4983
|
|
|
|
|
|
|
|
4984
|
|
|
|
|
|
|
sub print |
4985
|
|
|
|
|
|
|
{ my ($notation_list, $fh)= @_; |
4986
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
4987
|
|
|
|
|
|
|
|
4988
|
|
|
|
|
|
|
foreach my $notation_name ( sort keys %{$notation_list->{notations}}) |
4989
|
|
|
|
|
|
|
{ my $notation= $notation_list->{notations}->{$notation_name}; |
4990
|
|
|
|
|
|
|
# we have to test what the notation is or un-defined notations can creep in |
4991
|
|
|
|
|
|
|
if( isa( $notation, 'XML::Twig::Notation')) { $notation->print(); } |
4992
|
|
|
|
|
|
|
} |
4993
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
4994
|
|
|
|
|
|
|
return $notation_list; |
4995
|
|
|
|
|
|
|
} |
4996
|
|
|
|
|
|
|
|
4997
|
|
|
|
|
|
|
sub text |
4998
|
|
|
|
|
|
|
{ my ($notation_list)= @_; |
4999
|
|
|
|
|
|
|
return join "\n", map { $notation_list->{notations}->{$_}->text} sort keys %{$notation_list->{notations}}; |
5000
|
|
|
|
|
|
|
} |
5001
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
# return the list of notation names |
5003
|
|
|
|
|
|
|
sub notation_names |
5004
|
|
|
|
|
|
|
{ my $notation_list= shift; |
5005
|
|
|
|
|
|
|
return (sort keys %{$notation_list->{notations}}) ; |
5006
|
|
|
|
|
|
|
} |
5007
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
|
5009
|
|
|
|
|
|
|
sub list |
5010
|
|
|
|
|
|
|
{ my ($notation_list)= @_; |
5011
|
|
|
|
|
|
|
return map { $notation_list->{notations}->{$_} } sort keys %{$notation_list->{notations}}; |
5012
|
|
|
|
|
|
|
} |
5013
|
|
|
|
|
|
|
|
5014
|
|
|
|
|
|
|
1; |
5015
|
|
|
|
|
|
|
|
5016
|
|
|
|
|
|
|
###################################################################### |
5017
|
|
|
|
|
|
|
package XML::Twig::Notation; |
5018
|
|
|
|
|
|
|
###################################################################### |
5019
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
#*isa= *UNIVERSAL::isa; |
5021
|
|
|
|
|
|
|
|
5022
|
|
|
|
|
|
|
BEGIN |
5023
|
|
|
|
|
|
|
{ *sprint= *text; |
5024
|
|
|
|
|
|
|
} |
5025
|
|
|
|
|
|
|
|
5026
|
|
|
|
|
|
|
sub new |
5027
|
|
|
|
|
|
|
{ my( $class, $name, $base, $sysid, $pubid)= @_; |
5028
|
|
|
|
|
|
|
$class= ref( $class) || $class; |
5029
|
|
|
|
|
|
|
|
5030
|
|
|
|
|
|
|
my $self={}; |
5031
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
$self->{name} = $name; |
5033
|
|
|
|
|
|
|
$self->{base} = $base if( defined $base ); |
5034
|
|
|
|
|
|
|
$self->{sysid} = $sysid if( defined $sysid); |
5035
|
|
|
|
|
|
|
$self->{pubid} = $pubid if( defined $pubid); |
5036
|
|
|
|
|
|
|
|
5037
|
|
|
|
|
|
|
bless $self, $class; |
5038
|
|
|
|
|
|
|
return $self; |
5039
|
|
|
|
|
|
|
} |
5040
|
|
|
|
|
|
|
|
5041
|
|
|
|
|
|
|
|
5042
|
|
|
|
|
|
|
sub name { return $_[0]->{name}; } |
5043
|
|
|
|
|
|
|
sub base { return $_[0]->{base}; } |
5044
|
|
|
|
|
|
|
sub sysid { return $_[0]->{sysid}; } |
5045
|
|
|
|
|
|
|
sub pubid { return $_[0]->{pubid}; } |
5046
|
|
|
|
|
|
|
|
5047
|
|
|
|
|
|
|
|
5048
|
|
|
|
|
|
|
sub print |
5049
|
|
|
|
|
|
|
{ my ($notation, $fh)= @_; |
5050
|
|
|
|
|
|
|
my $text= $notation->text; |
5051
|
|
|
|
|
|
|
if( $fh) { print $fh $text . "\n"; } |
5052
|
|
|
|
|
|
|
else { print $text . "\n"; } |
5053
|
|
|
|
|
|
|
} |
5054
|
|
|
|
|
|
|
|
5055
|
|
|
|
|
|
|
sub text |
5056
|
|
|
|
|
|
|
{ my ($notation)= @_; |
5057
|
|
|
|
|
|
|
return '' if( !$notation->{name}); |
5058
|
|
|
|
|
|
|
my @tokens; |
5059
|
|
|
|
|
|
|
push @tokens, '
|
5060
|
|
|
|
|
|
|
push @tokens, $notation->{name}; |
5061
|
|
|
|
|
|
|
push @tokens, ( 'PUBLIC', _quoted_val( $notation->{pubid} ) ) if $notation->{pubid}; |
5062
|
|
|
|
|
|
|
push @tokens, ( 'SYSTEM') if ! $notation->{pubid} && $notation->{sysid}; |
5063
|
|
|
|
|
|
|
push @tokens, (_quoted_val( $notation->{sysid}) ) if $notation->{sysid}; |
5064
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
return join( ' ', @tokens) . '>'; |
5066
|
|
|
|
|
|
|
} |
5067
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
sub _quoted_val |
5069
|
|
|
|
|
|
|
{ my $q= $_[0]=~ m{"} ? q{'} : q{"}; |
5070
|
|
|
|
|
|
|
return qq{$q$_[0]$q}; |
5071
|
|
|
|
|
|
|
} |
5072
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
sub _dump |
5074
|
|
|
|
|
|
|
{ my( $notation)= @_; return join( " - ", map { "$_ => '$notation->{$_}'" } grep { defined $notation->{$_} } sort keys %$notation); } |
5075
|
|
|
|
|
|
|
|
5076
|
|
|
|
|
|
|
1; |
5077
|
|
|
|
|
|
|
|
5078
|
|
|
|
|
|
|
###################################################################### |
5079
|
|
|
|
|
|
|
package XML::Twig::Elt; |
5080
|
|
|
|
|
|
|
###################################################################### |
5081
|
|
|
|
|
|
|
|
5082
|
|
|
|
|
|
|
use Carp; |
5083
|
|
|
|
|
|
|
*isa= *UNIVERSAL::isa; |
5084
|
|
|
|
|
|
|
|
5085
|
|
|
|
|
|
|
my $CDATA_START = "
|
5086
|
|
|
|
|
|
|
my $CDATA_END = "]]>"; |
5087
|
|
|
|
|
|
|
my $PI_START = ""; |
5088
|
|
|
|
|
|
|
my $PI_END = "?>"; |
5089
|
|
|
|
|
|
|
my $COMMENT_START = ""; |
5091
|
|
|
|
|
|
|
|
5092
|
|
|
|
|
|
|
my $XMLNS_URI = 'http://www.w3.org/2000/xmlns/'; |
5093
|
|
|
|
|
|
|
|
5094
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
BEGIN |
5096
|
|
|
|
|
|
|
{ # set some aliases for methods |
5097
|
|
|
|
|
|
|
*tag = *gi; |
5098
|
|
|
|
|
|
|
*name = *gi; |
5099
|
|
|
|
|
|
|
*set_tag = *set_gi; |
5100
|
|
|
|
|
|
|
*set_name = *set_gi; |
5101
|
|
|
|
|
|
|
*find_nodes = *get_xpath; # as in XML::DOM |
5102
|
|
|
|
|
|
|
*findnodes = *get_xpath; # as in XML::LibXML |
5103
|
|
|
|
|
|
|
*field = *first_child_text; |
5104
|
|
|
|
|
|
|
*trimmed_field = *first_child_trimmed_text; |
5105
|
|
|
|
|
|
|
*is_field = *contains_only_text; |
5106
|
|
|
|
|
|
|
*is = *passes; |
5107
|
|
|
|
|
|
|
*matches = *passes; |
5108
|
|
|
|
|
|
|
*has_child = *first_child; |
5109
|
|
|
|
|
|
|
*has_children = *first_child; |
5110
|
|
|
|
|
|
|
*all_children_pass = *all_children_are; |
5111
|
|
|
|
|
|
|
*all_children_match= *all_children_are; |
5112
|
|
|
|
|
|
|
*getElementsByTagName= *descendants; |
5113
|
|
|
|
|
|
|
*find_by_tag_name= *descendants_or_self; |
5114
|
|
|
|
|
|
|
*unwrap = *erase; |
5115
|
|
|
|
|
|
|
*inner_xml = *xml_string; |
5116
|
|
|
|
|
|
|
*outer_xml = *sprint; |
5117
|
|
|
|
|
|
|
*add_class = *add_to_class; |
5118
|
|
|
|
|
|
|
|
5119
|
|
|
|
|
|
|
*first_child_is = *first_child_matches; |
5120
|
|
|
|
|
|
|
*last_child_is = *last_child_matches; |
5121
|
|
|
|
|
|
|
*next_sibling_is = *next_sibling_matches; |
5122
|
|
|
|
|
|
|
*prev_sibling_is = *prev_sibling_matches; |
5123
|
|
|
|
|
|
|
*next_elt_is = *next_elt_matches; |
5124
|
|
|
|
|
|
|
*prev_elt_is = *prev_elt_matches; |
5125
|
|
|
|
|
|
|
*parent_is = *parent_matches; |
5126
|
|
|
|
|
|
|
*child_is = *child_matches; |
5127
|
|
|
|
|
|
|
*inherited_att = *inherit_att; |
5128
|
|
|
|
|
|
|
|
5129
|
|
|
|
|
|
|
*sort_children_by_value= *sort_children_on_value; |
5130
|
|
|
|
|
|
|
|
5131
|
|
|
|
|
|
|
*has_atts= *att_nb; |
5132
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
# imports from XML::Twig |
5134
|
|
|
|
|
|
|
*_is_fh= *XML::Twig::_is_fh; |
5135
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
# XML::XPath compatibility |
5137
|
|
|
|
|
|
|
*string_value = *text; |
5138
|
|
|
|
|
|
|
*toString = *sprint; |
5139
|
|
|
|
|
|
|
*getName = *gi; |
5140
|
|
|
|
|
|
|
*getRootNode = *twig; |
5141
|
|
|
|
|
|
|
*getNextSibling = *_next_sibling; |
5142
|
|
|
|
|
|
|
*getPreviousSibling = *_prev_sibling; |
5143
|
|
|
|
|
|
|
*isElementNode = *is_elt; |
5144
|
|
|
|
|
|
|
*isTextNode = *is_text; |
5145
|
|
|
|
|
|
|
*isPI = *is_pi; |
5146
|
|
|
|
|
|
|
*isPINode = *is_pi; |
5147
|
|
|
|
|
|
|
*isProcessingInstructionNode= *is_pi; |
5148
|
|
|
|
|
|
|
*isComment = *is_comment; |
5149
|
|
|
|
|
|
|
*isCommentNode = *is_comment; |
5150
|
|
|
|
|
|
|
*getTarget = *target; |
5151
|
|
|
|
|
|
|
*getFirstChild = *_first_child; |
5152
|
|
|
|
|
|
|
*getLastChild = *_last_child; |
5153
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
# try using weak references |
5155
|
|
|
|
|
|
|
# test whether we can use weak references |
5156
|
|
|
|
|
|
|
{ local $SIG{__DIE__}; |
5157
|
|
|
|
|
|
|
if( eval 'require Scalar::Util' && defined( &Scalar::Util::weaken) ) |
5158
|
|
|
|
|
|
|
{ import Scalar::Util qw(weaken); } |
5159
|
|
|
|
|
|
|
elsif( eval 'require WeakRef') |
5160
|
|
|
|
|
|
|
{ import WeakRef; } |
5161
|
|
|
|
|
|
|
} |
5162
|
|
|
|
|
|
|
} |
5163
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
# can be called as XML::Twig::Elt->new( [[$gi, $atts, [@content]]) |
5166
|
|
|
|
|
|
|
# - gi is an optional gi given to the element |
5167
|
|
|
|
|
|
|
# - $atts is a hashref to attributes for the element |
5168
|
|
|
|
|
|
|
# - @content is an optional list of text and elements that will |
5169
|
|
|
|
|
|
|
# be inserted under the element |
5170
|
|
|
|
|
|
|
sub new |
5171
|
|
|
|
|
|
|
{ my $class= shift; |
5172
|
|
|
|
|
|
|
$class= ref $class || $class; |
5173
|
|
|
|
|
|
|
my $elt = {}; |
5174
|
|
|
|
|
|
|
bless ($elt, $class); |
5175
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
return $elt unless @_; |
5177
|
|
|
|
|
|
|
|
5178
|
|
|
|
|
|
|
if( @_ == 1 && $_[0]=~ m{^\s*<}) { return $class->parse( @_); } |
5179
|
|
|
|
|
|
|
|
5180
|
|
|
|
|
|
|
# if a gi is passed then use it |
5181
|
|
|
|
|
|
|
my $gi= shift; |
5182
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$gi} or $elt->set_gi( $gi); |
5183
|
|
|
|
|
|
|
|
5184
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
my $atts= ref $_[0] eq 'HASH' ? shift : undef; |
5186
|
|
|
|
|
|
|
|
5187
|
|
|
|
|
|
|
if( $atts && defined $atts->{$CDATA}) |
5188
|
|
|
|
|
|
|
{ delete $atts->{$CDATA}; |
5189
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
my $cdata= $class->new( $CDATA => @_); |
5191
|
|
|
|
|
|
|
return $class->new( $gi, $atts, $cdata); |
5192
|
|
|
|
|
|
|
} |
5193
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
if( $gi eq $PCDATA) |
5195
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } |
5196
|
|
|
|
|
|
|
$elt->{pcdata}= join '', @_; |
5197
|
|
|
|
|
|
|
} |
5198
|
|
|
|
|
|
|
elsif( $gi eq $ENT) |
5199
|
|
|
|
|
|
|
{ $elt->{ent}= shift; } |
5200
|
|
|
|
|
|
|
elsif( $gi eq $CDATA) |
5201
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } |
5202
|
|
|
|
|
|
|
$elt->{cdata}= join '', @_; |
5203
|
|
|
|
|
|
|
} |
5204
|
|
|
|
|
|
|
elsif( $gi eq $COMMENT) |
5205
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } |
5206
|
|
|
|
|
|
|
$elt->{comment}= join '', @_; |
5207
|
|
|
|
|
|
|
} |
5208
|
|
|
|
|
|
|
elsif( $gi eq $PI) |
5209
|
|
|
|
|
|
|
{ if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } |
5210
|
|
|
|
|
|
|
$elt->_set_pi( shift, join '', @_); |
5211
|
|
|
|
|
|
|
} |
5212
|
|
|
|
|
|
|
else |
5213
|
|
|
|
|
|
|
{ # the rest of the arguments are the content of the element |
5214
|
|
|
|
|
|
|
if( @_) |
5215
|
|
|
|
|
|
|
{ $elt->set_content( @_); } |
5216
|
|
|
|
|
|
|
else |
5217
|
|
|
|
|
|
|
{ $elt->{empty}= 1; } |
5218
|
|
|
|
|
|
|
} |
5219
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
if( $atts) |
5221
|
|
|
|
|
|
|
{ # the attribute hash can be used to pass the asis status |
5222
|
|
|
|
|
|
|
if( defined $atts->{$ASIS}) { $elt->set_asis( $atts->{$ASIS} ); delete $atts->{$ASIS}; } |
5223
|
|
|
|
|
|
|
if( defined $atts->{$EMPTY}) { $elt->{empty}= $atts->{$EMPTY}; delete $atts->{$EMPTY}; } |
5224
|
|
|
|
|
|
|
if( keys %$atts) { $elt->set_atts( $atts); } |
5225
|
|
|
|
|
|
|
$elt->_set_id( $atts->{$ID}) if( $atts->{$ID}); |
5226
|
|
|
|
|
|
|
} |
5227
|
|
|
|
|
|
|
|
5228
|
|
|
|
|
|
|
return $elt; |
5229
|
|
|
|
|
|
|
} |
5230
|
|
|
|
|
|
|
|
5231
|
|
|
|
|
|
|
# optimized version of $elt->new( PCDATA, $text); |
5232
|
|
|
|
|
|
|
sub _new_pcdata |
5233
|
|
|
|
|
|
|
{ my $class= $_[0]; |
5234
|
|
|
|
|
|
|
$class= ref $class || $class; |
5235
|
|
|
|
|
|
|
my $elt = {}; |
5236
|
|
|
|
|
|
|
bless $elt, $class; |
5237
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); |
5238
|
|
|
|
|
|
|
$elt->{pcdata}= $_[1]; |
5239
|
|
|
|
|
|
|
return $elt; |
5240
|
|
|
|
|
|
|
} |
5241
|
|
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
# this function creates an XM:::Twig::Elt from a string |
5243
|
|
|
|
|
|
|
# it is quite clumsy at the moment, as it just creates a |
5244
|
|
|
|
|
|
|
# new twig then returns its root |
5245
|
|
|
|
|
|
|
# there might also be memory leaks there |
5246
|
|
|
|
|
|
|
# additional arguments are passed to new XML::Twig |
5247
|
|
|
|
|
|
|
sub parse |
5248
|
|
|
|
|
|
|
{ my $class= shift; |
5249
|
|
|
|
|
|
|
if( ref( $class)) { $class= ref( $class); } |
5250
|
|
|
|
|
|
|
my $string= shift; |
5251
|
|
|
|
|
|
|
my %args= @_; |
5252
|
|
|
|
|
|
|
my $t= XML::Twig->new(%args); |
5253
|
|
|
|
|
|
|
$t->parse( $string); |
5254
|
|
|
|
|
|
|
my $elt= $t->root; |
5255
|
|
|
|
|
|
|
# clean-up the node |
5256
|
|
|
|
|
|
|
delete $elt->{twig}; # get rid of the twig data |
5257
|
|
|
|
|
|
|
delete $elt->{twig_current}; # better get rid of this too |
5258
|
|
|
|
|
|
|
if( $t->{twig_id_list}) { $elt->{twig_id_list}= $t->{twig_id_list}; } |
5259
|
|
|
|
|
|
|
$elt->cut; |
5260
|
|
|
|
|
|
|
undef $t->{twig_root}; |
5261
|
|
|
|
|
|
|
return $elt; |
5262
|
|
|
|
|
|
|
} |
5263
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
sub set_inner_xml |
5265
|
|
|
|
|
|
|
{ my( $elt, $xml, @args)= @_; |
5266
|
|
|
|
|
|
|
my $new_elt= $elt->parse( "$xml", @args); |
5267
|
|
|
|
|
|
|
$elt->cut_children; |
5268
|
|
|
|
|
|
|
$new_elt->paste_first_child( $elt); |
5269
|
|
|
|
|
|
|
$new_elt->erase; |
5270
|
|
|
|
|
|
|
return $elt; |
5271
|
|
|
|
|
|
|
} |
5272
|
|
|
|
|
|
|
|
5273
|
|
|
|
|
|
|
sub set_outer_xml |
5274
|
|
|
|
|
|
|
{ my( $elt, $xml, @args)= @_; |
5275
|
|
|
|
|
|
|
my $new_elt= $elt->parse( "$xml", @args); |
5276
|
|
|
|
|
|
|
$elt->cut_children; |
5277
|
|
|
|
|
|
|
$new_elt->replace( $elt); |
5278
|
|
|
|
|
|
|
$new_elt->erase; |
5279
|
|
|
|
|
|
|
return $new_elt; |
5280
|
|
|
|
|
|
|
} |
5281
|
|
|
|
|
|
|
|
5282
|
|
|
|
|
|
|
|
5283
|
|
|
|
|
|
|
sub set_inner_html |
5284
|
|
|
|
|
|
|
{ my( $elt, $html)= @_; |
5285
|
|
|
|
|
|
|
my $t= XML::Twig->new->parse_html( "$html"); |
5286
|
|
|
|
|
|
|
my $new_elt= $t->root; |
5287
|
|
|
|
|
|
|
if( $elt->tag eq 'head') |
5288
|
|
|
|
|
|
|
{ $new_elt->first_child( 'head')->unwrap; |
5289
|
|
|
|
|
|
|
$new_elt->first_child( 'body')->cut; |
5290
|
|
|
|
|
|
|
} |
5291
|
|
|
|
|
|
|
elsif( $elt->tag ne 'html') |
5292
|
|
|
|
|
|
|
{ $new_elt->first_child( 'head')->cut; |
5293
|
|
|
|
|
|
|
$new_elt->first_child( 'body')->unwrap; |
5294
|
|
|
|
|
|
|
} |
5295
|
|
|
|
|
|
|
$new_elt->cut; |
5296
|
|
|
|
|
|
|
$elt->cut_children; |
5297
|
|
|
|
|
|
|
$new_elt->paste_first_child( $elt); |
5298
|
|
|
|
|
|
|
$new_elt->erase; |
5299
|
|
|
|
|
|
|
return $elt; |
5300
|
|
|
|
|
|
|
} |
5301
|
|
|
|
|
|
|
|
5302
|
|
|
|
|
|
|
sub set_gi |
5303
|
|
|
|
|
|
|
{ my ($elt, $gi)= @_; |
5304
|
|
|
|
|
|
|
unless( defined $XML::Twig::gi2index{$gi}) |
5305
|
|
|
|
|
|
|
{ # new gi, create entries in %gi2index and @index2gi |
5306
|
|
|
|
|
|
|
push @XML::Twig::index2gi, $gi; |
5307
|
|
|
|
|
|
|
$XML::Twig::gi2index{$gi}= $#XML::Twig::index2gi; |
5308
|
|
|
|
|
|
|
} |
5309
|
|
|
|
|
|
|
$elt->{gi}= $XML::Twig::gi2index{$gi}; |
5310
|
|
|
|
|
|
|
return $elt; |
5311
|
|
|
|
|
|
|
} |
5312
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
sub gi { return $XML::Twig::index2gi[$_[0]->{gi}]; } |
5314
|
|
|
|
|
|
|
|
5315
|
|
|
|
|
|
|
sub local_name |
5316
|
|
|
|
|
|
|
{ my $elt= shift; |
5317
|
|
|
|
|
|
|
return _local_name( $XML::Twig::index2gi[$elt->{'gi'}]); |
5318
|
|
|
|
|
|
|
} |
5319
|
|
|
|
|
|
|
|
5320
|
|
|
|
|
|
|
sub ns_prefix |
5321
|
|
|
|
|
|
|
{ my $elt= shift; |
5322
|
|
|
|
|
|
|
return _ns_prefix( $XML::Twig::index2gi[$elt->{'gi'}]); |
5323
|
|
|
|
|
|
|
} |
5324
|
|
|
|
|
|
|
|
5325
|
|
|
|
|
|
|
# namespace prefix for any qname (can be used for elements or attributes) |
5326
|
|
|
|
|
|
|
sub _ns_prefix |
5327
|
|
|
|
|
|
|
{ my $qname= shift; |
5328
|
|
|
|
|
|
|
if( $qname=~ m{^([^:]*):}) |
5329
|
|
|
|
|
|
|
{ return $1; } |
5330
|
|
|
|
|
|
|
else |
5331
|
|
|
|
|
|
|
{ return( ''); } # should it be '' ? |
5332
|
|
|
|
|
|
|
} |
5333
|
|
|
|
|
|
|
|
5334
|
|
|
|
|
|
|
# local name for any qname (can be used for elements or attributes) |
5335
|
|
|
|
|
|
|
sub _local_name |
5336
|
|
|
|
|
|
|
{ my $qname= shift; |
5337
|
|
|
|
|
|
|
(my $local= $qname)=~ s{^[^:]*:}{}; |
5338
|
|
|
|
|
|
|
return $local; |
5339
|
|
|
|
|
|
|
} |
5340
|
|
|
|
|
|
|
|
5341
|
|
|
|
|
|
|
#sub get_namespace |
5342
|
|
|
|
|
|
|
sub namespace ## no critic (Subroutines::ProhibitNestedSubs); |
5343
|
|
|
|
|
|
|
{ my $elt= shift; |
5344
|
|
|
|
|
|
|
my $prefix= defined $_[0] ? shift() : $elt->ns_prefix; |
5345
|
|
|
|
|
|
|
my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns"; |
5346
|
|
|
|
|
|
|
my $expanded= $DEFAULT_NS{$prefix} || $elt->_inherit_att_through_cut( $ns_att) || ''; |
5347
|
|
|
|
|
|
|
return $expanded; |
5348
|
|
|
|
|
|
|
} |
5349
|
|
|
|
|
|
|
|
5350
|
|
|
|
|
|
|
sub declare_missing_ns ## no critic (Subroutines::ProhibitNestedSubs); |
5351
|
|
|
|
|
|
|
{ my $root= shift; |
5352
|
|
|
|
|
|
|
my %missing_prefix; |
5353
|
|
|
|
|
|
|
my $map= $root->_current_ns_prefix_map; |
5354
|
|
|
|
|
|
|
|
5355
|
|
|
|
|
|
|
foreach my $prefix (keys %$map) |
5356
|
|
|
|
|
|
|
{ my $prefix_att= $prefix eq '#default' ? 'xmlns' : "xmlns:$prefix"; |
5357
|
|
|
|
|
|
|
if( ! $root->{'att'}->{$prefix_att}) |
5358
|
|
|
|
|
|
|
{ $root->set_att( $prefix_att => $map->{$prefix}); } |
5359
|
|
|
|
|
|
|
} |
5360
|
|
|
|
|
|
|
return $root; |
5361
|
|
|
|
|
|
|
} |
5362
|
|
|
|
|
|
|
|
5363
|
|
|
|
|
|
|
sub _current_ns_prefix_map |
5364
|
|
|
|
|
|
|
{ my( $elt)= shift; |
5365
|
|
|
|
|
|
|
my $map; |
5366
|
|
|
|
|
|
|
while( $elt) |
5367
|
|
|
|
|
|
|
{ foreach my $att ($elt->att_names) |
5368
|
|
|
|
|
|
|
{ my $prefix= $att eq 'xmlns' ? '#default' |
5369
|
|
|
|
|
|
|
: $att=~ m{^xmlns:(.*)$} ? $1 |
5370
|
|
|
|
|
|
|
: next |
5371
|
|
|
|
|
|
|
; |
5372
|
|
|
|
|
|
|
if( ! exists $map->{$prefix}) { $map->{$prefix}= $elt->{'att'}->{$att}; } |
5373
|
|
|
|
|
|
|
} |
5374
|
|
|
|
|
|
|
$elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); |
5375
|
|
|
|
|
|
|
} |
5376
|
|
|
|
|
|
|
return $map; |
5377
|
|
|
|
|
|
|
} |
5378
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
sub set_ns_decl |
5380
|
|
|
|
|
|
|
{ my( $elt, $uri, $prefix)= @_; |
5381
|
|
|
|
|
|
|
my $ns_att= $prefix ? "xmlns:$prefix" : 'xmlns'; |
5382
|
|
|
|
|
|
|
$elt->set_att( $ns_att => $uri); |
5383
|
|
|
|
|
|
|
return $elt; |
5384
|
|
|
|
|
|
|
} |
5385
|
|
|
|
|
|
|
|
5386
|
|
|
|
|
|
|
sub set_ns_as_default |
5387
|
|
|
|
|
|
|
{ my( $root, $uri)= @_; |
5388
|
|
|
|
|
|
|
my @ns_decl_to_remove; |
5389
|
|
|
|
|
|
|
foreach my $elt ($root->descendants_or_self) |
5390
|
|
|
|
|
|
|
{ if( $elt->_ns_prefix && $elt->namespace eq $uri) |
5391
|
|
|
|
|
|
|
{ $elt->set_tag( $elt->local_name); } |
5392
|
|
|
|
|
|
|
# store any namespace declaration for that uri |
5393
|
|
|
|
|
|
|
foreach my $ns_decl (grep { $_=~ m{xmlns(:|$)} && $elt->{'att'}->{$_} eq $uri } $elt->att_names) |
5394
|
|
|
|
|
|
|
{ push @ns_decl_to_remove, [$elt, $ns_decl]; } |
5395
|
|
|
|
|
|
|
} |
5396
|
|
|
|
|
|
|
$root->set_ns_decl( $uri); |
5397
|
|
|
|
|
|
|
# now remove the ns declarations (if done earlier then descendants of an element with the ns declaration |
5398
|
|
|
|
|
|
|
# are not considered being in the namespace |
5399
|
|
|
|
|
|
|
foreach my $ns_decl_to_remove ( @ns_decl_to_remove) |
5400
|
|
|
|
|
|
|
{ my( $elt, $ns_decl)= @$ns_decl_to_remove; |
5401
|
|
|
|
|
|
|
$elt->del_att( $ns_decl); |
5402
|
|
|
|
|
|
|
} |
5403
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
return $root; |
5405
|
|
|
|
|
|
|
} |
5406
|
|
|
|
|
|
|
|
5407
|
|
|
|
|
|
|
|
5408
|
|
|
|
|
|
|
|
5409
|
|
|
|
|
|
|
# return #ELT for an element and #PCDATA... for others |
5410
|
|
|
|
|
|
|
sub get_type |
5411
|
|
|
|
|
|
|
{ my $gi_nb= $_[0]->{gi}; # the number, not the string |
5412
|
|
|
|
|
|
|
return $ELT if( $gi_nb >= $XML::Twig::SPECIAL_GI); |
5413
|
|
|
|
|
|
|
return $_[0]->gi; |
5414
|
|
|
|
|
|
|
} |
5415
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
# return the gi if it's a "real" element, 0 otherwise |
5417
|
|
|
|
|
|
|
sub is_elt |
5418
|
|
|
|
|
|
|
{ if( $_[0]->{gi} >= $XML::Twig::SPECIAL_GI) |
5419
|
|
|
|
|
|
|
{ return $_[0]->gi; } |
5420
|
|
|
|
|
|
|
else |
5421
|
|
|
|
|
|
|
{ return 0; } |
5422
|
|
|
|
|
|
|
} |
5423
|
|
|
|
|
|
|
|
5424
|
|
|
|
|
|
|
|
5425
|
|
|
|
|
|
|
sub is_pcdata |
5426
|
|
|
|
|
|
|
{ my $elt= shift; |
5427
|
|
|
|
|
|
|
return (exists $elt->{'pcdata'}); |
5428
|
|
|
|
|
|
|
} |
5429
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
sub is_cdata |
5431
|
|
|
|
|
|
|
{ my $elt= shift; |
5432
|
|
|
|
|
|
|
return (exists $elt->{'cdata'}); |
5433
|
|
|
|
|
|
|
} |
5434
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
sub is_pi |
5436
|
|
|
|
|
|
|
{ my $elt= shift; |
5437
|
|
|
|
|
|
|
return (exists $elt->{'target'}); |
5438
|
|
|
|
|
|
|
} |
5439
|
|
|
|
|
|
|
|
5440
|
|
|
|
|
|
|
sub is_comment |
5441
|
|
|
|
|
|
|
{ my $elt= shift; |
5442
|
|
|
|
|
|
|
return (exists $elt->{'comment'}); |
5443
|
|
|
|
|
|
|
} |
5444
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
sub is_ent |
5446
|
|
|
|
|
|
|
{ my $elt= shift; |
5447
|
|
|
|
|
|
|
return (exists $elt->{ent} || $elt->{ent_name}); |
5448
|
|
|
|
|
|
|
} |
5449
|
|
|
|
|
|
|
|
5450
|
|
|
|
|
|
|
|
5451
|
|
|
|
|
|
|
sub is_text |
5452
|
|
|
|
|
|
|
{ my $elt= shift; |
5453
|
|
|
|
|
|
|
return (exists( $elt->{'pcdata'}) || (exists $elt->{'cdata'})); |
5454
|
|
|
|
|
|
|
} |
5455
|
|
|
|
|
|
|
|
5456
|
|
|
|
|
|
|
sub is_empty |
5457
|
|
|
|
|
|
|
{ return $_[0]->{empty} || 0; } |
5458
|
|
|
|
|
|
|
|
5459
|
|
|
|
|
|
|
sub set_empty |
5460
|
|
|
|
|
|
|
{ $_[0]->{empty}= defined( $_[1]) ? $_[1] : 1; return $_[0]; } |
5461
|
|
|
|
|
|
|
|
5462
|
|
|
|
|
|
|
sub set_not_empty |
5463
|
|
|
|
|
|
|
{ delete $_[0]->{empty} if( $_[0]->{'empty'}); return $_[0]; } |
5464
|
|
|
|
|
|
|
|
5465
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
sub set_asis |
5467
|
|
|
|
|
|
|
{ my $elt=shift; |
5468
|
|
|
|
|
|
|
|
5469
|
|
|
|
|
|
|
foreach my $descendant ($elt, $elt->_descendants ) |
5470
|
|
|
|
|
|
|
{ $descendant->{asis}= 1; |
5471
|
|
|
|
|
|
|
if( (exists $descendant->{'cdata'})) |
5472
|
|
|
|
|
|
|
{ $descendant->{gi}=$XML::Twig::gi2index{$PCDATA} or $descendant->set_gi( $PCDATA); |
5473
|
|
|
|
|
|
|
$descendant->{pcdata}= $descendant->{cdata}; |
5474
|
|
|
|
|
|
|
} |
5475
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
} |
5477
|
|
|
|
|
|
|
return $elt; |
5478
|
|
|
|
|
|
|
} |
5479
|
|
|
|
|
|
|
|
5480
|
|
|
|
|
|
|
sub set_not_asis |
5481
|
|
|
|
|
|
|
{ my $elt=shift; |
5482
|
|
|
|
|
|
|
foreach my $descendant ($elt, $elt->descendants) |
5483
|
|
|
|
|
|
|
{ delete $descendant->{asis} if $descendant->{asis};} |
5484
|
|
|
|
|
|
|
return $elt; |
5485
|
|
|
|
|
|
|
} |
5486
|
|
|
|
|
|
|
|
5487
|
|
|
|
|
|
|
sub is_asis |
5488
|
|
|
|
|
|
|
{ return $_[0]->{asis}; } |
5489
|
|
|
|
|
|
|
|
5490
|
|
|
|
|
|
|
sub closed |
5491
|
|
|
|
|
|
|
{ my $elt= shift; |
5492
|
|
|
|
|
|
|
my $t= $elt->twig || return; |
5493
|
|
|
|
|
|
|
my $curr_elt= $t->{twig_current}; |
5494
|
|
|
|
|
|
|
return 1 unless( $curr_elt); |
5495
|
|
|
|
|
|
|
return $curr_elt->in( $elt); |
5496
|
|
|
|
|
|
|
} |
5497
|
|
|
|
|
|
|
|
5498
|
|
|
|
|
|
|
sub set_pcdata |
5499
|
|
|
|
|
|
|
{ my( $elt, $pcdata)= @_; |
5500
|
|
|
|
|
|
|
|
5501
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) |
5502
|
|
|
|
|
|
|
{ _try_moving_extra_data( $elt, $pcdata); |
5503
|
|
|
|
|
|
|
} |
5504
|
|
|
|
|
|
|
$elt->{pcdata}= $pcdata; |
5505
|
|
|
|
|
|
|
return $elt; |
5506
|
|
|
|
|
|
|
} |
5507
|
|
|
|
|
|
|
|
5508
|
|
|
|
|
|
|
sub _extra_data_in_pcdata { return $_[0]->{extra_data_in_pcdata}; } |
5509
|
|
|
|
|
|
|
sub _set_extra_data_in_pcdata { $_[0]->{extra_data_in_pcdata}= $_[1]; return $_[0]; } |
5510
|
|
|
|
|
|
|
sub _del_extra_data_in_pcdata { delete $_[0]->{extra_data_in_pcdata}; return $_[0]; } |
5511
|
|
|
|
|
|
|
sub _unshift_extra_data_in_pcdata |
5512
|
|
|
|
|
|
|
{ my $e= shift; |
5513
|
|
|
|
|
|
|
$e->{extra_data_in_pcdata}||=[]; |
5514
|
|
|
|
|
|
|
unshift @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; |
5515
|
|
|
|
|
|
|
} |
5516
|
|
|
|
|
|
|
sub _push_extra_data_in_pcdata |
5517
|
|
|
|
|
|
|
{ my $e= shift; |
5518
|
|
|
|
|
|
|
$e->{extra_data_in_pcdata}||=[]; |
5519
|
|
|
|
|
|
|
push @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; |
5520
|
|
|
|
|
|
|
} |
5521
|
|
|
|
|
|
|
|
5522
|
|
|
|
|
|
|
sub _extra_data_before_end_tag { return $_[0]->{extra_data_before_end_tag} || ''; } |
5523
|
|
|
|
|
|
|
sub _set_extra_data_before_end_tag { $_[0]->{extra_data_before_end_tag}= $_[1]; return $_[0]} |
5524
|
|
|
|
|
|
|
sub _del_extra_data_before_end_tag { delete $_[0]->{extra_data_before_end_tag}; return $_[0]} |
5525
|
|
|
|
|
|
|
sub _prefix_extra_data_before_end_tag |
5526
|
|
|
|
|
|
|
{ my( $elt, $data)= @_; |
5527
|
|
|
|
|
|
|
if($elt->{extra_data_before_end_tag}) |
5528
|
|
|
|
|
|
|
{ $elt->{extra_data_before_end_tag}= $data . $elt->{extra_data_before_end_tag}; } |
5529
|
|
|
|
|
|
|
else |
5530
|
|
|
|
|
|
|
{ $elt->{extra_data_before_end_tag}= $data; } |
5531
|
|
|
|
|
|
|
return $elt; |
5532
|
|
|
|
|
|
|
} |
5533
|
|
|
|
|
|
|
|
5534
|
|
|
|
|
|
|
# internal, in cases where we know there is no extra_data (inlined anyway!) |
5535
|
|
|
|
|
|
|
sub _set_pcdata { $_[0]->{pcdata}= $_[1]; } |
5536
|
|
|
|
|
|
|
|
5537
|
|
|
|
|
|
|
# try to figure out if we can keep the extra_data around |
5538
|
|
|
|
|
|
|
sub _try_moving_extra_data |
5539
|
|
|
|
|
|
|
{ my( $elt, $modified)=@_; |
5540
|
|
|
|
|
|
|
my $initial= $elt->{pcdata}; |
5541
|
|
|
|
|
|
|
my $cpis= $elt->{extra_data_in_pcdata}; |
5542
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
if( (my $offset= index( $modified, $initial)) != -1) |
5544
|
|
|
|
|
|
|
{ # text has been added |
5545
|
|
|
|
|
|
|
foreach (@$cpis) { $_->{offset}+= $offset; } |
5546
|
|
|
|
|
|
|
} |
5547
|
|
|
|
|
|
|
elsif( ($offset= index( $initial, $modified)) != -1) |
5548
|
|
|
|
|
|
|
{ # text has been cut |
5549
|
|
|
|
|
|
|
my $len= length( $modified); |
5550
|
|
|
|
|
|
|
foreach my $cpi (@$cpis) { $cpi->{offset} -= $offset; } |
5551
|
|
|
|
|
|
|
$elt->_set_extra_data_in_pcdata( [ grep { $_->{offset} >= 0 && $_->{offset} < $len } @$cpis ]); |
5552
|
|
|
|
|
|
|
} |
5553
|
|
|
|
|
|
|
else |
5554
|
|
|
|
|
|
|
{ _match_extra_data_words( $elt, $initial, $modified) |
5555
|
|
|
|
|
|
|
|| _match_extra_data_chars( $elt, $initial, $modified) |
5556
|
|
|
|
|
|
|
|| $elt->_del_extra_data_in_pcdata; |
5557
|
|
|
|
|
|
|
} |
5558
|
|
|
|
|
|
|
} |
5559
|
|
|
|
|
|
|
|
5560
|
|
|
|
|
|
|
sub _match_extra_data_words |
5561
|
|
|
|
|
|
|
{ my( $elt, $initial, $modified)= @_; |
5562
|
|
|
|
|
|
|
my @initial= split /\b/, $initial; |
5563
|
|
|
|
|
|
|
my @modified= split /\b/, $modified; |
5564
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
return _match_extra_data( $elt, length( $initial), \@initial, \@modified); |
5566
|
|
|
|
|
|
|
} |
5567
|
|
|
|
|
|
|
|
5568
|
|
|
|
|
|
|
sub _match_extra_data_chars |
5569
|
|
|
|
|
|
|
{ my( $elt, $initial, $modified)= @_; |
5570
|
|
|
|
|
|
|
my @initial= split //, $initial; |
5571
|
|
|
|
|
|
|
my @modified= split //, $modified; |
5572
|
|
|
|
|
|
|
|
5573
|
|
|
|
|
|
|
return _match_extra_data( $elt, length( $initial), \@initial, \@modified); |
5574
|
|
|
|
|
|
|
} |
5575
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
sub _match_extra_data |
5577
|
|
|
|
|
|
|
{ my( $elt, $length, $initial, $modified)= @_; |
5578
|
|
|
|
|
|
|
|
5579
|
|
|
|
|
|
|
my $cpis= $elt->{extra_data_in_pcdata}; |
5580
|
|
|
|
|
|
|
|
5581
|
|
|
|
|
|
|
if( @$initial <= @$modified) |
5582
|
|
|
|
|
|
|
{ |
5583
|
|
|
|
|
|
|
my( $ok, $positions, $offsets)= _pos_offset( $initial, $modified); |
5584
|
|
|
|
|
|
|
if( $ok) |
5585
|
|
|
|
|
|
|
{ my $offset=0; |
5586
|
|
|
|
|
|
|
my $pos= shift @$positions; |
5587
|
|
|
|
|
|
|
foreach my $cpi (@$cpis) |
5588
|
|
|
|
|
|
|
{ while( $cpi->{offset} >= $pos) |
5589
|
|
|
|
|
|
|
{ $offset= shift @$offsets; |
5590
|
|
|
|
|
|
|
$pos= shift @$positions || $length +1; |
5591
|
|
|
|
|
|
|
} |
5592
|
|
|
|
|
|
|
$cpi->{offset} += $offset; |
5593
|
|
|
|
|
|
|
} |
5594
|
|
|
|
|
|
|
return 1; |
5595
|
|
|
|
|
|
|
} |
5596
|
|
|
|
|
|
|
} |
5597
|
|
|
|
|
|
|
else |
5598
|
|
|
|
|
|
|
{ my( $ok, $positions, $offsets)= _pos_offset( $modified, $initial); |
5599
|
|
|
|
|
|
|
if( $ok) |
5600
|
|
|
|
|
|
|
{ #print STDERR "pos: ", join( ':', @$positions), "\n", |
5601
|
|
|
|
|
|
|
# "offset: ", join( ':', @$offsets), "\n"; |
5602
|
|
|
|
|
|
|
my $offset=0; |
5603
|
|
|
|
|
|
|
my $pos= shift @$positions; |
5604
|
|
|
|
|
|
|
my $prev_pos= 0; |
5605
|
|
|
|
|
|
|
|
5606
|
|
|
|
|
|
|
foreach my $cpi (@$cpis) |
5607
|
|
|
|
|
|
|
{ while( $cpi->{offset} >= $pos) |
5608
|
|
|
|
|
|
|
{ $offset= shift @$offsets; |
5609
|
|
|
|
|
|
|
$prev_pos= $pos; |
5610
|
|
|
|
|
|
|
$pos= shift @$positions || $length +1; |
5611
|
|
|
|
|
|
|
} |
5612
|
|
|
|
|
|
|
$cpi->{offset} -= $offset; |
5613
|
|
|
|
|
|
|
if( $cpi->{offset} < $prev_pos) { delete $cpi->{text}; } |
5614
|
|
|
|
|
|
|
} |
5615
|
|
|
|
|
|
|
$elt->_set_extra_data_in_pcdata( [ grep { exists $_->{text} } @$cpis ]); |
5616
|
|
|
|
|
|
|
return 1; |
5617
|
|
|
|
|
|
|
} |
5618
|
|
|
|
|
|
|
} |
5619
|
|
|
|
|
|
|
return 0; |
5620
|
|
|
|
|
|
|
} |
5621
|
|
|
|
|
|
|
|
5622
|
|
|
|
|
|
|
|
5623
|
|
|
|
|
|
|
sub _pos_offset |
5624
|
|
|
|
|
|
|
{ my( $short, $long)= @_; |
5625
|
|
|
|
|
|
|
my( @pos, @offset); |
5626
|
|
|
|
|
|
|
my( $s_length, $l_length)=(0,0); |
5627
|
|
|
|
|
|
|
while (@$short) |
5628
|
|
|
|
|
|
|
{ my $s_word= shift @$short; |
5629
|
|
|
|
|
|
|
my $l_word= shift @$long; |
5630
|
|
|
|
|
|
|
if( $s_word ne $l_word) |
5631
|
|
|
|
|
|
|
{ while( @$long && $s_word ne $l_word) |
5632
|
|
|
|
|
|
|
{ $l_length += length( $l_word); |
5633
|
|
|
|
|
|
|
$l_word= shift @$long; |
5634
|
|
|
|
|
|
|
} |
5635
|
|
|
|
|
|
|
if( !@$long && $s_word ne $l_word) { return 0; } |
5636
|
|
|
|
|
|
|
push @pos, $s_length; |
5637
|
|
|
|
|
|
|
push @offset, $l_length - $s_length; |
5638
|
|
|
|
|
|
|
} |
5639
|
|
|
|
|
|
|
my $length= length( $s_word); |
5640
|
|
|
|
|
|
|
$s_length += $length; |
5641
|
|
|
|
|
|
|
$l_length += $length; |
5642
|
|
|
|
|
|
|
} |
5643
|
|
|
|
|
|
|
return( 1, \@pos, \@offset); |
5644
|
|
|
|
|
|
|
} |
5645
|
|
|
|
|
|
|
|
5646
|
|
|
|
|
|
|
sub append_pcdata |
5647
|
|
|
|
|
|
|
{ $_[0]->{'pcdata'}.= $_[1]; |
5648
|
|
|
|
|
|
|
return $_[0]; |
5649
|
|
|
|
|
|
|
} |
5650
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
sub pcdata { return $_[0]->{pcdata}; } |
5652
|
|
|
|
|
|
|
|
5653
|
|
|
|
|
|
|
|
5654
|
|
|
|
|
|
|
sub append_extra_data |
5655
|
|
|
|
|
|
|
{ $_[0]->{extra_data}.= $_[1]; |
5656
|
|
|
|
|
|
|
return $_[0]; |
5657
|
|
|
|
|
|
|
} |
5658
|
|
|
|
|
|
|
|
5659
|
|
|
|
|
|
|
sub set_extra_data |
5660
|
|
|
|
|
|
|
{ $_[0]->{extra_data}= $_[1]; |
5661
|
|
|
|
|
|
|
return $_[0]; |
5662
|
|
|
|
|
|
|
} |
5663
|
|
|
|
|
|
|
sub extra_data { return $_[0]->{extra_data} || ''; } |
5664
|
|
|
|
|
|
|
|
5665
|
|
|
|
|
|
|
sub set_target |
5666
|
|
|
|
|
|
|
{ my( $elt, $target)= @_; |
5667
|
|
|
|
|
|
|
$elt->{target}= $target; |
5668
|
|
|
|
|
|
|
return $elt; |
5669
|
|
|
|
|
|
|
} |
5670
|
|
|
|
|
|
|
sub target { return $_[0]->{target}; } |
5671
|
|
|
|
|
|
|
|
5672
|
|
|
|
|
|
|
sub set_data |
5673
|
|
|
|
|
|
|
{ $_[0]->{'data'}= $_[1]; |
5674
|
|
|
|
|
|
|
return $_[0]; |
5675
|
|
|
|
|
|
|
} |
5676
|
|
|
|
|
|
|
sub data { return $_[0]->{data}; } |
5677
|
|
|
|
|
|
|
|
5678
|
|
|
|
|
|
|
sub set_pi |
5679
|
|
|
|
|
|
|
{ my $elt= shift; |
5680
|
|
|
|
|
|
|
unless( $elt->{gi} == $XML::Twig::gi2index{$PI}) |
5681
|
|
|
|
|
|
|
{ $elt->cut_children; |
5682
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$PI} or $elt->set_gi( $PI); |
5683
|
|
|
|
|
|
|
} |
5684
|
|
|
|
|
|
|
return $elt->_set_pi( @_); |
5685
|
|
|
|
|
|
|
} |
5686
|
|
|
|
|
|
|
|
5687
|
|
|
|
|
|
|
sub _set_pi |
5688
|
|
|
|
|
|
|
{ $_[0]->set_target( $_[1]); |
5689
|
|
|
|
|
|
|
$_[0]->{data}= $_[2]; |
5690
|
|
|
|
|
|
|
return $_[0]; |
5691
|
|
|
|
|
|
|
} |
5692
|
|
|
|
|
|
|
|
5693
|
|
|
|
|
|
|
sub pi_string { my $string= $PI_START . $_[0]->{target}; |
5694
|
|
|
|
|
|
|
my $data= $_[0]->{data}; |
5695
|
|
|
|
|
|
|
if( defined( $data) && $data ne '') { $string .= " $data"; } |
5696
|
|
|
|
|
|
|
$string .= $PI_END ; |
5697
|
|
|
|
|
|
|
return $string; |
5698
|
|
|
|
|
|
|
} |
5699
|
|
|
|
|
|
|
|
5700
|
|
|
|
|
|
|
sub set_comment |
5701
|
|
|
|
|
|
|
{ my $elt= shift; |
5702
|
|
|
|
|
|
|
unless( $elt->{gi} == $XML::Twig::gi2index{$COMMENT}) |
5703
|
|
|
|
|
|
|
{ $elt->cut_children; |
5704
|
|
|
|
|
|
|
$elt->{gi}=$XML::Twig::gi2index{$COMMENT} or $elt->set_gi( $COMMENT); |
5705
|
|
|
|
|
|
|
} |
5706
|
|
|
|
|
|
|
$elt->{comment}= $_[0]; |
5707
|
|
|
|
|
|
|
return $elt; |
5708
|
|
|
|
|
|
|
} |
5709
|
|
|
|
|
|
|
|
5710
|
|
|
|
|
|
|
sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } |
5711
|
|
|
|
|
|
|
sub comment { return $_[0]->{comment}; } |
5712
|
|
|
|
|
|
|
sub comment_string { return $COMMENT_START . _comment_escaped_string( $_[0]->{comment}) . $COMMENT_END; } |
5713
|
|
|
|
|
|
|
# comments cannot start or end with |
5714
|
|
|
|
|
|
|
sub _comment_escaped_string |
5715
|
|
|
|
|
|
|
{ my( $c)= @_; |
5716
|
|
|
|
|
|
|
$c=~ s{^-}{ -}; |
5717
|
|
|
|
|
|
|
$c=~ s{-$}{- }; |
5718
|
|
|
|
|
|
|
$c=~ s{--}{- -}g; |
5719
|
|
|
|
|
|
|
return $c; |
5720
|
|
|
|
|
|
|
} |
5721
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
sub set_ent { $_[0]->{ent}= $_[1]; return $_[0]; } |
5723
|
|
|
|
|
|
|
sub ent { return $_[0]->{ent}; } |
5724
|
|
|
|
|
|
|
sub ent_name { return substr( $_[0]->{ent}, 1, -1);} |
5725
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
sub set_cdata |
5727
|
|
|
|
|
|
|
{ my $elt= shift; |
5728
|
|
|
|
|
|
|
unless( $elt->{gi} == $XML::Twig::gi2index{$CDATA}) |
5729
|
|
|
|
|
|
|
{ $elt->cut_children; |
5730
|
|
|
|
|
|
|
$elt->insert_new_elt( first_child => $CDATA, @_); |
5731
|
|
|
|
|
|
|
return $elt; |
5732
|
|
|
|
|
|
|
} |
5733
|
|
|
|
|
|
|
$elt->{cdata}= $_[0]; |
5734
|
|
|
|
|
|
|
return $_[0]; |
5735
|
|
|
|
|
|
|
} |
5736
|
|
|
|
|
|
|
|
5737
|
|
|
|
|
|
|
sub _set_cdata |
5738
|
|
|
|
|
|
|
{ $_[0]->{cdata}= $_[1]; |
5739
|
|
|
|
|
|
|
return $_[0]; |
5740
|
|
|
|
|
|
|
} |
5741
|
|
|
|
|
|
|
|
5742
|
|
|
|
|
|
|
sub append_cdata |
5743
|
|
|
|
|
|
|
{ $_[0]->{cdata}.= $_[1]; |
5744
|
|
|
|
|
|
|
return $_[0]; |
5745
|
|
|
|
|
|
|
} |
5746
|
|
|
|
|
|
|
sub cdata { return $_[0]->{cdata}; } |
5747
|
|
|
|
|
|
|
|
5748
|
|
|
|
|
|
|
|
5749
|
|
|
|
|
|
|
sub contains_only_text |
5750
|
|
|
|
|
|
|
{ my $elt= shift; |
5751
|
|
|
|
|
|
|
return 0 unless $elt->is_elt; |
5752
|
|
|
|
|
|
|
foreach my $child ($elt->_children) |
5753
|
|
|
|
|
|
|
{ return 0 if $child->is_elt; } |
5754
|
|
|
|
|
|
|
return $elt; |
5755
|
|
|
|
|
|
|
} |
5756
|
|
|
|
|
|
|
|
5757
|
|
|
|
|
|
|
sub contains_only |
5758
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
5759
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
5760
|
|
|
|
|
|
|
foreach my $child (@children) |
5761
|
|
|
|
|
|
|
{ return 0 unless $child->is( $exp); } |
5762
|
|
|
|
|
|
|
return @children || 1; |
5763
|
|
|
|
|
|
|
} |
5764
|
|
|
|
|
|
|
|
5765
|
|
|
|
|
|
|
sub contains_a_single |
5766
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
5767
|
|
|
|
|
|
|
my $child= $elt->{first_child} or return 0; |
5768
|
|
|
|
|
|
|
return 0 unless $child->passes( $exp); |
5769
|
|
|
|
|
|
|
return 0 if( $child->{next_sibling}); |
5770
|
|
|
|
|
|
|
return $child; |
5771
|
|
|
|
|
|
|
} |
5772
|
|
|
|
|
|
|
|
5773
|
|
|
|
|
|
|
|
5774
|
|
|
|
|
|
|
sub root |
5775
|
|
|
|
|
|
|
{ my $elt= shift; |
5776
|
|
|
|
|
|
|
while( $elt->{parent}) { $elt= $elt->{parent}; } |
5777
|
|
|
|
|
|
|
return $elt; |
5778
|
|
|
|
|
|
|
} |
5779
|
|
|
|
|
|
|
|
5780
|
|
|
|
|
|
|
sub _root_through_cut |
5781
|
|
|
|
|
|
|
{ my $elt= shift; |
5782
|
|
|
|
|
|
|
while( $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})) { $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); } |
5783
|
|
|
|
|
|
|
return $elt; |
5784
|
|
|
|
|
|
|
} |
5785
|
|
|
|
|
|
|
|
5786
|
|
|
|
|
|
|
sub twig |
5787
|
|
|
|
|
|
|
{ my $elt= shift; |
5788
|
|
|
|
|
|
|
my $root= $elt->root; |
5789
|
|
|
|
|
|
|
return $root->{twig}; |
5790
|
|
|
|
|
|
|
} |
5791
|
|
|
|
|
|
|
|
5792
|
|
|
|
|
|
|
sub _twig_through_cut |
5793
|
|
|
|
|
|
|
{ my $elt= shift; |
5794
|
|
|
|
|
|
|
my $root= $elt->_root_through_cut; |
5795
|
|
|
|
|
|
|
return $root->{twig}; |
5796
|
|
|
|
|
|
|
} |
5797
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
|
5799
|
|
|
|
|
|
|
# used for navigation |
5800
|
|
|
|
|
|
|
# returns undef or the element, depending on whether $elt passes $cond |
5801
|
|
|
|
|
|
|
# $cond can be |
5802
|
|
|
|
|
|
|
# - empty: the element passes the condition |
5803
|
|
|
|
|
|
|
# - ELT ('#ELT'): the element passes the condition if it is a "real" element |
5804
|
|
|
|
|
|
|
# - TEXT ('#TEXT'): the element passes if it is a CDATA or PCDATA element |
5805
|
|
|
|
|
|
|
# - a string with an XPath condition (only a subset of XPath is actually |
5806
|
|
|
|
|
|
|
# supported). |
5807
|
|
|
|
|
|
|
# - a regexp: the element passes if its gi matches the regexp |
5808
|
|
|
|
|
|
|
# - a code ref: the element passes if the code, applied on the element, |
5809
|
|
|
|
|
|
|
# returns true |
5810
|
|
|
|
|
|
|
|
5811
|
|
|
|
|
|
|
my %cond_cache; # expression => coderef |
5812
|
|
|
|
|
|
|
|
5813
|
|
|
|
|
|
|
sub reset_cond_cache { %cond_cache=(); } |
5814
|
|
|
|
|
|
|
|
5815
|
|
|
|
|
|
|
{ |
5816
|
|
|
|
|
|
|
sub _install_cond |
5817
|
|
|
|
|
|
|
{ my $cond= shift; |
5818
|
|
|
|
|
|
|
my $test; |
5819
|
|
|
|
|
|
|
my $init=''; |
5820
|
|
|
|
|
|
|
|
5821
|
|
|
|
|
|
|
my $original_cond= $cond; |
5822
|
|
|
|
|
|
|
|
5823
|
|
|
|
|
|
|
my $not= ($cond=~ s{^\s*!}{}) ? '!' : ''; |
5824
|
|
|
|
|
|
|
|
5825
|
|
|
|
|
|
|
if( ref $cond eq 'CODE') { return $cond; } |
5826
|
|
|
|
|
|
|
|
5827
|
|
|
|
|
|
|
if( ref $cond eq 'Regexp') |
5828
|
|
|
|
|
|
|
{ $test = qq{(\$_[0]->gi=~ /$cond/)}; } |
5829
|
|
|
|
|
|
|
else |
5830
|
|
|
|
|
|
|
{ my @tests; |
5831
|
|
|
|
|
|
|
while( $cond) |
5832
|
|
|
|
|
|
|
{ |
5833
|
|
|
|
|
|
|
# the condition is a string |
5834
|
|
|
|
|
|
|
if( $cond=~ s{$ELT$SEP}{}) |
5835
|
|
|
|
|
|
|
{ push @tests, qq{\$_[0]->is_elt}; } |
5836
|
|
|
|
|
|
|
elsif( $cond=~ s{$TEXT$SEP}{}) |
5837
|
|
|
|
|
|
|
{ push @tests, qq{\$_[0]->is_text}; } |
5838
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_TAG_PART)$SEP}{}) |
5839
|
|
|
|
|
|
|
{ push @tests, _gi_test( $1); } |
5840
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_REGEXP)$SEP}{}) |
5841
|
|
|
|
|
|
|
{ # /regexp/ |
5842
|
|
|
|
|
|
|
push @tests, qq{ \$_[0]->gi=~ $1 }; |
5843
|
|
|
|
|
|
|
} |
5844
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_TAG_PART)?\s* # $1 |
5845
|
|
|
|
|
|
|
\[\s*(-?)\s*(\d+)\s*\] # [$2] |
5846
|
|
|
|
|
|
|
$SEP}{}xo |
5847
|
|
|
|
|
|
|
) |
5848
|
|
|
|
|
|
|
{ my( $gi, $neg, $index)= ($1, $2, $3); |
5849
|
|
|
|
|
|
|
my $siblings= $neg ? q{$_[0]->_next_siblings} : q{$_[0]->_prev_siblings}; |
5850
|
|
|
|
|
|
|
if( $gi && ($gi ne '*')) |
5851
|
|
|
|
|
|
|
#{ $test= qq{((\$_[0]->gi eq "$gi") && (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index))}; } |
5852
|
|
|
|
|
|
|
{ push @tests, _and( _gi_test( $gi), qq{ (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index)}); } |
5853
|
|
|
|
|
|
|
else |
5854
|
|
|
|
|
|
|
{ push @tests, qq{(scalar( $siblings) + 1 == $index)}; } |
5855
|
|
|
|
|
|
|
} |
5856
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_TAG_PART?)\s*($REG_PREDICATE)$SEP}{}) |
5857
|
|
|
|
|
|
|
{ my( $gi, $predicate)= ( $1, $2); |
5858
|
|
|
|
|
|
|
push @tests, _and( _gi_test( $gi), _parse_predicate_in_step( $predicate)); |
5859
|
|
|
|
|
|
|
} |
5860
|
|
|
|
|
|
|
elsif( $cond=~ s{^\s*($REG_NAKED_PREDICATE)$SEP}{}) |
5861
|
|
|
|
|
|
|
{ push @tests, _parse_predicate_in_step( $1); } |
5862
|
|
|
|
|
|
|
else |
5863
|
|
|
|
|
|
|
{ croak "wrong navigation condition '$original_cond' ($@)"; } |
5864
|
|
|
|
|
|
|
} |
5865
|
|
|
|
|
|
|
$test= @tests > 1 ? '(' . join( '||', map { "($_)" } @tests) . ')' : $tests[0]; |
5866
|
|
|
|
|
|
|
} |
5867
|
|
|
|
|
|
|
|
5868
|
|
|
|
|
|
|
#warn "init: '$init' - test: '$test'\n"; |
5869
|
|
|
|
|
|
|
|
5870
|
|
|
|
|
|
|
my $sub= qq{sub { $NO_WARNINGS; $init; return $not($test) ? \$_[0] : undef; } }; |
5871
|
|
|
|
|
|
|
my $s= eval $sub; |
5872
|
|
|
|
|
|
|
#warn "cond: $cond\n$sub\n"; |
5873
|
|
|
|
|
|
|
if( $@) |
5874
|
|
|
|
|
|
|
{ croak "wrong navigation condition '$original_cond' ($@);" } |
5875
|
|
|
|
|
|
|
return $s; |
5876
|
|
|
|
|
|
|
} |
5877
|
|
|
|
|
|
|
|
5878
|
|
|
|
|
|
|
sub _gi_test |
5879
|
|
|
|
|
|
|
{ my( $full_gi)= @_; |
5880
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
# optimize if the gi exists, including the case where the gi includes a dot |
5882
|
|
|
|
|
|
|
my $index= $XML::Twig::gi2index{$full_gi}; |
5883
|
|
|
|
|
|
|
if( $index) { return qq{\$_[0]->{gi} == $index}; } |
5884
|
|
|
|
|
|
|
|
5885
|
|
|
|
|
|
|
my( $gi, $class, $id)= $full_gi=~ m{^(.*?)(?:[.]([^.]*)|[#](.*))?$}; |
5886
|
|
|
|
|
|
|
|
5887
|
|
|
|
|
|
|
my $gi_test=''; |
5888
|
|
|
|
|
|
|
if( $gi && $gi ne '*' ) |
5889
|
|
|
|
|
|
|
{ # 2 options, depending on whether the gi exists in gi2index |
5890
|
|
|
|
|
|
|
# start optimization |
5891
|
|
|
|
|
|
|
my $index= $XML::Twig::gi2index{$gi}; |
5892
|
|
|
|
|
|
|
if( $index) |
5893
|
|
|
|
|
|
|
{ # the gi exists, use its index as a faster shortcut |
5894
|
|
|
|
|
|
|
$gi_test = qq{\$_[0]->{gi} == $index}; |
5895
|
|
|
|
|
|
|
} |
5896
|
|
|
|
|
|
|
else |
5897
|
|
|
|
|
|
|
# end optimization |
5898
|
|
|
|
|
|
|
{ # it does not exist (but might be created later), compare the strings |
5899
|
|
|
|
|
|
|
$gi_test = qq{ \$_[0]->gi eq "$gi"}; |
5900
|
|
|
|
|
|
|
} |
5901
|
|
|
|
|
|
|
} |
5902
|
|
|
|
|
|
|
else |
5903
|
|
|
|
|
|
|
{ $gi_test= 1; } |
5904
|
|
|
|
|
|
|
|
5905
|
|
|
|
|
|
|
my $class_test=''; |
5906
|
|
|
|
|
|
|
#warn "class: '$class'"; |
5907
|
|
|
|
|
|
|
if( $class) |
5908
|
|
|
|
|
|
|
{ $class_test = qq{ defined( \$_[0]->{att}->{class}) && \$_[0]->{att}->{class}=~ m{\\b$class\\b} }; } |
5909
|
|
|
|
|
|
|
|
5910
|
|
|
|
|
|
|
my $id_test=''; |
5911
|
|
|
|
|
|
|
#warn "id: '$id'"; |
5912
|
|
|
|
|
|
|
if( $id) |
5913
|
|
|
|
|
|
|
{ $id_test = qq{ defined( \$_[0]->{att}->{$ID}) && \$_[0]->{att}->{$ID} eq '$id' }; } |
5914
|
|
|
|
|
|
|
|
5915
|
|
|
|
|
|
|
|
5916
|
|
|
|
|
|
|
#warn "gi_test: '$gi_test' - class_test: '$class_test' returning ", _and( $gi_test, $class_test); |
5917
|
|
|
|
|
|
|
return _and( $gi_test, $class_test, $id_test); |
5918
|
|
|
|
|
|
|
} |
5919
|
|
|
|
|
|
|
|
5920
|
|
|
|
|
|
|
|
5921
|
|
|
|
|
|
|
# input: the original predicate |
5922
|
|
|
|
|
|
|
sub _parse_predicate_in_step |
5923
|
|
|
|
|
|
|
{ my $cond= shift; |
5924
|
|
|
|
|
|
|
my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); |
5925
|
|
|
|
|
|
|
|
5926
|
|
|
|
|
|
|
$cond=~ s{^\s*\[\s*}{}; |
5927
|
|
|
|
|
|
|
$cond=~ s{\s*\]\s*$}{}; |
5928
|
|
|
|
|
|
|
$cond=~ s{( ($REG_STRING|$REG_REGEXP) # strings or regexps |
5929
|
|
|
|
|
|
|
|\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator) |
5930
|
|
|
|
|
|
|
|\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |
5931
|
|
|
|
|
|
|
|=~|!~ # matching operators |
5932
|
|
|
|
|
|
|
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |
5933
|
|
|
|
|
|
|
|([><]=?|=|!=) # test, other cases |
5934
|
|
|
|
|
|
|
|($REG_FUNCTION) # no arg functions |
5935
|
|
|
|
|
|
|
# this bit is a mess, but it is the only solution with this half-baked parser |
5936
|
|
|
|
|
|
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child) =~ /regexp/ |
5937
|
|
|
|
|
|
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*!?=\s*$REG_VALUE) # string( child) = "value" (or !=) |
5938
|
|
|
|
|
|
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*[<>]=?\s*$REG_VALUE) # string( child) > "value" |
5939
|
|
|
|
|
|
|
|(and|or) |
5940
|
|
|
|
|
|
|
)} |
5941
|
|
|
|
|
|
|
{ my( $token, $string, $att, $bare_att, $num_test, $alpha_test, $func, $string_regexp, $string_eq, $string_test, $and_or) |
5942
|
|
|
|
|
|
|
= ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11); |
5943
|
|
|
|
|
|
|
|
5944
|
|
|
|
|
|
|
if( defined $string) { $token } |
5945
|
|
|
|
|
|
|
elsif( $att) { "( \$_[0]->{att} && exists( \$_[0]->{att}->{'$att'}) && \$_[0]->{att}->{'$att'})"; } |
5946
|
|
|
|
|
|
|
elsif( $bare_att) { "(\$_[0]->{att} && defined( \$_[0]->{att}->{'$bare_att'}))"; } |
5947
|
|
|
|
|
|
|
elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged |
5948
|
|
|
|
|
|
|
elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } |
5949
|
|
|
|
|
|
|
elsif( $func && $func=~ m{^(?:string|text)}) |
5950
|
|
|
|
|
|
|
{ "\$_[0]->text"; } |
5951
|
|
|
|
|
|
|
elsif( $string_regexp && $string_regexp =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) |
5952
|
|
|
|
|
|
|
{ "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } |
5953
|
|
|
|
|
|
|
elsif( $string_eq && $string_eq =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*(!?=)\s*($REG_VALUE)}) |
5954
|
|
|
|
|
|
|
{"(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $PERL_ALPHA_TEST{$2} $3) } 1, \$_[0]->_children)"; } |
5955
|
|
|
|
|
|
|
elsif( $string_test && $string_test =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*([<>]=?)\s*($REG_VALUE)}) |
5956
|
|
|
|
|
|
|
{ "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } |
5957
|
|
|
|
|
|
|
elsif( $and_or) { $and_or eq 'and' ? '&&' : '||' ; } |
5958
|
|
|
|
|
|
|
else { $token; } |
5959
|
|
|
|
|
|
|
}gexs; |
5960
|
|
|
|
|
|
|
return "($cond)"; |
5961
|
|
|
|
|
|
|
} |
5962
|
|
|
|
|
|
|
|
5963
|
|
|
|
|
|
|
|
5964
|
|
|
|
|
|
|
sub _op |
5965
|
|
|
|
|
|
|
{ my $op= shift; |
5966
|
|
|
|
|
|
|
if( $op eq '=') { $op= 'eq'; } |
5967
|
|
|
|
|
|
|
elsif( $op eq '!=') { $op= 'ne'; } |
5968
|
|
|
|
|
|
|
return $op; |
5969
|
|
|
|
|
|
|
} |
5970
|
|
|
|
|
|
|
|
5971
|
|
|
|
|
|
|
sub passes |
5972
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
5973
|
|
|
|
|
|
|
return $elt unless $cond; |
5974
|
|
|
|
|
|
|
my $sub= ($cond_cache{$cond} ||= _install_cond( $cond)); |
5975
|
|
|
|
|
|
|
return $sub->( $elt); |
5976
|
|
|
|
|
|
|
} |
5977
|
|
|
|
|
|
|
} |
5978
|
|
|
|
|
|
|
|
5979
|
|
|
|
|
|
|
sub set_parent |
5980
|
|
|
|
|
|
|
{ $_[0]->{parent}= $_[1]; |
5981
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{parent}); } |
5982
|
|
|
|
|
|
|
} |
5983
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
sub parent |
5985
|
|
|
|
|
|
|
{ my $elt= shift; |
5986
|
|
|
|
|
|
|
my $cond= shift || return $elt->{parent}; |
5987
|
|
|
|
|
|
|
do { $elt= $elt->{parent} || return; } until ( $elt->passes( $cond)); |
5988
|
|
|
|
|
|
|
return $elt; |
5989
|
|
|
|
|
|
|
} |
5990
|
|
|
|
|
|
|
|
5991
|
|
|
|
|
|
|
sub set_first_child |
5992
|
|
|
|
|
|
|
{ $_[0]->{'first_child'}= $_[1]; |
5993
|
|
|
|
|
|
|
} |
5994
|
|
|
|
|
|
|
|
5995
|
|
|
|
|
|
|
sub first_child |
5996
|
|
|
|
|
|
|
{ my $elt= shift; |
5997
|
|
|
|
|
|
|
my $cond= shift || return $elt->{first_child}; |
5998
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
5999
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
6000
|
|
|
|
|
|
|
while( $child && !$test_cond->( $child)) |
6001
|
|
|
|
|
|
|
{ $child= $child->{next_sibling}; } |
6002
|
|
|
|
|
|
|
return $child; |
6003
|
|
|
|
|
|
|
} |
6004
|
|
|
|
|
|
|
|
6005
|
|
|
|
|
|
|
sub _first_child { return $_[0]->{first_child}; } |
6006
|
|
|
|
|
|
|
sub _last_child { return $_[0]->{last_child}; } |
6007
|
|
|
|
|
|
|
sub _next_sibling { return $_[0]->{next_sibling}; } |
6008
|
|
|
|
|
|
|
sub _prev_sibling { return $_[0]->{prev_sibling}; } |
6009
|
|
|
|
|
|
|
sub _parent { return $_[0]->{parent}; } |
6010
|
|
|
|
|
|
|
sub _next_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{next_sibling}) { push @siblings, $elt; } return @siblings; } |
6011
|
|
|
|
|
|
|
sub _prev_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{prev_sibling}) { push @siblings, $elt; } return @siblings; } |
6012
|
|
|
|
|
|
|
|
6013
|
|
|
|
|
|
|
# sets a field |
6014
|
|
|
|
|
|
|
# arguments $record, $cond, @content |
6015
|
|
|
|
|
|
|
sub set_field |
6016
|
|
|
|
|
|
|
{ my $record = shift; |
6017
|
|
|
|
|
|
|
my $cond = shift; |
6018
|
|
|
|
|
|
|
my $child= $record->first_child( $cond); |
6019
|
|
|
|
|
|
|
if( $child) |
6020
|
|
|
|
|
|
|
{ $child->set_content( @_); } |
6021
|
|
|
|
|
|
|
else |
6022
|
|
|
|
|
|
|
{ if( $cond=~ m{^\s*($REG_TAG_NAME)}) |
6023
|
|
|
|
|
|
|
{ my $gi= $1; |
6024
|
|
|
|
|
|
|
$child= $record->insert_new_elt( last_child => $gi, @_); |
6025
|
|
|
|
|
|
|
} |
6026
|
|
|
|
|
|
|
else |
6027
|
|
|
|
|
|
|
{ croak "can't create a field name from $cond"; } |
6028
|
|
|
|
|
|
|
} |
6029
|
|
|
|
|
|
|
return $child; |
6030
|
|
|
|
|
|
|
} |
6031
|
|
|
|
|
|
|
|
6032
|
|
|
|
|
|
|
sub set_last_child |
6033
|
|
|
|
|
|
|
{ $_[0]->{'last_child'}= $_[1]; |
6034
|
|
|
|
|
|
|
delete $_->[0]->{empty}; |
6035
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } |
6036
|
|
|
|
|
|
|
} |
6037
|
|
|
|
|
|
|
|
6038
|
|
|
|
|
|
|
sub last_child |
6039
|
|
|
|
|
|
|
{ my $elt= shift; |
6040
|
|
|
|
|
|
|
my $cond= shift || return $elt->{last_child}; |
6041
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
6042
|
|
|
|
|
|
|
my $child= $elt->{last_child}; |
6043
|
|
|
|
|
|
|
while( $child && !$test_cond->( $child) ) |
6044
|
|
|
|
|
|
|
{ $child= $child->{prev_sibling}; } |
6045
|
|
|
|
|
|
|
return $child |
6046
|
|
|
|
|
|
|
} |
6047
|
|
|
|
|
|
|
|
6048
|
|
|
|
|
|
|
|
6049
|
|
|
|
|
|
|
sub set_prev_sibling |
6050
|
|
|
|
|
|
|
{ $_[0]->{'prev_sibling'}= $_[1]; |
6051
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{'prev_sibling'}); } |
6052
|
|
|
|
|
|
|
} |
6053
|
|
|
|
|
|
|
|
6054
|
|
|
|
|
|
|
sub prev_sibling |
6055
|
|
|
|
|
|
|
{ my $elt= shift; |
6056
|
|
|
|
|
|
|
my $cond= shift || return $elt->{prev_sibling}; |
6057
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
6058
|
|
|
|
|
|
|
my $sibling= $elt->{prev_sibling}; |
6059
|
|
|
|
|
|
|
while( $sibling && !$test_cond->( $sibling) ) |
6060
|
|
|
|
|
|
|
{ $sibling= $sibling->{prev_sibling}; } |
6061
|
|
|
|
|
|
|
return $sibling; |
6062
|
|
|
|
|
|
|
} |
6063
|
|
|
|
|
|
|
|
6064
|
|
|
|
|
|
|
sub set_next_sibling { $_[0]->{'next_sibling'}= $_[1]; } |
6065
|
|
|
|
|
|
|
|
6066
|
|
|
|
|
|
|
sub next_sibling |
6067
|
|
|
|
|
|
|
{ my $elt= shift; |
6068
|
|
|
|
|
|
|
my $cond= shift || return $elt->{next_sibling}; |
6069
|
|
|
|
|
|
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); |
6070
|
|
|
|
|
|
|
my $sibling= $elt->{next_sibling}; |
6071
|
|
|
|
|
|
|
while( $sibling && !$test_cond->( $sibling) ) |
6072
|
|
|
|
|
|
|
{ $sibling= $sibling->{next_sibling}; } |
6073
|
|
|
|
|
|
|
return $sibling; |
6074
|
|
|
|
|
|
|
} |
6075
|
|
|
|
|
|
|
|
6076
|
|
|
|
|
|
|
# methods dealing with the class attribute, convenient if you work with xhtml |
6077
|
|
|
|
|
|
|
sub class { $_[0]->{att}->{class}; } |
6078
|
|
|
|
|
|
|
# lvalue version of class. separate from class to avoid problem like RT# |
6079
|
|
|
|
|
|
|
sub lclass |
6080
|
|
|
|
|
|
|
:lvalue # > perl 5.5 |
6081
|
|
|
|
|
|
|
{ $_[0]->{att}->{class}; } |
6082
|
|
|
|
|
|
|
|
6083
|
|
|
|
|
|
|
sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } |
6084
|
|
|
|
|
|
|
|
6085
|
|
|
|
|
|
|
# adds a class to an element |
6086
|
|
|
|
|
|
|
sub add_to_class |
6087
|
|
|
|
|
|
|
{ my( $elt, $new_class)= @_; |
6088
|
|
|
|
|
|
|
return $elt unless $new_class; |
6089
|
|
|
|
|
|
|
my $class= $elt->class; |
6090
|
|
|
|
|
|
|
my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); |
6091
|
|
|
|
|
|
|
$class{$new_class}= 1; |
6092
|
|
|
|
|
|
|
$elt->set_class( join( ' ', sort keys %class)); |
6093
|
|
|
|
|
|
|
} |
6094
|
|
|
|
|
|
|
|
6095
|
|
|
|
|
|
|
sub remove_class |
6096
|
|
|
|
|
|
|
{ my( $elt, $class_to_remove)= @_; |
6097
|
|
|
|
|
|
|
return $elt unless $class_to_remove; |
6098
|
|
|
|
|
|
|
my $class= $elt->class; |
6099
|
|
|
|
|
|
|
my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); |
6100
|
|
|
|
|
|
|
delete $class{$class_to_remove}; |
6101
|
|
|
|
|
|
|
$elt->set_class( join( ' ', sort keys %class)); |
6102
|
|
|
|
|
|
|
} |
6103
|
|
|
|
|
|
|
|
6104
|
|
|
|
|
|
|
sub att_to_class { my( $elt, $att)= @_; $elt->set_class( $elt->{'att'}->{$att}); } |
6105
|
|
|
|
|
|
|
sub add_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); } |
6106
|
|
|
|
|
|
|
sub move_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); |
6107
|
|
|
|
|
|
|
$elt->del_att( $att); |
6108
|
|
|
|
|
|
|
} |
6109
|
|
|
|
|
|
|
sub tag_to_class { my( $elt)= @_; $elt->set_class( $elt->tag); } |
6110
|
|
|
|
|
|
|
sub add_tag_to_class { my( $elt)= @_; $elt->add_to_class( $elt->tag); } |
6111
|
|
|
|
|
|
|
sub set_tag_class { my( $elt, $new_tag)= @_; $elt->add_tag_to_class; $elt->set_tag( $new_tag); } |
6112
|
|
|
|
|
|
|
|
6113
|
|
|
|
|
|
|
sub tag_to_span |
6114
|
|
|
|
|
|
|
{ my( $elt)= @_; |
6115
|
|
|
|
|
|
|
$elt->set_class( $elt->tag) unless( $elt->tag eq 'span' && $elt->class); # set class to span unless it would mean replacing it with span |
6116
|
|
|
|
|
|
|
$elt->set_tag( 'span'); |
6117
|
|
|
|
|
|
|
} |
6118
|
|
|
|
|
|
|
|
6119
|
|
|
|
|
|
|
sub tag_to_div |
6120
|
|
|
|
|
|
|
{ my( $elt)= @_; |
6121
|
|
|
|
|
|
|
$elt->set_class( $elt->tag) unless( $elt->tag eq 'div' && $elt->class); # set class to div unless it would mean replacing it with div |
6122
|
|
|
|
|
|
|
$elt->set_tag( 'div'); |
6123
|
|
|
|
|
|
|
} |
6124
|
|
|
|
|
|
|
|
6125
|
|
|
|
|
|
|
sub in_class |
6126
|
|
|
|
|
|
|
{ my( $elt, $class)= @_; |
6127
|
|
|
|
|
|
|
my $elt_class= $elt->class; |
6128
|
|
|
|
|
|
|
return unless( defined $elt_class); |
6129
|
|
|
|
|
|
|
return $elt->class=~ m{(?:^|\s)\Q$class\E(?:\s|$)} ? $elt : 0; |
6130
|
|
|
|
|
|
|
} |
6131
|
|
|
|
|
|
|
|
6132
|
|
|
|
|
|
|
|
6133
|
|
|
|
|
|
|
# get or set all attributes |
6134
|
|
|
|
|
|
|
# argument can be a hash or a hashref |
6135
|
|
|
|
|
|
|
sub set_atts |
6136
|
|
|
|
|
|
|
{ my $elt= shift; |
6137
|
|
|
|
|
|
|
my %atts; |
6138
|
|
|
|
|
|
|
tie %atts, 'Tie::IxHash' if( keep_atts_order()); |
6139
|
|
|
|
|
|
|
%atts= @_ == 1 ? %{$_[0]} : @_; |
6140
|
|
|
|
|
|
|
$elt->{att}= \%atts; |
6141
|
|
|
|
|
|
|
if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } |
6142
|
|
|
|
|
|
|
return $elt; |
6143
|
|
|
|
|
|
|
} |
6144
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
sub atts { return $_[0]->{att}; } |
6146
|
|
|
|
|
|
|
sub att_names { return (sort keys %{$_[0]->{att}}); } |
6147
|
|
|
|
|
|
|
sub del_atts { $_[0]->{att}={}; return $_[0]; } |
6148
|
|
|
|
|
|
|
|
6149
|
|
|
|
|
|
|
# get or set a single attribute (set works for several atts) |
6150
|
|
|
|
|
|
|
sub set_att |
6151
|
|
|
|
|
|
|
{ my $elt= shift; |
6152
|
|
|
|
|
|
|
|
6153
|
|
|
|
|
|
|
if( $_[0] && ref( $_[0]) && !$_[1]) |
6154
|
|
|
|
|
|
|
{ croak "improper call to set_att, usage is \$elt->set_att( att1 => 'val1', att2 => 'val2',...)"; } |
6155
|
|
|
|
|
|
|
|
6156
|
|
|
|
|
|
|
unless( $elt->{att}) |
6157
|
|
|
|
|
|
|
{ $elt->{att}={}; |
6158
|
|
|
|
|
|
|
tie %{$elt->{att}}, 'Tie::IxHash' if( keep_atts_order()); |
6159
|
|
|
|
|
|
|
} |
6160
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
while(@_) |
6162
|
|
|
|
|
|
|
{ my( $att, $val)= (shift, shift); |
6163
|
|
|
|
|
|
|
$elt->{att}->{$att}= $val; |
6164
|
|
|
|
|
|
|
if( $att eq $ID) { $elt->_set_id( $val); } |
6165
|
|
|
|
|
|
|
} |
6166
|
|
|
|
|
|
|
return $elt; |
6167
|
|
|
|
|
|
|
} |
6168
|
|
|
|
|
|
|
|
6169
|
|
|
|
|
|
|
sub att { $_[0]->{att}->{$_[1]}; } |
6170
|
|
|
|
|
|
|
# lvalue version of att. separate from class to avoid problem like RT# |
6171
|
|
|
|
|
|
|
sub latt |
6172
|
|
|
|
|
|
|
:lvalue # > perl 5.5 |
6173
|
|
|
|
|
|
|
{ $_[0]->{att}->{$_[1]}; } |
6174
|
|
|
|
|
|
|
|
6175
|
|
|
|
|
|
|
sub del_att |
6176
|
|
|
|
|
|
|
{ my $elt= shift; |
6177
|
|
|
|
|
|
|
while( @_) { delete $elt->{'att'}->{shift()}; } |
6178
|
|
|
|
|
|
|
return $elt; |
6179
|
|
|
|
|
|
|
} |
6180
|
|
|
|
|
|
|
|
6181
|
|
|
|
|
|
|
sub att_exists { return exists $_[0]->{att}->{$_[1]}; } |
6182
|
|
|
|
|
|
|
|
6183
|
|
|
|
|
|
|
# delete an attribute from all descendants of an element |
6184
|
|
|
|
|
|
|
sub strip_att |
6185
|
|
|
|
|
|
|
{ my( $elt, $att)= @_; |
6186
|
|
|
|
|
|
|
$_->del_att( $att) foreach ($elt->descendants_or_self( qq{*[\@$att]})); |
6187
|
|
|
|
|
|
|
return $elt; |
6188
|
|
|
|
|
|
|
} |
6189
|
|
|
|
|
|
|
|
6190
|
|
|
|
|
|
|
sub change_att_name |
6191
|
|
|
|
|
|
|
{ my( $elt, $old_name, $new_name)= @_; |
6192
|
|
|
|
|
|
|
my $value= $elt->{'att'}->{$old_name}; |
6193
|
|
|
|
|
|
|
return $elt unless( defined $value); |
6194
|
|
|
|
|
|
|
$elt->del_att( $old_name) |
6195
|
|
|
|
|
|
|
->set_att( $new_name => $value); |
6196
|
|
|
|
|
|
|
return $elt; |
6197
|
|
|
|
|
|
|
} |
6198
|
|
|
|
|
|
|
|
6199
|
|
|
|
|
|
|
sub lc_attnames |
6200
|
|
|
|
|
|
|
{ my $elt= shift; |
6201
|
|
|
|
|
|
|
foreach my $att ($elt->att_names) |
6202
|
|
|
|
|
|
|
{ if( $att ne lc $att) { $elt->change_att_name( $att, lc $att); } } |
6203
|
|
|
|
|
|
|
return $elt; |
6204
|
|
|
|
|
|
|
} |
6205
|
|
|
|
|
|
|
|
6206
|
|
|
|
|
|
|
sub set_twig_current { $_[0]->{twig_current}=1; } |
6207
|
|
|
|
|
|
|
sub del_twig_current { delete $_[0]->{twig_current}; } |
6208
|
|
|
|
|
|
|
|
6209
|
|
|
|
|
|
|
|
6210
|
|
|
|
|
|
|
# get or set the id attribute |
6211
|
|
|
|
|
|
|
sub set_id |
6212
|
|
|
|
|
|
|
{ my( $elt, $id)= @_; |
6213
|
|
|
|
|
|
|
$elt->del_id() if( exists $elt->{att}->{$ID}); |
6214
|
|
|
|
|
|
|
$elt->set_att($ID, $id); |
6215
|
|
|
|
|
|
|
$elt->_set_id( $id); |
6216
|
|
|
|
|
|
|
return $elt; |
6217
|
|
|
|
|
|
|
} |
6218
|
|
|
|
|
|
|
|
6219
|
|
|
|
|
|
|
# only set id, does not update the attribute value |
6220
|
|
|
|
|
|
|
sub _set_id |
6221
|
|
|
|
|
|
|
{ my( $elt, $id)= @_; |
6222
|
|
|
|
|
|
|
my $t= $elt->twig || $elt; |
6223
|
|
|
|
|
|
|
$t->{twig_id_list}->{$id}= $elt; |
6224
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } |
6225
|
|
|
|
|
|
|
return $elt; |
6226
|
|
|
|
|
|
|
} |
6227
|
|
|
|
|
|
|
|
6228
|
|
|
|
|
|
|
sub id { return $_[0]->{att}->{$ID}; } |
6229
|
|
|
|
|
|
|
|
6230
|
|
|
|
|
|
|
# methods used to add ids to elements that don't have one |
6231
|
|
|
|
|
|
|
BEGIN |
6232
|
|
|
|
|
|
|
{ my $id_nb = "0001"; |
6233
|
|
|
|
|
|
|
my $id_seed = "twig_id_"; |
6234
|
|
|
|
|
|
|
|
6235
|
|
|
|
|
|
|
sub set_id_seed ## no critic (Subroutines::ProhibitNestedSubs); |
6236
|
|
|
|
|
|
|
{ $id_seed= $_[1]; $id_nb=1; } |
6237
|
|
|
|
|
|
|
|
6238
|
|
|
|
|
|
|
sub add_id ## no critic (Subroutines::ProhibitNestedSubs); |
6239
|
|
|
|
|
|
|
{ my $elt= shift; |
6240
|
|
|
|
|
|
|
if( defined $elt->{'att'}->{$ID}) |
6241
|
|
|
|
|
|
|
{ return $elt->{'att'}->{$ID}; } |
6242
|
|
|
|
|
|
|
else |
6243
|
|
|
|
|
|
|
{ my $id= $_[0] && ref( $_[0]) && isa( $_[0], 'CODE') ? $_[0]->( $elt) : $id_seed . $id_nb++; |
6244
|
|
|
|
|
|
|
$elt->set_id( $id); |
6245
|
|
|
|
|
|
|
return $id; |
6246
|
|
|
|
|
|
|
} |
6247
|
|
|
|
|
|
|
} |
6248
|
|
|
|
|
|
|
} |
6249
|
|
|
|
|
|
|
|
6250
|
|
|
|
|
|
|
|
6251
|
|
|
|
|
|
|
|
6252
|
|
|
|
|
|
|
# delete the id attribute and remove the element from the id list |
6253
|
|
|
|
|
|
|
sub del_id |
6254
|
|
|
|
|
|
|
{ my $elt= shift; |
6255
|
|
|
|
|
|
|
if( ! exists $elt->{att}->{$ID}) { return $elt }; |
6256
|
|
|
|
|
|
|
my $id= $elt->{att}->{$ID}; |
6257
|
|
|
|
|
|
|
|
6258
|
|
|
|
|
|
|
delete $elt->{att}->{$ID}; |
6259
|
|
|
|
|
|
|
|
6260
|
|
|
|
|
|
|
my $t= shift || $elt->twig; |
6261
|
|
|
|
|
|
|
unless( $t) { return $elt; } |
6262
|
|
|
|
|
|
|
if( exists $t->{twig_id_list}->{$id}) { delete $t->{twig_id_list}->{$id}; } |
6263
|
|
|
|
|
|
|
|
6264
|
|
|
|
|
|
|
return $elt; |
6265
|
|
|
|
|
|
|
} |
6266
|
|
|
|
|
|
|
|
6267
|
|
|
|
|
|
|
# return the list of children |
6268
|
|
|
|
|
|
|
sub children |
6269
|
|
|
|
|
|
|
{ my $elt= shift; |
6270
|
|
|
|
|
|
|
my @children; |
6271
|
|
|
|
|
|
|
my $child= $elt->first_child( @_); |
6272
|
|
|
|
|
|
|
while( $child) |
6273
|
|
|
|
|
|
|
{ push @children, $child; |
6274
|
|
|
|
|
|
|
$child= $child->next_sibling( @_); |
6275
|
|
|
|
|
|
|
} |
6276
|
|
|
|
|
|
|
return @children; |
6277
|
|
|
|
|
|
|
} |
6278
|
|
|
|
|
|
|
|
6279
|
|
|
|
|
|
|
sub _children |
6280
|
|
|
|
|
|
|
{ my $elt= shift; |
6281
|
|
|
|
|
|
|
my @children=(); |
6282
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
6283
|
|
|
|
|
|
|
while( $child) |
6284
|
|
|
|
|
|
|
{ push @children, $child; |
6285
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
6286
|
|
|
|
|
|
|
} |
6287
|
|
|
|
|
|
|
return @children; |
6288
|
|
|
|
|
|
|
} |
6289
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
sub children_copy |
6291
|
|
|
|
|
|
|
{ my $elt= shift; |
6292
|
|
|
|
|
|
|
my @children; |
6293
|
|
|
|
|
|
|
my $child= $elt->first_child( @_); |
6294
|
|
|
|
|
|
|
while( $child) |
6295
|
|
|
|
|
|
|
{ push @children, $child->copy; |
6296
|
|
|
|
|
|
|
$child= $child->next_sibling( @_); |
6297
|
|
|
|
|
|
|
} |
6298
|
|
|
|
|
|
|
return @children; |
6299
|
|
|
|
|
|
|
} |
6300
|
|
|
|
|
|
|
|
6301
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
sub children_count |
6303
|
|
|
|
|
|
|
{ my $elt= shift; |
6304
|
|
|
|
|
|
|
my $cond= shift; |
6305
|
|
|
|
|
|
|
my $count=0; |
6306
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
6307
|
|
|
|
|
|
|
while( $child) |
6308
|
|
|
|
|
|
|
{ $count++ if( $child->passes( $cond)); |
6309
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
6310
|
|
|
|
|
|
|
} |
6311
|
|
|
|
|
|
|
return $count; |
6312
|
|
|
|
|
|
|
} |
6313
|
|
|
|
|
|
|
|
6314
|
|
|
|
|
|
|
sub children_text |
6315
|
|
|
|
|
|
|
{ my $elt= shift; |
6316
|
|
|
|
|
|
|
return wantarray() ? map { $_->text} $elt->children( @_) |
6317
|
|
|
|
|
|
|
: join( '', map { $_->text} $elt->children( @_) ) |
6318
|
|
|
|
|
|
|
; |
6319
|
|
|
|
|
|
|
} |
6320
|
|
|
|
|
|
|
|
6321
|
|
|
|
|
|
|
sub children_trimmed_text |
6322
|
|
|
|
|
|
|
{ my $elt= shift; |
6323
|
|
|
|
|
|
|
return wantarray() ? map { $_->trimmed_text} $elt->children( @_) |
6324
|
|
|
|
|
|
|
: join( '', map { $_->trimmed_text} $elt->children( @_) ) |
6325
|
|
|
|
|
|
|
; |
6326
|
|
|
|
|
|
|
} |
6327
|
|
|
|
|
|
|
|
6328
|
|
|
|
|
|
|
sub all_children_are |
6329
|
|
|
|
|
|
|
{ my( $parent, $cond)= @_; |
6330
|
|
|
|
|
|
|
foreach my $child ($parent->_children) |
6331
|
|
|
|
|
|
|
{ return 0 unless( $child->passes( $cond)); } |
6332
|
|
|
|
|
|
|
return $parent; |
6333
|
|
|
|
|
|
|
} |
6334
|
|
|
|
|
|
|
|
6335
|
|
|
|
|
|
|
|
6336
|
|
|
|
|
|
|
sub ancestors |
6337
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6338
|
|
|
|
|
|
|
my @ancestors; |
6339
|
|
|
|
|
|
|
while( $elt->{parent}) |
6340
|
|
|
|
|
|
|
{ $elt= $elt->{parent}; |
6341
|
|
|
|
|
|
|
push @ancestors, $elt if( $elt->passes( $cond)); |
6342
|
|
|
|
|
|
|
} |
6343
|
|
|
|
|
|
|
return @ancestors; |
6344
|
|
|
|
|
|
|
} |
6345
|
|
|
|
|
|
|
|
6346
|
|
|
|
|
|
|
sub ancestors_or_self |
6347
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6348
|
|
|
|
|
|
|
my @ancestors; |
6349
|
|
|
|
|
|
|
while( $elt) |
6350
|
|
|
|
|
|
|
{ push @ancestors, $elt if( $elt->passes( $cond)); |
6351
|
|
|
|
|
|
|
$elt= $elt->{parent}; |
6352
|
|
|
|
|
|
|
} |
6353
|
|
|
|
|
|
|
return @ancestors; |
6354
|
|
|
|
|
|
|
} |
6355
|
|
|
|
|
|
|
|
6356
|
|
|
|
|
|
|
|
6357
|
|
|
|
|
|
|
sub _ancestors |
6358
|
|
|
|
|
|
|
{ my( $elt, $include_self)= @_; |
6359
|
|
|
|
|
|
|
my @ancestors= $include_self ? ($elt) : (); |
6360
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { push @ancestors, $elt; } |
6361
|
|
|
|
|
|
|
return @ancestors; |
6362
|
|
|
|
|
|
|
} |
6363
|
|
|
|
|
|
|
|
6364
|
|
|
|
|
|
|
|
6365
|
|
|
|
|
|
|
sub inherit_att |
6366
|
|
|
|
|
|
|
{ my $elt= shift; |
6367
|
|
|
|
|
|
|
my $att= shift; |
6368
|
|
|
|
|
|
|
my %tags= map { ($_, 1) } @_; |
6369
|
|
|
|
|
|
|
|
6370
|
|
|
|
|
|
|
do |
6371
|
|
|
|
|
|
|
{ if( (defined $elt->{'att'}->{$att}) |
6372
|
|
|
|
|
|
|
&& ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) |
6373
|
|
|
|
|
|
|
) |
6374
|
|
|
|
|
|
|
{ return $elt->{'att'}->{$att}; } |
6375
|
|
|
|
|
|
|
} while( $elt= $elt->{parent}); |
6376
|
|
|
|
|
|
|
return undef; |
6377
|
|
|
|
|
|
|
} |
6378
|
|
|
|
|
|
|
|
6379
|
|
|
|
|
|
|
sub _inherit_att_through_cut |
6380
|
|
|
|
|
|
|
{ my $elt= shift; |
6381
|
|
|
|
|
|
|
my $att= shift; |
6382
|
|
|
|
|
|
|
my %tags= map { ($_, 1) } @_; |
6383
|
|
|
|
|
|
|
|
6384
|
|
|
|
|
|
|
do |
6385
|
|
|
|
|
|
|
{ if( (defined $elt->{'att'}->{$att}) |
6386
|
|
|
|
|
|
|
&& ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) |
6387
|
|
|
|
|
|
|
) |
6388
|
|
|
|
|
|
|
{ return $elt->{'att'}->{$att}; } |
6389
|
|
|
|
|
|
|
} while( $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})); |
6390
|
|
|
|
|
|
|
return undef; |
6391
|
|
|
|
|
|
|
} |
6392
|
|
|
|
|
|
|
|
6393
|
|
|
|
|
|
|
|
6394
|
|
|
|
|
|
|
sub current_ns_prefixes |
6395
|
|
|
|
|
|
|
{ my $elt= shift; |
6396
|
|
|
|
|
|
|
my %prefix; |
6397
|
|
|
|
|
|
|
$prefix{''}=1 if( $elt->namespace( '')); |
6398
|
|
|
|
|
|
|
while( $elt) |
6399
|
|
|
|
|
|
|
{ my @ns= grep { !m{^xml} } map { m{^([^:]+):} } ($XML::Twig::index2gi[$elt->{'gi'}], $elt->att_names); |
6400
|
|
|
|
|
|
|
$prefix{$_}=1 foreach (@ns); |
6401
|
|
|
|
|
|
|
$elt= $elt->{parent}; |
6402
|
|
|
|
|
|
|
} |
6403
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
return (sort keys %prefix); |
6405
|
|
|
|
|
|
|
} |
6406
|
|
|
|
|
|
|
|
6407
|
|
|
|
|
|
|
# kinda counter-intuitive actually: |
6408
|
|
|
|
|
|
|
# the next element is found by looking for the next open tag after from the |
6409
|
|
|
|
|
|
|
# current one, which is the first child, if it exists, or the next sibling |
6410
|
|
|
|
|
|
|
# or the first next sibling of an ancestor |
6411
|
|
|
|
|
|
|
# optional arguments are: |
6412
|
|
|
|
|
|
|
# - $subtree_root: a reference to an element, when the next element is not |
6413
|
|
|
|
|
|
|
# within $subtree_root anymore then next_elt returns undef |
6414
|
|
|
|
|
|
|
# - $cond: a condition, next_elt returns the next element matching the condition |
6415
|
|
|
|
|
|
|
|
6416
|
|
|
|
|
|
|
sub next_elt |
6417
|
|
|
|
|
|
|
{ my $elt= shift; |
6418
|
|
|
|
|
|
|
my $subtree_root= 0; |
6419
|
|
|
|
|
|
|
$subtree_root= shift if( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')); |
6420
|
|
|
|
|
|
|
my $cond= shift; |
6421
|
|
|
|
|
|
|
my $next_elt; |
6422
|
|
|
|
|
|
|
|
6423
|
|
|
|
|
|
|
my $ind; # optimization |
6424
|
|
|
|
|
|
|
my $test_cond; |
6425
|
|
|
|
|
|
|
if( $cond) # optimization |
6426
|
|
|
|
|
|
|
{ unless( defined( $ind= $XML::Twig::gi2index{$cond}) ) # optimization |
6427
|
|
|
|
|
|
|
{ $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); } # optimization |
6428
|
|
|
|
|
|
|
} # optimization |
6429
|
|
|
|
|
|
|
|
6430
|
|
|
|
|
|
|
do |
6431
|
|
|
|
|
|
|
{ if( $next_elt= $elt->{first_child}) |
6432
|
|
|
|
|
|
|
{ # simplest case: the elt has a child |
6433
|
|
|
|
|
|
|
} |
6434
|
|
|
|
|
|
|
elsif( $next_elt= $elt->{next_sibling}) |
6435
|
|
|
|
|
|
|
{ # no child but a next sibling (just check we stay within the subtree) |
6436
|
|
|
|
|
|
|
|
6437
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has a sibling |
6438
|
|
|
|
|
|
|
return undef if( $subtree_root && ($elt == $subtree_root)); |
6439
|
|
|
|
|
|
|
|
6440
|
|
|
|
|
|
|
} |
6441
|
|
|
|
|
|
|
else |
6442
|
|
|
|
|
|
|
{ # case where the element has no child and no next sibling: |
6443
|
|
|
|
|
|
|
# get the first next sibling of an ancestor, checking subtree_root |
6444
|
|
|
|
|
|
|
|
6445
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has no sibling |
6446
|
|
|
|
|
|
|
return undef if( $subtree_root && ($elt == $subtree_root)); |
6447
|
|
|
|
|
|
|
|
6448
|
|
|
|
|
|
|
$next_elt= $elt->{parent} || return undef; |
6449
|
|
|
|
|
|
|
|
6450
|
|
|
|
|
|
|
until( $next_elt->{next_sibling}) |
6451
|
|
|
|
|
|
|
{ return undef if( $subtree_root && ($subtree_root == $next_elt)); |
6452
|
|
|
|
|
|
|
$next_elt= $next_elt->{parent} || return undef; |
6453
|
|
|
|
|
|
|
} |
6454
|
|
|
|
|
|
|
return undef if( $subtree_root && ($subtree_root == $next_elt)); |
6455
|
|
|
|
|
|
|
$next_elt= $next_elt->{next_sibling}; |
6456
|
|
|
|
|
|
|
} |
6457
|
|
|
|
|
|
|
$elt= $next_elt; # just in case we need to loop |
6458
|
|
|
|
|
|
|
} until( ! defined $elt |
6459
|
|
|
|
|
|
|
|| ! defined $cond |
6460
|
|
|
|
|
|
|
|| (defined $ind && ($elt->{gi} eq $ind)) # optimization |
6461
|
|
|
|
|
|
|
|| (defined $test_cond && ($test_cond->( $elt))) |
6462
|
|
|
|
|
|
|
); |
6463
|
|
|
|
|
|
|
|
6464
|
|
|
|
|
|
|
return $elt; |
6465
|
|
|
|
|
|
|
} |
6466
|
|
|
|
|
|
|
|
6467
|
|
|
|
|
|
|
# return the next_elt within the element |
6468
|
|
|
|
|
|
|
# just call next_elt with the element as first and second argument |
6469
|
|
|
|
|
|
|
sub first_descendant { return $_[0]->next_elt( @_); } |
6470
|
|
|
|
|
|
|
|
6471
|
|
|
|
|
|
|
# get the last descendant, # then return the element found or call prev_elt with the condition |
6472
|
|
|
|
|
|
|
sub last_descendant |
6473
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6474
|
|
|
|
|
|
|
my $last_descendant= $elt->_last_descendant; |
6475
|
|
|
|
|
|
|
if( !$cond || $last_descendant->matches( $cond)) |
6476
|
|
|
|
|
|
|
{ return $last_descendant; } |
6477
|
|
|
|
|
|
|
else |
6478
|
|
|
|
|
|
|
{ return $last_descendant->prev_elt( $elt, $cond); } |
6479
|
|
|
|
|
|
|
} |
6480
|
|
|
|
|
|
|
|
6481
|
|
|
|
|
|
|
# no argument allowed here, just go down the last_child recursively |
6482
|
|
|
|
|
|
|
sub _last_descendant |
6483
|
|
|
|
|
|
|
{ my $elt= shift; |
6484
|
|
|
|
|
|
|
while( my $child= $elt->{last_child}) { $elt= $child; } |
6485
|
|
|
|
|
|
|
return $elt; |
6486
|
|
|
|
|
|
|
} |
6487
|
|
|
|
|
|
|
|
6488
|
|
|
|
|
|
|
# counter-intuitive too: |
6489
|
|
|
|
|
|
|
# the previous element is found by looking |
6490
|
|
|
|
|
|
|
# for the first open tag backwards from the current one |
6491
|
|
|
|
|
|
|
# it's the last descendant of the previous sibling |
6492
|
|
|
|
|
|
|
# if it exists, otherwise it's simply the parent |
6493
|
|
|
|
|
|
|
sub prev_elt |
6494
|
|
|
|
|
|
|
{ my $elt= shift; |
6495
|
|
|
|
|
|
|
my $subtree_root= 0; |
6496
|
|
|
|
|
|
|
if( defined $_[0] and (ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'))) |
6497
|
|
|
|
|
|
|
{ $subtree_root= shift ; |
6498
|
|
|
|
|
|
|
return undef if( $elt == $subtree_root); |
6499
|
|
|
|
|
|
|
} |
6500
|
|
|
|
|
|
|
my $cond= shift; |
6501
|
|
|
|
|
|
|
# get prev elt |
6502
|
|
|
|
|
|
|
my $prev_elt; |
6503
|
|
|
|
|
|
|
do |
6504
|
|
|
|
|
|
|
{ return undef if( $elt == $subtree_root); |
6505
|
|
|
|
|
|
|
if( $prev_elt= $elt->{prev_sibling}) |
6506
|
|
|
|
|
|
|
{ while( $prev_elt->{last_child}) |
6507
|
|
|
|
|
|
|
{ $prev_elt= $prev_elt->{last_child}; } |
6508
|
|
|
|
|
|
|
} |
6509
|
|
|
|
|
|
|
else |
6510
|
|
|
|
|
|
|
{ $prev_elt= $elt->{parent} || return undef; } |
6511
|
|
|
|
|
|
|
$elt= $prev_elt; # in case we need to loop |
6512
|
|
|
|
|
|
|
} until( $elt->passes( $cond)); |
6513
|
|
|
|
|
|
|
|
6514
|
|
|
|
|
|
|
return $elt; |
6515
|
|
|
|
|
|
|
} |
6516
|
|
|
|
|
|
|
|
6517
|
|
|
|
|
|
|
sub _following_elt |
6518
|
|
|
|
|
|
|
{ my( $elt)= @_; |
6519
|
|
|
|
|
|
|
while( $elt && !$elt->{next_sibling}) |
6520
|
|
|
|
|
|
|
{ $elt= $elt->{parent}; } |
6521
|
|
|
|
|
|
|
return $elt ? $elt->{next_sibling} : undef; |
6522
|
|
|
|
|
|
|
} |
6523
|
|
|
|
|
|
|
|
6524
|
|
|
|
|
|
|
sub following_elt |
6525
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6526
|
|
|
|
|
|
|
$elt= $elt->_following_elt || return undef; |
6527
|
|
|
|
|
|
|
return $elt if( !$cond || $elt->matches( $cond)); |
6528
|
|
|
|
|
|
|
return $elt->next_elt( $cond); |
6529
|
|
|
|
|
|
|
} |
6530
|
|
|
|
|
|
|
|
6531
|
|
|
|
|
|
|
sub following_elts |
6532
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6533
|
|
|
|
|
|
|
if( !$cond) { undef $cond; } |
6534
|
|
|
|
|
|
|
my $following= $elt->following_elt( $cond); |
6535
|
|
|
|
|
|
|
if( $following) |
6536
|
|
|
|
|
|
|
{ my @followings= $following; |
6537
|
|
|
|
|
|
|
while( $following= $following->next_elt( $cond)) |
6538
|
|
|
|
|
|
|
{ push @followings, $following; } |
6539
|
|
|
|
|
|
|
return( @followings); |
6540
|
|
|
|
|
|
|
} |
6541
|
|
|
|
|
|
|
else |
6542
|
|
|
|
|
|
|
{ return (); } |
6543
|
|
|
|
|
|
|
} |
6544
|
|
|
|
|
|
|
|
6545
|
|
|
|
|
|
|
sub _preceding_elt |
6546
|
|
|
|
|
|
|
{ my( $elt)= @_; |
6547
|
|
|
|
|
|
|
while( $elt && !$elt->{prev_sibling}) |
6548
|
|
|
|
|
|
|
{ $elt= $elt->{parent}; } |
6549
|
|
|
|
|
|
|
return $elt ? $elt->{prev_sibling}->_last_descendant : undef; |
6550
|
|
|
|
|
|
|
} |
6551
|
|
|
|
|
|
|
|
6552
|
|
|
|
|
|
|
sub preceding_elt |
6553
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6554
|
|
|
|
|
|
|
$elt= $elt->_preceding_elt || return undef; |
6555
|
|
|
|
|
|
|
return $elt if( !$cond || $elt->matches( $cond)); |
6556
|
|
|
|
|
|
|
return $elt->prev_elt( $cond); |
6557
|
|
|
|
|
|
|
} |
6558
|
|
|
|
|
|
|
|
6559
|
|
|
|
|
|
|
sub preceding_elts |
6560
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6561
|
|
|
|
|
|
|
if( !$cond) { undef $cond; } |
6562
|
|
|
|
|
|
|
my $preceding= $elt->preceding_elt( $cond); |
6563
|
|
|
|
|
|
|
if( $preceding) |
6564
|
|
|
|
|
|
|
{ my @precedings= $preceding; |
6565
|
|
|
|
|
|
|
while( $preceding= $preceding->prev_elt( $cond)) |
6566
|
|
|
|
|
|
|
{ push @precedings, $preceding; } |
6567
|
|
|
|
|
|
|
return( @precedings); |
6568
|
|
|
|
|
|
|
} |
6569
|
|
|
|
|
|
|
else |
6570
|
|
|
|
|
|
|
{ return (); } |
6571
|
|
|
|
|
|
|
} |
6572
|
|
|
|
|
|
|
|
6573
|
|
|
|
|
|
|
# used in get_xpath |
6574
|
|
|
|
|
|
|
sub _self |
6575
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6576
|
|
|
|
|
|
|
return $cond ? $elt->matches( $cond) : $elt; |
6577
|
|
|
|
|
|
|
} |
6578
|
|
|
|
|
|
|
|
6579
|
|
|
|
|
|
|
sub next_n_elt |
6580
|
|
|
|
|
|
|
{ my $elt= shift; |
6581
|
|
|
|
|
|
|
my $offset= shift || return undef; |
6582
|
|
|
|
|
|
|
foreach (1..$offset) |
6583
|
|
|
|
|
|
|
{ $elt= $elt->next_elt( @_) || return undef; } |
6584
|
|
|
|
|
|
|
return $elt; |
6585
|
|
|
|
|
|
|
} |
6586
|
|
|
|
|
|
|
|
6587
|
|
|
|
|
|
|
# checks whether $elt is included in $ancestor, returns 1 in that case |
6588
|
|
|
|
|
|
|
sub in |
6589
|
|
|
|
|
|
|
{ my ($elt, $ancestor)= @_; |
6590
|
|
|
|
|
|
|
if( ref( $ancestor) && isa( $ancestor, 'XML::Twig::Elt')) |
6591
|
|
|
|
|
|
|
{ # element |
6592
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { return $elt if( $elt == $ancestor); } |
6593
|
|
|
|
|
|
|
} |
6594
|
|
|
|
|
|
|
else |
6595
|
|
|
|
|
|
|
{ # condition |
6596
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { return $elt if( $elt->matches( $ancestor)); } |
6597
|
|
|
|
|
|
|
} |
6598
|
|
|
|
|
|
|
return 0; |
6599
|
|
|
|
|
|
|
} |
6600
|
|
|
|
|
|
|
|
6601
|
|
|
|
|
|
|
sub first_child_text |
6602
|
|
|
|
|
|
|
{ my $elt= shift; |
6603
|
|
|
|
|
|
|
my $dest=$elt->first_child(@_) or return ''; |
6604
|
|
|
|
|
|
|
return $dest->text; |
6605
|
|
|
|
|
|
|
} |
6606
|
|
|
|
|
|
|
|
6607
|
|
|
|
|
|
|
sub fields |
6608
|
|
|
|
|
|
|
{ my $elt= shift; |
6609
|
|
|
|
|
|
|
return map { $elt->field( $_) } @_; |
6610
|
|
|
|
|
|
|
} |
6611
|
|
|
|
|
|
|
|
6612
|
|
|
|
|
|
|
sub first_child_trimmed_text |
6613
|
|
|
|
|
|
|
{ my $elt= shift; |
6614
|
|
|
|
|
|
|
my $dest=$elt->first_child(@_) or return ''; |
6615
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6616
|
|
|
|
|
|
|
} |
6617
|
|
|
|
|
|
|
|
6618
|
|
|
|
|
|
|
sub first_child_matches |
6619
|
|
|
|
|
|
|
{ my $elt= shift; |
6620
|
|
|
|
|
|
|
my $dest= $elt->{first_child} or return undef; |
6621
|
|
|
|
|
|
|
return $dest->passes( @_); |
6622
|
|
|
|
|
|
|
} |
6623
|
|
|
|
|
|
|
|
6624
|
|
|
|
|
|
|
sub last_child_text |
6625
|
|
|
|
|
|
|
{ my $elt= shift; |
6626
|
|
|
|
|
|
|
my $dest=$elt->last_child(@_) or return ''; |
6627
|
|
|
|
|
|
|
return $dest->text; |
6628
|
|
|
|
|
|
|
} |
6629
|
|
|
|
|
|
|
|
6630
|
|
|
|
|
|
|
sub last_child_trimmed_text |
6631
|
|
|
|
|
|
|
{ my $elt= shift; |
6632
|
|
|
|
|
|
|
my $dest=$elt->last_child(@_) or return ''; |
6633
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6634
|
|
|
|
|
|
|
} |
6635
|
|
|
|
|
|
|
|
6636
|
|
|
|
|
|
|
sub last_child_matches |
6637
|
|
|
|
|
|
|
{ my $elt= shift; |
6638
|
|
|
|
|
|
|
my $dest= $elt->{last_child} or return undef; |
6639
|
|
|
|
|
|
|
return $dest->passes( @_); |
6640
|
|
|
|
|
|
|
} |
6641
|
|
|
|
|
|
|
|
6642
|
|
|
|
|
|
|
sub child_text |
6643
|
|
|
|
|
|
|
{ my $elt= shift; |
6644
|
|
|
|
|
|
|
my $dest=$elt->child(@_) or return ''; |
6645
|
|
|
|
|
|
|
return $dest->text; |
6646
|
|
|
|
|
|
|
} |
6647
|
|
|
|
|
|
|
|
6648
|
|
|
|
|
|
|
sub child_trimmed_text |
6649
|
|
|
|
|
|
|
{ my $elt= shift; |
6650
|
|
|
|
|
|
|
my $dest=$elt->child(@_) or return ''; |
6651
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6652
|
|
|
|
|
|
|
} |
6653
|
|
|
|
|
|
|
|
6654
|
|
|
|
|
|
|
sub child_matches |
6655
|
|
|
|
|
|
|
{ my $elt= shift; |
6656
|
|
|
|
|
|
|
my $nb= shift; |
6657
|
|
|
|
|
|
|
my $dest= $elt->child( $nb) or return undef; |
6658
|
|
|
|
|
|
|
return $dest->passes( @_); |
6659
|
|
|
|
|
|
|
} |
6660
|
|
|
|
|
|
|
|
6661
|
|
|
|
|
|
|
sub prev_sibling_text |
6662
|
|
|
|
|
|
|
{ my $elt= shift; |
6663
|
|
|
|
|
|
|
my $dest= $elt->_prev_sibling(@_) or return ''; |
6664
|
|
|
|
|
|
|
return $dest->text; |
6665
|
|
|
|
|
|
|
} |
6666
|
|
|
|
|
|
|
|
6667
|
|
|
|
|
|
|
sub prev_sibling_trimmed_text |
6668
|
|
|
|
|
|
|
{ my $elt= shift; |
6669
|
|
|
|
|
|
|
my $dest= $elt->_prev_sibling(@_) or return ''; |
6670
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6671
|
|
|
|
|
|
|
} |
6672
|
|
|
|
|
|
|
|
6673
|
|
|
|
|
|
|
sub prev_sibling_matches |
6674
|
|
|
|
|
|
|
{ my $elt= shift; |
6675
|
|
|
|
|
|
|
my $dest= $elt->{prev_sibling} or return undef; |
6676
|
|
|
|
|
|
|
return $dest->passes( @_); |
6677
|
|
|
|
|
|
|
} |
6678
|
|
|
|
|
|
|
|
6679
|
|
|
|
|
|
|
sub next_sibling_text |
6680
|
|
|
|
|
|
|
{ my $elt= shift; |
6681
|
|
|
|
|
|
|
my $dest= $elt->next_sibling(@_) or return ''; |
6682
|
|
|
|
|
|
|
return $dest->text; |
6683
|
|
|
|
|
|
|
} |
6684
|
|
|
|
|
|
|
|
6685
|
|
|
|
|
|
|
sub next_sibling_trimmed_text |
6686
|
|
|
|
|
|
|
{ my $elt= shift; |
6687
|
|
|
|
|
|
|
my $dest= $elt->next_sibling(@_) or return ''; |
6688
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6689
|
|
|
|
|
|
|
} |
6690
|
|
|
|
|
|
|
|
6691
|
|
|
|
|
|
|
sub next_sibling_matches |
6692
|
|
|
|
|
|
|
{ my $elt= shift; |
6693
|
|
|
|
|
|
|
my $dest= $elt->{next_sibling} or return undef; |
6694
|
|
|
|
|
|
|
return $dest->passes( @_); |
6695
|
|
|
|
|
|
|
} |
6696
|
|
|
|
|
|
|
|
6697
|
|
|
|
|
|
|
sub prev_elt_text |
6698
|
|
|
|
|
|
|
{ my $elt= shift; |
6699
|
|
|
|
|
|
|
my $dest= $elt->prev_elt(@_) or return ''; |
6700
|
|
|
|
|
|
|
return $dest->text; |
6701
|
|
|
|
|
|
|
} |
6702
|
|
|
|
|
|
|
|
6703
|
|
|
|
|
|
|
sub prev_elt_trimmed_text |
6704
|
|
|
|
|
|
|
{ my $elt= shift; |
6705
|
|
|
|
|
|
|
my $dest= $elt->prev_elt(@_) or return ''; |
6706
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6707
|
|
|
|
|
|
|
} |
6708
|
|
|
|
|
|
|
|
6709
|
|
|
|
|
|
|
sub prev_elt_matches |
6710
|
|
|
|
|
|
|
{ my $elt= shift; |
6711
|
|
|
|
|
|
|
my $dest= $elt->prev_elt or return undef; |
6712
|
|
|
|
|
|
|
return $dest->passes( @_); |
6713
|
|
|
|
|
|
|
} |
6714
|
|
|
|
|
|
|
|
6715
|
|
|
|
|
|
|
sub next_elt_text |
6716
|
|
|
|
|
|
|
{ my $elt= shift; |
6717
|
|
|
|
|
|
|
my $dest= $elt->next_elt(@_) or return ''; |
6718
|
|
|
|
|
|
|
return $dest->text; |
6719
|
|
|
|
|
|
|
} |
6720
|
|
|
|
|
|
|
|
6721
|
|
|
|
|
|
|
sub next_elt_trimmed_text |
6722
|
|
|
|
|
|
|
{ my $elt= shift; |
6723
|
|
|
|
|
|
|
my $dest= $elt->next_elt(@_) or return ''; |
6724
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6725
|
|
|
|
|
|
|
} |
6726
|
|
|
|
|
|
|
|
6727
|
|
|
|
|
|
|
sub next_elt_matches |
6728
|
|
|
|
|
|
|
{ my $elt= shift; |
6729
|
|
|
|
|
|
|
my $dest= $elt->next_elt or return undef; |
6730
|
|
|
|
|
|
|
return $dest->passes( @_); |
6731
|
|
|
|
|
|
|
} |
6732
|
|
|
|
|
|
|
|
6733
|
|
|
|
|
|
|
sub parent_text |
6734
|
|
|
|
|
|
|
{ my $elt= shift; |
6735
|
|
|
|
|
|
|
my $dest= $elt->parent(@_) or return ''; |
6736
|
|
|
|
|
|
|
return $dest->text; |
6737
|
|
|
|
|
|
|
} |
6738
|
|
|
|
|
|
|
|
6739
|
|
|
|
|
|
|
sub parent_trimmed_text |
6740
|
|
|
|
|
|
|
{ my $elt= shift; |
6741
|
|
|
|
|
|
|
my $dest= $elt->parent(@_) or return ''; |
6742
|
|
|
|
|
|
|
return $dest->trimmed_text; |
6743
|
|
|
|
|
|
|
} |
6744
|
|
|
|
|
|
|
|
6745
|
|
|
|
|
|
|
sub parent_matches |
6746
|
|
|
|
|
|
|
{ my $elt= shift; |
6747
|
|
|
|
|
|
|
my $dest= $elt->{parent} or return undef; |
6748
|
|
|
|
|
|
|
return $dest->passes( @_); |
6749
|
|
|
|
|
|
|
} |
6750
|
|
|
|
|
|
|
|
6751
|
|
|
|
|
|
|
sub is_first_child |
6752
|
|
|
|
|
|
|
{ my $elt= shift; |
6753
|
|
|
|
|
|
|
my $parent= $elt->{parent} or return 0; |
6754
|
|
|
|
|
|
|
my $first_child= $parent->first_child( @_) or return 0; |
6755
|
|
|
|
|
|
|
return ($first_child == $elt) ? $elt : 0; |
6756
|
|
|
|
|
|
|
} |
6757
|
|
|
|
|
|
|
|
6758
|
|
|
|
|
|
|
sub is_last_child |
6759
|
|
|
|
|
|
|
{ my $elt= shift; |
6760
|
|
|
|
|
|
|
my $parent= $elt->{parent} or return 0; |
6761
|
|
|
|
|
|
|
my $last_child= $parent->last_child( @_) or return 0; |
6762
|
|
|
|
|
|
|
return ($last_child == $elt) ? $elt : 0; |
6763
|
|
|
|
|
|
|
} |
6764
|
|
|
|
|
|
|
|
6765
|
|
|
|
|
|
|
# returns the depth level of the element |
6766
|
|
|
|
|
|
|
# if 2 parameter are used then counts the 2cd element name in the |
6767
|
|
|
|
|
|
|
# ancestors list |
6768
|
|
|
|
|
|
|
sub level |
6769
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6770
|
|
|
|
|
|
|
my $level=0; |
6771
|
|
|
|
|
|
|
my $name=shift || ''; |
6772
|
|
|
|
|
|
|
while( $elt= $elt->{parent}) { $level++ if( !$cond || $elt->matches( $cond)); } |
6773
|
|
|
|
|
|
|
return $level; |
6774
|
|
|
|
|
|
|
} |
6775
|
|
|
|
|
|
|
|
6776
|
|
|
|
|
|
|
# checks whether $elt has an ancestor that satisfies $cond, returns the ancestor |
6777
|
|
|
|
|
|
|
sub in_context |
6778
|
|
|
|
|
|
|
{ my ($elt, $cond, $level)= @_; |
6779
|
|
|
|
|
|
|
$level= -1 unless( $level) ; # $level-- will never hit 0 |
6780
|
|
|
|
|
|
|
|
6781
|
|
|
|
|
|
|
while( $level) |
6782
|
|
|
|
|
|
|
{ $elt= $elt->{parent} or return 0; |
6783
|
|
|
|
|
|
|
if( $elt->matches( $cond)) { return $elt; } |
6784
|
|
|
|
|
|
|
$level--; |
6785
|
|
|
|
|
|
|
} |
6786
|
|
|
|
|
|
|
return 0; |
6787
|
|
|
|
|
|
|
} |
6788
|
|
|
|
|
|
|
|
6789
|
|
|
|
|
|
|
sub _descendants |
6790
|
|
|
|
|
|
|
{ my( $subtree_root, $include_self)= @_; |
6791
|
|
|
|
|
|
|
my @descendants= $include_self ? ($subtree_root) : (); |
6792
|
|
|
|
|
|
|
|
6793
|
|
|
|
|
|
|
my $elt= $subtree_root; |
6794
|
|
|
|
|
|
|
my $next_elt; |
6795
|
|
|
|
|
|
|
|
6796
|
|
|
|
|
|
|
MAIN: while( 1) |
6797
|
|
|
|
|
|
|
{ if( $next_elt= $elt->{first_child}) |
6798
|
|
|
|
|
|
|
{ # simplest case: the elt has a child |
6799
|
|
|
|
|
|
|
} |
6800
|
|
|
|
|
|
|
elsif( $next_elt= $elt->{next_sibling}) |
6801
|
|
|
|
|
|
|
{ # no child but a next sibling (just check we stay within the subtree) |
6802
|
|
|
|
|
|
|
|
6803
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has a sibling |
6804
|
|
|
|
|
|
|
last MAIN if( $elt == $subtree_root); |
6805
|
|
|
|
|
|
|
} |
6806
|
|
|
|
|
|
|
else |
6807
|
|
|
|
|
|
|
{ # case where the element has no child and no next sibling: |
6808
|
|
|
|
|
|
|
# get the first next sibling of an ancestor, checking subtree_root |
6809
|
|
|
|
|
|
|
|
6810
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has no sibling |
6811
|
|
|
|
|
|
|
last MAIN if( $elt == $subtree_root); |
6812
|
|
|
|
|
|
|
|
6813
|
|
|
|
|
|
|
# backtrack until we find a parent with a next sibling |
6814
|
|
|
|
|
|
|
$next_elt= $elt->{parent} || last; |
6815
|
|
|
|
|
|
|
until( $next_elt->{next_sibling}) |
6816
|
|
|
|
|
|
|
{ last MAIN if( $subtree_root == $next_elt); |
6817
|
|
|
|
|
|
|
$next_elt= $next_elt->{parent} || last MAIN; |
6818
|
|
|
|
|
|
|
} |
6819
|
|
|
|
|
|
|
last MAIN if( $subtree_root == $next_elt); |
6820
|
|
|
|
|
|
|
$next_elt= $next_elt->{next_sibling}; |
6821
|
|
|
|
|
|
|
} |
6822
|
|
|
|
|
|
|
$elt= $next_elt || last MAIN; |
6823
|
|
|
|
|
|
|
push @descendants, $elt; |
6824
|
|
|
|
|
|
|
} |
6825
|
|
|
|
|
|
|
return @descendants; |
6826
|
|
|
|
|
|
|
} |
6827
|
|
|
|
|
|
|
|
6828
|
|
|
|
|
|
|
|
6829
|
|
|
|
|
|
|
sub descendants |
6830
|
|
|
|
|
|
|
{ my( $subtree_root, $cond)= @_; |
6831
|
|
|
|
|
|
|
my @descendants=(); |
6832
|
|
|
|
|
|
|
my $elt= $subtree_root; |
6833
|
|
|
|
|
|
|
|
6834
|
|
|
|
|
|
|
# this branch is pure optimization for speed: if $cond is a gi replace it |
6835
|
|
|
|
|
|
|
# by the index of the gi and loop here |
6836
|
|
|
|
|
|
|
# start optimization |
6837
|
|
|
|
|
|
|
my $ind; |
6838
|
|
|
|
|
|
|
if( !$cond || ( defined ( $ind= $XML::Twig::gi2index{$cond})) ) |
6839
|
|
|
|
|
|
|
{ |
6840
|
|
|
|
|
|
|
my $next_elt; |
6841
|
|
|
|
|
|
|
|
6842
|
|
|
|
|
|
|
while( 1) |
6843
|
|
|
|
|
|
|
{ if( $next_elt= $elt->{first_child}) |
6844
|
|
|
|
|
|
|
{ # simplest case: the elt has a child |
6845
|
|
|
|
|
|
|
} |
6846
|
|
|
|
|
|
|
elsif( $next_elt= $elt->{next_sibling}) |
6847
|
|
|
|
|
|
|
{ # no child but a next sibling (just check we stay within the subtree) |
6848
|
|
|
|
|
|
|
|
6849
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has a sibling |
6850
|
|
|
|
|
|
|
last if( $subtree_root && ($elt == $subtree_root)); |
6851
|
|
|
|
|
|
|
} |
6852
|
|
|
|
|
|
|
else |
6853
|
|
|
|
|
|
|
{ # case where the element has no child and no next sibling: |
6854
|
|
|
|
|
|
|
# get the first next sibling of an ancestor, checking subtree_root |
6855
|
|
|
|
|
|
|
|
6856
|
|
|
|
|
|
|
# case where elt is subtree_root, is empty and has no sibling |
6857
|
|
|
|
|
|
|
last if( $subtree_root && ($elt == $subtree_root)); |
6858
|
|
|
|
|
|
|
|
6859
|
|
|
|
|
|
|
# backtrack until we find a parent with a next sibling |
6860
|
|
|
|
|
|
|
$next_elt= $elt->{parent} || last undef; |
6861
|
|
|
|
|
|
|
until( $next_elt->{next_sibling}) |
6862
|
|
|
|
|
|
|
{ last if( $subtree_root && ($subtree_root == $next_elt)); |
6863
|
|
|
|
|
|
|
$next_elt= $next_elt->{parent} || last; |
6864
|
|
|
|
|
|
|
} |
6865
|
|
|
|
|
|
|
last if( $subtree_root && ($subtree_root == $next_elt)); |
6866
|
|
|
|
|
|
|
$next_elt= $next_elt->{next_sibling}; |
6867
|
|
|
|
|
|
|
} |
6868
|
|
|
|
|
|
|
$elt= $next_elt || last; |
6869
|
|
|
|
|
|
|
push @descendants, $elt if( !$cond || ($elt->{gi} eq $ind)); |
6870
|
|
|
|
|
|
|
} |
6871
|
|
|
|
|
|
|
} |
6872
|
|
|
|
|
|
|
else |
6873
|
|
|
|
|
|
|
# end optimization |
6874
|
|
|
|
|
|
|
{ # branch for a complex condition: use the regular (slow but simple) way |
6875
|
|
|
|
|
|
|
while( $elt= $elt->next_elt( $subtree_root, $cond)) |
6876
|
|
|
|
|
|
|
{ push @descendants, $elt; } |
6877
|
|
|
|
|
|
|
} |
6878
|
|
|
|
|
|
|
return @descendants; |
6879
|
|
|
|
|
|
|
} |
6880
|
|
|
|
|
|
|
|
6881
|
|
|
|
|
|
|
|
6882
|
|
|
|
|
|
|
sub descendants_or_self |
6883
|
|
|
|
|
|
|
{ my( $elt, $cond)= @_; |
6884
|
|
|
|
|
|
|
my @descendants= $elt->passes( $cond) ? ($elt) : (); |
6885
|
|
|
|
|
|
|
push @descendants, $elt->descendants( $cond); |
6886
|
|
|
|
|
|
|
return @descendants; |
6887
|
|
|
|
|
|
|
} |
6888
|
|
|
|
|
|
|
|
6889
|
|
|
|
|
|
|
sub sibling |
6890
|
|
|
|
|
|
|
{ my $elt= shift; |
6891
|
|
|
|
|
|
|
my $nb= shift; |
6892
|
|
|
|
|
|
|
if( $nb > 0) |
6893
|
|
|
|
|
|
|
{ foreach( 1..$nb) |
6894
|
|
|
|
|
|
|
{ $elt= $elt->next_sibling( @_) or return undef; } |
6895
|
|
|
|
|
|
|
} |
6896
|
|
|
|
|
|
|
elsif( $nb < 0) |
6897
|
|
|
|
|
|
|
{ foreach( 1..(-$nb)) |
6898
|
|
|
|
|
|
|
{ $elt= $elt->prev_sibling( @_) or return undef; } |
6899
|
|
|
|
|
|
|
} |
6900
|
|
|
|
|
|
|
else # $nb == 0 |
6901
|
|
|
|
|
|
|
{ return $elt->passes( $_[0]); } |
6902
|
|
|
|
|
|
|
return $elt; |
6903
|
|
|
|
|
|
|
} |
6904
|
|
|
|
|
|
|
|
6905
|
|
|
|
|
|
|
sub sibling_text |
6906
|
|
|
|
|
|
|
{ my $elt= sibling( @_); |
6907
|
|
|
|
|
|
|
return $elt ? $elt->text : undef; |
6908
|
|
|
|
|
|
|
} |
6909
|
|
|
|
|
|
|
|
6910
|
|
|
|
|
|
|
|
6911
|
|
|
|
|
|
|
sub child |
6912
|
|
|
|
|
|
|
{ my $elt= shift; |
6913
|
|
|
|
|
|
|
my $nb= shift; |
6914
|
|
|
|
|
|
|
if( $nb >= 0) |
6915
|
|
|
|
|
|
|
{ $elt= $elt->first_child( @_) or return undef; |
6916
|
|
|
|
|
|
|
foreach( 1..$nb) |
6917
|
|
|
|
|
|
|
{ $elt= $elt->next_sibling( @_) or return undef; } |
6918
|
|
|
|
|
|
|
} |
6919
|
|
|
|
|
|
|
else |
6920
|
|
|
|
|
|
|
{ $elt= $elt->last_child( @_) or return undef; |
6921
|
|
|
|
|
|
|
foreach( 2..(-$nb)) |
6922
|
|
|
|
|
|
|
{ $elt= $elt->prev_sibling( @_) or return undef; } |
6923
|
|
|
|
|
|
|
} |
6924
|
|
|
|
|
|
|
return $elt; |
6925
|
|
|
|
|
|
|
} |
6926
|
|
|
|
|
|
|
|
6927
|
|
|
|
|
|
|
sub prev_siblings |
6928
|
|
|
|
|
|
|
{ my $elt= shift; |
6929
|
|
|
|
|
|
|
my @siblings=(); |
6930
|
|
|
|
|
|
|
while( $elt= $elt->prev_sibling( @_)) |
6931
|
|
|
|
|
|
|
{ unshift @siblings, $elt; } |
6932
|
|
|
|
|
|
|
return @siblings; |
6933
|
|
|
|
|
|
|
} |
6934
|
|
|
|
|
|
|
|
6935
|
|
|
|
|
|
|
sub siblings |
6936
|
|
|
|
|
|
|
{ my $elt= shift; |
6937
|
|
|
|
|
|
|
return grep { $_ ne $elt } $elt->{parent}->children( @_); |
6938
|
|
|
|
|
|
|
} |
6939
|
|
|
|
|
|
|
|
6940
|
|
|
|
|
|
|
sub pos |
6941
|
|
|
|
|
|
|
{ my $elt= shift; |
6942
|
|
|
|
|
|
|
return 0 if ($_[0] && !$elt->matches( @_)); |
6943
|
|
|
|
|
|
|
my $pos=1; |
6944
|
|
|
|
|
|
|
$pos++ while( $elt= $elt->prev_sibling( @_)); |
6945
|
|
|
|
|
|
|
return $pos; |
6946
|
|
|
|
|
|
|
} |
6947
|
|
|
|
|
|
|
|
6948
|
|
|
|
|
|
|
|
6949
|
|
|
|
|
|
|
sub next_siblings |
6950
|
|
|
|
|
|
|
{ my $elt= shift; |
6951
|
|
|
|
|
|
|
my @siblings=(); |
6952
|
|
|
|
|
|
|
while( $elt= $elt->next_sibling( @_)) |
6953
|
|
|
|
|
|
|
{ push @siblings, $elt; } |
6954
|
|
|
|
|
|
|
return @siblings; |
6955
|
|
|
|
|
|
|
} |
6956
|
|
|
|
|
|
|
|
6957
|
|
|
|
|
|
|
|
6958
|
|
|
|
|
|
|
# used by get_xpath: parses the xpath expression and generates a sub that performs the |
6959
|
|
|
|
|
|
|
# search |
6960
|
|
|
|
|
|
|
{ my %axis2method; |
6961
|
|
|
|
|
|
|
BEGIN { %axis2method= ( child => 'children', |
6962
|
|
|
|
|
|
|
descendant => 'descendants', |
6963
|
|
|
|
|
|
|
'descendant-or-self' => 'descendants_or_self', |
6964
|
|
|
|
|
|
|
parent => 'parent_is', |
6965
|
|
|
|
|
|
|
ancestor => 'ancestors', |
6966
|
|
|
|
|
|
|
'ancestor-or-self' => 'ancestors_or_self', |
6967
|
|
|
|
|
|
|
'following-sibling' => 'next_siblings', |
6968
|
|
|
|
|
|
|
'preceding-sibling' => 'prev_siblings', |
6969
|
|
|
|
|
|
|
following => 'following_elts', |
6970
|
|
|
|
|
|
|
preceding => 'preceding_elts', |
6971
|
|
|
|
|
|
|
self => '_self', |
6972
|
|
|
|
|
|
|
); |
6973
|
|
|
|
|
|
|
} |
6974
|
|
|
|
|
|
|
|
6975
|
|
|
|
|
|
|
sub _install_xpath |
6976
|
|
|
|
|
|
|
{ my( $xpath_exp, $type)= @_; |
6977
|
|
|
|
|
|
|
my $original_exp= $xpath_exp; |
6978
|
|
|
|
|
|
|
my $sub= 'my $elt= shift; my @results;'; |
6979
|
|
|
|
|
|
|
|
6980
|
|
|
|
|
|
|
# grab the root if expression starts with a / |
6981
|
|
|
|
|
|
|
if( $xpath_exp=~ s{^/}{}) |
6982
|
|
|
|
|
|
|
{ $sub .= '@results= ($elt->twig) || croak "cannot use an XPath query starting with a / on a node not attached to a whole twig";'; } |
6983
|
|
|
|
|
|
|
elsif( $xpath_exp=~ s{^\./}{}) |
6984
|
|
|
|
|
|
|
{ $sub .= '@results= ($elt);'; } |
6985
|
|
|
|
|
|
|
else |
6986
|
|
|
|
|
|
|
{ $sub .= '@results= ($elt);'; } |
6987
|
|
|
|
|
|
|
|
6988
|
|
|
|
|
|
|
|
6989
|
|
|
|
|
|
|
#warn "xpath_exp= '$xpath_exp'\n"; |
6990
|
|
|
|
|
|
|
|
6991
|
|
|
|
|
|
|
while( $xpath_exp && |
6992
|
|
|
|
|
|
|
$xpath_exp=~s{^\s*(/?) |
6993
|
|
|
|
|
|
|
# the xxx=~/regexp/ is a pain as it includes / |
6994
|
|
|
|
|
|
|
(\s*(?:(?:($REG_AXIS)::)?(\*|$REG_TAG_PART|\.\.|\.)\s*)?($REG_PREDICATE_ALT*) |
6995
|
|
|
|
|
|
|
) |
6996
|
|
|
|
|
|
|
(/|$)}{}xo) |
6997
|
|
|
|
|
|
|
|
6998
|
|
|
|
|
|
|
{ my( $wildcard, $sub_exp, $axis, $gi, $predicates)= ($1, $2, $3, $4, $5); |
6999
|
|
|
|
|
|
|
if( $axis && ! $gi) |
7000
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp"); } |
7001
|
|
|
|
|
|
|
|
7002
|
|
|
|
|
|
|
# grab a parent |
7003
|
|
|
|
|
|
|
if( $sub_exp eq '..') |
7004
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp") if( $wildcard); |
7005
|
|
|
|
|
|
|
$sub .= '@results= map { $_->{parent}} @results;'; |
7006
|
|
|
|
|
|
|
} |
7007
|
|
|
|
|
|
|
# test the element itself |
7008
|
|
|
|
|
|
|
elsif( $sub_exp=~ m{^\.(.*)$}s) |
7009
|
|
|
|
|
|
|
{ $sub .= "\@results= grep { \$_->matches( q{$1}) } \@results;" } |
7010
|
|
|
|
|
|
|
# grab children |
7011
|
|
|
|
|
|
|
else |
7012
|
|
|
|
|
|
|
{ |
7013
|
|
|
|
|
|
|
if( !$axis) |
7014
|
|
|
|
|
|
|
{ $axis= $wildcard ? 'descendant' : 'child'; } |
7015
|
|
|
|
|
|
|
if( !$gi or $gi eq '*') { $gi=''; } |
7016
|
|
|
|
|
|
|
my $function; |
7017
|
|
|
|
|
|
|
|
7018
|
|
|
|
|
|
|
# "special" predicates, that return just one element |
7019
|
|
|
|
|
|
|
if( $predicates && ($predicates =~ m{^\s*\[\s*((-\s*)?\d+)\s*\]\s*$})) |
7020
|
|
|
|
|
|
|
{ # [] |
7021
|
|
|
|
|
|
|
my $offset= $1; |
7022
|
|
|
|
|
|
|
$offset-- if( $offset > 0); |
7023
|
|
|
|
|
|
|
$function= $axis eq 'descendant' ? "next_n_elt( $offset, '$gi')" |
7024
|
|
|
|
|
|
|
: $axis eq 'child' ? "child( $offset, '$gi')" |
7025
|
|
|
|
|
|
|
: _croak_and_doublecheck_xpath( $original_exp, "error [$1] not supported along axis '$axis'") |
7026
|
|
|
|
|
|
|
; |
7027
|
|
|
|
|
|
|
$sub .= "\@results= grep { \$_ } map { \$_->$function } \@results;" |
7028
|
|
|
|
|
|
|
} |
7029
|
|
|
|
|
|
|
elsif( $predicates && ($predicates =~ m{^\s*\[\s*last\s*\(\s*\)\s*\]\s*$}) ) |
7030
|
|
|
|
|
|
|
{ # last() |
7031
|
|
|
|
|
|
|
_croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp, usage of // and last() not supported") if( $wildcard); |
7032
|
|
|
|
|
|
|
$sub .= "\@results= map { \$_->last_child( '$gi') } \@results;"; |
7033
|
|
|
|
|
|
|
} |
7034
|
|
|
|
|
|
|
else |
7035
|
|
|
|
|
|
|
{ # follow the axis |
7036
|
|
|
|
|
|
|
#warn "axis: '$axis' - method: '$axis2method{$axis}' - gi: '$gi'\n"; |
7037
|
|
|
|
|
|
|
|
7038
|
|
|
|
|
|
|
my $follow_axis= " \$_->$axis2method{$axis}( '$gi')"; |
7039
|
|
|
|
|
|
|
my $step= $follow_axis; |
7040
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
# now filter using the predicate |
7042
|
|
|
|
|
|
|
while( $predicates=~ s{^\s*($REG_PREDICATE_ALT)\s*}{}o) |
7043
|
|
|
|
|
|
|
{ my $pred= $1; |
7044
|
|
|
|
|
|
|
$pred=~ s{^\s*\[\s*}{}; |
7045
|
|
|
|
|
|
|
$pred=~ s{\s*\]\s*$}{}; |
7046
|
|
|
|
|
|
|
my $test=""; |
7047
|
|
|
|
|
|
|
my $pos; |
7048
|
|
|
|
|
|
|
if( $pred=~ m{^(-?\s*\d+)$}) |
7049
|
|
|
|
|
|
|
{ my $pos= $1; |
7050
|
|
|
|
|
|
|
if( $step=~ m{^\s*grep(.*) (\$_->\w+\(\s*'[^']*'\s*\))}) |
7051
|
|
|
|
|
|
|
{ $step= "XML::Twig::_first_n $1 $pos, $2"; } |
7052
|
|
|
|
|
|
|
else |
7053
|
|
|
|
|
|
|
{ if( $pos > 0) { $pos--; } |
7054
|
|
|
|
|
|
|
$step= "($step)[$pos]"; |
7055
|
|
|
|
|
|
|
} |
7056
|
|
|
|
|
|
|
#warn "number predicate '$pos' - generated step '$step'\n"; |
7057
|
|
|
|
|
|
|
} |
7058
|
|
|
|
|
|
|
else |
7059
|
|
|
|
|
|
|
{ my $syntax_error=0; |
7060
|
|
|
|
|
|
|
do |
7061
|
|
|
|
|
|
|
{ if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_STRING)\s*}{}o) # string()="string" pred |
7062
|
|
|
|
|
|
|
{ $test .= "\$_->text eq $1"; } |
7063
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_STRING)\s*}{}o) # string()!="string" pred |
7064
|
|
|
|
|
|
|
{ $test .= "\$_->text ne $1"; } |
7065
|
|
|
|
|
|
|
if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_NUMBER)\s*}{}o) # string()= pred |
7066
|
|
|
|
|
|
|
{ $test .= "\$_->text eq $1"; } |
7067
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_NUMBER)\s*}{}o) # string()!= pred |
7068
|
|
|
|
|
|
|
{ $test .= "\$_->text ne $1"; } |
7069
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*(>|<|>=|<=)\s*($REG_NUMBER)\s*}{}o) # string()!= pred |
7070
|
|
|
|
|
|
|
{ $test .= "\$_->text $1 $2"; } |
7071
|
|
|
|
|
|
|
|
7072
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # string()=~/regex/ pred |
7073
|
|
|
|
|
|
|
{ my( $match, $regexp)= ($1, $2); |
7074
|
|
|
|
|
|
|
$test .= "\$_->text $match $regexp"; |
7075
|
|
|
|
|
|
|
} |
7076
|
|
|
|
|
|
|
elsif( $pred =~ s{^string\(\s*\)\s*}{}o) # string() pred |
7077
|
|
|
|
|
|
|
{ $test .= "\$_->text"; } |
7078
|
|
|
|
|
|
|
elsif( $pred=~ s{^@($REG_TAG_NAME)\s*($REG_OP)\s*($REG_STRING|$REG_NUMBER)}{}o) # @att="val" pred |
7079
|
|
|
|
|
|
|
{ my( $att, $oper, $val)= ($1, _op( $2), $3); |
7080
|
|
|
|
|
|
|
$test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $oper $val))}; |
7081
|
|
|
|
|
|
|
} |
7082
|
|
|
|
|
|
|
elsif( $pred =~ s{^@($REG_TAG_NAME)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # @att=~/regex/ pred XXX |
7083
|
|
|
|
|
|
|
{ my( $att, $match, $regexp)= ($1, $2, $3); |
7084
|
|
|
|
|
|
|
$test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $match $regexp))};; |
7085
|
|
|
|
|
|
|
} |
7086
|
|
|
|
|
|
|
elsif( $pred=~ s{^@($REG_TAG_NAME)\s*}{}o) # @att pred |
7087
|
|
|
|
|
|
|
{ $test .= qq{(defined \$_->{'att'}->{"$1"})}; } |
7088
|
|
|
|
|
|
|
elsif( $pred=~ s{^\s*(?:not|!)\s*@($REG_TAG_NAME)\s*}{}o) # not @att pred |
7089
|
|
|
|
|
|
|
{ $test .= qq{((\$_->is_elt) && (not defined \$_->{'att'}->{"$1"}))}; } |
7090
|
|
|
|
|
|
|
elsif( $pred=~ s{^\s*([()])}{}) # ( or ) (just add to the test) |
7091
|
|
|
|
|
|
|
{ $test .= qq{$1}; } |
7092
|
|
|
|
|
|
|
elsif( $pred=~ s{^\s*(and|or)\s*}{}) |
7093
|
|
|
|
|
|
|
{ $test .= lc " $1 "; } |
7094
|
|
|
|
|
|
|
else |
7095
|
|
|
|
|
|
|
{ $syntax_error=1; } |
7096
|
|
|
|
|
|
|
|
7097
|
|
|
|
|
|
|
} while( !$syntax_error && $pred); |
7098
|
|
|
|
|
|
|
_croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp at $pred") if( $pred); |
7099
|
|
|
|
|
|
|
$step= " grep { $test } $step "; |
7100
|
|
|
|
|
|
|
} |
7101
|
|
|
|
|
|
|
} |
7102
|
|
|
|
|
|
|
#warn "step: '$step'"; |
7103
|
|
|
|
|
|
|
$sub .= "\@results= grep defined, map { $step } \@results;"; |
7104
|
|
|
|
|
|
|
} |
7105
|
|
|
|
|
|
|
} |
7106
|
|
|
|
|
|
|
} |
7107
|
|
|
|
|
|
|
|
7108
|
|
|
|
|
|
|
if( $xpath_exp) |
7109
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp around $xpath_exp"); } |
7110
|
|
|
|
|
|
|
|
7111
|
|
|
|
|
|
|
$sub .= q{return XML::Twig::_unique_elts( @results); }; |
7112
|
|
|
|
|
|
|
#warn "generated: '$sub'\n"; |
7113
|
|
|
|
|
|
|
my $s= eval "sub { $NO_WARNINGS; $sub }"; |
7114
|
|
|
|
|
|
|
if( $@) |
7115
|
|
|
|
|
|
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp ($@);") } |
7116
|
|
|
|
|
|
|
return( $s); |
7117
|
|
|
|
|
|
|
} |
7118
|
|
|
|
|
|
|
} |
7119
|
|
|
|
|
|
|
|
7120
|
|
|
|
|
|
|
sub _croak_and_doublecheck_xpath |
7121
|
|
|
|
|
|
|
{ my $xpath_expression= shift; |
7122
|
|
|
|
|
|
|
my $mess= join( "\n", @_); |
7123
|
|
|
|
|
|
|
if( $XML::Twig::XPath::VERSION || 0) |
7124
|
|
|
|
|
|
|
{ my $check_twig= XML::Twig::XPath->new; |
7125
|
|
|
|
|
|
|
if( eval { $check_twig->{twig_xp}->_parse( $xpath_expression) }) |
7126
|
|
|
|
|
|
|
{ $mess .= "\nthe expression is a valid XPath statement, and you are using XML::Twig::XPath, but" |
7127
|
|
|
|
|
|
|
. "\nyou are using either 'find_nodes' or 'get_xpath' where the method you likely wanted" |
7128
|
|
|
|
|
|
|
. "\nto use is 'findnodes', which is the only one that uses the full XPath engine\n"; |
7129
|
|
|
|
|
|
|
} |
7130
|
|
|
|
|
|
|
} |
7131
|
|
|
|
|
|
|
croak $mess; |
7132
|
|
|
|
|
|
|
} |
7133
|
|
|
|
|
|
|
|
7134
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
|
7136
|
|
|
|
|
|
|
{ # extremely elaborate caching mechanism |
7137
|
|
|
|
|
|
|
my %xpath; # xpath_expression => subroutine_code; |
7138
|
|
|
|
|
|
|
sub get_xpath |
7139
|
|
|
|
|
|
|
{ my( $elt, $xpath_exp, $offset)= @_; |
7140
|
|
|
|
|
|
|
my $sub= ($xpath{$xpath_exp} ||= _install_xpath( $xpath_exp)); |
7141
|
|
|
|
|
|
|
return $sub->( $elt) unless( defined $offset); |
7142
|
|
|
|
|
|
|
my @res= $sub->( $elt); |
7143
|
|
|
|
|
|
|
return $res[$offset]; |
7144
|
|
|
|
|
|
|
} |
7145
|
|
|
|
|
|
|
} |
7146
|
|
|
|
|
|
|
|
7147
|
|
|
|
|
|
|
|
7148
|
|
|
|
|
|
|
sub findvalues |
7149
|
|
|
|
|
|
|
{ my $elt= shift; |
7150
|
|
|
|
|
|
|
return map { $_->text } $elt->get_xpath( @_); |
7151
|
|
|
|
|
|
|
} |
7152
|
|
|
|
|
|
|
|
7153
|
|
|
|
|
|
|
sub findvalue |
7154
|
|
|
|
|
|
|
{ my $elt= shift; |
7155
|
|
|
|
|
|
|
return join '', map { $_->text } $elt->get_xpath( @_); |
7156
|
|
|
|
|
|
|
} |
7157
|
|
|
|
|
|
|
|
7158
|
|
|
|
|
|
|
|
7159
|
|
|
|
|
|
|
# XML::XPath compatibility |
7160
|
|
|
|
|
|
|
sub getElementById { return $_[0]->twig->elt_id( $_[1]); } |
7161
|
|
|
|
|
|
|
sub getChildNodes { my @children= do { my $elt= $_[0]; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; return wantarray ? @children : \@children; } |
7162
|
|
|
|
|
|
|
|
7163
|
|
|
|
|
|
|
sub _flushed { return $_[0]->{flushed}; } |
7164
|
|
|
|
|
|
|
sub _set_flushed { $_[0]->{flushed}=1; } |
7165
|
|
|
|
|
|
|
sub _del_flushed { delete $_[0]->{flushed}; } |
7166
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
sub cut |
7168
|
|
|
|
|
|
|
{ my $elt= shift; |
7169
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling); |
7170
|
|
|
|
|
|
|
$parent= $elt->{parent}; |
7171
|
|
|
|
|
|
|
if( ! $parent && $elt->is_elt) |
7172
|
|
|
|
|
|
|
{ # are we cutting the root? |
7173
|
|
|
|
|
|
|
my $t= $elt->{twig}; |
7174
|
|
|
|
|
|
|
if( $t && ! $t->{twig_parsing}) |
7175
|
|
|
|
|
|
|
{ delete $t->{twig_root}; |
7176
|
|
|
|
|
|
|
delete $elt->{twig}; |
7177
|
|
|
|
|
|
|
return $elt; |
7178
|
|
|
|
|
|
|
} # cutt`ing the root |
7179
|
|
|
|
|
|
|
else |
7180
|
|
|
|
|
|
|
{ return; } # cutting an orphan, returning $elt would break backward compatibility |
7181
|
|
|
|
|
|
|
} |
7182
|
|
|
|
|
|
|
|
7183
|
|
|
|
|
|
|
# save the old links, that'll make it easier for some loops |
7184
|
|
|
|
|
|
|
foreach my $link ( qw(parent prev_sibling next_sibling) ) |
7185
|
|
|
|
|
|
|
{ $elt->{former}->{$link}= $elt->{$link}; |
7186
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $elt->{former}->{$link}); } |
7187
|
|
|
|
|
|
|
} |
7188
|
|
|
|
|
|
|
|
7189
|
|
|
|
|
|
|
# if we cut the current element then its parent becomes the current elt |
7190
|
|
|
|
|
|
|
if( $elt->{twig_current}) |
7191
|
|
|
|
|
|
|
{ my $twig_current= $elt->{parent}; |
7192
|
|
|
|
|
|
|
$elt->twig->{twig_current}= $twig_current; |
7193
|
|
|
|
|
|
|
$twig_current->{'twig_current'}=1; |
7194
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
7195
|
|
|
|
|
|
|
} |
7196
|
|
|
|
|
|
|
|
7197
|
|
|
|
|
|
|
if( $parent->{first_child} && $parent->{first_child} == $elt) |
7198
|
|
|
|
|
|
|
{ $parent->{first_child}= $elt->{next_sibling}; |
7199
|
|
|
|
|
|
|
# cutting can make the parent empty |
7200
|
|
|
|
|
|
|
if( ! $parent->{first_child}) { $parent->{empty}= 1; } |
7201
|
|
|
|
|
|
|
} |
7202
|
|
|
|
|
|
|
|
7203
|
|
|
|
|
|
|
if( $parent->{last_child} && $parent->{last_child} == $elt) |
7204
|
|
|
|
|
|
|
{ delete $parent->{empty}; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
7205
|
|
|
|
|
|
|
} |
7206
|
|
|
|
|
|
|
|
7207
|
|
|
|
|
|
|
if( $prev_sibling= $elt->{prev_sibling}) |
7208
|
|
|
|
|
|
|
{ $prev_sibling->{next_sibling}= $elt->{next_sibling}; } |
7209
|
|
|
|
|
|
|
if( $next_sibling= $elt->{next_sibling}) |
7210
|
|
|
|
|
|
|
{ $next_sibling->{prev_sibling}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } |
7211
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
|
7213
|
|
|
|
|
|
|
$elt->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
7214
|
|
|
|
|
|
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
7215
|
|
|
|
|
|
|
$elt->{next_sibling}= undef; |
7216
|
|
|
|
|
|
|
|
7217
|
|
|
|
|
|
|
# merge 2 (now) consecutive text nodes if they are of the same type |
7218
|
|
|
|
|
|
|
# (type can be PCDATA or CDATA) |
7219
|
|
|
|
|
|
|
if( $prev_sibling && $next_sibling && $prev_sibling->is_text && ( $XML::Twig::index2gi[$prev_sibling->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}])) |
7220
|
|
|
|
|
|
|
{ $prev_sibling->merge_text( $next_sibling); } |
7221
|
|
|
|
|
|
|
|
7222
|
|
|
|
|
|
|
return $elt; |
7223
|
|
|
|
|
|
|
} |
7224
|
|
|
|
|
|
|
|
7225
|
|
|
|
|
|
|
|
7226
|
|
|
|
|
|
|
sub former_next_sibling { return $_[0]->{former}->{next_sibling}; } |
7227
|
|
|
|
|
|
|
sub former_prev_sibling { return $_[0]->{former}->{prev_sibling}; } |
7228
|
|
|
|
|
|
|
sub former_parent { return $_[0]->{former}->{parent}; } |
7229
|
|
|
|
|
|
|
|
7230
|
|
|
|
|
|
|
sub cut_children |
7231
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
7232
|
|
|
|
|
|
|
my @children= $elt->children( $exp); |
7233
|
|
|
|
|
|
|
foreach (@children) { $_->cut; } |
7234
|
|
|
|
|
|
|
if( ! $elt->has_children) { $elt->{empty}= 1; } |
7235
|
|
|
|
|
|
|
return @children; |
7236
|
|
|
|
|
|
|
} |
7237
|
|
|
|
|
|
|
|
7238
|
|
|
|
|
|
|
sub cut_descendants |
7239
|
|
|
|
|
|
|
{ my( $elt, $exp)= @_; |
7240
|
|
|
|
|
|
|
my @descendants= $elt->descendants( $exp); |
7241
|
|
|
|
|
|
|
foreach ($elt->descendants( $exp)) { $_->cut; } |
7242
|
|
|
|
|
|
|
if( ! $elt->has_children) { $elt->{empty}= 1; } |
7243
|
|
|
|
|
|
|
return @descendants; |
7244
|
|
|
|
|
|
|
} |
7245
|
|
|
|
|
|
|
|
7246
|
|
|
|
|
|
|
|
7247
|
|
|
|
|
|
|
sub erase |
7248
|
|
|
|
|
|
|
{ my $elt= shift; |
7249
|
|
|
|
|
|
|
#you cannot erase the current element |
7250
|
|
|
|
|
|
|
if( $elt->{twig_current}) |
7251
|
|
|
|
|
|
|
{ croak "trying to erase an element before it has been completely parsed"; } |
7252
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
7253
|
|
|
|
|
|
|
{ # normal case |
7254
|
|
|
|
|
|
|
$elt->_move_extra_data_after_erase; |
7255
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
7256
|
|
|
|
|
|
|
if( @children) |
7257
|
|
|
|
|
|
|
{ |
7258
|
|
|
|
|
|
|
# elt has children, move them up |
7259
|
|
|
|
|
|
|
|
7260
|
|
|
|
|
|
|
# the first child may need to be merged with a previous text |
7261
|
|
|
|
|
|
|
my $first_child= shift @children; |
7262
|
|
|
|
|
|
|
$first_child->move( before => $elt); |
7263
|
|
|
|
|
|
|
my $prev= $first_child->{prev_sibling}; |
7264
|
|
|
|
|
|
|
if( $prev && $prev->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev->{'gi'}]) ) |
7265
|
|
|
|
|
|
|
{ $prev->merge_text( $first_child); } |
7266
|
|
|
|
|
|
|
|
7267
|
|
|
|
|
|
|
# move the rest of the children |
7268
|
|
|
|
|
|
|
foreach my $child (@children) |
7269
|
|
|
|
|
|
|
{ $child->move( before => $elt); } |
7270
|
|
|
|
|
|
|
|
7271
|
|
|
|
|
|
|
# now the elt had no child, delete it |
7272
|
|
|
|
|
|
|
$elt->delete; |
7273
|
|
|
|
|
|
|
|
7274
|
|
|
|
|
|
|
# now see if we need to merge the last child with the next element |
7275
|
|
|
|
|
|
|
my $last_child= $children[-1] || $first_child; # if no last child, then it's also the first child |
7276
|
|
|
|
|
|
|
my $next= $last_child->{next_sibling}; |
7277
|
|
|
|
|
|
|
if( $next && $next->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next->{'gi'}]) ) |
7278
|
|
|
|
|
|
|
{ $last_child->merge_text( $next); } |
7279
|
|
|
|
|
|
|
|
7280
|
|
|
|
|
|
|
# if parsing and have now a PCDATA text, mark so we can normalize later on if need be |
7281
|
|
|
|
|
|
|
if( $parent->{twig_current} && $last_child->is_text) { $parent->{twig_to_be_normalized}=1; } |
7282
|
|
|
|
|
|
|
} |
7283
|
|
|
|
|
|
|
else |
7284
|
|
|
|
|
|
|
{ # no children, just cut the elt |
7285
|
|
|
|
|
|
|
$elt->delete; |
7286
|
|
|
|
|
|
|
} |
7287
|
|
|
|
|
|
|
} |
7288
|
|
|
|
|
|
|
else |
7289
|
|
|
|
|
|
|
{ # trying to erase the root (of a twig or of a cut/new element) |
7290
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
7291
|
|
|
|
|
|
|
unless( @children == 1) |
7292
|
|
|
|
|
|
|
{ croak "can only erase an element with no parent if it has a single child"; } |
7293
|
|
|
|
|
|
|
$elt->_move_extra_data_after_erase; |
7294
|
|
|
|
|
|
|
my $child= shift @children; |
7295
|
|
|
|
|
|
|
$child->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; |
7296
|
|
|
|
|
|
|
my $twig= $elt->twig; |
7297
|
|
|
|
|
|
|
$twig->set_root( $child); |
7298
|
|
|
|
|
|
|
} |
7299
|
|
|
|
|
|
|
|
7300
|
|
|
|
|
|
|
return $elt; |
7301
|
|
|
|
|
|
|
|
7302
|
|
|
|
|
|
|
} |
7303
|
|
|
|
|
|
|
|
7304
|
|
|
|
|
|
|
sub _move_extra_data_after_erase |
7305
|
|
|
|
|
|
|
{ my( $elt)= @_; |
7306
|
|
|
|
|
|
|
# extra_data |
7307
|
|
|
|
|
|
|
if( my $extra_data= $elt->{extra_data}) |
7308
|
|
|
|
|
|
|
{ my $target= $elt->{first_child} || $elt->{next_sibling}; |
7309
|
|
|
|
|
|
|
if( $target) |
7310
|
|
|
|
|
|
|
{ |
7311
|
|
|
|
|
|
|
if( $target->is( $ELT)) |
7312
|
|
|
|
|
|
|
{ $target->set_extra_data( $extra_data . ($target->extra_data || '')); } |
7313
|
|
|
|
|
|
|
elsif( $target->is( $TEXT)) |
7314
|
|
|
|
|
|
|
{ $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } # TO CHECK |
7315
|
|
|
|
|
|
|
} |
7316
|
|
|
|
|
|
|
else |
7317
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; # always exists or the erase cannot be performed |
7318
|
|
|
|
|
|
|
$parent->_prefix_extra_data_before_end_tag( $extra_data); |
7319
|
|
|
|
|
|
|
} |
7320
|
|
|
|
|
|
|
} |
7321
|
|
|
|
|
|
|
|
7322
|
|
|
|
|
|
|
# extra_data_before_end_tag |
7323
|
|
|
|
|
|
|
if( my $extra_data= $elt->{extra_data_before_end_tag}) |
7324
|
|
|
|
|
|
|
{ if( my $target= $elt->{next_sibling}) |
7325
|
|
|
|
|
|
|
{ if( $target->is( $ELT)) |
7326
|
|
|
|
|
|
|
{ $target->set_extra_data( $extra_data . ($target->extra_data || '')); } |
7327
|
|
|
|
|
|
|
elsif( $target->is( $TEXT)) |
7328
|
|
|
|
|
|
|
{ |
7329
|
|
|
|
|
|
|
$target->_unshift_extra_data_in_pcdata( $extra_data, 0); |
7330
|
|
|
|
|
|
|
} |
7331
|
|
|
|
|
|
|
} |
7332
|
|
|
|
|
|
|
elsif( my $parent= $elt->{parent}) |
7333
|
|
|
|
|
|
|
{ $parent->_prefix_extra_data_before_end_tag( $extra_data); } |
7334
|
|
|
|
|
|
|
} |
7335
|
|
|
|
|
|
|
|
7336
|
|
|
|
|
|
|
return $elt; |
7337
|
|
|
|
|
|
|
|
7338
|
|
|
|
|
|
|
} |
7339
|
|
|
|
|
|
|
BEGIN |
7340
|
|
|
|
|
|
|
{ my %method= ( before => \&paste_before, |
7341
|
|
|
|
|
|
|
after => \&paste_after, |
7342
|
|
|
|
|
|
|
first_child => \&paste_first_child, |
7343
|
|
|
|
|
|
|
last_child => \&paste_last_child, |
7344
|
|
|
|
|
|
|
within => \&paste_within, |
7345
|
|
|
|
|
|
|
); |
7346
|
|
|
|
|
|
|
|
7347
|
|
|
|
|
|
|
# paste elt somewhere around ref |
7348
|
|
|
|
|
|
|
# pos can be first_child (default), last_child, before, after or within |
7349
|
|
|
|
|
|
|
sub paste ## no critic (Subroutines::ProhibitNestedSubs); |
7350
|
|
|
|
|
|
|
{ my $elt= shift; |
7351
|
|
|
|
|
|
|
if( $elt->{parent}) |
7352
|
|
|
|
|
|
|
{ croak "cannot paste an element that belongs to a tree"; } |
7353
|
|
|
|
|
|
|
my $pos; |
7354
|
|
|
|
|
|
|
my $ref; |
7355
|
|
|
|
|
|
|
if( ref $_[0]) |
7356
|
|
|
|
|
|
|
{ $pos= 'first_child'; |
7357
|
|
|
|
|
|
|
croak "wrong argument order in paste, should be $_[1] first" if($_[1]); |
7358
|
|
|
|
|
|
|
} |
7359
|
|
|
|
|
|
|
else |
7360
|
|
|
|
|
|
|
{ $pos= shift; } |
7361
|
|
|
|
|
|
|
|
7362
|
|
|
|
|
|
|
if( my $method= $method{$pos}) |
7363
|
|
|
|
|
|
|
{ |
7364
|
|
|
|
|
|
|
unless( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')) |
7365
|
|
|
|
|
|
|
{ if( ! defined( $_[0])) |
7366
|
|
|
|
|
|
|
{ croak "missing target in paste"; } |
7367
|
|
|
|
|
|
|
elsif( ! ref( $_[0])) |
7368
|
|
|
|
|
|
|
{ croak "wrong target type in paste (not a reference), should be XML::Twig::Elt or a subclass"; } |
7369
|
|
|
|
|
|
|
else |
7370
|
|
|
|
|
|
|
{ my $ref= ref $_[0]; |
7371
|
|
|
|
|
|
|
croak "wrong target type in paste: '$ref', should be XML::Twig::Elt or a subclass"; |
7372
|
|
|
|
|
|
|
} |
7373
|
|
|
|
|
|
|
} |
7374
|
|
|
|
|
|
|
$ref= $_[0]; |
7375
|
|
|
|
|
|
|
# check here so error message lists the caller file/line |
7376
|
|
|
|
|
|
|
if( !$ref->{parent} && ($pos=~ m{^(before|after)$}) && !(exists $elt->{'target'}) && !(exists $elt->{'comment'})) |
7377
|
|
|
|
|
|
|
{ croak "cannot paste $1 root"; } |
7378
|
|
|
|
|
|
|
$elt->$method( @_); |
7379
|
|
|
|
|
|
|
} |
7380
|
|
|
|
|
|
|
else |
7381
|
|
|
|
|
|
|
{ croak "tried to paste in wrong position '$pos', allowed positions " . |
7382
|
|
|
|
|
|
|
" are 'first_child', 'last_child', 'before', 'after' and " . |
7383
|
|
|
|
|
|
|
"'within'"; |
7384
|
|
|
|
|
|
|
} |
7385
|
|
|
|
|
|
|
if( (my $ids= $elt->{twig_id_list}) && (my $t= $ref->twig) ) |
7386
|
|
|
|
|
|
|
{ $t->{twig_id_list}||={}; |
7387
|
|
|
|
|
|
|
foreach my $id (keys %$ids) |
7388
|
|
|
|
|
|
|
{ $t->{twig_id_list}->{$id}= $ids->{$id}; |
7389
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } |
7390
|
|
|
|
|
|
|
} |
7391
|
|
|
|
|
|
|
} |
7392
|
|
|
|
|
|
|
return $elt; |
7393
|
|
|
|
|
|
|
} |
7394
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
|
7396
|
|
|
|
|
|
|
sub paste_before |
7397
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
7398
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
7399
|
|
|
|
|
|
|
|
7400
|
|
|
|
|
|
|
# trying to paste before an orphan (root or detached wlt) |
7401
|
|
|
|
|
|
|
unless( $ref->{parent}) |
7402
|
|
|
|
|
|
|
{ if( my $t= $ref->twig) |
7403
|
|
|
|
|
|
|
{ if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this |
7404
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( leading_cpi => $elt); return; } |
7405
|
|
|
|
|
|
|
else |
7406
|
|
|
|
|
|
|
{ croak "cannot paste before root"; } |
7407
|
|
|
|
|
|
|
} |
7408
|
|
|
|
|
|
|
else |
7409
|
|
|
|
|
|
|
{ croak "cannot paste before an orphan element"; } |
7410
|
|
|
|
|
|
|
} |
7411
|
|
|
|
|
|
|
$parent= $ref->{parent}; |
7412
|
|
|
|
|
|
|
$prev_sibling= $ref->{prev_sibling}; |
7413
|
|
|
|
|
|
|
$next_sibling= $ref; |
7414
|
|
|
|
|
|
|
|
7415
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
7416
|
|
|
|
|
|
|
if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } |
7417
|
|
|
|
|
|
|
|
7418
|
|
|
|
|
|
|
if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } |
7419
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
7420
|
|
|
|
|
|
|
|
7421
|
|
|
|
|
|
|
$next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; |
7422
|
|
|
|
|
|
|
$elt->{next_sibling}= $ref; |
7423
|
|
|
|
|
|
|
return $elt; |
7424
|
|
|
|
|
|
|
} |
7425
|
|
|
|
|
|
|
|
7426
|
|
|
|
|
|
|
sub paste_after |
7427
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
7428
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
7429
|
|
|
|
|
|
|
|
7430
|
|
|
|
|
|
|
# trying to paste after an orphan (root or detached wlt) |
7431
|
|
|
|
|
|
|
unless( $ref->{parent}) |
7432
|
|
|
|
|
|
|
{ if( my $t= $ref->twig) |
7433
|
|
|
|
|
|
|
{ if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this |
7434
|
|
|
|
|
|
|
{ $t->_add_cpi_outside_of_root( trailing_cpi => $elt); return; } |
7435
|
|
|
|
|
|
|
else |
7436
|
|
|
|
|
|
|
{ croak "cannot paste after root"; } |
7437
|
|
|
|
|
|
|
} |
7438
|
|
|
|
|
|
|
else |
7439
|
|
|
|
|
|
|
{ croak "cannot paste after an orphan element"; } |
7440
|
|
|
|
|
|
|
} |
7441
|
|
|
|
|
|
|
$parent= $ref->{parent}; |
7442
|
|
|
|
|
|
|
$prev_sibling= $ref; |
7443
|
|
|
|
|
|
|
$next_sibling= $ref->{next_sibling}; |
7444
|
|
|
|
|
|
|
|
7445
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
7446
|
|
|
|
|
|
|
if( $parent->{last_child}== $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
7447
|
|
|
|
|
|
|
|
7448
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $elt; |
7449
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
7450
|
|
|
|
|
|
|
|
7451
|
|
|
|
|
|
|
if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } |
7452
|
|
|
|
|
|
|
$elt->{next_sibling}= $next_sibling; |
7453
|
|
|
|
|
|
|
return $elt; |
7454
|
|
|
|
|
|
|
|
7455
|
|
|
|
|
|
|
} |
7456
|
|
|
|
|
|
|
|
7457
|
|
|
|
|
|
|
sub paste_first_child |
7458
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
7459
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
7460
|
|
|
|
|
|
|
$parent= $ref; |
7461
|
|
|
|
|
|
|
$next_sibling= $ref->{first_child}; |
7462
|
|
|
|
|
|
|
|
7463
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
7464
|
|
|
|
|
|
|
$parent->{first_child}= $elt; |
7465
|
|
|
|
|
|
|
unless( $parent->{last_child}) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
7466
|
|
|
|
|
|
|
|
7467
|
|
|
|
|
|
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
7468
|
|
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } |
7470
|
|
|
|
|
|
|
$elt->{next_sibling}= $next_sibling; |
7471
|
|
|
|
|
|
|
return $elt; |
7472
|
|
|
|
|
|
|
} |
7473
|
|
|
|
|
|
|
|
7474
|
|
|
|
|
|
|
sub paste_last_child |
7475
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
7476
|
|
|
|
|
|
|
my( $parent, $prev_sibling, $next_sibling ); |
7477
|
|
|
|
|
|
|
$parent= $ref; |
7478
|
|
|
|
|
|
|
$prev_sibling= $ref->{last_child}; |
7479
|
|
|
|
|
|
|
|
7480
|
|
|
|
|
|
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
7481
|
|
|
|
|
|
|
delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; |
7482
|
|
|
|
|
|
|
unless( $parent->{first_child}) { $parent->{first_child}= $elt; } |
7483
|
|
|
|
|
|
|
|
7484
|
|
|
|
|
|
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
7485
|
|
|
|
|
|
|
if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } |
7486
|
|
|
|
|
|
|
|
7487
|
|
|
|
|
|
|
$elt->{next_sibling}= undef; |
7488
|
|
|
|
|
|
|
return $elt; |
7489
|
|
|
|
|
|
|
} |
7490
|
|
|
|
|
|
|
|
7491
|
|
|
|
|
|
|
sub paste_within |
7492
|
|
|
|
|
|
|
{ my( $elt, $ref, $offset)= @_; |
7493
|
|
|
|
|
|
|
my $text= $ref->is_text ? $ref : $ref->next_elt( $TEXT, $ref); |
7494
|
|
|
|
|
|
|
my $new= $text->split_at( $offset); |
7495
|
|
|
|
|
|
|
$elt->paste_before( $new); |
7496
|
|
|
|
|
|
|
return $elt; |
7497
|
|
|
|
|
|
|
} |
7498
|
|
|
|
|
|
|
} |
7499
|
|
|
|
|
|
|
|
7500
|
|
|
|
|
|
|
# load an element into a structure similar to XML::Simple's |
7501
|
|
|
|
|
|
|
sub simplify |
7502
|
|
|
|
|
|
|
{ my $elt= shift; |
7503
|
|
|
|
|
|
|
|
7504
|
|
|
|
|
|
|
# normalize option names |
7505
|
|
|
|
|
|
|
my %options= @_; |
7506
|
|
|
|
|
|
|
%options= map { my ($key, $val)= ($_, $options{$_}); |
7507
|
|
|
|
|
|
|
$key=~ s{(\w)([A-Z])}{$1_\L$2}g; |
7508
|
|
|
|
|
|
|
$key => $val |
7509
|
|
|
|
|
|
|
} keys %options; |
7510
|
|
|
|
|
|
|
|
7511
|
|
|
|
|
|
|
# check options |
7512
|
|
|
|
|
|
|
my @allowed_options= qw( keyattr forcearray noattr content_key |
7513
|
|
|
|
|
|
|
var var_regexp variables var_attr |
7514
|
|
|
|
|
|
|
group_tags forcecontent |
7515
|
|
|
|
|
|
|
normalise_space normalize_space |
7516
|
|
|
|
|
|
|
); |
7517
|
|
|
|
|
|
|
my %allowed_options= map { $_ => 1 } @allowed_options; |
7518
|
|
|
|
|
|
|
foreach my $option (keys %options) |
7519
|
|
|
|
|
|
|
{ carp "invalid option $option\n" unless( $allowed_options{$option}); } |
7520
|
|
|
|
|
|
|
|
7521
|
|
|
|
|
|
|
$options{normalise_space} ||= $options{normalize_space} || 0; |
7522
|
|
|
|
|
|
|
|
7523
|
|
|
|
|
|
|
$options{content_key} ||= 'content'; |
7524
|
|
|
|
|
|
|
if( $options{content_key}=~ m{^-}) |
7525
|
|
|
|
|
|
|
{ # need to remove the - and to activate extra folding |
7526
|
|
|
|
|
|
|
$options{content_key}=~ s{^-}{}; |
7527
|
|
|
|
|
|
|
$options{extra_folding}= 1; |
7528
|
|
|
|
|
|
|
} |
7529
|
|
|
|
|
|
|
else |
7530
|
|
|
|
|
|
|
{ $options{extra_folding}= 0; } |
7531
|
|
|
|
|
|
|
|
7532
|
|
|
|
|
|
|
$options{forcearray} ||=0; |
7533
|
|
|
|
|
|
|
if( isa( $options{forcearray}, 'ARRAY')) |
7534
|
|
|
|
|
|
|
{ my %forcearray_tags= map { $_ => 1 } @{$options{forcearray}}; |
7535
|
|
|
|
|
|
|
$options{forcearray_tags}= \%forcearray_tags; |
7536
|
|
|
|
|
|
|
$options{forcearray}= 0; |
7537
|
|
|
|
|
|
|
} |
7538
|
|
|
|
|
|
|
|
7539
|
|
|
|
|
|
|
$options{keyattr} ||= ['name', 'key', 'id']; |
7540
|
|
|
|
|
|
|
if( ref $options{keyattr} eq 'ARRAY') |
7541
|
|
|
|
|
|
|
{ foreach my $keyattr (@{$options{keyattr}}) |
7542
|
|
|
|
|
|
|
{ my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); |
7543
|
|
|
|
|
|
|
$prefix ||= ''; |
7544
|
|
|
|
|
|
|
$options{key_for_all}->{$att}= 1; |
7545
|
|
|
|
|
|
|
$options{remove_key_for_all}->{$att}=1 unless( $prefix eq '+'); |
7546
|
|
|
|
|
|
|
$options{prefix_key_for_all}->{$att}=1 if( $prefix eq '-'); |
7547
|
|
|
|
|
|
|
} |
7548
|
|
|
|
|
|
|
} |
7549
|
|
|
|
|
|
|
elsif( ref $options{keyattr} eq 'HASH') |
7550
|
|
|
|
|
|
|
{ while( my( $elt, $keyattr)= each %{$options{keyattr}}) |
7551
|
|
|
|
|
|
|
{ my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); |
7552
|
|
|
|
|
|
|
$prefix ||=''; |
7553
|
|
|
|
|
|
|
$options{key_for_elt}->{$elt}= $att; |
7554
|
|
|
|
|
|
|
$options{remove_key_for_elt}->{"$elt#$att"}=1 unless( $prefix); |
7555
|
|
|
|
|
|
|
$options{prefix_key_for_elt}->{"$elt#$att"}=1 if( $prefix eq '-'); |
7556
|
|
|
|
|
|
|
} |
7557
|
|
|
|
|
|
|
} |
7558
|
|
|
|
|
|
|
|
7559
|
|
|
|
|
|
|
|
7560
|
|
|
|
|
|
|
$options{var}||= $options{var_attr}; # for compat with XML::Simple |
7561
|
|
|
|
|
|
|
if( $options{var}) { $options{var_values}= {}; } |
7562
|
|
|
|
|
|
|
else { $options{var}=''; } |
7563
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
if( $options{variables}) |
7565
|
|
|
|
|
|
|
{ $options{var}||= 1; |
7566
|
|
|
|
|
|
|
$options{var_values}= $options{variables}; |
7567
|
|
|
|
|
|
|
} |
7568
|
|
|
|
|
|
|
|
7569
|
|
|
|
|
|
|
if( $options{var_regexp} and !$options{var}) |
7570
|
|
|
|
|
|
|
{ warn "var option not used, var_regexp option ignored\n"; } |
7571
|
|
|
|
|
|
|
$options{var_regexp} ||= '\$\{?(\w+)\}?'; |
7572
|
|
|
|
|
|
|
|
7573
|
|
|
|
|
|
|
$elt->_simplify( \%options); |
7574
|
|
|
|
|
|
|
|
7575
|
|
|
|
|
|
|
} |
7576
|
|
|
|
|
|
|
|
7577
|
|
|
|
|
|
|
sub _simplify |
7578
|
|
|
|
|
|
|
{ my( $elt, $options)= @_; |
7579
|
|
|
|
|
|
|
|
7580
|
|
|
|
|
|
|
my $data; |
7581
|
|
|
|
|
|
|
|
7582
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
7583
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
7584
|
|
|
|
|
|
|
my %atts= $options->{noattr} || !$elt->{att} ? () : %{$elt->{att}}; |
7585
|
|
|
|
|
|
|
my $nb_atts= keys %atts; |
7586
|
|
|
|
|
|
|
my $nb_children= $elt->children_count + $nb_atts; |
7587
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
my %nb_children; |
7589
|
|
|
|
|
|
|
foreach (@children) { $nb_children{$_->tag}++; } |
7590
|
|
|
|
|
|
|
foreach (keys %atts) { $nb_children{$_}++; } |
7591
|
|
|
|
|
|
|
|
7592
|
|
|
|
|
|
|
my $arrays; # tag => array where elements are stored |
7593
|
|
|
|
|
|
|
|
7594
|
|
|
|
|
|
|
|
7595
|
|
|
|
|
|
|
# store children |
7596
|
|
|
|
|
|
|
foreach my $child (@children) |
7597
|
|
|
|
|
|
|
{ if( $child->is_text) |
7598
|
|
|
|
|
|
|
{ # generate with a content key |
7599
|
|
|
|
|
|
|
my $text= $elt->_text_with_vars( $options); |
7600
|
|
|
|
|
|
|
if( $options->{normalise_space} >= 2) { $text= _normalize_space( $text); } |
7601
|
|
|
|
|
|
|
if( $options->{force_content} |
7602
|
|
|
|
|
|
|
|| $nb_atts |
7603
|
|
|
|
|
|
|
|| (scalar @children > 1) |
7604
|
|
|
|
|
|
|
) |
7605
|
|
|
|
|
|
|
{ $data->{$options->{content_key}}= $text; } |
7606
|
|
|
|
|
|
|
else |
7607
|
|
|
|
|
|
|
{ $data= $text; } |
7608
|
|
|
|
|
|
|
} |
7609
|
|
|
|
|
|
|
else |
7610
|
|
|
|
|
|
|
{ # element with sub-elements |
7611
|
|
|
|
|
|
|
my $child_gi= $XML::Twig::index2gi[$child->{'gi'}]; |
7612
|
|
|
|
|
|
|
|
7613
|
|
|
|
|
|
|
my $child_data= $child->_simplify( $options); |
7614
|
|
|
|
|
|
|
|
7615
|
|
|
|
|
|
|
# first see if we need to simplify further the child data |
7616
|
|
|
|
|
|
|
# simplify because of grouped tags |
7617
|
|
|
|
|
|
|
if( my $grouped_tag= $options->{group_tags}->{$child_gi}) |
7618
|
|
|
|
|
|
|
{ # check that the child data is a hash with a single field |
7619
|
|
|
|
|
|
|
unless( (ref( $child_data) eq 'HASH') |
7620
|
|
|
|
|
|
|
&& (keys %$child_data == 1) |
7621
|
|
|
|
|
|
|
&& defined ( my $grouped_child_data= $child_data->{$grouped_tag}) |
7622
|
|
|
|
|
|
|
) |
7623
|
|
|
|
|
|
|
{ croak "error in grouped tag $child_gi"; } |
7624
|
|
|
|
|
|
|
else |
7625
|
|
|
|
|
|
|
{ $child_data= $grouped_child_data; } |
7626
|
|
|
|
|
|
|
} |
7627
|
|
|
|
|
|
|
# simplify because of extra folding |
7628
|
|
|
|
|
|
|
if( $options->{extra_folding}) |
7629
|
|
|
|
|
|
|
{ if( (ref( $child_data) eq 'HASH') |
7630
|
|
|
|
|
|
|
&& (keys %$child_data == 1) |
7631
|
|
|
|
|
|
|
&& defined( my $content= $child_data->{$options->{content_key}}) |
7632
|
|
|
|
|
|
|
) |
7633
|
|
|
|
|
|
|
{ $child_data= $content; } |
7634
|
|
|
|
|
|
|
} |
7635
|
|
|
|
|
|
|
|
7636
|
|
|
|
|
|
|
if( my $keyatt= $child->_key_attr( $options)) |
7637
|
|
|
|
|
|
|
{ # simplify element with key |
7638
|
|
|
|
|
|
|
my $key= $child->{'att'}->{$keyatt}; |
7639
|
|
|
|
|
|
|
if( $options->{normalise_space} >= 1) { $key= _normalize_space( $key); } |
7640
|
|
|
|
|
|
|
$data->{$child_gi}->{$key}= $child_data; |
7641
|
|
|
|
|
|
|
} |
7642
|
|
|
|
|
|
|
elsif( $options->{forcearray} |
7643
|
|
|
|
|
|
|
|| $options->{forcearray_tags}->{$child_gi} |
7644
|
|
|
|
|
|
|
|| ( $nb_children{$child_gi} > 1) |
7645
|
|
|
|
|
|
|
) |
7646
|
|
|
|
|
|
|
{ # simplify element to store in an array |
7647
|
|
|
|
|
|
|
if( defined $child_data && $child_data ne "" ) |
7648
|
|
|
|
|
|
|
{ $data->{$child_gi} ||= []; |
7649
|
|
|
|
|
|
|
push @{$data->{$child_gi}}, $child_data; |
7650
|
|
|
|
|
|
|
} |
7651
|
|
|
|
|
|
|
else |
7652
|
|
|
|
|
|
|
{ $data->{$child_gi}= [{}]; } |
7653
|
|
|
|
|
|
|
} |
7654
|
|
|
|
|
|
|
else |
7655
|
|
|
|
|
|
|
{ # simplify element to store as a hash field |
7656
|
|
|
|
|
|
|
$data->{$child_gi}=$child_data; |
7657
|
|
|
|
|
|
|
$data->{$child_gi}= defined $child_data && $child_data ne "" ? $child_data : {}; |
7658
|
|
|
|
|
|
|
} |
7659
|
|
|
|
|
|
|
} |
7660
|
|
|
|
|
|
|
} |
7661
|
|
|
|
|
|
|
|
7662
|
|
|
|
|
|
|
# store atts |
7663
|
|
|
|
|
|
|
# TODO: deal with att that already have an element by that name |
7664
|
|
|
|
|
|
|
foreach my $att (keys %atts) |
7665
|
|
|
|
|
|
|
{ # do not store if the att is a key that needs to be removed |
7666
|
|
|
|
|
|
|
if( $options->{remove_key_for_all}->{$att} |
7667
|
|
|
|
|
|
|
|| $options->{remove_key_for_elt}->{"$gi#$att"} |
7668
|
|
|
|
|
|
|
) |
7669
|
|
|
|
|
|
|
{ next; } |
7670
|
|
|
|
|
|
|
|
7671
|
|
|
|
|
|
|
my $att_text= $options->{var} ? _replace_vars_in_text( $atts{$att}, $options) : $atts{$att} ; |
7672
|
|
|
|
|
|
|
if( $options->{normalise_space} >= 2) { $att_text= _normalize_space( $att_text); } |
7673
|
|
|
|
|
|
|
|
7674
|
|
|
|
|
|
|
if( $options->{prefix_key_for_all}->{$att} |
7675
|
|
|
|
|
|
|
|| $options->{prefix_key_for_elt}->{"$gi#$att"} |
7676
|
|
|
|
|
|
|
) |
7677
|
|
|
|
|
|
|
{ # prefix the att |
7678
|
|
|
|
|
|
|
$data->{"-$att"}= $att_text; |
7679
|
|
|
|
|
|
|
} |
7680
|
|
|
|
|
|
|
else |
7681
|
|
|
|
|
|
|
{ # normal case |
7682
|
|
|
|
|
|
|
$data->{$att}= $att_text; |
7683
|
|
|
|
|
|
|
} |
7684
|
|
|
|
|
|
|
} |
7685
|
|
|
|
|
|
|
|
7686
|
|
|
|
|
|
|
return $data; |
7687
|
|
|
|
|
|
|
} |
7688
|
|
|
|
|
|
|
|
7689
|
|
|
|
|
|
|
sub _key_attr |
7690
|
|
|
|
|
|
|
{ my( $elt, $options)=@_; |
7691
|
|
|
|
|
|
|
return if( $options->{noattr}); |
7692
|
|
|
|
|
|
|
if( $options->{key_for_all}) |
7693
|
|
|
|
|
|
|
{ foreach my $att ($elt->att_names) |
7694
|
|
|
|
|
|
|
{ if( $options->{key_for_all}->{$att}) |
7695
|
|
|
|
|
|
|
{ return $att; } |
7696
|
|
|
|
|
|
|
} |
7697
|
|
|
|
|
|
|
} |
7698
|
|
|
|
|
|
|
elsif( $options->{key_for_elt}) |
7699
|
|
|
|
|
|
|
{ if( my $key_for_elt= $options->{key_for_elt}->{$XML::Twig::index2gi[$elt->{'gi'}]} ) |
7700
|
|
|
|
|
|
|
{ return $key_for_elt if( defined( $elt->{'att'}->{$key_for_elt})); } |
7701
|
|
|
|
|
|
|
} |
7702
|
|
|
|
|
|
|
return; |
7703
|
|
|
|
|
|
|
} |
7704
|
|
|
|
|
|
|
|
7705
|
|
|
|
|
|
|
sub _text_with_vars |
7706
|
|
|
|
|
|
|
{ my( $elt, $options)= @_; |
7707
|
|
|
|
|
|
|
my $text; |
7708
|
|
|
|
|
|
|
if( $options->{var}) |
7709
|
|
|
|
|
|
|
{ $text= _replace_vars_in_text( $elt->text, $options); |
7710
|
|
|
|
|
|
|
$elt->_store_var( $options); |
7711
|
|
|
|
|
|
|
} |
7712
|
|
|
|
|
|
|
else |
7713
|
|
|
|
|
|
|
{ $text= $elt->text; } |
7714
|
|
|
|
|
|
|
return $text; |
7715
|
|
|
|
|
|
|
} |
7716
|
|
|
|
|
|
|
|
7717
|
|
|
|
|
|
|
|
7718
|
|
|
|
|
|
|
sub _normalize_space |
7719
|
|
|
|
|
|
|
{ my $text= shift; |
7720
|
|
|
|
|
|
|
$text=~ s{\s+}{ }sg; |
7721
|
|
|
|
|
|
|
$text=~ s{^\s}{}; |
7722
|
|
|
|
|
|
|
$text=~ s{\s$}{}; |
7723
|
|
|
|
|
|
|
return $text; |
7724
|
|
|
|
|
|
|
} |
7725
|
|
|
|
|
|
|
|
7726
|
|
|
|
|
|
|
|
7727
|
|
|
|
|
|
|
sub att_nb |
7728
|
|
|
|
|
|
|
{ return 0 unless( my $atts= $_[0]->{att}); |
7729
|
|
|
|
|
|
|
return scalar keys %$atts; |
7730
|
|
|
|
|
|
|
} |
7731
|
|
|
|
|
|
|
|
7732
|
|
|
|
|
|
|
sub has_no_atts |
7733
|
|
|
|
|
|
|
{ return 1 unless( my $atts= $_[0]->{att}); |
7734
|
|
|
|
|
|
|
return scalar keys %$atts ? 0 : 1; |
7735
|
|
|
|
|
|
|
} |
7736
|
|
|
|
|
|
|
|
7737
|
|
|
|
|
|
|
sub _replace_vars_in_text |
7738
|
|
|
|
|
|
|
{ my( $text, $options)= @_; |
7739
|
|
|
|
|
|
|
|
7740
|
|
|
|
|
|
|
$text=~ s{($options->{var_regexp})} |
7741
|
|
|
|
|
|
|
{ if( defined( my $value= $options->{var_values}->{$2})) |
7742
|
|
|
|
|
|
|
{ $value } |
7743
|
|
|
|
|
|
|
else |
7744
|
|
|
|
|
|
|
{ warn "unknown variable $2\n"; |
7745
|
|
|
|
|
|
|
$1 |
7746
|
|
|
|
|
|
|
} |
7747
|
|
|
|
|
|
|
}gex; |
7748
|
|
|
|
|
|
|
return $text; |
7749
|
|
|
|
|
|
|
} |
7750
|
|
|
|
|
|
|
|
7751
|
|
|
|
|
|
|
sub _store_var |
7752
|
|
|
|
|
|
|
{ my( $elt, $options)= @_; |
7753
|
|
|
|
|
|
|
if( defined (my $var_name= $elt->{'att'}->{$options->{var}})) |
7754
|
|
|
|
|
|
|
{ $options->{var_values}->{$var_name}= $elt->text; |
7755
|
|
|
|
|
|
|
} |
7756
|
|
|
|
|
|
|
} |
7757
|
|
|
|
|
|
|
|
7758
|
|
|
|
|
|
|
|
7759
|
|
|
|
|
|
|
# split a text element at a given offset |
7760
|
|
|
|
|
|
|
sub split_at |
7761
|
|
|
|
|
|
|
{ my( $elt, $offset)= @_; |
7762
|
|
|
|
|
|
|
my $text_elt= $elt->is_text ? $elt : $elt->first_child( $TEXT) || return ''; |
7763
|
|
|
|
|
|
|
my $string= $text_elt->text; |
7764
|
|
|
|
|
|
|
my $left_string= substr( $string, 0, $offset); |
7765
|
|
|
|
|
|
|
my $right_string= substr( $string, $offset); |
7766
|
|
|
|
|
|
|
$text_elt->{pcdata}= (delete $text_elt->{empty} || 1) && $left_string; |
7767
|
|
|
|
|
|
|
my $new_elt= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}], $right_string); |
7768
|
|
|
|
|
|
|
$new_elt->paste( after => $elt); |
7769
|
|
|
|
|
|
|
return $new_elt; |
7770
|
|
|
|
|
|
|
} |
7771
|
|
|
|
|
|
|
|
7772
|
|
|
|
|
|
|
|
7773
|
|
|
|
|
|
|
# split an element or its text descendants into several, in place |
7774
|
|
|
|
|
|
|
# all elements (new and untouched) are returned |
7775
|
|
|
|
|
|
|
sub split |
7776
|
|
|
|
|
|
|
{ my $elt= shift; |
7777
|
|
|
|
|
|
|
my @text_chunks; |
7778
|
|
|
|
|
|
|
my @result; |
7779
|
|
|
|
|
|
|
if( $elt->is_text) { @text_chunks= ($elt); } |
7780
|
|
|
|
|
|
|
else { @text_chunks= $elt->descendants( $TEXT); } |
7781
|
|
|
|
|
|
|
foreach my $text_chunk (@text_chunks) |
7782
|
|
|
|
|
|
|
{ push @result, $text_chunk->_split( 1, @_); } |
7783
|
|
|
|
|
|
|
return @result; |
7784
|
|
|
|
|
|
|
} |
7785
|
|
|
|
|
|
|
|
7786
|
|
|
|
|
|
|
# split an element or its text descendants into several, in place |
7787
|
|
|
|
|
|
|
# created elements (those which match the regexp) are returned |
7788
|
|
|
|
|
|
|
sub mark |
7789
|
|
|
|
|
|
|
{ my $elt= shift; |
7790
|
|
|
|
|
|
|
my @text_chunks; |
7791
|
|
|
|
|
|
|
my @result; |
7792
|
|
|
|
|
|
|
if( $elt->is_text) { @text_chunks= ($elt); } |
7793
|
|
|
|
|
|
|
else { @text_chunks= $elt->descendants( $TEXT); } |
7794
|
|
|
|
|
|
|
foreach my $text_chunk (@text_chunks) |
7795
|
|
|
|
|
|
|
{ push @result, $text_chunk->_split( 0, @_); } |
7796
|
|
|
|
|
|
|
return @result; |
7797
|
|
|
|
|
|
|
} |
7798
|
|
|
|
|
|
|
|
7799
|
|
|
|
|
|
|
# split a single text element |
7800
|
|
|
|
|
|
|
# return_all defines what is returned: if it is true |
7801
|
|
|
|
|
|
|
# only returns the elements created by matches in the split regexp |
7802
|
|
|
|
|
|
|
# otherwise all elements (new and untouched) are returned |
7803
|
|
|
|
|
|
|
|
7804
|
|
|
|
|
|
|
|
7805
|
|
|
|
|
|
|
{ |
7806
|
|
|
|
|
|
|
|
7807
|
|
|
|
|
|
|
sub _split |
7808
|
|
|
|
|
|
|
{ my $elt= shift; |
7809
|
|
|
|
|
|
|
my $return_all= shift; |
7810
|
|
|
|
|
|
|
my $regexp= shift; |
7811
|
|
|
|
|
|
|
my @tags; |
7812
|
|
|
|
|
|
|
|
7813
|
|
|
|
|
|
|
while( @_) |
7814
|
|
|
|
|
|
|
{ my $tag= shift(); |
7815
|
|
|
|
|
|
|
if( ref $_[0]) |
7816
|
|
|
|
|
|
|
{ push @tags, { tag => $tag, atts => shift }; } |
7817
|
|
|
|
|
|
|
else |
7818
|
|
|
|
|
|
|
{ push @tags, { tag => $tag }; } |
7819
|
|
|
|
|
|
|
} |
7820
|
|
|
|
|
|
|
|
7821
|
|
|
|
|
|
|
unless( @tags) { @tags= { tag => $elt->{parent}->gi }; } |
7822
|
|
|
|
|
|
|
|
7823
|
|
|
|
|
|
|
my @result; # the returned list of elements |
7824
|
|
|
|
|
|
|
my $text= $elt->text; |
7825
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
7826
|
|
|
|
|
|
|
|
7827
|
|
|
|
|
|
|
# 2 uses: if split matches then the first substring reuses $elt |
7828
|
|
|
|
|
|
|
# once a split has occurred then the last match needs to be put in |
7829
|
|
|
|
|
|
|
# a new element |
7830
|
|
|
|
|
|
|
my $previous_match= 0; |
7831
|
|
|
|
|
|
|
|
7832
|
|
|
|
|
|
|
while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs) |
7833
|
|
|
|
|
|
|
{ $text= pop @matches; |
7834
|
|
|
|
|
|
|
if( $previous_match) |
7835
|
|
|
|
|
|
|
{ # match, not the first one, create a new text ($gi) element |
7836
|
|
|
|
|
|
|
_utf8_ify( $pre_match) if( $] < 5.010); |
7837
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $gi, $pre_match); |
7838
|
|
|
|
|
|
|
push @result, $elt if( $return_all); |
7839
|
|
|
|
|
|
|
} |
7840
|
|
|
|
|
|
|
else |
7841
|
|
|
|
|
|
|
{ # first match in $elt, re-use $elt for the first sub-string |
7842
|
|
|
|
|
|
|
_utf8_ify( $pre_match) if( $] < 5.010); |
7843
|
|
|
|
|
|
|
$elt->set_text( $pre_match); |
7844
|
|
|
|
|
|
|
$previous_match++; # store the fact that there was a match |
7845
|
|
|
|
|
|
|
push @result, $elt if( $return_all); |
7846
|
|
|
|
|
|
|
} |
7847
|
|
|
|
|
|
|
|
7848
|
|
|
|
|
|
|
# now deal with matches captured in the regexp |
7849
|
|
|
|
|
|
|
if( @matches) |
7850
|
|
|
|
|
|
|
{ # match, with capture |
7851
|
|
|
|
|
|
|
my $i=0; |
7852
|
|
|
|
|
|
|
foreach my $match (@matches) |
7853
|
|
|
|
|
|
|
{ # create new element, text is the match |
7854
|
|
|
|
|
|
|
_utf8_ify( $match) if( $] < 5.010); |
7855
|
|
|
|
|
|
|
my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA'; |
7856
|
|
|
|
|
|
|
my $atts = \%{$tags[$i]->{atts}} || {}; |
7857
|
|
|
|
|
|
|
my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts; |
7858
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $tag, \%atts, $match); |
7859
|
|
|
|
|
|
|
push @result, $elt; |
7860
|
|
|
|
|
|
|
$i= ($i + 1) % @tags; |
7861
|
|
|
|
|
|
|
} |
7862
|
|
|
|
|
|
|
} |
7863
|
|
|
|
|
|
|
else |
7864
|
|
|
|
|
|
|
{ # match, no captures |
7865
|
|
|
|
|
|
|
my $tag = $tags[0]->{tag}; |
7866
|
|
|
|
|
|
|
my $atts = \%{$tags[0]->{atts}} || {}; |
7867
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $tag, $atts); |
7868
|
|
|
|
|
|
|
push @result, $elt; |
7869
|
|
|
|
|
|
|
} |
7870
|
|
|
|
|
|
|
} |
7871
|
|
|
|
|
|
|
if( $previous_match && $text) |
7872
|
|
|
|
|
|
|
{ # there was at least 1 match, and there is text left after the match |
7873
|
|
|
|
|
|
|
$elt= $elt->insert_new_elt( after => $gi, $text); |
7874
|
|
|
|
|
|
|
} |
7875
|
|
|
|
|
|
|
|
7876
|
|
|
|
|
|
|
push @result, $elt if( $return_all); |
7877
|
|
|
|
|
|
|
|
7878
|
|
|
|
|
|
|
return @result; # return all elements |
7879
|
|
|
|
|
|
|
} |
7880
|
|
|
|
|
|
|
|
7881
|
|
|
|
|
|
|
sub _repl_match |
7882
|
|
|
|
|
|
|
{ my( $val, @matches)= @_; |
7883
|
|
|
|
|
|
|
$val=~ s{\$(\d+)}{$matches[$1-1]}g; |
7884
|
|
|
|
|
|
|
return $val; |
7885
|
|
|
|
|
|
|
} |
7886
|
|
|
|
|
|
|
|
7887
|
|
|
|
|
|
|
# evil hack needed as sometimes |
7888
|
|
|
|
|
|
|
my $encode_is_loaded=0; # so we only load Encode once |
7889
|
|
|
|
|
|
|
sub _utf8_ify |
7890
|
|
|
|
|
|
|
{ |
7891
|
|
|
|
|
|
|
if( $perl_version >= 5.008 and $perl_version < 5.010 and !_keep_encoding()) |
7892
|
|
|
|
|
|
|
{ unless( $encode_is_loaded) { require Encode; import Encode; $encode_is_loaded++; } |
7893
|
|
|
|
|
|
|
Encode::_utf8_on( $_[0]); # the flag should be set but is not |
7894
|
|
|
|
|
|
|
} |
7895
|
|
|
|
|
|
|
} |
7896
|
|
|
|
|
|
|
|
7897
|
|
|
|
|
|
|
|
7898
|
|
|
|
|
|
|
} |
7899
|
|
|
|
|
|
|
|
7900
|
|
|
|
|
|
|
{ my %replace_sub; # cache for complex expressions (expression => sub) |
7901
|
|
|
|
|
|
|
|
7902
|
|
|
|
|
|
|
sub subs_text |
7903
|
|
|
|
|
|
|
{ my( $elt, $regexp, $replace)= @_; |
7904
|
|
|
|
|
|
|
|
7905
|
|
|
|
|
|
|
my $replacement_string; |
7906
|
|
|
|
|
|
|
my $is_string= _is_string( $replace); |
7907
|
|
|
|
|
|
|
|
7908
|
|
|
|
|
|
|
my @parents; |
7909
|
|
|
|
|
|
|
|
7910
|
|
|
|
|
|
|
foreach my $text_elt ($elt->descendants_or_self( $TEXT)) |
7911
|
|
|
|
|
|
|
{ |
7912
|
|
|
|
|
|
|
if( $is_string) |
7913
|
|
|
|
|
|
|
{ my $text= $text_elt->text; |
7914
|
|
|
|
|
|
|
$text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; |
7915
|
|
|
|
|
|
|
$text_elt->set_text( $text); |
7916
|
|
|
|
|
|
|
} |
7917
|
|
|
|
|
|
|
else |
7918
|
|
|
|
|
|
|
{ |
7919
|
|
|
|
|
|
|
no utf8; # = perl 5.6 |
7920
|
|
|
|
|
|
|
my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); |
7921
|
|
|
|
|
|
|
my $text= $text_elt->text; |
7922
|
|
|
|
|
|
|
my $pos=0; # used to skip text that was previously matched |
7923
|
|
|
|
|
|
|
my $found_hit; |
7924
|
|
|
|
|
|
|
while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) |
7925
|
|
|
|
|
|
|
{ $found_hit=1; |
7926
|
|
|
|
|
|
|
my $match_start = length( $pre_match_string); |
7927
|
|
|
|
|
|
|
my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; |
7928
|
|
|
|
|
|
|
my $match_length = length( $match_string); |
7929
|
|
|
|
|
|
|
my $post_match = $match->split_at( $match_length); |
7930
|
|
|
|
|
|
|
$replace_sub->( $match, @var); |
7931
|
|
|
|
|
|
|
|
7932
|
|
|
|
|
|
|
# go to next |
7933
|
|
|
|
|
|
|
$text_elt= $post_match; |
7934
|
|
|
|
|
|
|
$text= $post_match->text; |
7935
|
|
|
|
|
|
|
|
7936
|
|
|
|
|
|
|
if( $found_hit) { push @parents, $text_elt->{parent} unless $parents[-1] && $parents[-1]== $text_elt->{parent}; } |
7937
|
|
|
|
|
|
|
|
7938
|
|
|
|
|
|
|
} |
7939
|
|
|
|
|
|
|
} |
7940
|
|
|
|
|
|
|
} |
7941
|
|
|
|
|
|
|
|
7942
|
|
|
|
|
|
|
foreach my $parent (@parents) { $parent->normalize; } |
7943
|
|
|
|
|
|
|
|
7944
|
|
|
|
|
|
|
return $elt; |
7945
|
|
|
|
|
|
|
} |
7946
|
|
|
|
|
|
|
|
7947
|
|
|
|
|
|
|
|
7948
|
|
|
|
|
|
|
sub _is_string |
7949
|
|
|
|
|
|
|
{ return ($_[0]=~ m{&e[ln]t}) ? 0: 1 } |
7950
|
|
|
|
|
|
|
|
7951
|
|
|
|
|
|
|
sub _replace_var |
7952
|
|
|
|
|
|
|
{ my( $string, @var)= @_; |
7953
|
|
|
|
|
|
|
unshift @var, undef; |
7954
|
|
|
|
|
|
|
$string=~ s{\$(\d)}{$var[$1]}g; |
7955
|
|
|
|
|
|
|
return $string; |
7956
|
|
|
|
|
|
|
} |
7957
|
|
|
|
|
|
|
|
7958
|
|
|
|
|
|
|
sub _install_replace_sub |
7959
|
|
|
|
|
|
|
{ my $replace_exp= shift; |
7960
|
|
|
|
|
|
|
my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; |
7961
|
|
|
|
|
|
|
my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; |
7962
|
|
|
|
|
|
|
my( $gi, $exp); |
7963
|
|
|
|
|
|
|
foreach my $item (@item) |
7964
|
|
|
|
|
|
|
{ next if ! length $item; |
7965
|
|
|
|
|
|
|
if( $item=~ m{^&elt\s*\(([^)]*)\)}) |
7966
|
|
|
|
|
|
|
{ $exp= $1; } |
7967
|
|
|
|
|
|
|
elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) |
7968
|
|
|
|
|
|
|
{ $exp= " '#ENT' => $1"; } |
7969
|
|
|
|
|
|
|
else |
7970
|
|
|
|
|
|
|
{ $exp= qq{ '#PCDATA' => "$item"}; } |
7971
|
|
|
|
|
|
|
$exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches |
7972
|
|
|
|
|
|
|
$sub.= qq{ \$new= \$match->new( $exp); }; |
7973
|
|
|
|
|
|
|
$sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; |
7974
|
|
|
|
|
|
|
} |
7975
|
|
|
|
|
|
|
$sub .= q{ $match->delete; }; |
7976
|
|
|
|
|
|
|
#$sub=~ s/;/;\n/g; warn "subs: $sub"; |
7977
|
|
|
|
|
|
|
my $coderef= eval "sub { $NO_WARNINGS; $sub }"; |
7978
|
|
|
|
|
|
|
if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } |
7979
|
|
|
|
|
|
|
return $coderef; |
7980
|
|
|
|
|
|
|
} |
7981
|
|
|
|
|
|
|
|
7982
|
|
|
|
|
|
|
} |
7983
|
|
|
|
|
|
|
|
7984
|
|
|
|
|
|
|
|
7985
|
|
|
|
|
|
|
sub merge_text |
7986
|
|
|
|
|
|
|
{ my( $e1, $e2)= @_; |
7987
|
|
|
|
|
|
|
croak "invalid merge: can only merge 2 elements" |
7988
|
|
|
|
|
|
|
unless( isa( $e2, 'XML::Twig::Elt')); |
7989
|
|
|
|
|
|
|
croak "invalid merge: can only merge 2 text elements" |
7990
|
|
|
|
|
|
|
unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi)); |
7991
|
|
|
|
|
|
|
|
7992
|
|
|
|
|
|
|
my $t1_length= length( $e1->text); |
7993
|
|
|
|
|
|
|
|
7994
|
|
|
|
|
|
|
$e1->set_text( $e1->text . $e2->text); |
7995
|
|
|
|
|
|
|
|
7996
|
|
|
|
|
|
|
if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata) |
7997
|
|
|
|
|
|
|
{ foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } |
7998
|
|
|
|
|
|
|
|
7999
|
|
|
|
|
|
|
$e2->delete; |
8000
|
|
|
|
|
|
|
|
8001
|
|
|
|
|
|
|
return $e1; |
8002
|
|
|
|
|
|
|
} |
8003
|
|
|
|
|
|
|
|
8004
|
|
|
|
|
|
|
sub merge |
8005
|
|
|
|
|
|
|
{ my( $e1, $e2)= @_; |
8006
|
|
|
|
|
|
|
my @e2_children= $e2->_children; |
8007
|
|
|
|
|
|
|
if( $e1->_last_child && $e1->_last_child->is_pcdata |
8008
|
|
|
|
|
|
|
&& @e2_children && $e2_children[0]->is_pcdata |
8009
|
|
|
|
|
|
|
) |
8010
|
|
|
|
|
|
|
{ my $t1_length= length( $e1->_last_child->{pcdata}); |
8011
|
|
|
|
|
|
|
my $child1= $e1->_last_child; |
8012
|
|
|
|
|
|
|
my $child2= shift @e2_children; |
8013
|
|
|
|
|
|
|
$child1->{pcdata} .= $child2->{pcdata}; |
8014
|
|
|
|
|
|
|
|
8015
|
|
|
|
|
|
|
my $extra_data= $e1->_extra_data_before_end_tag . $e2->extra_data; |
8016
|
|
|
|
|
|
|
|
8017
|
|
|
|
|
|
|
if( $extra_data) |
8018
|
|
|
|
|
|
|
{ $e1->_del_extra_data_before_end_tag; |
8019
|
|
|
|
|
|
|
$child1->_push_extra_data_in_pcdata( $extra_data, $t1_length); |
8020
|
|
|
|
|
|
|
} |
8021
|
|
|
|
|
|
|
|
8022
|
|
|
|
|
|
|
if( my $extra_data_in_pcdata= $child2->_extra_data_in_pcdata) |
8023
|
|
|
|
|
|
|
{ foreach my $data (@$extra_data_in_pcdata) { $child1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } |
8024
|
|
|
|
|
|
|
|
8025
|
|
|
|
|
|
|
if( my $extra_data_before_end_tag= $e2->_extra_data_before_end_tag) |
8026
|
|
|
|
|
|
|
{ $e1->_set_extra_data_before_end_tag( $extra_data_before_end_tag); } |
8027
|
|
|
|
|
|
|
} |
8028
|
|
|
|
|
|
|
|
8029
|
|
|
|
|
|
|
foreach my $e (@e2_children) { $e->move( last_child => $e1); } |
8030
|
|
|
|
|
|
|
|
8031
|
|
|
|
|
|
|
$e2->delete; |
8032
|
|
|
|
|
|
|
return $e1; |
8033
|
|
|
|
|
|
|
} |
8034
|
|
|
|
|
|
|
|
8035
|
|
|
|
|
|
|
|
8036
|
|
|
|
|
|
|
# recursively copy an element and returns the copy (can be huge and long) |
8037
|
|
|
|
|
|
|
sub copy |
8038
|
|
|
|
|
|
|
{ my $elt= shift; |
8039
|
|
|
|
|
|
|
my $copy= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}]); |
8040
|
|
|
|
|
|
|
|
8041
|
|
|
|
|
|
|
if( $elt->extra_data) { $copy->set_extra_data( $elt->extra_data); } |
8042
|
|
|
|
|
|
|
if( $elt->{extra_data_before_end_tag}) { $copy->_set_extra_data_before_end_tag( $elt->{extra_data_before_end_tag}); } |
8043
|
|
|
|
|
|
|
|
8044
|
|
|
|
|
|
|
if( $elt->is_asis) { $copy->set_asis; } |
8045
|
|
|
|
|
|
|
|
8046
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) |
8047
|
|
|
|
|
|
|
{ $copy->{pcdata}= (delete $copy->{empty} || 1) && $elt->{pcdata}; |
8048
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } |
8049
|
|
|
|
|
|
|
} |
8050
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) |
8051
|
|
|
|
|
|
|
{ $copy->{cdata}= $elt->{cdata}; |
8052
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } |
8053
|
|
|
|
|
|
|
} |
8054
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) |
8055
|
|
|
|
|
|
|
{ $copy->_set_pi( $elt->{target}, $elt->{data}); } |
8056
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) |
8057
|
|
|
|
|
|
|
{ $copy->{comment}= $elt->{comment}; } |
8058
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) |
8059
|
|
|
|
|
|
|
{ $copy->{ent}= $elt->{ent}; } |
8060
|
|
|
|
|
|
|
else |
8061
|
|
|
|
|
|
|
{ my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
8062
|
|
|
|
|
|
|
if( my $atts= $elt->{att}) |
8063
|
|
|
|
|
|
|
{ my %atts; |
8064
|
|
|
|
|
|
|
tie %atts, 'Tie::IxHash' if (keep_atts_order()); |
8065
|
|
|
|
|
|
|
%atts= %{$atts}; # we want to do a real copy of the attributes |
8066
|
|
|
|
|
|
|
$copy->set_atts( \%atts); |
8067
|
|
|
|
|
|
|
} |
8068
|
|
|
|
|
|
|
foreach my $child (@children) |
8069
|
|
|
|
|
|
|
{ my $child_copy= $child->copy; |
8070
|
|
|
|
|
|
|
$child_copy->paste( 'last_child', $copy); |
8071
|
|
|
|
|
|
|
} |
8072
|
|
|
|
|
|
|
} |
8073
|
|
|
|
|
|
|
# save links to the original location, which can be convenient and is used for namespace resolution |
8074
|
|
|
|
|
|
|
foreach my $link ( qw(parent prev_sibling next_sibling) ) |
8075
|
|
|
|
|
|
|
{ $copy->{former}->{$link}= $elt->{$link}; |
8076
|
|
|
|
|
|
|
if( $XML::Twig::weakrefs) { weaken( $copy->{former}->{$link}); } |
8077
|
|
|
|
|
|
|
} |
8078
|
|
|
|
|
|
|
|
8079
|
|
|
|
|
|
|
$copy->{empty}= $elt->{'empty'}; |
8080
|
|
|
|
|
|
|
|
8081
|
|
|
|
|
|
|
return $copy; |
8082
|
|
|
|
|
|
|
} |
8083
|
|
|
|
|
|
|
|
8084
|
|
|
|
|
|
|
|
8085
|
|
|
|
|
|
|
sub delete |
8086
|
|
|
|
|
|
|
{ my $elt= shift; |
8087
|
|
|
|
|
|
|
$elt->cut; |
8088
|
|
|
|
|
|
|
$elt->DESTROY unless $XML::Twig::weakrefs; |
8089
|
|
|
|
|
|
|
return undef; |
8090
|
|
|
|
|
|
|
} |
8091
|
|
|
|
|
|
|
|
8092
|
|
|
|
|
|
|
sub __destroy |
8093
|
|
|
|
|
|
|
{ my $elt= shift; |
8094
|
|
|
|
|
|
|
return if( $XML::Twig::weakrefs); |
8095
|
|
|
|
|
|
|
my $t= shift || $elt->twig; # optional argument, passed in recursive calls |
8096
|
|
|
|
|
|
|
|
8097
|
|
|
|
|
|
|
foreach( @{[$elt->_children]}) { $_->DESTROY( $t); } |
8098
|
|
|
|
|
|
|
|
8099
|
|
|
|
|
|
|
# the id reference needs to be destroyed |
8100
|
|
|
|
|
|
|
# lots of tests to avoid warnings during the cleanup phase |
8101
|
|
|
|
|
|
|
$elt->del_id( $t) if( $ID && $t && defined( $elt->{att}) && exists( $elt->{att}->{$ID})); |
8102
|
|
|
|
|
|
|
if( $elt->{former}) { foreach (keys %{$elt->{former}}) { delete $elt->{former}->{$_}; } delete $elt->{former}; } |
8103
|
|
|
|
|
|
|
foreach (qw( keys %$elt)) { delete $elt->{$_}; } |
8104
|
|
|
|
|
|
|
undef $elt; |
8105
|
|
|
|
|
|
|
} |
8106
|
|
|
|
|
|
|
|
8107
|
|
|
|
|
|
|
BEGIN |
8108
|
|
|
|
|
|
|
{ sub set_destroy { if( $XML::Twig::weakrefs) { undef *DESTROY } else { *DESTROY= *__destroy; } } |
8109
|
|
|
|
|
|
|
set_destroy(); |
8110
|
|
|
|
|
|
|
} |
8111
|
|
|
|
|
|
|
|
8112
|
|
|
|
|
|
|
# ignores the element |
8113
|
|
|
|
|
|
|
sub ignore |
8114
|
|
|
|
|
|
|
{ my $elt= shift; |
8115
|
|
|
|
|
|
|
my $t= $elt->twig; |
8116
|
|
|
|
|
|
|
$t->ignore( $elt, @_); |
8117
|
|
|
|
|
|
|
} |
8118
|
|
|
|
|
|
|
|
8119
|
|
|
|
|
|
|
BEGIN { |
8120
|
|
|
|
|
|
|
my $pretty = 0; |
8121
|
|
|
|
|
|
|
my $quote = '"'; |
8122
|
|
|
|
|
|
|
my $INDENT = ' '; |
8123
|
|
|
|
|
|
|
my $empty_tag_style = 0; |
8124
|
|
|
|
|
|
|
my $remove_cdata = 0; |
8125
|
|
|
|
|
|
|
my $keep_encoding = 0; |
8126
|
|
|
|
|
|
|
my $expand_external_entities = 0; |
8127
|
|
|
|
|
|
|
my $keep_atts_order = 0; |
8128
|
|
|
|
|
|
|
my $do_not_escape_amp_in_atts = 0; |
8129
|
|
|
|
|
|
|
my $WRAP = '80'; |
8130
|
|
|
|
|
|
|
my $REPLACED_ENTS = qq{&<}; |
8131
|
|
|
|
|
|
|
|
8132
|
|
|
|
|
|
|
my ($NSGMLS, $NICE, $INDENTED, $INDENTEDCT, $INDENTEDC, $WRAPPED, $RECORD1, $RECORD2, $INDENTEDA)= (1..9); |
8133
|
|
|
|
|
|
|
my %KEEP_TEXT_TAG_ON_ONE_LINE= map { $_ => 1 } ( $INDENTED, $INDENTEDCT, $INDENTEDC, $INDENTEDA, $WRAPPED); |
8134
|
|
|
|
|
|
|
my %WRAPPED = map { $_ => 1 } ( $WRAPPED, $INDENTEDA, $INDENTEDC); |
8135
|
|
|
|
|
|
|
|
8136
|
|
|
|
|
|
|
my %pretty_print_style= |
8137
|
|
|
|
|
|
|
( none => 0, # no added \n |
8138
|
|
|
|
|
|
|
nsgmls => $NSGMLS, # nsgmls-style, \n in tags |
8139
|
|
|
|
|
|
|
# below this line styles are UNSAFE (the generated XML can be well-formed but invalid) |
8140
|
|
|
|
|
|
|
nice => $NICE, # \n after open/close tags except when the |
8141
|
|
|
|
|
|
|
# element starts with text |
8142
|
|
|
|
|
|
|
indented => $INDENTED, # nice plus idented |
8143
|
|
|
|
|
|
|
indented_close_tag => $INDENTEDCT, # nice plus idented |
8144
|
|
|
|
|
|
|
indented_c => $INDENTEDC, # slightly more compact than indented (closing |
8145
|
|
|
|
|
|
|
# tags are on the same line) |
8146
|
|
|
|
|
|
|
wrapped => $WRAPPED, # text is wrapped at column |
8147
|
|
|
|
|
|
|
record_c => $RECORD1, # for record-like data (compact) |
8148
|
|
|
|
|
|
|
record => $RECORD2, # for record-like data (not so compact) |
8149
|
|
|
|
|
|
|
indented_a => $INDENTEDA, # nice, indented, and with attributes on separate |
8150
|
|
|
|
|
|
|
# lines as the nsgmls style, as well as wrapped |
8151
|
|
|
|
|
|
|
# lines - to make the xml friendly to line-oriented tools |
8152
|
|
|
|
|
|
|
cvs => $INDENTEDA, # alias for indented_a |
8153
|
|
|
|
|
|
|
); |
8154
|
|
|
|
|
|
|
|
8155
|
|
|
|
|
|
|
my ($HTML, $EXPAND)= (1..2); |
8156
|
|
|
|
|
|
|
my %empty_tag_style= |
8157
|
|
|
|
|
|
|
( normal => 0, # |
8158
|
|
|
|
|
|
|
html => $HTML, # |
8159
|
|
|
|
|
|
|
xhtml => $HTML, # |
8160
|
|
|
|
|
|
|
expand => $EXPAND, # |
8161
|
|
|
|
|
|
|
); |
8162
|
|
|
|
|
|
|
|
8163
|
|
|
|
|
|
|
my %quote_style= |
8164
|
|
|
|
|
|
|
( double => '"', |
8165
|
|
|
|
|
|
|
single => "'", |
8166
|
|
|
|
|
|
|
# smart => "smart", |
8167
|
|
|
|
|
|
|
); |
8168
|
|
|
|
|
|
|
|
8169
|
|
|
|
|
|
|
my $xml_space_preserve; # set when an element includes xml:space="preserve" |
8170
|
|
|
|
|
|
|
|
8171
|
|
|
|
|
|
|
my $output_filter; # filters the entire output (including < and >) |
8172
|
|
|
|
|
|
|
my $output_text_filter; # filters only the text part (tag names, attributes, pcdata) |
8173
|
|
|
|
|
|
|
|
8174
|
|
|
|
|
|
|
my $replaced_ents= $REPLACED_ENTS; |
8175
|
|
|
|
|
|
|
|
8176
|
|
|
|
|
|
|
|
8177
|
|
|
|
|
|
|
# returns those pesky "global" variables so you can switch between twigs |
8178
|
|
|
|
|
|
|
sub global_state ## no critic (Subroutines::ProhibitNestedSubs); |
8179
|
|
|
|
|
|
|
{ return |
8180
|
|
|
|
|
|
|
{ pretty => $pretty, |
8181
|
|
|
|
|
|
|
quote => $quote, |
8182
|
|
|
|
|
|
|
indent => $INDENT, |
8183
|
|
|
|
|
|
|
empty_tag_style => $empty_tag_style, |
8184
|
|
|
|
|
|
|
remove_cdata => $remove_cdata, |
8185
|
|
|
|
|
|
|
keep_encoding => $keep_encoding, |
8186
|
|
|
|
|
|
|
expand_external_entities => $expand_external_entities, |
8187
|
|
|
|
|
|
|
output_filter => $output_filter, |
8188
|
|
|
|
|
|
|
output_text_filter => $output_text_filter, |
8189
|
|
|
|
|
|
|
keep_atts_order => $keep_atts_order, |
8190
|
|
|
|
|
|
|
do_not_escape_amp_in_atts => $do_not_escape_amp_in_atts, |
8191
|
|
|
|
|
|
|
wrap => $WRAP, |
8192
|
|
|
|
|
|
|
replaced_ents => $replaced_ents, |
8193
|
|
|
|
|
|
|
}; |
8194
|
|
|
|
|
|
|
} |
8195
|
|
|
|
|
|
|
|
8196
|
|
|
|
|
|
|
# restores the global variables |
8197
|
|
|
|
|
|
|
sub set_global_state |
8198
|
|
|
|
|
|
|
{ my $state= shift; |
8199
|
|
|
|
|
|
|
$pretty = $state->{pretty}; |
8200
|
|
|
|
|
|
|
$quote = $state->{quote}; |
8201
|
|
|
|
|
|
|
$INDENT = $state->{indent}; |
8202
|
|
|
|
|
|
|
$empty_tag_style = $state->{empty_tag_style}; |
8203
|
|
|
|
|
|
|
$remove_cdata = $state->{remove_cdata}; |
8204
|
|
|
|
|
|
|
$keep_encoding = $state->{keep_encoding}; |
8205
|
|
|
|
|
|
|
$expand_external_entities = $state->{expand_external_entities}; |
8206
|
|
|
|
|
|
|
$output_filter = $state->{output_filter}; |
8207
|
|
|
|
|
|
|
$output_text_filter = $state->{output_text_filter}; |
8208
|
|
|
|
|
|
|
$keep_atts_order = $state->{keep_atts_order}; |
8209
|
|
|
|
|
|
|
$do_not_escape_amp_in_atts = $state->{do_not_escape_amp_in_atts}; |
8210
|
|
|
|
|
|
|
$WRAP = $state->{wrap}; |
8211
|
|
|
|
|
|
|
$replaced_ents = $state->{replaced_ents}, |
8212
|
|
|
|
|
|
|
} |
8213
|
|
|
|
|
|
|
|
8214
|
|
|
|
|
|
|
# sets global state to defaults |
8215
|
|
|
|
|
|
|
sub init_global_state |
8216
|
|
|
|
|
|
|
{ set_global_state( |
8217
|
|
|
|
|
|
|
{ pretty => 0, |
8218
|
|
|
|
|
|
|
quote => '"', |
8219
|
|
|
|
|
|
|
indent => $INDENT, |
8220
|
|
|
|
|
|
|
empty_tag_style => 0, |
8221
|
|
|
|
|
|
|
remove_cdata => 0, |
8222
|
|
|
|
|
|
|
keep_encoding => 0, |
8223
|
|
|
|
|
|
|
expand_external_entities => 0, |
8224
|
|
|
|
|
|
|
output_filter => undef, |
8225
|
|
|
|
|
|
|
output_text_filter => undef, |
8226
|
|
|
|
|
|
|
keep_atts_order => undef, |
8227
|
|
|
|
|
|
|
do_not_escape_amp_in_atts => 0, |
8228
|
|
|
|
|
|
|
wrap => $WRAP, |
8229
|
|
|
|
|
|
|
replaced_ents => $REPLACED_ENTS, |
8230
|
|
|
|
|
|
|
}); |
8231
|
|
|
|
|
|
|
} |
8232
|
|
|
|
|
|
|
|
8233
|
|
|
|
|
|
|
|
8234
|
|
|
|
|
|
|
# set the pretty_print style (in $pretty) and returns the old one |
8235
|
|
|
|
|
|
|
# can be called from outside the package with 2 arguments (elt, style) |
8236
|
|
|
|
|
|
|
# or from inside with only one argument (style) |
8237
|
|
|
|
|
|
|
# the style can be either a string (one of the keys of %pretty_print_style |
8238
|
|
|
|
|
|
|
# or a number (presumably an old value saved) |
8239
|
|
|
|
|
|
|
sub set_pretty_print |
8240
|
|
|
|
|
|
|
{ my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases |
8241
|
|
|
|
|
|
|
my $old_pretty= $pretty; |
8242
|
|
|
|
|
|
|
if( $style=~ /^\d+$/) |
8243
|
|
|
|
|
|
|
{ croak "invalid pretty print style $style" unless( $style < keys %pretty_print_style); |
8244
|
|
|
|
|
|
|
$pretty= $style; |
8245
|
|
|
|
|
|
|
} |
8246
|
|
|
|
|
|
|
else |
8247
|
|
|
|
|
|
|
{ croak "invalid pretty print style '$style'" unless( exists $pretty_print_style{$style}); |
8248
|
|
|
|
|
|
|
$pretty= $pretty_print_style{$style}; |
8249
|
|
|
|
|
|
|
} |
8250
|
|
|
|
|
|
|
if( $WRAPPED{$pretty} ) |
8251
|
|
|
|
|
|
|
{ XML::Twig::_use( 'Text::Wrap') or croak( "Text::Wrap not available, cannot use style $style"); } |
8252
|
|
|
|
|
|
|
return $old_pretty; |
8253
|
|
|
|
|
|
|
} |
8254
|
|
|
|
|
|
|
|
8255
|
|
|
|
|
|
|
sub _pretty_print { return $pretty; } |
8256
|
|
|
|
|
|
|
|
8257
|
|
|
|
|
|
|
# set the empty tag style (in $empty_tag_style) and returns the old one |
8258
|
|
|
|
|
|
|
# can be called from outside the package with 2 arguments (elt, style) |
8259
|
|
|
|
|
|
|
# or from inside with only one argument (style) |
8260
|
|
|
|
|
|
|
# the style can be either a string (one of the keys of %empty_tag_style |
8261
|
|
|
|
|
|
|
# or a number (presumably an old value saved) |
8262
|
|
|
|
|
|
|
sub set_empty_tag_style |
8263
|
|
|
|
|
|
|
{ my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases |
8264
|
|
|
|
|
|
|
my $old_style= $empty_tag_style; |
8265
|
|
|
|
|
|
|
if( $style=~ /^\d+$/) |
8266
|
|
|
|
|
|
|
{ croak "invalid empty tag style $style" |
8267
|
|
|
|
|
|
|
unless( $style < keys %empty_tag_style); |
8268
|
|
|
|
|
|
|
$empty_tag_style= $style; |
8269
|
|
|
|
|
|
|
} |
8270
|
|
|
|
|
|
|
else |
8271
|
|
|
|
|
|
|
{ croak "invalid empty tag style '$style'" |
8272
|
|
|
|
|
|
|
unless( exists $empty_tag_style{$style}); |
8273
|
|
|
|
|
|
|
$empty_tag_style= $empty_tag_style{$style}; |
8274
|
|
|
|
|
|
|
} |
8275
|
|
|
|
|
|
|
return $old_style; |
8276
|
|
|
|
|
|
|
} |
8277
|
|
|
|
|
|
|
|
8278
|
|
|
|
|
|
|
sub _pretty_print_styles |
8279
|
|
|
|
|
|
|
{ return (sort { $pretty_print_style{$a} <=> $pretty_print_style{$b} || $a cmp $b } keys %pretty_print_style); } |
8280
|
|
|
|
|
|
|
|
8281
|
|
|
|
|
|
|
sub set_quote |
8282
|
|
|
|
|
|
|
{ my $style= $_[1] || $_[0]; |
8283
|
|
|
|
|
|
|
my $old_quote= $quote; |
8284
|
|
|
|
|
|
|
croak "invalid quote '$style'" unless( exists $quote_style{$style}); |
8285
|
|
|
|
|
|
|
$quote= $quote_style{$style}; |
8286
|
|
|
|
|
|
|
return $old_quote; |
8287
|
|
|
|
|
|
|
} |
8288
|
|
|
|
|
|
|
|
8289
|
|
|
|
|
|
|
sub set_remove_cdata |
8290
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8291
|
|
|
|
|
|
|
my $old_value= $remove_cdata; |
8292
|
|
|
|
|
|
|
$remove_cdata= $new_value; |
8293
|
|
|
|
|
|
|
return $old_value; |
8294
|
|
|
|
|
|
|
} |
8295
|
|
|
|
|
|
|
|
8296
|
|
|
|
|
|
|
|
8297
|
|
|
|
|
|
|
sub set_indent |
8298
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8299
|
|
|
|
|
|
|
my $old_value= $INDENT; |
8300
|
|
|
|
|
|
|
$INDENT= $new_value; |
8301
|
|
|
|
|
|
|
return $old_value; |
8302
|
|
|
|
|
|
|
} |
8303
|
|
|
|
|
|
|
|
8304
|
|
|
|
|
|
|
sub set_wrap |
8305
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8306
|
|
|
|
|
|
|
my $old_value= $WRAP; |
8307
|
|
|
|
|
|
|
$WRAP= $new_value; |
8308
|
|
|
|
|
|
|
return $old_value; |
8309
|
|
|
|
|
|
|
} |
8310
|
|
|
|
|
|
|
|
8311
|
|
|
|
|
|
|
|
8312
|
|
|
|
|
|
|
sub set_keep_encoding |
8313
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8314
|
|
|
|
|
|
|
my $old_value= $keep_encoding; |
8315
|
|
|
|
|
|
|
$keep_encoding= $new_value; |
8316
|
|
|
|
|
|
|
return $old_value; |
8317
|
|
|
|
|
|
|
} |
8318
|
|
|
|
|
|
|
|
8319
|
|
|
|
|
|
|
sub set_replaced_ents |
8320
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8321
|
|
|
|
|
|
|
my $old_value= $replaced_ents; |
8322
|
|
|
|
|
|
|
$replaced_ents= $new_value; |
8323
|
|
|
|
|
|
|
return $old_value; |
8324
|
|
|
|
|
|
|
} |
8325
|
|
|
|
|
|
|
|
8326
|
|
|
|
|
|
|
sub do_not_escape_gt |
8327
|
|
|
|
|
|
|
{ my $old_value= $replaced_ents; |
8328
|
|
|
|
|
|
|
$replaced_ents= q{&<}; # & needs to be first |
8329
|
|
|
|
|
|
|
return $old_value; |
8330
|
|
|
|
|
|
|
} |
8331
|
|
|
|
|
|
|
|
8332
|
|
|
|
|
|
|
sub escape_gt |
8333
|
|
|
|
|
|
|
{ my $old_value= $replaced_ents; |
8334
|
|
|
|
|
|
|
$replaced_ents= qq{&<>}; # & needs to be first |
8335
|
|
|
|
|
|
|
return $old_value; |
8336
|
|
|
|
|
|
|
} |
8337
|
|
|
|
|
|
|
|
8338
|
|
|
|
|
|
|
sub _keep_encoding { return $keep_encoding; } # so I can use elsewhere in the module |
8339
|
|
|
|
|
|
|
|
8340
|
|
|
|
|
|
|
sub set_do_not_escape_amp_in_atts |
8341
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8342
|
|
|
|
|
|
|
my $old_value= $do_not_escape_amp_in_atts; |
8343
|
|
|
|
|
|
|
$do_not_escape_amp_in_atts= $new_value; |
8344
|
|
|
|
|
|
|
return $old_value; |
8345
|
|
|
|
|
|
|
} |
8346
|
|
|
|
|
|
|
|
8347
|
|
|
|
|
|
|
sub output_filter { return $output_filter; } |
8348
|
|
|
|
|
|
|
sub output_text_filter { return $output_text_filter; } |
8349
|
|
|
|
|
|
|
|
8350
|
|
|
|
|
|
|
sub set_output_filter |
8351
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode |
8352
|
|
|
|
|
|
|
# if called in object mode with no argument, the filter is undefined |
8353
|
|
|
|
|
|
|
if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } |
8354
|
|
|
|
|
|
|
my $old_value= $output_filter; |
8355
|
|
|
|
|
|
|
if( !$new_value || isa( $new_value, 'CODE') ) |
8356
|
|
|
|
|
|
|
{ $output_filter= $new_value; } |
8357
|
|
|
|
|
|
|
elsif( $new_value eq 'latin1') |
8358
|
|
|
|
|
|
|
{ $output_filter= XML::Twig::latin1(); |
8359
|
|
|
|
|
|
|
} |
8360
|
|
|
|
|
|
|
elsif( $XML::Twig::filter{$new_value}) |
8361
|
|
|
|
|
|
|
{ $output_filter= $XML::Twig::filter{$new_value}; } |
8362
|
|
|
|
|
|
|
else |
8363
|
|
|
|
|
|
|
{ croak "invalid output filter '$new_value'"; } |
8364
|
|
|
|
|
|
|
|
8365
|
|
|
|
|
|
|
return $old_value; |
8366
|
|
|
|
|
|
|
} |
8367
|
|
|
|
|
|
|
|
8368
|
|
|
|
|
|
|
sub set_output_text_filter |
8369
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode |
8370
|
|
|
|
|
|
|
# if called in object mode with no argument, the filter is undefined |
8371
|
|
|
|
|
|
|
if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } |
8372
|
|
|
|
|
|
|
my $old_value= $output_text_filter; |
8373
|
|
|
|
|
|
|
if( !$new_value || isa( $new_value, 'CODE') ) |
8374
|
|
|
|
|
|
|
{ $output_text_filter= $new_value; } |
8375
|
|
|
|
|
|
|
elsif( $new_value eq 'latin1') |
8376
|
|
|
|
|
|
|
{ $output_text_filter= XML::Twig::latin1(); |
8377
|
|
|
|
|
|
|
} |
8378
|
|
|
|
|
|
|
elsif( $XML::Twig::filter{$new_value}) |
8379
|
|
|
|
|
|
|
{ $output_text_filter= $XML::Twig::filter{$new_value}; } |
8380
|
|
|
|
|
|
|
else |
8381
|
|
|
|
|
|
|
{ croak "invalid output text filter '$new_value'"; } |
8382
|
|
|
|
|
|
|
|
8383
|
|
|
|
|
|
|
return $old_value; |
8384
|
|
|
|
|
|
|
} |
8385
|
|
|
|
|
|
|
|
8386
|
|
|
|
|
|
|
sub set_expand_external_entities |
8387
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8388
|
|
|
|
|
|
|
my $old_value= $expand_external_entities; |
8389
|
|
|
|
|
|
|
$expand_external_entities= $new_value; |
8390
|
|
|
|
|
|
|
return $old_value; |
8391
|
|
|
|
|
|
|
} |
8392
|
|
|
|
|
|
|
|
8393
|
|
|
|
|
|
|
sub set_keep_atts_order |
8394
|
|
|
|
|
|
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; |
8395
|
|
|
|
|
|
|
my $old_value= $keep_atts_order; |
8396
|
|
|
|
|
|
|
$keep_atts_order= $new_value; |
8397
|
|
|
|
|
|
|
return $old_value; |
8398
|
|
|
|
|
|
|
|
8399
|
|
|
|
|
|
|
} |
8400
|
|
|
|
|
|
|
|
8401
|
|
|
|
|
|
|
sub keep_atts_order { return $keep_atts_order; } # so I can use elsewhere in the module |
8402
|
|
|
|
|
|
|
|
8403
|
|
|
|
|
|
|
my %html_empty_elt; |
8404
|
|
|
|
|
|
|
BEGIN { %html_empty_elt= map { $_ => 1} qw( base meta link hr br param img area input col); } |
8405
|
|
|
|
|
|
|
|
8406
|
|
|
|
|
|
|
sub start_tag |
8407
|
|
|
|
|
|
|
{ my( $elt, $option)= @_; |
8408
|
|
|
|
|
|
|
|
8409
|
|
|
|
|
|
|
|
8410
|
|
|
|
|
|
|
return if( $elt->{gi} < $XML::Twig::SPECIAL_GI); |
8411
|
|
|
|
|
|
|
|
8412
|
|
|
|
|
|
|
my $extra_data= $elt->{extra_data} || ''; |
8413
|
|
|
|
|
|
|
|
8414
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
8415
|
|
|
|
|
|
|
my $att= $elt->{att}; # should be $elt->{att}, optimized into a pure hash look-up |
8416
|
|
|
|
|
|
|
|
8417
|
|
|
|
|
|
|
my $ns_map= $att ? $att->{'#original_gi'} : ''; |
8418
|
|
|
|
|
|
|
if( $ns_map) { $gi= _restore_original_prefix( $ns_map, $gi); } |
8419
|
|
|
|
|
|
|
$gi=~ s{^#default:}{}; # remove default prefix |
8420
|
|
|
|
|
|
|
|
8421
|
|
|
|
|
|
|
if( $output_text_filter) { $gi= $output_text_filter->( $gi); } |
8422
|
|
|
|
|
|
|
|
8423
|
|
|
|
|
|
|
# get the attribute and their values |
8424
|
|
|
|
|
|
|
my $att_sep = $pretty==$NSGMLS ? "\n" |
8425
|
|
|
|
|
|
|
: $pretty==$INDENTEDA ? "\n" . $INDENT x ($elt->level+1) . ' ' |
8426
|
|
|
|
|
|
|
: ' ' |
8427
|
|
|
|
|
|
|
; |
8428
|
|
|
|
|
|
|
|
8429
|
|
|
|
|
|
|
my $replace_in_att_value= $replaced_ents . "$quote\t\r\n"; |
8430
|
|
|
|
|
|
|
if( $option->{escape_gt} && $replaced_ents !~ m{>}) { $replace_in_att_value.= '>'; } |
8431
|
|
|
|
|
|
|
|
8432
|
|
|
|
|
|
|
my $tag; |
8433
|
|
|
|
|
|
|
my @att_names= grep { !( $_=~ m{^#(?!default:)} ) } $keep_atts_order ? keys %{$att} : sort keys %{$att}; |
8434
|
|
|
|
|
|
|
if( @att_names) |
8435
|
|
|
|
|
|
|
{ my $atts= join $att_sep, map { my $output_att_name= $ns_map ? _restore_original_prefix( $ns_map, $_) : $_; |
8436
|
|
|
|
|
|
|
if( $output_text_filter) |
8437
|
|
|
|
|
|
|
{ $output_att_name= $output_text_filter->( $output_att_name); } |
8438
|
|
|
|
|
|
|
$output_att_name . '=' . $quote . _att_xml_string( $att->{$_}, $replace_in_att_value) . $quote |
8439
|
|
|
|
|
|
|
|
8440
|
|
|
|
|
|
|
} |
8441
|
|
|
|
|
|
|
@att_names |
8442
|
|
|
|
|
|
|
; |
8443
|
|
|
|
|
|
|
if( $pretty==$INDENTEDA && @att_names == 1) { $att_sep= ' '; } |
8444
|
|
|
|
|
|
|
$tag= "<$gi$att_sep$atts"; |
8445
|
|
|
|
|
|
|
} |
8446
|
|
|
|
|
|
|
else |
8447
|
|
|
|
|
|
|
{ $tag= "<$gi"; } |
8448
|
|
|
|
|
|
|
|
8449
|
|
|
|
|
|
|
$tag .= "\n" if($pretty==$NSGMLS); |
8450
|
|
|
|
|
|
|
|
8451
|
|
|
|
|
|
|
|
8452
|
|
|
|
|
|
|
# force empty if suitable HTML tag, otherwise use the value from the input tree |
8453
|
|
|
|
|
|
|
if( ($empty_tag_style eq $HTML) && !$elt->{first_child} && !$elt->{extra_data_before_end_tag} && $html_empty_elt{$gi}) |
8454
|
|
|
|
|
|
|
{ $elt->{empty}= 1; } |
8455
|
|
|
|
|
|
|
my $empty= defined $elt->{empty} ? $elt->{empty} |
8456
|
|
|
|
|
|
|
: $elt->{first_child} ? 0 |
8457
|
|
|
|
|
|
|
: 1; |
8458
|
|
|
|
|
|
|
|
8459
|
|
|
|
|
|
|
$tag .= (!$elt->{empty} || $elt->{extra_data_before_end_tag}) ? '>' # element has content |
8460
|
|
|
|
|
|
|
: (($empty_tag_style eq $HTML) && $html_empty_elt{$gi}) ? ' />' # html empty element |
8461
|
|
|
|
|
|
|
# cvs-friendly format |
8462
|
|
|
|
|
|
|
: ( $pretty == $INDENTEDA && @att_names > 1) ? "\n" . $INDENT x $elt->level . "/>" |
8463
|
|
|
|
|
|
|
: ( $pretty == $INDENTEDA && @att_names == 1) ? " />" |
8464
|
|
|
|
|
|
|
: $empty_tag_style ? ">" . $XML::Twig::index2gi[$elt->{'gi'}] . ">" # $empty_tag_style is $HTML or $EXPAND |
8465
|
|
|
|
|
|
|
: '/>' |
8466
|
|
|
|
|
|
|
; |
8467
|
|
|
|
|
|
|
|
8468
|
|
|
|
|
|
|
if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } |
8469
|
|
|
|
|
|
|
|
8470
|
|
|
|
|
|
|
#warn "TRACE: ", $tag,": ", Encode::is_utf8( $tag) ? "has flag" : "FLAG NOT SET"; |
8471
|
|
|
|
|
|
|
|
8472
|
|
|
|
|
|
|
unless( $pretty) { return defined( $extra_data) ? $extra_data . $tag : $tag; } |
8473
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
my $prefix=''; |
8475
|
|
|
|
|
|
|
my $return=''; # '' or \n is to be printed before the tag |
8476
|
|
|
|
|
|
|
my $indent=0; # number of indents before the tag |
8477
|
|
|
|
|
|
|
|
8478
|
|
|
|
|
|
|
if( $pretty==$RECORD1) |
8479
|
|
|
|
|
|
|
{ my $level= $elt->level; |
8480
|
|
|
|
|
|
|
$return= "\n" if( $level < 2); |
8481
|
|
|
|
|
|
|
$indent= 1 if( $level == 1); |
8482
|
|
|
|
|
|
|
} |
8483
|
|
|
|
|
|
|
|
8484
|
|
|
|
|
|
|
elsif( $pretty==$RECORD2) |
8485
|
|
|
|
|
|
|
{ $return= "\n"; |
8486
|
|
|
|
|
|
|
$indent= $elt->level; |
8487
|
|
|
|
|
|
|
} |
8488
|
|
|
|
|
|
|
|
8489
|
|
|
|
|
|
|
elsif( $pretty==$NICE) |
8490
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
8491
|
|
|
|
|
|
|
unless( !$parent || $parent->{contains_text}) |
8492
|
|
|
|
|
|
|
{ $return= "\n"; } |
8493
|
|
|
|
|
|
|
$elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) |
8494
|
|
|
|
|
|
|
|| $elt->contains_text); |
8495
|
|
|
|
|
|
|
} |
8496
|
|
|
|
|
|
|
|
8497
|
|
|
|
|
|
|
elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) |
8498
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
8499
|
|
|
|
|
|
|
unless( !$parent || $parent->{contains_text}) |
8500
|
|
|
|
|
|
|
{ $return= "\n"; |
8501
|
|
|
|
|
|
|
$indent= $elt->level; |
8502
|
|
|
|
|
|
|
} |
8503
|
|
|
|
|
|
|
$elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) |
8504
|
|
|
|
|
|
|
|| $elt->contains_text); |
8505
|
|
|
|
|
|
|
} |
8506
|
|
|
|
|
|
|
|
8507
|
|
|
|
|
|
|
if( $return || $indent) |
8508
|
|
|
|
|
|
|
{ # check for elements in which spaces should be kept |
8509
|
|
|
|
|
|
|
my $t= $elt->twig; |
8510
|
|
|
|
|
|
|
return $extra_data . $tag if( $xml_space_preserve); |
8511
|
|
|
|
|
|
|
if( $t && $t->{twig_keep_spaces_in}) |
8512
|
|
|
|
|
|
|
{ foreach my $ancestor ($elt->ancestors) |
8513
|
|
|
|
|
|
|
{ return $extra_data . $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } |
8514
|
|
|
|
|
|
|
} |
8515
|
|
|
|
|
|
|
|
8516
|
|
|
|
|
|
|
$prefix= $INDENT x $indent; |
8517
|
|
|
|
|
|
|
if( $extra_data) |
8518
|
|
|
|
|
|
|
{ $extra_data=~ s{\s+$}{}; |
8519
|
|
|
|
|
|
|
$extra_data=~ s{^\s+}{}; |
8520
|
|
|
|
|
|
|
$extra_data= $prefix . $extra_data . $return; |
8521
|
|
|
|
|
|
|
} |
8522
|
|
|
|
|
|
|
} |
8523
|
|
|
|
|
|
|
|
8524
|
|
|
|
|
|
|
|
8525
|
|
|
|
|
|
|
return $return . $extra_data . $prefix . $tag; |
8526
|
|
|
|
|
|
|
} |
8527
|
|
|
|
|
|
|
|
8528
|
|
|
|
|
|
|
sub end_tag |
8529
|
|
|
|
|
|
|
{ my $elt= shift; |
8530
|
|
|
|
|
|
|
return '' if( ($elt->{gi}<$XML::Twig::SPECIAL_GI) |
8531
|
|
|
|
|
|
|
|| ($elt->{'empty'} && !$elt->{extra_data_before_end_tag}) |
8532
|
|
|
|
|
|
|
); |
8533
|
|
|
|
|
|
|
my $tag= "<"; |
8534
|
|
|
|
|
|
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; |
8535
|
|
|
|
|
|
|
|
8536
|
|
|
|
|
|
|
if( my $map= $elt->{'att'}->{'#original_gi'}) { $gi= _restore_original_prefix( $map, $gi); } |
8537
|
|
|
|
|
|
|
$gi=~ s{^#default:}{}; # remove default prefix |
8538
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
if( $output_text_filter) { $gi= $output_text_filter->( $XML::Twig::index2gi[$elt->{'gi'}]); } |
8540
|
|
|
|
|
|
|
$tag .= "/$gi>"; |
8541
|
|
|
|
|
|
|
|
8542
|
|
|
|
|
|
|
$tag = ($elt->{extra_data_before_end_tag} || '') . $tag; |
8543
|
|
|
|
|
|
|
|
8544
|
|
|
|
|
|
|
if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } |
8545
|
|
|
|
|
|
|
|
8546
|
|
|
|
|
|
|
return $tag unless $pretty; |
8547
|
|
|
|
|
|
|
|
8548
|
|
|
|
|
|
|
my $prefix=''; |
8549
|
|
|
|
|
|
|
my $return=0; # 1 if a \n is to be printed before the tag |
8550
|
|
|
|
|
|
|
my $indent=0; # number of indents before the tag |
8551
|
|
|
|
|
|
|
|
8552
|
|
|
|
|
|
|
if( $pretty==$RECORD1) |
8553
|
|
|
|
|
|
|
{ $return= 1 if( $elt->level == 0); |
8554
|
|
|
|
|
|
|
} |
8555
|
|
|
|
|
|
|
|
8556
|
|
|
|
|
|
|
elsif( $pretty==$RECORD2) |
8557
|
|
|
|
|
|
|
{ unless( $elt->contains_text) |
8558
|
|
|
|
|
|
|
{ $return= 1 ; |
8559
|
|
|
|
|
|
|
$indent= $elt->level; |
8560
|
|
|
|
|
|
|
} |
8561
|
|
|
|
|
|
|
} |
8562
|
|
|
|
|
|
|
|
8563
|
|
|
|
|
|
|
elsif( $pretty==$NICE) |
8564
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
8565
|
|
|
|
|
|
|
if( ( ($parent && !$parent->{contains_text}) || !$parent ) |
8566
|
|
|
|
|
|
|
&& ( !$elt->{contains_text} |
8567
|
|
|
|
|
|
|
&& ($elt->{has_flushed_child} || $elt->{first_child}) |
8568
|
|
|
|
|
|
|
) |
8569
|
|
|
|
|
|
|
) |
8570
|
|
|
|
|
|
|
{ $return= 1; } |
8571
|
|
|
|
|
|
|
} |
8572
|
|
|
|
|
|
|
|
8573
|
|
|
|
|
|
|
elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) |
8574
|
|
|
|
|
|
|
{ my $parent= $elt->{parent}; |
8575
|
|
|
|
|
|
|
if( ( ($parent && !$parent->{contains_text}) || !$parent ) |
8576
|
|
|
|
|
|
|
&& ( !$elt->{contains_text} |
8577
|
|
|
|
|
|
|
&& ($elt->{has_flushed_child} || $elt->{first_child}) |
8578
|
|
|
|
|
|
|
) |
8579
|
|
|
|
|
|
|
) |
8580
|
|
|
|
|
|
|
{ $return= 1; |
8581
|
|
|
|
|
|
|
$indent= $elt->level; |
8582
|
|
|
|
|
|
|
} |
8583
|
|
|
|
|
|
|
} |
8584
|
|
|
|
|
|
|
|
8585
|
|
|
|
|
|
|
if( $return || $indent) |
8586
|
|
|
|
|
|
|
{ # check for elements in which spaces should be kept |
8587
|
|
|
|
|
|
|
my $t= $elt->twig; |
8588
|
|
|
|
|
|
|
return $tag if( $xml_space_preserve); |
8589
|
|
|
|
|
|
|
if( $t && $t->{twig_keep_spaces_in}) |
8590
|
|
|
|
|
|
|
{ foreach my $ancestor ($elt, $elt->ancestors) |
8591
|
|
|
|
|
|
|
{ return $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } |
8592
|
|
|
|
|
|
|
} |
8593
|
|
|
|
|
|
|
|
8594
|
|
|
|
|
|
|
if( $return) { $prefix= ($pretty== $INDENTEDCT) ? "\n$INDENT" : "\n"; } |
8595
|
|
|
|
|
|
|
$prefix.= $INDENT x $indent; |
8596
|
|
|
|
|
|
|
} |
8597
|
|
|
|
|
|
|
|
8598
|
|
|
|
|
|
|
# add a \n at the end of the document (after the root element) |
8599
|
|
|
|
|
|
|
$tag .= "\n" unless( $elt->{parent}); |
8600
|
|
|
|
|
|
|
|
8601
|
|
|
|
|
|
|
return $prefix . $tag; |
8602
|
|
|
|
|
|
|
} |
8603
|
|
|
|
|
|
|
|
8604
|
|
|
|
|
|
|
sub _restore_original_prefix |
8605
|
|
|
|
|
|
|
{ my( $map, $name)= @_; |
8606
|
|
|
|
|
|
|
my $prefix= _ns_prefix( $name); |
8607
|
|
|
|
|
|
|
if( my $original_prefix= $map->{$prefix}) |
8608
|
|
|
|
|
|
|
{ if( $original_prefix eq '#default') |
8609
|
|
|
|
|
|
|
{ $name=~ s{^$prefix:}{}; } |
8610
|
|
|
|
|
|
|
else |
8611
|
|
|
|
|
|
|
{ $name=~ s{^$prefix(?=:)}{$original_prefix}; } |
8612
|
|
|
|
|
|
|
} |
8613
|
|
|
|
|
|
|
return $name; |
8614
|
|
|
|
|
|
|
} |
8615
|
|
|
|
|
|
|
|
8616
|
|
|
|
|
|
|
# buffer used to hold the text to print/sprint, to avoid passing it back and forth between methods |
8617
|
|
|
|
|
|
|
my @sprint; |
8618
|
|
|
|
|
|
|
|
8619
|
|
|
|
|
|
|
# $elt is an element to print |
8620
|
|
|
|
|
|
|
# $fh is an optional filehandle to print to |
8621
|
|
|
|
|
|
|
# $pretty is an optional value, if true a \n is printed after the < of the |
8622
|
|
|
|
|
|
|
# opening tag |
8623
|
|
|
|
|
|
|
sub print |
8624
|
|
|
|
|
|
|
{ my $elt= shift; |
8625
|
|
|
|
|
|
|
|
8626
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
8627
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
8628
|
|
|
|
|
|
|
print $elt->sprint( @_); |
8629
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
8630
|
|
|
|
|
|
|
} |
8631
|
|
|
|
|
|
|
|
8632
|
|
|
|
|
|
|
|
8633
|
|
|
|
|
|
|
# those next 2 methods need to be refactored, they are copies of the same methods in XML::Twig |
8634
|
|
|
|
|
|
|
sub print_to_file |
8635
|
|
|
|
|
|
|
{ my( $elt, $filename)= (shift, shift); |
8636
|
|
|
|
|
|
|
my $out_fh; |
8637
|
|
|
|
|
|
|
# open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 |
8638
|
|
|
|
|
|
|
my $mode= $keep_encoding ? '>' : '>:utf8'; # >= perl 5.8 |
8639
|
|
|
|
|
|
|
open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 |
8640
|
|
|
|
|
|
|
$elt->print( $out_fh, @_); |
8641
|
|
|
|
|
|
|
close $out_fh; |
8642
|
|
|
|
|
|
|
return $elt; |
8643
|
|
|
|
|
|
|
} |
8644
|
|
|
|
|
|
|
|
8645
|
|
|
|
|
|
|
# probably only works on *nix (at least the chmod bit) |
8646
|
|
|
|
|
|
|
# first print to a temporary file, then rename that file to the desired file name, then change permissions |
8647
|
|
|
|
|
|
|
# to the original file permissions (or to the current umask) |
8648
|
|
|
|
|
|
|
sub safe_print_to_file |
8649
|
|
|
|
|
|
|
{ my( $elt, $filename)= (shift, shift); |
8650
|
|
|
|
|
|
|
my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; |
8651
|
|
|
|
|
|
|
XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; |
8652
|
|
|
|
|
|
|
XML::Twig::_use( 'File::Basename') || croak "need File::Basename to use safe_print_to_file\n"; |
8653
|
|
|
|
|
|
|
my $tmpdir= File::Basename::dirname( $filename); |
8654
|
|
|
|
|
|
|
my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); |
8655
|
|
|
|
|
|
|
$elt->print_to_file( $tmpfilename, @_); |
8656
|
|
|
|
|
|
|
rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); |
8657
|
|
|
|
|
|
|
chmod $perm, $filename; |
8658
|
|
|
|
|
|
|
return $elt; |
8659
|
|
|
|
|
|
|
} |
8660
|
|
|
|
|
|
|
|
8661
|
|
|
|
|
|
|
|
8662
|
|
|
|
|
|
|
# same as print but does not output the start tag if the element |
8663
|
|
|
|
|
|
|
# is marked as flushed |
8664
|
|
|
|
|
|
|
sub flush |
8665
|
|
|
|
|
|
|
{ my $elt= shift; |
8666
|
|
|
|
|
|
|
my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; |
8667
|
|
|
|
|
|
|
$elt->twig->flush_up_to( $up_to, @_); |
8668
|
|
|
|
|
|
|
} |
8669
|
|
|
|
|
|
|
sub purge |
8670
|
|
|
|
|
|
|
{ my $elt= shift; |
8671
|
|
|
|
|
|
|
my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; |
8672
|
|
|
|
|
|
|
$elt->twig->purge_up_to( $up_to, @_); |
8673
|
|
|
|
|
|
|
} |
8674
|
|
|
|
|
|
|
|
8675
|
|
|
|
|
|
|
sub _flush |
8676
|
|
|
|
|
|
|
{ my $elt= shift; |
8677
|
|
|
|
|
|
|
|
8678
|
|
|
|
|
|
|
my $pretty; |
8679
|
|
|
|
|
|
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; |
8680
|
|
|
|
|
|
|
my $old_select= defined $fh ? select $fh : undef; |
8681
|
|
|
|
|
|
|
my $old_pretty= defined ($pretty= shift) ? set_pretty_print( $pretty) : undef; |
8682
|
|
|
|
|
|
|
|
8683
|
|
|
|
|
|
|
$xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); |
8684
|
|
|
|
|
|
|
|
8685
|
|
|
|
|
|
|
$elt->__flush(); |
8686
|
|
|
|
|
|
|
|
8687
|
|
|
|
|
|
|
$xml_space_preserve= 0; |
8688
|
|
|
|
|
|
|
|
8689
|
|
|
|
|
|
|
select $old_select if( defined $old_select); |
8690
|
|
|
|
|
|
|
set_pretty_print( $old_pretty) if( defined $old_pretty); |
8691
|
|
|
|
|
|
|
} |
8692
|
|
|
|
|
|
|
|
8693
|
|
|
|
|
|
|
sub __flush |
8694
|
|
|
|
|
|
|
{ my $elt= shift; |
8695
|
|
|
|
|
|
|
|
8696
|
|
|
|
|
|
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) |
8697
|
|
|
|
|
|
|
{ my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; |
8698
|
|
|
|
|
|
|
$xml_space_preserve++ if $preserve; |
8699
|
|
|
|
|
|
|
unless( $elt->{'flushed'}) |
8700
|
|
|
|
|
|
|
{ print $elt->start_tag(); |
8701
|
|
|
|
|
|
|
} |
8702
|
|
|
|
|
|
|
|
8703
|
|
|
|
|
|
|
# flush the children |
8704
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
8705
|
|
|
|
|
|
|
foreach my $child (@children) |
8706
|
|
|
|
|
|
|
{ $child->_flush( $pretty); |
8707
|
|
|
|
|
|
|
$child->{'flushed'}=1; |
8708
|
|
|
|
|
|
|
} |
8709
|
|
|
|
|
|
|
if( ! $elt->{end_tag_flushed}) |
8710
|
|
|
|
|
|
|
{ print $elt->end_tag; |
8711
|
|
|
|
|
|
|
$elt->{end_tag_flushed}=1; |
8712
|
|
|
|
|
|
|
$elt->{'flushed'}=1; |
8713
|
|
|
|
|
|
|
} |
8714
|
|
|
|
|
|
|
$xml_space_preserve-- if $preserve; |
8715
|
|
|
|
|
|
|
# used for pretty printing |
8716
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) { $parent->{has_flushed_child}= 1; } |
8717
|
|
|
|
|
|
|
} |
8718
|
|
|
|
|
|
|
else # text or special element |
8719
|
|
|
|
|
|
|
{ my $text; |
8720
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) { $text= $elt->pcdata_xml_string; |
8721
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
8722
|
|
|
|
|
|
|
{ $parent->{contains_text}= 1; } |
8723
|
|
|
|
|
|
|
} |
8724
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { $text= $elt->cdata_string; |
8725
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
8726
|
|
|
|
|
|
|
{ $parent->{contains_text}= 1; } |
8727
|
|
|
|
|
|
|
} |
8728
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) { $text= $elt->pi_string; } |
8729
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) { $text= $elt->comment_string; } |
8730
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { $text= $elt->ent_string; } |
8731
|
|
|
|
|
|
|
|
8732
|
|
|
|
|
|
|
print $output_filter ? $output_filter->( $text) : $text; |
8733
|
|
|
|
|
|
|
} |
8734
|
|
|
|
|
|
|
} |
8735
|
|
|
|
|
|
|
|
8736
|
|
|
|
|
|
|
|
8737
|
|
|
|
|
|
|
sub xml_text |
8738
|
|
|
|
|
|
|
{ my( $elt, @options)= @_; |
8739
|
|
|
|
|
|
|
|
8740
|
|
|
|
|
|
|
if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->xml_text_only; } |
8741
|
|
|
|
|
|
|
|
8742
|
|
|
|
|
|
|
my $string=''; |
8743
|
|
|
|
|
|
|
|
8744
|
|
|
|
|
|
|
if( ($elt->{gi} >= $XML::Twig::SPECIAL_GI) ) |
8745
|
|
|
|
|
|
|
{ # sprint the children |
8746
|
|
|
|
|
|
|
my $child= $elt->{first_child} || ''; |
8747
|
|
|
|
|
|
|
while( $child) |
8748
|
|
|
|
|
|
|
{ $string.= $child->xml_text; |
8749
|
|
|
|
|
|
|
} continue { $child= $child->{next_sibling}; } |
8750
|
|
|
|
|
|
|
} |
8751
|
|
|
|
|
|
|
elsif( (exists $elt->{'pcdata'})) { $string .= $output_filter ? $output_filter->($elt->pcdata_xml_string) |
8752
|
|
|
|
|
|
|
: $elt->pcdata_xml_string; |
8753
|
|
|
|
|
|
|
} |
8754
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { $string .= $output_filter ? $output_filter->($elt->cdata_string) |
8755
|
|
|
|
|
|
|
: $elt->cdata_string; |
8756
|
|
|
|
|
|
|
} |
8757
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { $string .= $elt->ent_string; } |
8758
|
|
|
|
|
|
|
|
8759
|
|
|
|
|
|
|
return $string; |
8760
|
|
|
|
|
|
|
} |
8761
|
|
|
|
|
|
|
|
8762
|
|
|
|
|
|
|
sub xml_text_only |
8763
|
|
|
|
|
|
|
{ return join '', map { $_->xml_text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } |
8764
|
|
|
|
|
|
|
|
8765
|
|
|
|
|
|
|
# same as print but except... it does not print but rather returns the string |
8766
|
|
|
|
|
|
|
# if the second parameter is set then only the content is returned, not the |
8767
|
|
|
|
|
|
|
# start and end tags of the element (but the tags of the included elements are |
8768
|
|
|
|
|
|
|
# returned) |
8769
|
|
|
|
|
|
|
|
8770
|
|
|
|
|
|
|
sub sprint |
8771
|
|
|
|
|
|
|
{ my $elt= shift; |
8772
|
|
|
|
|
|
|
my( $old_pretty, $old_empty_tag_style); |
8773
|
|
|
|
|
|
|
|
8774
|
|
|
|
|
|
|
if( $_[0]) |
8775
|
|
|
|
|
|
|
{ if( isa( $_[0], 'HASH')) |
8776
|
|
|
|
|
|
|
{ # "proper way, using a hashref for options |
8777
|
|
|
|
|
|
|
my %args= XML::Twig::_normalize_args( %{shift()}); |
8778
|
|
|
|
|
|
|
if( defined $args{PrettyPrint}) { $old_pretty = set_pretty_print( $args{PrettyPrint}); } |
8779
|
|
|
|
|
|
|
if( defined $args{EmptyTags}) { $old_empty_tag_style = set_empty_tag_style( $args{EmptyTags}); } |
8780
|
|
|
|
|
|
|
} |
8781
|
|
|
|
|
|
|
else |
8782
|
|
|
|
|
|
|
{ # "old" way, just using the option name |
8783
|
|
|
|
|
|
|
my @other_opt; |
8784
|
|
|
|
|
|
|
foreach my $opt (@_) |
8785
|
|
|
|
|
|
|
{ if( exists $pretty_print_style{$opt}) { $old_pretty = set_pretty_print( $opt); } |
8786
|
|
|
|
|
|
|
elsif( exists $empty_tag_style{$opt}) { $old_empty_tag_style = set_empty_tag_style( $opt); } |
8787
|
|
|
|
|
|
|
else { push @other_opt, $opt; } |
8788
|
|
|
|
|
|
|
} |
8789
|
|
|
|
|
|
|
@_= @other_opt; |
8790
|
|
|
|
|
|
|
} |
8791
|
|
|
|
|
|
|
} |
8792
|
|
|
|
|
|
|
|
8793
|
|
|
|
|
|
|
$xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); |
8794
|
|
|
|
|
|
|
|
8795
|
|
|
|
|
|
|
@sprint=(); |
8796
|
|
|
|
|
|
|
$elt->_sprint( @_); |
8797
|
|
|
|
|
|
|
my $sprint= join( '', @sprint); |
8798
|
|
|
|
|
|
|
if( $output_filter) { $sprint= $output_filter->( $sprint); } |
8799
|
|
|
|
|
|
|
|
8800
|
|
|
|
|
|
|
if( ( ($pretty== $WRAPPED) || ($pretty==$INDENTEDC)) && !$xml_space_preserve) |
8801
|
|
|
|
|
|
|
{ $sprint= _wrap_text( $sprint); } |
8802
|
|
|
|
|
|
|
$xml_space_preserve= 0; |
8803
|
|
|
|
|
|
|
|
8804
|
|
|
|
|
|
|
|
8805
|
|
|
|
|
|
|
if( defined $old_pretty) { set_pretty_print( $old_pretty); } |
8806
|
|
|
|
|
|
|
if( defined $old_empty_tag_style) { set_empty_tag_style( $old_empty_tag_style); } |
8807
|
|
|
|
|
|
|
|
8808
|
|
|
|
|
|
|
return $sprint; |
8809
|
|
|
|
|
|
|
} |
8810
|
|
|
|
|
|
|
|
8811
|
|
|
|
|
|
|
sub _wrap_text |
8812
|
|
|
|
|
|
|
{ my( $string)= @_; |
8813
|
|
|
|
|
|
|
my $wrapped; |
8814
|
|
|
|
|
|
|
foreach my $line (split /\n/, $string) |
8815
|
|
|
|
|
|
|
{ my( $initial_indent)= $line=~ m{^(\s*)}; |
8816
|
|
|
|
|
|
|
my $wrapped_line= Text::Wrap::wrap( '', $initial_indent . $INDENT, $line) . "\n"; |
8817
|
|
|
|
|
|
|
|
8818
|
|
|
|
|
|
|
# fix glitch with Text::wrap when the first line is long and does not include spaces |
8819
|
|
|
|
|
|
|
# the first line ends up being too short by 2 chars, but we'll have to live with it! |
8820
|
|
|
|
|
|
|
$wrapped_line=~ s{^ +\n }{}s; # this prefix needs to be removed |
8821
|
|
|
|
|
|
|
|
8822
|
|
|
|
|
|
|
$wrapped .= $wrapped_line; |
8823
|
|
|
|
|
|
|
} |
8824
|
|
|
|
|
|
|
|
8825
|
|
|
|
|
|
|
return $wrapped; |
8826
|
|
|
|
|
|
|
} |
8827
|
|
|
|
|
|
|
|
8828
|
|
|
|
|
|
|
|
8829
|
|
|
|
|
|
|
sub _sprint |
8830
|
|
|
|
|
|
|
{ my $elt= shift; |
8831
|
|
|
|
|
|
|
my $no_tag= shift || 0; |
8832
|
|
|
|
|
|
|
# in case there's some comments or PI's piggybacking |
8833
|
|
|
|
|
|
|
|
8834
|
|
|
|
|
|
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) |
8835
|
|
|
|
|
|
|
{ |
8836
|
|
|
|
|
|
|
my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; |
8837
|
|
|
|
|
|
|
$xml_space_preserve++ if $preserve; |
8838
|
|
|
|
|
|
|
|
8839
|
|
|
|
|
|
|
push @sprint, $elt->start_tag unless( $no_tag); |
8840
|
|
|
|
|
|
|
|
8841
|
|
|
|
|
|
|
# sprint the children |
8842
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
8843
|
|
|
|
|
|
|
while( $child) |
8844
|
|
|
|
|
|
|
{ $child->_sprint; |
8845
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
8846
|
|
|
|
|
|
|
} |
8847
|
|
|
|
|
|
|
push @sprint, $elt->end_tag unless( $no_tag); |
8848
|
|
|
|
|
|
|
$xml_space_preserve-- if $preserve; |
8849
|
|
|
|
|
|
|
} |
8850
|
|
|
|
|
|
|
else |
8851
|
|
|
|
|
|
|
{ push @sprint, $elt->{extra_data} if( $elt->{extra_data}) ; |
8852
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) { push @sprint, $elt->pcdata_xml_string; } |
8853
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { push @sprint, $elt->cdata_string; } |
8854
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } |
8855
|
|
|
|
|
|
|
push @sprint, $elt->pi_string; |
8856
|
|
|
|
|
|
|
} |
8857
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } |
8858
|
|
|
|
|
|
|
push @sprint, $elt->comment_string; |
8859
|
|
|
|
|
|
|
} |
8860
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { push @sprint, $elt->ent_string; } |
8861
|
|
|
|
|
|
|
} |
8862
|
|
|
|
|
|
|
|
8863
|
|
|
|
|
|
|
return; |
8864
|
|
|
|
|
|
|
} |
8865
|
|
|
|
|
|
|
|
8866
|
|
|
|
|
|
|
# just a shortcut to $elt->sprint( 1) |
8867
|
|
|
|
|
|
|
sub xml_string |
8868
|
|
|
|
|
|
|
{ my $elt= shift; |
8869
|
|
|
|
|
|
|
isa( $_[0], 'HASH') ? $elt->sprint( shift(), 1) : $elt->sprint( 1); |
8870
|
|
|
|
|
|
|
} |
8871
|
|
|
|
|
|
|
|
8872
|
|
|
|
|
|
|
sub pcdata_xml_string |
8873
|
|
|
|
|
|
|
{ my $elt= shift; |
8874
|
|
|
|
|
|
|
if( defined( my $string= $elt->{pcdata}) ) |
8875
|
|
|
|
|
|
|
{ |
8876
|
|
|
|
|
|
|
if( ! $elt->{extra_data_in_pcdata}) |
8877
|
|
|
|
|
|
|
{ |
8878
|
|
|
|
|
|
|
$string=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g unless( !$replaced_ents || $keep_encoding || $elt->{asis}); |
8879
|
|
|
|
|
|
|
$string=~ s{\Q]]>}{]]>}g; |
8880
|
|
|
|
|
|
|
} |
8881
|
|
|
|
|
|
|
else |
8882
|
|
|
|
|
|
|
{ _gen_mark( $string); # used by _(un)?protect_extra_data |
8883
|
|
|
|
|
|
|
foreach my $data (reverse @{$elt->{extra_data_in_pcdata}}) |
8884
|
|
|
|
|
|
|
{ my $substr= substr( $string, $data->{offset}); |
8885
|
|
|
|
|
|
|
if( $keep_encoding || $elt->{asis}) |
8886
|
|
|
|
|
|
|
{ substr( $string, $data->{offset}, 0, $data->{text}); } |
8887
|
|
|
|
|
|
|
else |
8888
|
|
|
|
|
|
|
{ substr( $string, $data->{offset}, 0, _protect_extra_data( $data->{text})); } |
8889
|
|
|
|
|
|
|
} |
8890
|
|
|
|
|
|
|
unless( $keep_encoding || $elt->{asis}) |
8891
|
|
|
|
|
|
|
{ |
8892
|
|
|
|
|
|
|
$string=~ s{([$replaced_ents])}{$XML::Twig::base_ent{$1}}g ; |
8893
|
|
|
|
|
|
|
$string=~ s{\Q]]>}{]]>}g; |
8894
|
|
|
|
|
|
|
_unprotect_extra_data( $string); |
8895
|
|
|
|
|
|
|
} |
8896
|
|
|
|
|
|
|
} |
8897
|
|
|
|
|
|
|
return $output_text_filter ? $output_text_filter->( $string) : $string; |
8898
|
|
|
|
|
|
|
} |
8899
|
|
|
|
|
|
|
else |
8900
|
|
|
|
|
|
|
{ return ''; } |
8901
|
|
|
|
|
|
|
} |
8902
|
|
|
|
|
|
|
|
8903
|
|
|
|
|
|
|
{ my $mark; |
8904
|
|
|
|
|
|
|
my( %char2ent, %ent2char); |
8905
|
|
|
|
|
|
|
BEGIN |
8906
|
|
|
|
|
|
|
{ %char2ent= ( '<' => 'lt', '&' => 'amp', '>' => 'gt'); |
8907
|
|
|
|
|
|
|
%ent2char= map { $char2ent{$_} => $_ } keys %char2ent; |
8908
|
|
|
|
|
|
|
} |
8909
|
|
|
|
|
|
|
|
8910
|
|
|
|
|
|
|
# generate a unique mark (a string) not found in the string, |
8911
|
|
|
|
|
|
|
# used to mark < and & in the extra data |
8912
|
|
|
|
|
|
|
sub _gen_mark |
8913
|
|
|
|
|
|
|
{ $mark="AAAA"; |
8914
|
|
|
|
|
|
|
$mark++ while( index( $_[0], $mark) > -1); |
8915
|
|
|
|
|
|
|
return $mark; |
8916
|
|
|
|
|
|
|
} |
8917
|
|
|
|
|
|
|
|
8918
|
|
|
|
|
|
|
sub _protect_extra_data |
8919
|
|
|
|
|
|
|
{ my( $extra_data)= @_; |
8920
|
|
|
|
|
|
|
$extra_data=~ s{([<&>])}{:$mark:$char2ent{$1}:}g; |
8921
|
|
|
|
|
|
|
return $extra_data; |
8922
|
|
|
|
|
|
|
} |
8923
|
|
|
|
|
|
|
|
8924
|
|
|
|
|
|
|
sub _unprotect_extra_data |
8925
|
|
|
|
|
|
|
{ $_[0]=~ s{:$mark:(\w+):}{$ent2char{$1}}g; } |
8926
|
|
|
|
|
|
|
|
8927
|
|
|
|
|
|
|
} |
8928
|
|
|
|
|
|
|
|
8929
|
|
|
|
|
|
|
sub cdata_string |
8930
|
|
|
|
|
|
|
{ my $cdata= $_[0]->{cdata}; |
8931
|
|
|
|
|
|
|
unless( defined $cdata) { return ''; } |
8932
|
|
|
|
|
|
|
if( $remove_cdata) |
8933
|
|
|
|
|
|
|
{ $cdata=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g; } |
8934
|
|
|
|
|
|
|
else |
8935
|
|
|
|
|
|
|
{ $cdata= $CDATA_START . $cdata . $CDATA_END; } |
8936
|
|
|
|
|
|
|
return $cdata; |
8937
|
|
|
|
|
|
|
} |
8938
|
|
|
|
|
|
|
|
8939
|
|
|
|
|
|
|
sub att_xml_string |
8940
|
|
|
|
|
|
|
{ my $elt= shift; |
8941
|
|
|
|
|
|
|
my $att= shift; |
8942
|
|
|
|
|
|
|
|
8943
|
|
|
|
|
|
|
my $replace= $replaced_ents . "$quote\n\r\t"; |
8944
|
|
|
|
|
|
|
if($_[0] && $_[0]->{escape_gt} && ($replace!~ m{>}) ) { $replace .='>'; } |
8945
|
|
|
|
|
|
|
|
8946
|
|
|
|
|
|
|
if( defined (my $string= $elt->{att}->{$att})) |
8947
|
|
|
|
|
|
|
{ return _att_xml_string( $string, $replace); } |
8948
|
|
|
|
|
|
|
else |
8949
|
|
|
|
|
|
|
{ return ''; } |
8950
|
|
|
|
|
|
|
} |
8951
|
|
|
|
|
|
|
|
8952
|
|
|
|
|
|
|
# escaped xml string for an attribute value |
8953
|
|
|
|
|
|
|
sub _att_xml_string |
8954
|
|
|
|
|
|
|
{ my( $string, $escape)= @_; |
8955
|
|
|
|
|
|
|
if( !defined( $string)) { return ''; } |
8956
|
|
|
|
|
|
|
if( $keep_encoding) |
8957
|
|
|
|
|
|
|
{ $string=~ s{$quote}{$XML::Twig::base_ent{$quote}}g; |
8958
|
|
|
|
|
|
|
} |
8959
|
|
|
|
|
|
|
else |
8960
|
|
|
|
|
|
|
{ |
8961
|
|
|
|
|
|
|
if( $do_not_escape_amp_in_atts) |
8962
|
|
|
|
|
|
|
{ $escape=~ s{^.}{}; # seems like the most backward compatible way to remove & from the list |
8963
|
|
|
|
|
|
|
$string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; |
8964
|
|
|
|
|
|
|
$string=~ s{&(?!(\w+|#\d+|[xX][0-9a-fA-F]+);)}{&}g; # dodgy: escape & that do not start an entity |
8965
|
|
|
|
|
|
|
} |
8966
|
|
|
|
|
|
|
else |
8967
|
|
|
|
|
|
|
{ $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; |
8968
|
|
|
|
|
|
|
$string=~ s{\Q]]>}{]]>}g; |
8969
|
|
|
|
|
|
|
} |
8970
|
|
|
|
|
|
|
} |
8971
|
|
|
|
|
|
|
|
8972
|
|
|
|
|
|
|
return $output_text_filter ? $output_text_filter->( $string) : $string; |
8973
|
|
|
|
|
|
|
} |
8974
|
|
|
|
|
|
|
|
8975
|
|
|
|
|
|
|
sub ent_string |
8976
|
|
|
|
|
|
|
{ my $ent= shift; |
8977
|
|
|
|
|
|
|
my $ent_text= $ent->{ent}; |
8978
|
|
|
|
|
|
|
my( $t, $el, $ent_string); |
8979
|
|
|
|
|
|
|
if( $expand_external_entities |
8980
|
|
|
|
|
|
|
&& ($t= $ent->twig) |
8981
|
|
|
|
|
|
|
&& ($el= $t->entity_list) |
8982
|
|
|
|
|
|
|
&& ($ent_string= $el->{entities}->{$ent->ent_name}->{val}) |
8983
|
|
|
|
|
|
|
) |
8984
|
|
|
|
|
|
|
{ return $ent_string; } |
8985
|
|
|
|
|
|
|
else |
8986
|
|
|
|
|
|
|
{ return $ent_text; } |
8987
|
|
|
|
|
|
|
} |
8988
|
|
|
|
|
|
|
|
8989
|
|
|
|
|
|
|
# returns just the text, no tags, for an element |
8990
|
|
|
|
|
|
|
sub text |
8991
|
|
|
|
|
|
|
{ my( $elt, @options)= @_; |
8992
|
|
|
|
|
|
|
|
8993
|
|
|
|
|
|
|
if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->text_only; } |
8994
|
|
|
|
|
|
|
my $sep = (@options && grep { lc( $_) eq 'sep' } @options) ? ' ' : ''; |
8995
|
|
|
|
|
|
|
|
8996
|
|
|
|
|
|
|
my $string; |
8997
|
|
|
|
|
|
|
|
8998
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) { return $elt->{pcdata} . $sep; } |
8999
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) { return $elt->{cdata} . $sep; } |
9000
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) { return $elt->pi_string . $sep; } |
9001
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) { return $elt->{comment} . $sep; } |
9002
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) { return $elt->{ent} . $sep ; } |
9003
|
|
|
|
|
|
|
|
9004
|
|
|
|
|
|
|
|
9005
|
|
|
|
|
|
|
my $child= $elt->{first_child} ||''; |
9006
|
|
|
|
|
|
|
while( $child) |
9007
|
|
|
|
|
|
|
{ |
9008
|
|
|
|
|
|
|
my $child_text= $child->text( @options); |
9009
|
|
|
|
|
|
|
$string.= defined( $child_text) ? $sep . $child_text : ''; |
9010
|
|
|
|
|
|
|
} continue { $child= $child->{next_sibling}; } |
9011
|
|
|
|
|
|
|
|
9012
|
|
|
|
|
|
|
unless( defined $string) { $string=''; } |
9013
|
|
|
|
|
|
|
|
9014
|
|
|
|
|
|
|
return $output_text_filter ? $output_text_filter->( $string) : $string; |
9015
|
|
|
|
|
|
|
} |
9016
|
|
|
|
|
|
|
|
9017
|
|
|
|
|
|
|
sub text_only |
9018
|
|
|
|
|
|
|
{ return join '', map { $_->text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } |
9019
|
|
|
|
|
|
|
|
9020
|
|
|
|
|
|
|
sub trimmed_text |
9021
|
|
|
|
|
|
|
{ my $elt= shift; |
9022
|
|
|
|
|
|
|
my $text= $elt->text( @_); |
9023
|
|
|
|
|
|
|
$text=~ s{\s+}{ }sg; |
9024
|
|
|
|
|
|
|
$text=~ s{^\s*}{}; |
9025
|
|
|
|
|
|
|
$text=~ s{\s*$}{}; |
9026
|
|
|
|
|
|
|
return $text; |
9027
|
|
|
|
|
|
|
} |
9028
|
|
|
|
|
|
|
|
9029
|
|
|
|
|
|
|
sub trim |
9030
|
|
|
|
|
|
|
{ my( $elt)= @_; |
9031
|
|
|
|
|
|
|
my $pcdata= $elt->first_descendant( $TEXT); |
9032
|
|
|
|
|
|
|
(my $pcdata_text= $pcdata->text)=~ s{^\s+}{}s; |
9033
|
|
|
|
|
|
|
$pcdata->set_text( $pcdata_text); |
9034
|
|
|
|
|
|
|
$pcdata= $elt->last_descendant( $TEXT); |
9035
|
|
|
|
|
|
|
($pcdata_text= $pcdata->text)=~ s{\s+$}{}; |
9036
|
|
|
|
|
|
|
$pcdata->set_text( $pcdata_text); |
9037
|
|
|
|
|
|
|
foreach my $pcdata ($elt->descendants( $TEXT)) |
9038
|
|
|
|
|
|
|
{ ($pcdata_text= $pcdata->text)=~ s{\s+}{ }g; |
9039
|
|
|
|
|
|
|
$pcdata->set_text( $pcdata_text); |
9040
|
|
|
|
|
|
|
} |
9041
|
|
|
|
|
|
|
return $elt; |
9042
|
|
|
|
|
|
|
} |
9043
|
|
|
|
|
|
|
|
9044
|
|
|
|
|
|
|
|
9045
|
|
|
|
|
|
|
# remove cdata sections (turns them into regular pcdata) in an element |
9046
|
|
|
|
|
|
|
sub remove_cdata |
9047
|
|
|
|
|
|
|
{ my $elt= shift; |
9048
|
|
|
|
|
|
|
foreach my $cdata ($elt->descendants_or_self( $CDATA)) |
9049
|
|
|
|
|
|
|
{ if( $keep_encoding) |
9050
|
|
|
|
|
|
|
{ my $data= $cdata->{cdata}; |
9051
|
|
|
|
|
|
|
$data=~ s{([&<"'])}{$XML::Twig::base_ent{$1}}g; |
9052
|
|
|
|
|
|
|
$cdata->{pcdata}= (delete $cdata->{empty} || 1) && $data; |
9053
|
|
|
|
|
|
|
} |
9054
|
|
|
|
|
|
|
else |
9055
|
|
|
|
|
|
|
{ $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $cdata->{cdata}; } |
9056
|
|
|
|
|
|
|
$cdata->{gi}=$XML::Twig::gi2index{$PCDATA} or $cdata->set_gi( $PCDATA); |
9057
|
|
|
|
|
|
|
undef $cdata->{cdata}; |
9058
|
|
|
|
|
|
|
} |
9059
|
|
|
|
|
|
|
} |
9060
|
|
|
|
|
|
|
|
9061
|
|
|
|
|
|
|
sub _is_private { return _is_private_name( $_[0]->gi); } |
9062
|
|
|
|
|
|
|
sub _is_private_name { return $_[0]=~ m{^#(?!default:)}; } |
9063
|
|
|
|
|
|
|
|
9064
|
|
|
|
|
|
|
|
9065
|
|
|
|
|
|
|
} # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) |
9066
|
|
|
|
|
|
|
|
9067
|
|
|
|
|
|
|
# merges consecutive #PCDATAs in am element |
9068
|
|
|
|
|
|
|
sub normalize |
9069
|
|
|
|
|
|
|
{ my( $elt)= @_; |
9070
|
|
|
|
|
|
|
my @descendants= $elt->descendants( $PCDATA); |
9071
|
|
|
|
|
|
|
while( my $desc= shift @descendants) |
9072
|
|
|
|
|
|
|
{ if( ! length $desc->{pcdata}) { $desc->delete; next; } |
9073
|
|
|
|
|
|
|
while( @descendants && $desc->{next_sibling} && $desc->{next_sibling}== $descendants[0]) |
9074
|
|
|
|
|
|
|
{ my $to_merge= shift @descendants; |
9075
|
|
|
|
|
|
|
$desc->merge_text( $to_merge); |
9076
|
|
|
|
|
|
|
} |
9077
|
|
|
|
|
|
|
} |
9078
|
|
|
|
|
|
|
return $elt; |
9079
|
|
|
|
|
|
|
} |
9080
|
|
|
|
|
|
|
|
9081
|
|
|
|
|
|
|
# SAX export methods |
9082
|
|
|
|
|
|
|
sub toSAX1 |
9083
|
|
|
|
|
|
|
{ _toSAX(@_, \&_start_tag_data_SAX1, \&_end_tag_data_SAX1); } |
9084
|
|
|
|
|
|
|
|
9085
|
|
|
|
|
|
|
sub toSAX2 |
9086
|
|
|
|
|
|
|
{ _toSAX(@_, \&_start_tag_data_SAX2, \&_end_tag_data_SAX2); } |
9087
|
|
|
|
|
|
|
|
9088
|
|
|
|
|
|
|
sub _toSAX |
9089
|
|
|
|
|
|
|
{ my( $elt, $handler, $start_tag_data, $end_tag_data)= @_; |
9090
|
|
|
|
|
|
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) |
9091
|
|
|
|
|
|
|
{ my $data= $start_tag_data->( $elt); |
9092
|
|
|
|
|
|
|
_start_prefix_mapping( $elt, $handler, $data); |
9093
|
|
|
|
|
|
|
if( $data && (my $start_element = $handler->can( 'start_element'))) |
9094
|
|
|
|
|
|
|
{ unless( $elt->{'flushed'}) { $start_element->( $handler, $data); } } |
9095
|
|
|
|
|
|
|
|
9096
|
|
|
|
|
|
|
foreach my $child ($elt->_children) |
9097
|
|
|
|
|
|
|
{ $child->_toSAX( $handler, $start_tag_data, $end_tag_data); } |
9098
|
|
|
|
|
|
|
|
9099
|
|
|
|
|
|
|
if( (my $data= $end_tag_data->( $elt)) && (my $end_element = $handler->can( 'end_element')) ) |
9100
|
|
|
|
|
|
|
{ $end_element->( $handler, $data); } |
9101
|
|
|
|
|
|
|
_end_prefix_mapping( $elt, $handler); |
9102
|
|
|
|
|
|
|
} |
9103
|
|
|
|
|
|
|
else # text or special element |
9104
|
|
|
|
|
|
|
{ if( (exists $elt->{'pcdata'}) && (my $characters= $handler->can( 'characters'))) |
9105
|
|
|
|
|
|
|
{ $characters->( $handler, { Data => $elt->{pcdata} }); } |
9106
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) |
9107
|
|
|
|
|
|
|
{ if( my $start_cdata= $handler->can( 'start_cdata')) |
9108
|
|
|
|
|
|
|
{ $start_cdata->( $handler); } |
9109
|
|
|
|
|
|
|
if( my $characters= $handler->can( 'characters')) |
9110
|
|
|
|
|
|
|
{ $characters->( $handler, {Data => $elt->{cdata} }); } |
9111
|
|
|
|
|
|
|
if( my $end_cdata= $handler->can( 'end_cdata')) |
9112
|
|
|
|
|
|
|
{ $end_cdata->( $handler); } |
9113
|
|
|
|
|
|
|
} |
9114
|
|
|
|
|
|
|
elsif( ((exists $elt->{'target'})) && (my $pi= $handler->can( 'processing_instruction'))) |
9115
|
|
|
|
|
|
|
{ $pi->( $handler, { Target =>$elt->{target}, Data => $elt->{data} }); } |
9116
|
|
|
|
|
|
|
elsif( ((exists $elt->{'comment'})) && (my $comment= $handler->can( 'comment'))) |
9117
|
|
|
|
|
|
|
{ $comment->( $handler, { Data => $elt->{comment} }); } |
9118
|
|
|
|
|
|
|
elsif( ((exists $elt->{'ent'}))) |
9119
|
|
|
|
|
|
|
{ |
9120
|
|
|
|
|
|
|
if( my $se= $handler->can( 'skipped_entity')) |
9121
|
|
|
|
|
|
|
{ $se->( $handler, { Name => $elt->ent_name }); } |
9122
|
|
|
|
|
|
|
elsif( my $characters= $handler->can( 'characters')) |
9123
|
|
|
|
|
|
|
{ if( defined $elt->ent_string) |
9124
|
|
|
|
|
|
|
{ $characters->( $handler, {Data => $elt->ent_string}); } |
9125
|
|
|
|
|
|
|
else |
9126
|
|
|
|
|
|
|
{ $characters->( $handler, {Data => $elt->ent_name}); } |
9127
|
|
|
|
|
|
|
} |
9128
|
|
|
|
|
|
|
} |
9129
|
|
|
|
|
|
|
|
9130
|
|
|
|
|
|
|
} |
9131
|
|
|
|
|
|
|
} |
9132
|
|
|
|
|
|
|
|
9133
|
|
|
|
|
|
|
sub _start_tag_data_SAX1 |
9134
|
|
|
|
|
|
|
{ my( $elt)= @_; |
9135
|
|
|
|
|
|
|
my $name= $XML::Twig::index2gi[$elt->{'gi'}]; |
9136
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
9137
|
|
|
|
|
|
|
my $attributes={}; |
9138
|
|
|
|
|
|
|
my $atts= $elt->{att}; |
9139
|
|
|
|
|
|
|
while( my( $att, $value)= each %$atts) |
9140
|
|
|
|
|
|
|
{ $attributes->{$att}= $value unless( ( $att=~ m{^#(?!default:)} )); } |
9141
|
|
|
|
|
|
|
my $data= { Name => $name, Attributes => $attributes}; |
9142
|
|
|
|
|
|
|
return $data; |
9143
|
|
|
|
|
|
|
} |
9144
|
|
|
|
|
|
|
|
9145
|
|
|
|
|
|
|
sub _end_tag_data_SAX1 |
9146
|
|
|
|
|
|
|
{ my( $elt)= @_; |
9147
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
9148
|
|
|
|
|
|
|
return { Name => $XML::Twig::index2gi[$elt->{'gi'}] }; |
9149
|
|
|
|
|
|
|
} |
9150
|
|
|
|
|
|
|
|
9151
|
|
|
|
|
|
|
sub _start_tag_data_SAX2 |
9152
|
|
|
|
|
|
|
{ my( $elt)= @_; |
9153
|
|
|
|
|
|
|
my $data={}; |
9154
|
|
|
|
|
|
|
|
9155
|
|
|
|
|
|
|
my $name= $XML::Twig::index2gi[$elt->{'gi'}]; |
9156
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
9157
|
|
|
|
|
|
|
$data->{Name} = $name; |
9158
|
|
|
|
|
|
|
$data->{Prefix} = $elt->ns_prefix; |
9159
|
|
|
|
|
|
|
$data->{LocalName} = $elt->local_name; |
9160
|
|
|
|
|
|
|
$data->{NamespaceURI} = $elt->namespace; |
9161
|
|
|
|
|
|
|
|
9162
|
|
|
|
|
|
|
# save a copy of the data so we can re-use it for the end tag |
9163
|
|
|
|
|
|
|
my %sax2_data= %$data; |
9164
|
|
|
|
|
|
|
$elt->{twig_elt_SAX2_data}= \%sax2_data; |
9165
|
|
|
|
|
|
|
|
9166
|
|
|
|
|
|
|
# add the attributes |
9167
|
|
|
|
|
|
|
$data->{Attributes}= $elt->_atts_to_SAX2; |
9168
|
|
|
|
|
|
|
|
9169
|
|
|
|
|
|
|
return $data; |
9170
|
|
|
|
|
|
|
} |
9171
|
|
|
|
|
|
|
|
9172
|
|
|
|
|
|
|
sub _atts_to_SAX2 |
9173
|
|
|
|
|
|
|
{ my $elt= shift; |
9174
|
|
|
|
|
|
|
my $SAX2_atts= {}; |
9175
|
|
|
|
|
|
|
foreach my $att (keys %{$elt->{att}}) |
9176
|
|
|
|
|
|
|
{ |
9177
|
|
|
|
|
|
|
next if( ( $att=~ m{^#(?!default:)} )); |
9178
|
|
|
|
|
|
|
my $SAX2_att={}; |
9179
|
|
|
|
|
|
|
$SAX2_att->{Name} = $att; |
9180
|
|
|
|
|
|
|
$SAX2_att->{Prefix} = _ns_prefix( $att); |
9181
|
|
|
|
|
|
|
$SAX2_att->{LocalName} = _local_name( $att); |
9182
|
|
|
|
|
|
|
$SAX2_att->{NamespaceURI} = $elt->namespace( $SAX2_att->{Prefix}); |
9183
|
|
|
|
|
|
|
$SAX2_att->{Value} = $elt->{'att'}->{$att}; |
9184
|
|
|
|
|
|
|
my $SAX2_att_name= "{$SAX2_att->{NamespaceURI}}$SAX2_att->{LocalName}"; |
9185
|
|
|
|
|
|
|
|
9186
|
|
|
|
|
|
|
$SAX2_atts->{$SAX2_att_name}= $SAX2_att; |
9187
|
|
|
|
|
|
|
} |
9188
|
|
|
|
|
|
|
return $SAX2_atts; |
9189
|
|
|
|
|
|
|
} |
9190
|
|
|
|
|
|
|
|
9191
|
|
|
|
|
|
|
sub _start_prefix_mapping |
9192
|
|
|
|
|
|
|
{ my( $elt, $handler, $data)= @_; |
9193
|
|
|
|
|
|
|
if( my $start_prefix_mapping= $handler->can( 'start_prefix_mapping') |
9194
|
|
|
|
|
|
|
and my @new_prefix_mappings= grep { /^\{[^}]*\}xmlns/ || /^\{$XMLNS_URI\}/ } keys %{$data->{Attributes}} |
9195
|
|
|
|
|
|
|
) |
9196
|
|
|
|
|
|
|
{ foreach my $prefix (@new_prefix_mappings) |
9197
|
|
|
|
|
|
|
{ my $prefix_string= $data->{Attributes}->{$prefix}->{LocalName}; |
9198
|
|
|
|
|
|
|
if( $prefix_string eq 'xmlns') { $prefix_string=''; } |
9199
|
|
|
|
|
|
|
my $prefix_data= |
9200
|
|
|
|
|
|
|
{ Prefix => $prefix_string, |
9201
|
|
|
|
|
|
|
NamespaceURI => $data->{Attributes}->{$prefix}->{Value} |
9202
|
|
|
|
|
|
|
}; |
9203
|
|
|
|
|
|
|
$start_prefix_mapping->( $handler, $prefix_data); |
9204
|
|
|
|
|
|
|
$elt->{twig_end_prefix_mapping} ||= []; |
9205
|
|
|
|
|
|
|
push @{$elt->{twig_end_prefix_mapping}}, $prefix_string; |
9206
|
|
|
|
|
|
|
} |
9207
|
|
|
|
|
|
|
} |
9208
|
|
|
|
|
|
|
} |
9209
|
|
|
|
|
|
|
|
9210
|
|
|
|
|
|
|
sub _end_prefix_mapping |
9211
|
|
|
|
|
|
|
{ my( $elt, $handler)= @_; |
9212
|
|
|
|
|
|
|
if( my $end_prefix_mapping= $handler->can( 'end_prefix_mapping')) |
9213
|
|
|
|
|
|
|
{ foreach my $prefix (@{$elt->{twig_end_prefix_mapping}}) |
9214
|
|
|
|
|
|
|
{ $end_prefix_mapping->( $handler, { Prefix => $prefix} ); } |
9215
|
|
|
|
|
|
|
} |
9216
|
|
|
|
|
|
|
} |
9217
|
|
|
|
|
|
|
|
9218
|
|
|
|
|
|
|
sub _end_tag_data_SAX2 |
9219
|
|
|
|
|
|
|
{ my( $elt)= @_; |
9220
|
|
|
|
|
|
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); |
9221
|
|
|
|
|
|
|
return $elt->{twig_elt_SAX2_data}; |
9222
|
|
|
|
|
|
|
} |
9223
|
|
|
|
|
|
|
|
9224
|
|
|
|
|
|
|
sub contains_text |
9225
|
|
|
|
|
|
|
{ my $elt= shift; |
9226
|
|
|
|
|
|
|
my $child= $elt->{first_child}; |
9227
|
|
|
|
|
|
|
while ($child) |
9228
|
|
|
|
|
|
|
{ return 1 if( $child->is_text || (exists $child->{'ent'})); |
9229
|
|
|
|
|
|
|
$child= $child->{next_sibling}; |
9230
|
|
|
|
|
|
|
} |
9231
|
|
|
|
|
|
|
return 0; |
9232
|
|
|
|
|
|
|
} |
9233
|
|
|
|
|
|
|
|
9234
|
|
|
|
|
|
|
# creates a single pcdata element containing the text as child of the element |
9235
|
|
|
|
|
|
|
# options: |
9236
|
|
|
|
|
|
|
# - force_pcdata: when set to a true value forces the text to be in a #PCDATA |
9237
|
|
|
|
|
|
|
# even if the original element was a #CDATA |
9238
|
|
|
|
|
|
|
sub set_text |
9239
|
|
|
|
|
|
|
{ my( $elt, $string, %option)= @_; |
9240
|
|
|
|
|
|
|
|
9241
|
|
|
|
|
|
|
if( $XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA) |
9242
|
|
|
|
|
|
|
{ return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; } |
9243
|
|
|
|
|
|
|
elsif( $XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) |
9244
|
|
|
|
|
|
|
{ if( $option{force_pcdata}) |
9245
|
|
|
|
|
|
|
{ $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); |
9246
|
|
|
|
|
|
|
$elt->{cdata}= ''; |
9247
|
|
|
|
|
|
|
return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; |
9248
|
|
|
|
|
|
|
} |
9249
|
|
|
|
|
|
|
else |
9250
|
|
|
|
|
|
|
{ $elt->{cdata}= $string; |
9251
|
|
|
|
|
|
|
return $string; |
9252
|
|
|
|
|
|
|
} |
9253
|
|
|
|
|
|
|
} |
9254
|
|
|
|
|
|
|
elsif( $elt->contains_a_single( $PCDATA) ) |
9255
|
|
|
|
|
|
|
{ # optimized so we have a slight chance of not losing embedded comments and pi's |
9256
|
|
|
|
|
|
|
$elt->{first_child}->set_pcdata( $string); |
9257
|
|
|
|
|
|
|
return $elt; |
9258
|
|
|
|
|
|
|
} |
9259
|
|
|
|
|
|
|
|
9260
|
|
|
|
|
|
|
foreach my $child (@{[$elt->_children]}) |
9261
|
|
|
|
|
|
|
{ $child->delete; } |
9262
|
|
|
|
|
|
|
|
9263
|
|
|
|
|
|
|
my $pcdata= $elt->_new_pcdata( $string); |
9264
|
|
|
|
|
|
|
$pcdata->paste( $elt); |
9265
|
|
|
|
|
|
|
|
9266
|
|
|
|
|
|
|
delete $elt->{empty}; |
9267
|
|
|
|
|
|
|
|
9268
|
|
|
|
|
|
|
return $elt; |
9269
|
|
|
|
|
|
|
} |
9270
|
|
|
|
|
|
|
|
9271
|
|
|
|
|
|
|
# set the content of an element from a list of strings and elements |
9272
|
|
|
|
|
|
|
sub set_content |
9273
|
|
|
|
|
|
|
{ my $elt= shift; |
9274
|
|
|
|
|
|
|
|
9275
|
|
|
|
|
|
|
return $elt unless defined $_[0]; |
9276
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
# attributes can be given as a hash (passed by ref) |
9278
|
|
|
|
|
|
|
if( ref $_[0] eq 'HASH') |
9279
|
|
|
|
|
|
|
{ my $atts= shift; |
9280
|
|
|
|
|
|
|
$elt->del_atts; # usually useless but better safe than sorry |
9281
|
|
|
|
|
|
|
$elt->set_atts( $atts); |
9282
|
|
|
|
|
|
|
return $elt unless defined $_[0]; |
9283
|
|
|
|
|
|
|
} |
9284
|
|
|
|
|
|
|
|
9285
|
|
|
|
|
|
|
# check next argument for #EMPTY |
9286
|
|
|
|
|
|
|
if( !(ref $_[0]) && ($_[0] eq $EMPTY) ) |
9287
|
|
|
|
|
|
|
{ $elt->{empty}= 1; return $elt; } |
9288
|
|
|
|
|
|
|
|
9289
|
|
|
|
|
|
|
# case where we really want to do a set_text, the element is '#PCDATA' |
9290
|
|
|
|
|
|
|
# or contains a single PCDATA and we only want to add text in it |
9291
|
|
|
|
|
|
|
if( ($XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA || $elt->contains_a_single( $PCDATA)) |
9292
|
|
|
|
|
|
|
&& (@_ == 1) && !( ref $_[0])) |
9293
|
|
|
|
|
|
|
{ $elt->set_text( $_[0]); |
9294
|
|
|
|
|
|
|
return $elt; |
9295
|
|
|
|
|
|
|
} |
9296
|
|
|
|
|
|
|
elsif( ($XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) && (@_ == 1) && !( ref $_[0])) |
9297
|
|
|
|
|
|
|
{ $elt->{cdata}= $_[0]; |
9298
|
|
|
|
|
|
|
return $elt; |
9299
|
|
|
|
|
|
|
} |
9300
|
|
|
|
|
|
|
|
9301
|
|
|
|
|
|
|
# delete the children |
9302
|
|
|
|
|
|
|
foreach my $child (@{[$elt->_children]}) |
9303
|
|
|
|
|
|
|
{ $child->delete; } |
9304
|
|
|
|
|
|
|
|
9305
|
|
|
|
|
|
|
if( @_) { delete $elt->{empty}; } |
9306
|
|
|
|
|
|
|
|
9307
|
|
|
|
|
|
|
foreach my $child (@_) |
9308
|
|
|
|
|
|
|
{ if( ref( $child) && isa( $child, 'XML::Twig::Elt')) |
9309
|
|
|
|
|
|
|
{ # argument is an element |
9310
|
|
|
|
|
|
|
$child->paste( 'last_child', $elt); |
9311
|
|
|
|
|
|
|
} |
9312
|
|
|
|
|
|
|
else |
9313
|
|
|
|
|
|
|
{ # argument is a string |
9314
|
|
|
|
|
|
|
if( (my $pcdata= $elt->{last_child}) && $elt->{last_child}->is_pcdata) |
9315
|
|
|
|
|
|
|
{ # previous child is also pcdata: just concatenate |
9316
|
|
|
|
|
|
|
$pcdata->{pcdata}= (delete $pcdata->{empty} || 1) && $pcdata->{pcdata} . $child |
9317
|
|
|
|
|
|
|
} |
9318
|
|
|
|
|
|
|
else |
9319
|
|
|
|
|
|
|
{ # previous child is not a string: create a new pcdata element |
9320
|
|
|
|
|
|
|
$pcdata= $elt->_new_pcdata( $child); |
9321
|
|
|
|
|
|
|
$pcdata->paste( 'last_child', $elt); |
9322
|
|
|
|
|
|
|
} |
9323
|
|
|
|
|
|
|
} |
9324
|
|
|
|
|
|
|
} |
9325
|
|
|
|
|
|
|
|
9326
|
|
|
|
|
|
|
|
9327
|
|
|
|
|
|
|
return $elt; |
9328
|
|
|
|
|
|
|
} |
9329
|
|
|
|
|
|
|
|
9330
|
|
|
|
|
|
|
# inserts an element (whose gi is given) as child of the element |
9331
|
|
|
|
|
|
|
# all children of the element are now children of the new element |
9332
|
|
|
|
|
|
|
# returns the new element |
9333
|
|
|
|
|
|
|
sub insert |
9334
|
|
|
|
|
|
|
{ my ($elt, @args)= @_; |
9335
|
|
|
|
|
|
|
# first cut the children |
9336
|
|
|
|
|
|
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; |
9337
|
|
|
|
|
|
|
foreach my $child (@children) |
9338
|
|
|
|
|
|
|
{ $child->cut; } |
9339
|
|
|
|
|
|
|
# insert elements |
9340
|
|
|
|
|
|
|
while( my $gi= shift @args) |
9341
|
|
|
|
|
|
|
{ my $new_elt= $elt->new( $gi); |
9342
|
|
|
|
|
|
|
# add attributes if needed |
9343
|
|
|
|
|
|
|
if( defined( $args[0]) && ( isa( $args[0], 'HASH')) ) |
9344
|
|
|
|
|
|
|
{ $new_elt->set_atts( shift @args); } |
9345
|
|
|
|
|
|
|
# paste the element |
9346
|
|
|
|
|
|
|
$new_elt->paste( $elt); |
9347
|
|
|
|
|
|
|
delete $elt->{empty}; |
9348
|
|
|
|
|
|
|
$elt= $new_elt; |
9349
|
|
|
|
|
|
|
} |
9350
|
|
|
|
|
|
|
# paste back the children |
9351
|
|
|
|
|
|
|
foreach my $child (@children) |
9352
|
|
|
|
|
|
|
{ $child->paste( 'last_child', $elt); } |
9353
|
|
|
|
|
|
|
return $elt; |
9354
|
|
|
|
|
|
|
} |
9355
|
|
|
|
|
|
|
|
9356
|
|
|
|
|
|
|
# insert a new element |
9357
|
|
|
|
|
|
|
# $elt->insert_new_element( $opt_position, $gi, $opt_atts_hash, @opt_content); |
9358
|
|
|
|
|
|
|
# the element is created with the same syntax as new |
9359
|
|
|
|
|
|
|
# position is the same as in paste, first_child by default |
9360
|
|
|
|
|
|
|
sub insert_new_elt |
9361
|
|
|
|
|
|
|
{ my $elt= shift; |
9362
|
|
|
|
|
|
|
my $position= $_[0]; |
9363
|
|
|
|
|
|
|
if( ($position eq 'before') || ($position eq 'after') |
9364
|
|
|
|
|
|
|
|| ($position eq 'first_child') || ($position eq 'last_child')) |
9365
|
|
|
|
|
|
|
{ shift; } |
9366
|
|
|
|
|
|
|
else |
9367
|
|
|
|
|
|
|
{ $position= 'first_child'; } |
9368
|
|
|
|
|
|
|
|
9369
|
|
|
|
|
|
|
my $new_elt= $elt->new( @_); |
9370
|
|
|
|
|
|
|
$new_elt->paste( $position, $elt); |
9371
|
|
|
|
|
|
|
|
9372
|
|
|
|
|
|
|
#if( defined $new_elt->{'att'}->{$ID}) { $new_elt->set_id( $new_elt->{'att'}->{$ID}); } |
9373
|
|
|
|
|
|
|
|
9374
|
|
|
|
|
|
|
return $new_elt; |
9375
|
|
|
|
|
|
|
} |
9376
|
|
|
|
|
|
|
|
9377
|
|
|
|
|
|
|
# wraps an element in elements which gi's are given as arguments |
9378
|
|
|
|
|
|
|
# $elt->wrap_in( 'td', 'tr', 'table') wraps the element as a single |
9379
|
|
|
|
|
|
|
# cell in a table for example |
9380
|
|
|
|
|
|
|
# returns the new element |
9381
|
|
|
|
|
|
|
sub wrap_in |
9382
|
|
|
|
|
|
|
{ my $elt= shift; |
9383
|
|
|
|
|
|
|
while( my $gi = shift @_) |
9384
|
|
|
|
|
|
|
{ my $new_elt = $elt->new( $gi); |
9385
|
|
|
|
|
|
|
if( $elt->{twig_current}) |
9386
|
|
|
|
|
|
|
{ my $t= $elt->twig; |
9387
|
|
|
|
|
|
|
$t->{twig_current}= $new_elt; |
9388
|
|
|
|
|
|
|
delete $elt->{'twig_current'}; |
9389
|
|
|
|
|
|
|
$new_elt->{'twig_current'}=1; |
9390
|
|
|
|
|
|
|
} |
9391
|
|
|
|
|
|
|
|
9392
|
|
|
|
|
|
|
if( my $parent= $elt->{parent}) |
9393
|
|
|
|
|
|
|
{ $new_elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $new_elt->{parent});} ; |
9394
|
|
|
|
|
|
|
if( $parent->{first_child} == $elt) { $parent->{first_child}= $new_elt; } |
9395
|
|
|
|
|
|
|
if( $parent->{last_child} == $elt) { delete $parent->{empty}; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
9396
|
|
|
|
|
|
|
} |
9397
|
|
|
|
|
|
|
else |
9398
|
|
|
|
|
|
|
{ # wrapping the root |
9399
|
|
|
|
|
|
|
my $twig= $elt->twig; |
9400
|
|
|
|
|
|
|
if( $twig && $twig->root && ($twig->root eq $elt) ) |
9401
|
|
|
|
|
|
|
{ $twig->set_root( $new_elt); |
9402
|
|
|
|
|
|
|
} |
9403
|
|
|
|
|
|
|
} |
9404
|
|
|
|
|
|
|
|
9405
|
|
|
|
|
|
|
if( my $prev_sibling= $elt->{prev_sibling}) |
9406
|
|
|
|
|
|
|
{ $new_elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $new_elt->{prev_sibling});} ; |
9407
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $new_elt; |
9408
|
|
|
|
|
|
|
} |
9409
|
|
|
|
|
|
|
|
9410
|
|
|
|
|
|
|
if( my $next_sibling= $elt->{next_sibling}) |
9411
|
|
|
|
|
|
|
{ $new_elt->{next_sibling}= $next_sibling; |
9412
|
|
|
|
|
|
|
$next_sibling->{prev_sibling}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; |
9413
|
|
|
|
|
|
|
} |
9414
|
|
|
|
|
|
|
$new_elt->{first_child}= $elt; |
9415
|
|
|
|
|
|
|
delete $new_elt->{empty}; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ; |
9416
|
|
|
|
|
|
|
|
9417
|
|
|
|
|
|
|
$elt->{parent}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
9418
|
|
|
|
|
|
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
9419
|
|
|
|
|
|
|
$elt->{next_sibling}= undef; |
9420
|
|
|
|
|
|
|
|
9421
|
|
|
|
|
|
|
# add the attributes if the next argument is a hash ref |
9422
|
|
|
|
|
|
|
if( defined( $_[0]) && (isa( $_[0], 'HASH')) ) |
9423
|
|
|
|
|
|
|
{ $new_elt->set_atts( shift @_); } |
9424
|
|
|
|
|
|
|
|
9425
|
|
|
|
|
|
|
$elt= $new_elt; |
9426
|
|
|
|
|
|
|
} |
9427
|
|
|
|
|
|
|
|
9428
|
|
|
|
|
|
|
return $elt; |
9429
|
|
|
|
|
|
|
} |
9430
|
|
|
|
|
|
|
|
9431
|
|
|
|
|
|
|
sub replace |
9432
|
|
|
|
|
|
|
{ my( $elt, $ref)= @_; |
9433
|
|
|
|
|
|
|
|
9434
|
|
|
|
|
|
|
if( $elt->{parent}) { $elt->cut; } |
9435
|
|
|
|
|
|
|
|
9436
|
|
|
|
|
|
|
if( my $parent= $ref->{parent}) |
9437
|
|
|
|
|
|
|
{ $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; |
9438
|
|
|
|
|
|
|
if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } |
9439
|
|
|
|
|
|
|
if( $parent->{last_child} == $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } |
9440
|
|
|
|
|
|
|
} |
9441
|
|
|
|
|
|
|
elsif( $ref->twig && $ref == $ref->twig->root) |
9442
|
|
|
|
|
|
|
{ $ref->twig->set_root( $elt); } |
9443
|
|
|
|
|
|
|
|
9444
|
|
|
|
|
|
|
if( my $prev_sibling= $ref->{prev_sibling}) |
9445
|
|
|
|
|
|
|
{ $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; |
9446
|
|
|
|
|
|
|
$prev_sibling->{next_sibling}= $elt; |
9447
|
|
|
|
|
|
|
} |
9448
|
|
|
|
|
|
|
if( my $next_sibling= $ref->{next_sibling}) |
9449
|
|
|
|
|
|
|
{ $elt->{next_sibling}= $next_sibling; |
9450
|
|
|
|
|
|
|
$next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; |
9451
|
|
|
|
|
|
|
} |
9452
|
|
|
|
|
|
|
|
9453
|
|
|
|
|
|
|
$ref->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{parent});} ; |
9454
|
|
|
|
|
|
|
$ref->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{prev_sibling});} ; |
9455
|
|
|
|
|
|
|
$ref->{next_sibling}= undef; |
9456
|
|
|
|
|
|
|
return $ref; |
9457
|
|
|
|
|
|
|
} |
9458
|
|
|
|
|
|
|
|
9459
|
|
|
|
|
|
|
sub replace_with |
9460
|
|
|
|
|
|
|
{ my $ref= shift; |
9461
|
|
|
|
|
|
|
my $elt= shift; |
9462
|
|
|
|
|
|
|
$elt->replace( $ref); |
9463
|
|
|
|
|
|
|
foreach my $new_elt (reverse @_) |
9464
|
|
|
|
|
|
|
{ $new_elt->paste( after => $elt); } |
9465
|
|
|
|
|
|
|
return $elt; |
9466
|
|
|
|
|
|
|
} |
9467
|
|
|
|
|
|
|
|
9468
|
|
|
|
|
|
|
|
9469
|
|
|
|
|
|
|
# move an element, same syntax as paste, except the element is first cut |
9470
|
|
|
|
|
|
|
sub move |
9471
|
|
|
|
|
|
|
{ my $elt= shift; |
9472
|
|
|
|
|
|
|
$elt->cut; |
9473
|
|
|
|
|
|
|
$elt->paste( @_); |
9474
|
|
|
|
|
|
|
return $elt; |
9475
|
|
|
|
|
|
|
} |
9476
|
|
|
|
|
|
|
|
9477
|
|
|
|
|
|
|
|
9478
|
|
|
|
|
|
|
# adds a prefix to an element, creating a pcdata child if needed |
9479
|
|
|
|
|
|
|
sub prefix |
9480
|
|
|
|
|
|
|
{ my ($elt, $prefix, $option)= @_; |
9481
|
|
|
|
|
|
|
my $asis= ($option && ($option eq 'asis')) ? 1 : 0; |
9482
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'}) |
9483
|
|
|
|
|
|
|
&& (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) |
9484
|
|
|
|
|
|
|
) |
9485
|
|
|
|
|
|
|
{ $elt->{pcdata}= (delete $elt->{empty} || 1) && $prefix . $elt->{pcdata}; } |
9486
|
|
|
|
|
|
|
elsif( $elt->{first_child} && $elt->{first_child}->is_pcdata |
9487
|
|
|
|
|
|
|
&& ( ($asis && $elt->{first_child}->{asis}) |
9488
|
|
|
|
|
|
|
|| (!$asis && ! $elt->{first_child}->{asis})) |
9489
|
|
|
|
|
|
|
) |
9490
|
|
|
|
|
|
|
{ |
9491
|
|
|
|
|
|
|
$elt->{first_child}->set_pcdata( $prefix . $elt->{first_child}->pcdata); |
9492
|
|
|
|
|
|
|
} |
9493
|
|
|
|
|
|
|
else |
9494
|
|
|
|
|
|
|
{ my $new_elt= $elt->_new_pcdata( $prefix); |
9495
|
|
|
|
|
|
|
my $pos= (exists $elt->{'pcdata'}) ? 'before' : 'first_child'; |
9496
|
|
|
|
|
|
|
$new_elt->paste( $pos => $elt); |
9497
|
|
|
|
|
|
|
if( $asis) { $new_elt->set_asis; } |
9498
|
|
|
|
|
|
|
} |
9499
|
|
|
|
|
|
|
return $elt; |
9500
|
|
|
|
|
|
|
} |
9501
|
|
|
|
|
|
|
|
9502
|
|
|
|
|
|
|
# adds a suffix to an element, creating a pcdata child if needed |
9503
|
|
|
|
|
|
|
sub suffix |
9504
|
|
|
|
|
|
|
{ my ($elt, $suffix, $option)= @_; |
9505
|
|
|
|
|
|
|
my $asis= ($option && ($option eq 'asis')) ? 1 : 0; |
9506
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'}) |
9507
|
|
|
|
|
|
|
&& (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) |
9508
|
|
|
|
|
|
|
) |
9509
|
|
|
|
|
|
|
{ $elt->{pcdata}= (delete $elt->{empty} || 1) && $elt->{pcdata} . $suffix; } |
9510
|
|
|
|
|
|
|
elsif( $elt->{last_child} && $elt->{last_child}->is_pcdata |
9511
|
|
|
|
|
|
|
&& ( ($asis && $elt->{last_child}->{asis}) |
9512
|
|
|
|
|
|
|
|| (!$asis && ! $elt->{last_child}->{asis})) |
9513
|
|
|
|
|
|
|
) |
9514
|
|
|
|
|
|
|
{ $elt->{last_child}->set_pcdata( $elt->{last_child}->pcdata . $suffix); } |
9515
|
|
|
|
|
|
|
else |
9516
|
|
|
|
|
|
|
{ my $new_elt= $elt->_new_pcdata( $suffix); |
9517
|
|
|
|
|
|
|
my $pos= (exists $elt->{'pcdata'}) ? 'after' : 'last_child'; |
9518
|
|
|
|
|
|
|
$new_elt->paste( $pos => $elt); |
9519
|
|
|
|
|
|
|
if( $asis) { $new_elt->set_asis; } |
9520
|
|
|
|
|
|
|
} |
9521
|
|
|
|
|
|
|
return $elt; |
9522
|
|
|
|
|
|
|
} |
9523
|
|
|
|
|
|
|
|
9524
|
|
|
|
|
|
|
# create a path to an element ('/root/.../gi) |
9525
|
|
|
|
|
|
|
sub path |
9526
|
|
|
|
|
|
|
{ my $elt= shift; |
9527
|
|
|
|
|
|
|
my @context= ( $elt, $elt->ancestors); |
9528
|
|
|
|
|
|
|
return "/" . join( "/", reverse map {$_->gi} @context); |
9529
|
|
|
|
|
|
|
} |
9530
|
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
sub xpath |
9532
|
|
|
|
|
|
|
{ my $elt= shift; |
9533
|
|
|
|
|
|
|
my $xpath; |
9534
|
|
|
|
|
|
|
foreach my $ancestor (reverse $elt->ancestors_or_self) |
9535
|
|
|
|
|
|
|
{ my $gi= $XML::Twig::index2gi[$ancestor->{'gi'}]; |
9536
|
|
|
|
|
|
|
$xpath.= "/$gi"; |
9537
|
|
|
|
|
|
|
my $index= $ancestor->prev_siblings( $gi) + 1; |
9538
|
|
|
|
|
|
|
unless( ($index == 1) && !$ancestor->next_sibling( $gi)) |
9539
|
|
|
|
|
|
|
{ $xpath.= "[$index]"; } |
9540
|
|
|
|
|
|
|
} |
9541
|
|
|
|
|
|
|
return $xpath; |
9542
|
|
|
|
|
|
|
} |
9543
|
|
|
|
|
|
|
|
9544
|
|
|
|
|
|
|
# methods used mainly by wrap_children |
9545
|
|
|
|
|
|
|
|
9546
|
|
|
|
|
|
|
# return a string with the |
9547
|
|
|
|
|
|
|
# for an element ...... |
9548
|
|
|
|
|
|
|
# returns '' |
9549
|
|
|
|
|
|
|
sub _stringify_struct |
9550
|
|
|
|
|
|
|
{ my( $elt, %opt)= @_; |
9551
|
|
|
|
|
|
|
my $string=''; |
9552
|
|
|
|
|
|
|
my $pretty_print= set_pretty_print( 'none'); |
9553
|
|
|
|
|
|
|
foreach my $child ($elt->_children) |
9554
|
|
|
|
|
|
|
{ $child->add_id; $string .= $child->start_tag( { escape_gt => 1 }) ||''; } |
9555
|
|
|
|
|
|
|
set_pretty_print( $pretty_print); |
9556
|
|
|
|
|
|
|
return $string; |
9557
|
|
|
|
|
|
|
} |
9558
|
|
|
|
|
|
|
|
9559
|
|
|
|
|
|
|
# wrap a series of elements in a new one |
9560
|
|
|
|
|
|
|
sub _wrap_range |
9561
|
|
|
|
|
|
|
{ my $elt= shift; |
9562
|
|
|
|
|
|
|
my $gi= shift; |
9563
|
|
|
|
|
|
|
my $atts= isa( $_[0], 'HASH') ? shift : undef; |
9564
|
|
|
|
|
|
|
my $range= shift; # the string with the tags to wrap |
9565
|
|
|
|
|
|
|
|
9566
|
|
|
|
|
|
|
my $t= $elt->twig; |
9567
|
|
|
|
|
|
|
|
9568
|
|
|
|
|
|
|
# get the tags to wrap |
9569
|
|
|
|
|
|
|
my @to_wrap; |
9570
|
|
|
|
|
|
|
while( $range=~ m{<\w+\s+[^>]*id=("[^"]*"|'[^']*')[^>]*>}g) |
9571
|
|
|
|
|
|
|
{ push @to_wrap, $t->elt_id( substr( $1, 1, -1)); } |
9572
|
|
|
|
|
|
|
|
9573
|
|
|
|
|
|
|
return '' unless @to_wrap; |
9574
|
|
|
|
|
|
|
|
9575
|
|
|
|
|
|
|
my $to_wrap= shift @to_wrap; |
9576
|
|
|
|
|
|
|
my %atts= %$atts; |
9577
|
|
|
|
|
|
|
my $new_elt= $to_wrap->wrap_in( $gi, \%atts); |
9578
|
|
|
|
|
|
|
$_->move( last_child => $new_elt) foreach (@to_wrap); |
9579
|
|
|
|
|
|
|
|
9580
|
|
|
|
|
|
|
return ''; |
9581
|
|
|
|
|
|
|
} |
9582
|
|
|
|
|
|
|
|
9583
|
|
|
|
|
|
|
# wrap children matching a regexp in a new element |
9584
|
|
|
|
|
|
|
sub wrap_children |
9585
|
|
|
|
|
|
|
{ my( $elt, $regexp, $gi, $atts)= @_; |
9586
|
|
|
|
|
|
|
|
9587
|
|
|
|
|
|
|
$atts ||={}; |
9588
|
|
|
|
|
|
|
|
9589
|
|
|
|
|
|
|
my $elt_as_string= $elt->_stringify_struct; # stringify the elt structure |
9590
|
|
|
|
|
|
|
$regexp=~ s{(<[^>]*>)}{_match_expr( $1)}eg; # in the regexp, replace gi's by the proper regexp |
9591
|
|
|
|
|
|
|
$elt_as_string=~ s{($regexp)}{$elt->_wrap_range( $gi, $atts, $1)}eg; # then do the actual replace |
9592
|
|
|
|
|
|
|
|
9593
|
|
|
|
|
|
|
return $elt; |
9594
|
|
|
|
|
|
|
} |
9595
|
|
|
|
|
|
|
|
9596
|
|
|
|
|
|
|
sub _match_expr |
9597
|
|
|
|
|
|
|
{ my $tag= shift; |
9598
|
|
|
|
|
|
|
my( $gi, %atts)= XML::Twig::_parse_start_tag( $tag); |
9599
|
|
|
|
|
|
|
return _match_tag( $gi, %atts); |
9600
|
|
|
|
|
|
|
} |
9601
|
|
|
|
|
|
|
|
9602
|
|
|
|
|
|
|
|
9603
|
|
|
|
|
|
|
sub _match_tag |
9604
|
|
|
|
|
|
|
{ my( $elt, %atts)= @_; |
9605
|
|
|
|
|
|
|
my $string= "<$elt\\b"; |
9606
|
|
|
|
|
|
|
foreach my $key (sort keys %atts) |
9607
|
|
|
|
|
|
|
{ my $val= qq{\Q$atts{$key}\E}; |
9608
|
|
|
|
|
|
|
$string.= qq{[^>]*$key=(?:"$val"|'$val')}; |
9609
|
|
|
|
|
|
|
} |
9610
|
|
|
|
|
|
|
$string.= qq{[^>]*>}; |
9611
|
|
|
|
|
|
|
return "(?:$string)"; |
9612
|
|
|
|
|
|
|
} |
9613
|
|
|
|
|
|
|
|
9614
|
|
|
|
|
|
|
sub field_to_att |
9615
|
|
|
|
|
|
|
{ my( $elt, $cond, $att)= @_; |
9616
|
|
|
|
|
|
|
$att ||= $cond; |
9617
|
|
|
|
|
|
|
my $child= $elt->first_child( $cond) or return undef; |
9618
|
|
|
|
|
|
|
$elt->set_att( $att => $child->text); |
9619
|
|
|
|
|
|
|
$child->cut; |
9620
|
|
|
|
|
|
|
return $elt; |
9621
|
|
|
|
|
|
|
} |
9622
|
|
|
|
|
|
|
|
9623
|
|
|
|
|
|
|
sub att_to_field |
9624
|
|
|
|
|
|
|
{ my( $elt, $att, $tag)= @_; |
9625
|
|
|
|
|
|
|
$tag ||= $att; |
9626
|
|
|
|
|
|
|
my $child= $elt->insert_new_elt( first_child => $tag, $elt->{'att'}->{$att}); |
9627
|
|
|
|
|
|
|
$elt->del_att( $att); |
9628
|
|
|
|
|
|
|
return $elt; |
9629
|
|
|
|
|
|
|
} |
9630
|
|
|
|
|
|
|
|
9631
|
|
|
|
|
|
|
# sort children methods |
9632
|
|
|
|
|
|
|
|
9633
|
|
|
|
|
|
|
sub sort_children_on_field |
9634
|
|
|
|
|
|
|
{ my $elt = shift; |
9635
|
|
|
|
|
|
|
my $field = shift; |
9636
|
|
|
|
|
|
|
my $get_key= sub { return $_[0]->field( $field) }; |
9637
|
|
|
|
|
|
|
return $elt->sort_children( $get_key, @_); |
9638
|
|
|
|
|
|
|
} |
9639
|
|
|
|
|
|
|
|
9640
|
|
|
|
|
|
|
sub sort_children_on_att |
9641
|
|
|
|
|
|
|
{ my $elt = shift; |
9642
|
|
|
|
|
|
|
my $att = shift; |
9643
|
|
|
|
|
|
|
my $get_key= sub { return $_[0]->{'att'}->{$att} }; |
9644
|
|
|
|
|
|
|
return $elt->sort_children( $get_key, @_); |
9645
|
|
|
|
|
|
|
} |
9646
|
|
|
|
|
|
|
|
9647
|
|
|
|
|
|
|
sub sort_children_on_value |
9648
|
|
|
|
|
|
|
{ my $elt = shift; |
9649
|
|
|
|
|
|
|
#my $get_key= eval qq{ sub { $NO_WARNINGS; return \$_[0]->text } }; |
9650
|
|
|
|
|
|
|
my $get_key= \&text; |
9651
|
|
|
|
|
|
|
return $elt->sort_children( $get_key, @_); |
9652
|
|
|
|
|
|
|
} |
9653
|
|
|
|
|
|
|
|
9654
|
|
|
|
|
|
|
sub sort_children |
9655
|
|
|
|
|
|
|
{ my( $elt, $get_key, %opt)=@_; |
9656
|
|
|
|
|
|
|
$opt{order} ||= 'normal'; |
9657
|
|
|
|
|
|
|
$opt{type} ||= 'alpha'; |
9658
|
|
|
|
|
|
|
my( $par_a, $par_b)= ($opt{order} eq 'reverse') ? qw( b a) : qw ( a b) ; |
9659
|
|
|
|
|
|
|
my $op= ($opt{type} eq 'numeric') ? '<=>' : 'cmp' ; |
9660
|
|
|
|
|
|
|
my @children= $elt->cut_children; |
9661
|
|
|
|
|
|
|
if( $opt{type} eq 'numeric') |
9662
|
|
|
|
|
|
|
{ @children= map { $_->[1] } |
9663
|
|
|
|
|
|
|
sort { $a->[0] <=> $b->[0] } |
9664
|
|
|
|
|
|
|
map { [ $get_key->( $_), $_] } @children; |
9665
|
|
|
|
|
|
|
} |
9666
|
|
|
|
|
|
|
elsif( $opt{type} eq 'alpha') |
9667
|
|
|
|
|
|
|
{ @children= map { $_->[1] } |
9668
|
|
|
|
|
|
|
sort { $a->[0] cmp $b->[0] } |
9669
|
|
|
|
|
|
|
map { [ $get_key->( $_), $_] } @children; |
9670
|
|
|
|
|
|
|
} |
9671
|
|
|
|
|
|
|
else |
9672
|
|
|
|
|
|
|
{ croak "wrong sort type '$opt{type}', should be either 'alpha' or 'numeric'"; } |
9673
|
|
|
|
|
|
|
|
9674
|
|
|
|
|
|
|
@children= reverse @children if( $opt{order} eq 'reverse'); |
9675
|
|
|
|
|
|
|
$elt->set_content( @children); |
9676
|
|
|
|
|
|
|
} |
9677
|
|
|
|
|
|
|
|
9678
|
|
|
|
|
|
|
|
9679
|
|
|
|
|
|
|
# comparison methods |
9680
|
|
|
|
|
|
|
|
9681
|
|
|
|
|
|
|
sub before |
9682
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9683
|
|
|
|
|
|
|
if( $a->cmp( $b) == -1) { return 1; } else { return 0; } |
9684
|
|
|
|
|
|
|
} |
9685
|
|
|
|
|
|
|
|
9686
|
|
|
|
|
|
|
sub after |
9687
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9688
|
|
|
|
|
|
|
if( $a->cmp( $b) == 1) { return 1; } else { return 0; } |
9689
|
|
|
|
|
|
|
} |
9690
|
|
|
|
|
|
|
|
9691
|
|
|
|
|
|
|
sub lt |
9692
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9693
|
|
|
|
|
|
|
return 1 if( $a->cmp( $b) == -1); |
9694
|
|
|
|
|
|
|
return 0; |
9695
|
|
|
|
|
|
|
} |
9696
|
|
|
|
|
|
|
|
9697
|
|
|
|
|
|
|
sub le |
9698
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9699
|
|
|
|
|
|
|
return 1 unless( $a->cmp( $b) == 1); |
9700
|
|
|
|
|
|
|
return 0; |
9701
|
|
|
|
|
|
|
} |
9702
|
|
|
|
|
|
|
|
9703
|
|
|
|
|
|
|
sub gt |
9704
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9705
|
|
|
|
|
|
|
return 1 if( $a->cmp( $b) == 1); |
9706
|
|
|
|
|
|
|
return 0; |
9707
|
|
|
|
|
|
|
} |
9708
|
|
|
|
|
|
|
|
9709
|
|
|
|
|
|
|
sub ge |
9710
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9711
|
|
|
|
|
|
|
return 1 unless( $a->cmp( $b) == -1); |
9712
|
|
|
|
|
|
|
return 0; |
9713
|
|
|
|
|
|
|
} |
9714
|
|
|
|
|
|
|
|
9715
|
|
|
|
|
|
|
|
9716
|
|
|
|
|
|
|
sub cmp |
9717
|
|
|
|
|
|
|
{ my( $a, $b)=@_; |
9718
|
|
|
|
|
|
|
|
9719
|
|
|
|
|
|
|
# easy cases |
9720
|
|
|
|
|
|
|
return 0 if( $a == $b); |
9721
|
|
|
|
|
|
|
return 1 if( $a->in($b)); # a in b => a starts after b |
9722
|
|
|
|
|
|
|
return -1 if( $b->in($a)); # b in a => a starts before b |
9723
|
|
|
|
|
|
|
|
9724
|
|
|
|
|
|
|
# ancestors does not include the element itself |
9725
|
|
|
|
|
|
|
my @a_pile= ($a, $a->ancestors); |
9726
|
|
|
|
|
|
|
my @b_pile= ($b, $b->ancestors); |
9727
|
|
|
|
|
|
|
|
9728
|
|
|
|
|
|
|
# the 2 elements are not in the same twig |
9729
|
|
|
|
|
|
|
return undef unless( $a_pile[-1] == $b_pile[-1]); |
9730
|
|
|
|
|
|
|
|
9731
|
|
|
|
|
|
|
# find the first non common ancestors (they are siblings) |
9732
|
|
|
|
|
|
|
my $a_anc= pop @a_pile; |
9733
|
|
|
|
|
|
|
my $b_anc= pop @b_pile; |
9734
|
|
|
|
|
|
|
|
9735
|
|
|
|
|
|
|
while( $a_anc == $b_anc) |
9736
|
|
|
|
|
|
|
{ $a_anc= pop @a_pile; |
9737
|
|
|
|
|
|
|
$b_anc= pop @b_pile; |
9738
|
|
|
|
|
|
|
} |
9739
|
|
|
|
|
|
|
|
9740
|
|
|
|
|
|
|
# from there move left and right and figure out the order |
9741
|
|
|
|
|
|
|
my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); |
9742
|
|
|
|
|
|
|
while() |
9743
|
|
|
|
|
|
|
{ $a_prev= $a_prev->{prev_sibling} || return( -1); |
9744
|
|
|
|
|
|
|
return 1 if( $a_prev == $b_next); |
9745
|
|
|
|
|
|
|
$a_next= $a_next->{next_sibling} || return( 1); |
9746
|
|
|
|
|
|
|
return -1 if( $a_next == $b_prev); |
9747
|
|
|
|
|
|
|
$b_prev= $b_prev->{prev_sibling} || return( 1); |
9748
|
|
|
|
|
|
|
return -1 if( $b_prev == $a_next); |
9749
|
|
|
|
|
|
|
$b_next= $b_next->{next_sibling} || return( -1); |
9750
|
|
|
|
|
|
|
return 1 if( $b_next == $a_prev); |
9751
|
|
|
|
|
|
|
} |
9752
|
|
|
|
|
|
|
} |
9753
|
|
|
|
|
|
|
|
9754
|
|
|
|
|
|
|
sub _dump |
9755
|
|
|
|
|
|
|
{ my( $elt, $option)= @_; |
9756
|
|
|
|
|
|
|
|
9757
|
|
|
|
|
|
|
my $atts = defined $option->{atts} ? $option->{atts} : 1; |
9758
|
|
|
|
|
|
|
my $extra = defined $option->{extra} ? $option->{extra} : 0; |
9759
|
|
|
|
|
|
|
my $short_text = defined $option->{short_text} ? $option->{short_text} : 40; |
9760
|
|
|
|
|
|
|
|
9761
|
|
|
|
|
|
|
my $sp= '| '; |
9762
|
|
|
|
|
|
|
my $indent= $sp x $elt->level; |
9763
|
|
|
|
|
|
|
my $indent_sp= ' ' x $elt->level; |
9764
|
|
|
|
|
|
|
|
9765
|
|
|
|
|
|
|
my $dump=''; |
9766
|
|
|
|
|
|
|
if( $elt->is_elt) |
9767
|
|
|
|
|
|
|
{ |
9768
|
|
|
|
|
|
|
$dump .= $indent . '|-' . $XML::Twig::index2gi[$elt->{'gi'}]; |
9769
|
|
|
|
|
|
|
|
9770
|
|
|
|
|
|
|
if( $atts && (my @atts= $elt->att_names) ) |
9771
|
|
|
|
|
|
|
{ $dump .= ' ' . join( ' ', map { qq{$_="} . $elt->{'att'}->{$_} . qq{"} } @atts); } |
9772
|
|
|
|
|
|
|
|
9773
|
|
|
|
|
|
|
$dump .= "\n"; |
9774
|
|
|
|
|
|
|
if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } |
9775
|
|
|
|
|
|
|
$dump .= join( "", map { $_->_dump( $option) } do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }); |
9776
|
|
|
|
|
|
|
} |
9777
|
|
|
|
|
|
|
else |
9778
|
|
|
|
|
|
|
{ |
9779
|
|
|
|
|
|
|
if( (exists $elt->{'pcdata'})) |
9780
|
|
|
|
|
|
|
{ $dump .= "$indent|-PCDATA: '" . _short_text( $elt->{pcdata}, $short_text) . "'\n" } |
9781
|
|
|
|
|
|
|
elsif( (exists $elt->{'ent'})) |
9782
|
|
|
|
|
|
|
{ $dump .= "$indent|-ENTITY: '" . _short_text( $elt->{ent}, $short_text) . "'\n" } |
9783
|
|
|
|
|
|
|
elsif( (exists $elt->{'cdata'})) |
9784
|
|
|
|
|
|
|
{ $dump .= "$indent|-CDATA: '" . _short_text( $elt->{cdata}, $short_text) . "'\n" } |
9785
|
|
|
|
|
|
|
elsif( (exists $elt->{'comment'})) |
9786
|
|
|
|
|
|
|
{ $dump .= "$indent|-COMMENT: '" . _short_text( $elt->comment_string, $short_text) . "'\n" } |
9787
|
|
|
|
|
|
|
elsif( (exists $elt->{'target'})) |
9788
|
|
|
|
|
|
|
{ $dump .= "$indent|-PI: '" . $elt->{target} . "' - '" . _short_text( $elt->{data}, $short_text) . "'\n" } |
9789
|
|
|
|
|
|
|
if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } |
9790
|
|
|
|
|
|
|
} |
9791
|
|
|
|
|
|
|
return $dump; |
9792
|
|
|
|
|
|
|
} |
9793
|
|
|
|
|
|
|
|
9794
|
|
|
|
|
|
|
sub _dump_extra_data |
9795
|
|
|
|
|
|
|
{ my( $elt, $indent, $indent_sp, $short_text)= @_; |
9796
|
|
|
|
|
|
|
my $dump=''; |
9797
|
|
|
|
|
|
|
if( $elt->extra_data) |
9798
|
|
|
|
|
|
|
{ my $extra_data = $indent . "|-- (cpi before) '" . _short_text( $elt->extra_data, $short_text) . "'"; |
9799
|
|
|
|
|
|
|
$extra_data=~ s{\n}{$indent_sp}g; |
9800
|
|
|
|
|
|
|
$dump .= $extra_data . "\n"; |
9801
|
|
|
|
|
|
|
} |
9802
|
|
|
|
|
|
|
if( $elt->{extra_data_in_pcdata}) |
9803
|
|
|
|
|
|
|
{ foreach my $data ( @{$elt->{extra_data_in_pcdata}}) |
9804
|
|
|
|
|
|
|
{ my $extra_data = $indent . "|-- (cpi offset $data->{offset}) '" . _short_text( $data->{text}, $short_text) . "'"; |
9805
|
|
|
|
|
|
|
$extra_data=~ s{\n}{$indent_sp}g; |
9806
|
|
|
|
|
|
|
$dump .= $extra_data . "\n"; |
9807
|
|
|
|
|
|
|
} |
9808
|
|
|
|
|
|
|
} |
9809
|
|
|
|
|
|
|
if( $elt->{extra_data_before_end_tag}) |
9810
|
|
|
|
|
|
|
{ my $extra_data = $indent . "|-- (cpi end) '" . _short_text( $elt->{extra_data_before_end_tag}, $short_text) . "'"; |
9811
|
|
|
|
|
|
|
$extra_data=~ s{\n}{$indent_sp}g; |
9812
|
|
|
|
|
|
|
$dump .= $extra_data . "\n"; |
9813
|
|
|
|
|
|
|
} |
9814
|
|
|
|
|
|
|
return $dump; |
9815
|
|
|
|
|
|
|
} |
9816
|
|
|
|
|
|
|
|
9817
|
|
|
|
|
|
|
|
9818
|
|
|
|
|
|
|
sub _short_text |
9819
|
|
|
|
|
|
|
{ my( $string, $length)= @_; |
9820
|
|
|
|
|
|
|
if( !$length || (length( $string) < $length) ) { return $string; } |
9821
|
|
|
|
|
|
|
my $l1= (length( $string) -5) /2; |
9822
|
|
|
|
|
|
|
my $l2= length( $string) - ($l1 + 5); |
9823
|
|
|
|
|
|
|
return substr( $string, 0, $l1) . ' ... ' . substr( $string, -$l2); |
9824
|
|
|
|
|
|
|
} |
9825
|
|
|
|
|
|
|
|
9826
|
|
|
|
|
|
|
|
9827
|
|
|
|
|
|
|
sub _and { return _join_defined( ' && ', @_); } |
9828
|
|
|
|
|
|
|
sub _join_defined { return join( shift(), grep { $_ } @_); } |
9829
|
|
|
|
|
|
|
|
9830
|
|
|
|
|
|
|
1; |
9831
|
|
|
|
|
|
|
__END__ |