line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WebSource::Extract::xslt; |
2
|
1
|
|
|
1
|
|
4157
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
47
|
|
3
|
1
|
|
|
1
|
|
1300
|
use XML::LibXML; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
use XML::LibXSLT; |
5
|
|
|
|
|
|
|
use Carp; |
6
|
|
|
|
|
|
|
use Date::Language; |
7
|
|
|
|
|
|
|
use Date::Format; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @ISA = ('WebSource::Module'); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
WebSource::Extract::xslt - Apply an XSL Stylesheet to the input |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This flavor of the B operator applies an XSL stylesheet to the input |
18
|
|
|
|
|
|
|
an returns the transformation result. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Such an extraction operator should be described as follows : |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
... |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
where the xsl prefix should be associated to the URI http://www.w3.org/1999/XSL/Transform |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 METHODS |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub new { |
37
|
|
|
|
|
|
|
my $class = shift; |
38
|
|
|
|
|
|
|
my %params = @_; |
39
|
|
|
|
|
|
|
my $self = bless \%params, $class; |
40
|
|
|
|
|
|
|
$self->SUPER::_init_; |
41
|
|
|
|
|
|
|
my $wsd = $self->{wsdnode}; |
42
|
|
|
|
|
|
|
if($wsd) { |
43
|
|
|
|
|
|
|
$wsd->setNamespace("http://www.w3.org/1999/XSL/Transform","xsl",0); |
44
|
|
|
|
|
|
|
my %param_mapping; |
45
|
|
|
|
|
|
|
foreach my $paramEl ($wsd->findnodes('xsl:stylesheet/xsl:param')) { |
46
|
|
|
|
|
|
|
my $paramName = $paramEl->getAttribute('name'); |
47
|
|
|
|
|
|
|
my $wsEnvKey = $paramEl->getAttributeNS("http://wwwsource.free.fr/ns/websource","mapped-from"); |
48
|
|
|
|
|
|
|
if(!$wsEnvKey) { |
49
|
|
|
|
|
|
|
$wsEnvKey = $paramName; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
$self->log(2,"Found parameter : $paramName (mapped from $wsEnvKey)"); |
52
|
|
|
|
|
|
|
$param_mapping{$paramName} = $wsEnvKey; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
$self->{xslparams} = \%param_mapping; |
55
|
|
|
|
|
|
|
my @stylesheet = $wsd->findnodes('xsl:stylesheet'); |
56
|
|
|
|
|
|
|
if(@stylesheet) { |
57
|
|
|
|
|
|
|
my $wsdoc = $wsd->ownerDocument; |
58
|
|
|
|
|
|
|
my $xsltdoc = XML::LibXML::Document->new($wsdoc->version,$wsdoc->encoding); |
59
|
|
|
|
|
|
|
$xsltdoc->setDocumentElement($stylesheet[0]->cloneNode(1)); |
60
|
|
|
|
|
|
|
my $xslt = XML::LibXSLT->new(); |
61
|
|
|
|
|
|
|
$xslt->register_function('http://wwwsource.free.fr/ns/websource/xslt-ext','reformat-date','WebSource::Extract::xslt::reformatDate'); |
62
|
|
|
|
|
|
|
$xslt->register_function('http://wwwsource.free.fr/ns/websource/xslt-ext','string-replace','WebSource::Extract::xslt::stringReplace'); |
63
|
|
|
|
|
|
|
$xslt->register_function('http://wwwsource.free.fr/ns/websource/xslt-ext','html-lint','WebSource::Extract::xslt::htmlLint'); |
64
|
|
|
|
|
|
|
$self->{xsl} = $xslt->parse_stylesheet($xsltdoc); |
65
|
|
|
|
|
|
|
$self->{format} = $wsd->getAttribute("format"); |
66
|
|
|
|
|
|
|
} else { |
67
|
|
|
|
|
|
|
croak "No stylesheet found\n"; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
$self->{xsl} or croak "No XSLT stylesheet given"; |
71
|
|
|
|
|
|
|
return $self; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub handle { |
75
|
|
|
|
|
|
|
my $self = shift; |
76
|
|
|
|
|
|
|
my $env = shift; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$self->log(5,"Got document ",$env->{baseuri}); |
79
|
|
|
|
|
|
|
my $data = $env->data; |
80
|
|
|
|
|
|
|
if(!$data->isa("XML::LibXML::Document")) { |
81
|
|
|
|
|
|
|
$self->log(5,"Creating document from DOM node"); |
82
|
|
|
|
|
|
|
my $doc = XML::LibXML::Document->new("1.0","UTF-8"); |
83
|
|
|
|
|
|
|
$doc->setDocumentElement($data->cloneNode(1)); |
84
|
|
|
|
|
|
|
$data = $doc; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
$self->log(6,"We have : \n".$data->toString(1,'utf-8')."\n"); |
87
|
|
|
|
|
|
|
$self->log(6,".. encoding: ".$data->ownerDocument->actualEncoding()."\n"); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $mapping = $self->{xslparams}; |
90
|
|
|
|
|
|
|
my %parameters; |
91
|
|
|
|
|
|
|
foreach my $param (keys(%$mapping)) { |
92
|
|
|
|
|
|
|
my $origKey = $mapping->{$param}; |
93
|
|
|
|
|
|
|
my $value = $env->{$origKey}; |
94
|
|
|
|
|
|
|
$self->log(2,"Found value for $param (using $origKey) : ",$value); |
95
|
|
|
|
|
|
|
$parameters{$param} = $value; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
my $result = $self->{xsl}->transform($data,XML::LibXSLT::xpath_to_string(%parameters)); |
98
|
|
|
|
|
|
|
$self->{format} eq "document" or $result = $result->documentElement; |
99
|
|
|
|
|
|
|
$self->log(6,"Produced :\n",$result->toString(1,'UTF-8')); |
100
|
|
|
|
|
|
|
return WebSource::Envelope->new(type => "object/dom-node", data => $result); |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 XSLT EXTENSIONS |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The module implements extra pratical XSLT extension functions |
106
|
|
|
|
|
|
|
These can be used by delaring a prefix for theses extensions whose namespace |
107
|
|
|
|
|
|
|
is C and declaring that this prefix is |
108
|
|
|
|
|
|
|
an extension prefix. For example: |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
xmlns:wsx="http://wwwsource.free.fr/ns/websource/xslt-ext" |
112
|
|
|
|
|
|
|
extension-element-prefixes="wsx" |
113
|
|
|
|
|
|
|
> |
114
|
|
|
|
|
|
|
... |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 reformat-date |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Extension function to reformat dates |
123
|
|
|
|
|
|
|
{http://wwwsource.free.fr/ns/websource/xslt-ext}reformat-date( |
124
|
|
|
|
|
|
|
date, targetTemplate, sourceLanguage? |
125
|
|
|
|
|
|
|
) |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub reformatDate { |
130
|
|
|
|
|
|
|
my ($srcdate,$template,@langs) = @_; |
131
|
|
|
|
|
|
|
my $dsttime = undef; |
132
|
|
|
|
|
|
|
while(!defined($dsttime) && @langs) { |
133
|
|
|
|
|
|
|
my $l = shift @langs; |
134
|
|
|
|
|
|
|
my $lang = Date::Language->new($l); |
135
|
|
|
|
|
|
|
$dsttime = $lang->str2time($srcdate); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
if($dsttime) { |
138
|
|
|
|
|
|
|
return time2str($template,$dsttime); |
139
|
|
|
|
|
|
|
} else { |
140
|
|
|
|
|
|
|
return ""; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head2 string-replace |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Extension function to do a string replacement using a perl regular expression |
148
|
|
|
|
|
|
|
{http://wwwsource.free.fr/ns/websource/xslt-ext}string-replace(regexp, replacement, data) |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=cut |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub stringReplace { |
153
|
|
|
|
|
|
|
my ($regexp,$replace,$data) = @_; |
154
|
|
|
|
|
|
|
$data =~ s/$regexp/$replace/g; |
155
|
|
|
|
|
|
|
return $data; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head2 parse-encoded |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Extension function parse-encoded which parses an encoded XML string an returns a cleaned-up version |
161
|
|
|
|
|
|
|
{http://wwwsource.free.fr/ns/websource/xslt-ext}html-lint |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub htmlLint { |
166
|
|
|
|
|
|
|
my ($string) = @_; |
167
|
|
|
|
|
|
|
my $temp = "" . $string . ""; |
168
|
|
|
|
|
|
|
my $parser = XML::LibXML->new( recover => 2); |
169
|
|
|
|
|
|
|
open(TEMP,">>",'/tmp/ws-xslt.log'); |
170
|
|
|
|
|
|
|
print TEMP $temp,"\n==============================\n"; |
171
|
|
|
|
|
|
|
close(TEMP); |
172
|
|
|
|
|
|
|
my $doc = $parser->load_xml( string => $temp); |
173
|
|
|
|
|
|
|
my @children = $doc->documentElement->childNodes(); |
174
|
|
|
|
|
|
|
return join("\n", map { $_->toString(1,'utf-8') } @children); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head1 SEE ALSO |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
WebSource |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=cut |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
1; |