line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WebSource; |
2
|
|
|
|
|
|
|
our $REVSTR = '$Revision: 1.13 $'; |
3
|
|
|
|
|
|
|
$REVSTR =~ m/Revision: ([^ ]+)/; |
4
|
|
|
|
|
|
|
our $REVISION = $1; |
5
|
|
|
|
|
|
|
our $VERSION='2.4.5'; |
6
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
46268
|
use strict; |
|
2
|
|
|
|
|
22
|
|
|
2
|
|
|
|
|
72
|
|
8
|
2
|
|
|
2
|
|
12
|
use Carp; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
208
|
|
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
13130
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
197491
|
|
|
2
|
|
|
|
|
77
|
|
11
|
2
|
|
|
2
|
|
5121
|
use HTTP::Cookies; |
|
2
|
|
|
|
|
18176
|
|
|
2
|
|
|
|
|
68
|
|
12
|
2
|
|
|
2
|
|
660
|
use WebSource::Parser; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use WebSource::Envelope; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use File::Spec; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $NameSpace = 'http://wwwsource.free.fr/ns/websource'; |
18
|
|
|
|
|
|
|
our %ModClass = ( |
19
|
|
|
|
|
|
|
"fetch" => 'WebSource::Fetcher', |
20
|
|
|
|
|
|
|
"extract" => 'WebSource::Extract', |
21
|
|
|
|
|
|
|
"filter" => 'WebSource::Filter', |
22
|
|
|
|
|
|
|
"query" => 'WebSource::Query', |
23
|
|
|
|
|
|
|
"format" => 'WebSource::Format', |
24
|
|
|
|
|
|
|
"xmlparser" => 'WebSource::XMLParser', |
25
|
|
|
|
|
|
|
"cache" => 'WebSource::Cache', |
26
|
|
|
|
|
|
|
"soap" => 'WebSource::Soap', |
27
|
|
|
|
|
|
|
"database" => 'WebSource::DB', |
28
|
|
|
|
|
|
|
"map" => 'WebSource::Map', |
29
|
|
|
|
|
|
|
"dummy" => 'WebSource::Module', |
30
|
|
|
|
|
|
|
"file" => 'WebSource::File', |
31
|
|
|
|
|
|
|
"xmlsender" => 'WebSource::XMLSender', |
32
|
|
|
|
|
|
|
"meta-tag" => 'WebSource::MetaTag' |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 NAME |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
WebSource - a general data wrapping tool particularly well suited for online data |
39
|
|
|
|
|
|
|
(but what data in not online in some way today ;) ) |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 DESCRIPTION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
WebSource gives a general and normalized framework way to access |
44
|
|
|
|
|
|
|
data made available via the web. An access to subparts of the |
45
|
|
|
|
|
|
|
Web is made by defining a task. This task is built by composing |
46
|
|
|
|
|
|
|
query building, extraction, fetching and filtering subtasks. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 SYNOPSIS |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
$source = WebSource->new(wsd => $description); |
51
|
|
|
|
|
|
|
@results = $source->query($query); |
52
|
|
|
|
|
|
|
or |
53
|
|
|
|
|
|
|
$result = $source->set_query($query); |
54
|
|
|
|
|
|
|
while($result = $source->next_result()) { |
55
|
|
|
|
|
|
|
... |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head1 ABSTRACT |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
WebSource originally was a generic wrapper around a Web Source. |
61
|
|
|
|
|
|
|
Given an XML description of a source it allows to query the source |
62
|
|
|
|
|
|
|
and retreive its results. The format of the query and the result |
63
|
|
|
|
|
|
|
remain source dependant however. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
It is now configurable enough allow to do complex tasks on the web : such as |
66
|
|
|
|
|
|
|
fetching, extracting, filtering data one the Web. Each complex task is |
67
|
|
|
|
|
|
|
described by an XML task description file (WebSource description). This task |
68
|
|
|
|
|
|
|
is decomposed into simple subtasks of different flavors. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Existing subtask flavors are : |
71
|
|
|
|
|
|
|
- B |
72
|
|
|
|
|
|
|
I an XML::LibXML::Document |
73
|
|
|
|
|
|
|
I |
74
|
|
|
|
|
|
|
Applys an Xpath on the document and returns the set of nodes |
75
|
|
|
|
|
|
|
- B |
76
|
|
|
|
|
|
|
I a URL (or XML::LibXML::Node containing a url) |
77
|
|
|
|
|
|
|
I |
78
|
|
|
|
|
|
|
- B |
79
|
|
|
|
|
|
|
I an XML::Document |
80
|
|
|
|
|
|
|
I |
81
|
|
|
|
|
|
|
- B |
82
|
|
|
|
|
|
|
I anything |
83
|
|
|
|
|
|
|
I |
84
|
|
|
|
|
|
|
- B |
85
|
|
|
|
|
|
|
This type of subtask uses an external perl module as a task. |
86
|
|
|
|
|
|
|
This allows to define highly configurable tasks. |
87
|
|
|
|
|
|
|
I depends on external module |
88
|
|
|
|
|
|
|
I |
89
|
|
|
|
|
|
|
- B |
90
|
|
|
|
|
|
|
I anything |
91
|
|
|
|
|
|
|
I |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head1 METHODS |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=over 2 |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item B<< $source = WebSource->new(wsd => $wsd); >> |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Create a new WebSource object working with the given a WebSource description |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
The following named paramters can be given : |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=over 2 |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item C |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Use a generic engine with the given source description file |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item C |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Do not output more than max_results |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=back |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub new { |
118
|
|
|
|
|
|
|
my $class = shift; |
119
|
|
|
|
|
|
|
my %param = @_; |
120
|
|
|
|
|
|
|
$param{wsd} or croak("No WebSource description given"); |
121
|
|
|
|
|
|
|
$param{useragent} or $param{useragent} = |
122
|
|
|
|
|
|
|
LWP::UserAgent->new( |
123
|
|
|
|
|
|
|
agent => "WebSource/1.0", |
124
|
|
|
|
|
|
|
keep_alive => 1, |
125
|
|
|
|
|
|
|
timeout => 30, |
126
|
|
|
|
|
|
|
requests_redirectable => ['GET', 'HEAD', 'POST'], |
127
|
|
|
|
|
|
|
env_proxy => 1, |
128
|
|
|
|
|
|
|
); |
129
|
|
|
|
|
|
|
$param{cookies} or $param{cookies} = HTTP::Cookies->new; |
130
|
|
|
|
|
|
|
$param{useragent}->cookie_jar($param{cookies}); |
131
|
|
|
|
|
|
|
$param{maxreqinterval} or $param{maxreqinterval} = 3; |
132
|
|
|
|
|
|
|
$param{maxtries} or $param{maxtries} = 3; |
133
|
|
|
|
|
|
|
$param{parser} or $param{parser} = XML::LibXML->new; |
134
|
|
|
|
|
|
|
$param{parser}->expand_xinclude(1); |
135
|
|
|
|
|
|
|
$param{result_count} = 0; |
136
|
|
|
|
|
|
|
my $self = bless \%param, $class; |
137
|
|
|
|
|
|
|
$self->_init; |
138
|
|
|
|
|
|
|
return $self; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub _init { |
142
|
|
|
|
|
|
|
my $self = shift; |
143
|
|
|
|
|
|
|
my $wsd = $self->{wsd}; |
144
|
|
|
|
|
|
|
my $doc = $self->load_wsd($wsd); |
145
|
|
|
|
|
|
|
$self->{wsddoc} = $doc; |
146
|
|
|
|
|
|
|
$self->apply_imports; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub load_wsd { |
150
|
|
|
|
|
|
|
my ($self, $wsd, $base) = @_; |
151
|
|
|
|
|
|
|
my $parser = $self->{parser}; |
152
|
|
|
|
|
|
|
my $doc; |
153
|
|
|
|
|
|
|
if($base) { |
154
|
|
|
|
|
|
|
if(-f $base) { |
155
|
|
|
|
|
|
|
my @path = File::Spec->splitpath(); |
156
|
|
|
|
|
|
|
pop @path; |
157
|
|
|
|
|
|
|
$base = File::Spec->catpath(@path); |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
$wsd = $base ? File::Spec->rel2abs($wsd,$base) : File::Spec->rel2abs($wsd); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
$self->log(2,"Loading " .$wsd); |
162
|
|
|
|
|
|
|
if(-f $wsd) { |
163
|
|
|
|
|
|
|
$parser->base_uri("file://" . $wsd); |
164
|
|
|
|
|
|
|
$doc = $parser->parse_file($wsd); |
165
|
|
|
|
|
|
|
$parser->base_uri(""); |
166
|
|
|
|
|
|
|
} else { |
167
|
|
|
|
|
|
|
my $resp = $self->{useragent}->get($wsd); |
168
|
|
|
|
|
|
|
$resp->is_success or croak "Couldn't download description $wsd"; |
169
|
|
|
|
|
|
|
$parser->base_uri($wsd); |
170
|
|
|
|
|
|
|
$doc = $parser->parse_string($resp->content); |
171
|
|
|
|
|
|
|
$parser->base_uri(""); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
$doc or croak "Couldn't parse document $wsd"; |
174
|
|
|
|
|
|
|
return $doc; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub init { |
178
|
|
|
|
|
|
|
my $self = shift; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
$self->apply_options; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
my $wsd = $self->{wsd}; |
183
|
|
|
|
|
|
|
my $parser = $self->{parser}; |
184
|
|
|
|
|
|
|
my $doc = $self->{wsddoc}; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# |
187
|
|
|
|
|
|
|
# Fetch all module descriptions and build the |
188
|
|
|
|
|
|
|
# corresponding module |
189
|
|
|
|
|
|
|
# |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
my $root = $doc->documentElement; |
192
|
|
|
|
|
|
|
my $first; |
193
|
|
|
|
|
|
|
my $last; |
194
|
|
|
|
|
|
|
my %modules; |
195
|
|
|
|
|
|
|
my %forwards; |
196
|
|
|
|
|
|
|
my %feedbacks; |
197
|
|
|
|
|
|
|
my @nodes = $root->childNodes; |
198
|
|
|
|
|
|
|
while (@nodes) { |
199
|
|
|
|
|
|
|
my $mnode = shift(@nodes); |
200
|
|
|
|
|
|
|
$mnode->nodeType == 1 or next; |
201
|
|
|
|
|
|
|
$mnode->namespaceURI eq $NameSpace or next; |
202
|
|
|
|
|
|
|
my $type = $mnode->localname; |
203
|
|
|
|
|
|
|
my %params = %$self; |
204
|
|
|
|
|
|
|
my $name = $mnode->getAttribute("name"); |
205
|
|
|
|
|
|
|
if($mnode->hasAttribute("abort-if-empty")) { |
206
|
|
|
|
|
|
|
$params{abortIfEmpty} = ($mnode->getAttribute("abort-if-empty") eq "yes"); |
207
|
|
|
|
|
|
|
} else { |
208
|
|
|
|
|
|
|
$params{abortIfEmpty} = 0; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
if($type eq 'options' || $type eq 'include') { |
211
|
|
|
|
|
|
|
# do nothing these are handled seperately |
212
|
|
|
|
|
|
|
} elsif($type eq 'init') { |
213
|
|
|
|
|
|
|
my $uri = $mnode->getAttribute("browse"); |
214
|
|
|
|
|
|
|
my $resp = $self->{useragent}->get($uri); |
215
|
|
|
|
|
|
|
$self->{cookies}->extract_cookies($resp); |
216
|
|
|
|
|
|
|
} elsif($ModClass{$type} || $type eq 'external') { |
217
|
|
|
|
|
|
|
$self->log(5,"Creating subtask of type ",$type); |
218
|
|
|
|
|
|
|
my $class; |
219
|
|
|
|
|
|
|
if($type eq 'external') { |
220
|
|
|
|
|
|
|
$class = $mnode->getAttribute("module"); |
221
|
|
|
|
|
|
|
$class or croak("No module declared for external"); |
222
|
|
|
|
|
|
|
} else { |
223
|
|
|
|
|
|
|
my $subtype = $mnode->getAttribute("type"); |
224
|
|
|
|
|
|
|
$class = $subtype ? |
225
|
|
|
|
|
|
|
$ModClass{$type} . "::" . $subtype : |
226
|
|
|
|
|
|
|
$ModClass{$type}; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
$self->log(5,"Using perl module ",$class); |
229
|
|
|
|
|
|
|
eval "require $class"; |
230
|
|
|
|
|
|
|
if(!$@) { |
231
|
|
|
|
|
|
|
$modules{$name} = $class->new( %params, |
232
|
|
|
|
|
|
|
wsdnode => $mnode, name => $name); |
233
|
|
|
|
|
|
|
if($mnode->hasAttribute("forward-to")) { |
234
|
|
|
|
|
|
|
$forwards{$name} = $mnode->getAttribute("forward-to"); |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
if($mnode->hasAttribute("feedback-to")) { |
237
|
|
|
|
|
|
|
$feedbacks{$name} = $mnode->getAttribute("feedback-to"); |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
$first or $first = $name; |
240
|
|
|
|
|
|
|
$last = $name; |
241
|
|
|
|
|
|
|
} else { |
242
|
|
|
|
|
|
|
croak("Couldn't load '$class' : $@"); |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
} else { |
245
|
|
|
|
|
|
|
$self->log(1,"Module named '$name' is of an unknown type '$type'"); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
if(!$first) { |
250
|
|
|
|
|
|
|
croak("No modules defined in description file"); |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# |
254
|
|
|
|
|
|
|
# Connect the modules to each other |
255
|
|
|
|
|
|
|
# |
256
|
|
|
|
|
|
|
foreach my $key (keys(%forwards)) { |
257
|
|
|
|
|
|
|
foreach my $other (split(/ /,$forwards{$key})) { |
258
|
|
|
|
|
|
|
if($modules{$other}) { |
259
|
|
|
|
|
|
|
$self->log(5,"Setting $key as producer of $other"); |
260
|
|
|
|
|
|
|
$modules{$key} or croak("No module named $key defined"); |
261
|
|
|
|
|
|
|
$modules{$other}->producers($modules{$key}); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# |
267
|
|
|
|
|
|
|
# Configure feed back sending |
268
|
|
|
|
|
|
|
# |
269
|
|
|
|
|
|
|
foreach my $key (keys(%feedbacks)) { |
270
|
|
|
|
|
|
|
foreach my $other (split(/ /,$feedbacks{$key})) { |
271
|
|
|
|
|
|
|
if($modules{$other}) { |
272
|
|
|
|
|
|
|
$self->log(5,"Configuring $key to send feedback to $other"); |
273
|
|
|
|
|
|
|
$modules{$key} or croak("No module named $key defined"); |
274
|
|
|
|
|
|
|
$modules{$key}->isa('WebSource::Filter') or |
275
|
|
|
|
|
|
|
croak($modules{$key}->{name} . " is not a filter"); |
276
|
|
|
|
|
|
|
$modules{$other}->can("feedback") or |
277
|
|
|
|
|
|
|
croak($modules{$other}->{name} . " doesn't have a feedback method"); |
278
|
|
|
|
|
|
|
$modules{$key}->listeners($modules{$other}); |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# |
285
|
|
|
|
|
|
|
# Setup first and last |
286
|
|
|
|
|
|
|
# |
287
|
|
|
|
|
|
|
$self->{first} = $modules{$first}; |
288
|
|
|
|
|
|
|
$self->{last} = $modules{$last}; |
289
|
|
|
|
|
|
|
$self->log(5,"Initial module is $first"); |
290
|
|
|
|
|
|
|
$self->log(5,"Final module is $last"); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub log { |
295
|
|
|
|
|
|
|
my $self = shift; |
296
|
|
|
|
|
|
|
my $level = shift; |
297
|
|
|
|
|
|
|
if($self->{logger}) { |
298
|
|
|
|
|
|
|
$self->{logger}->log($level, "[WebSource] ", @_); |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=item B<< $source->push($item); >> |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
Pass the initial data to the first subtask |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub push { |
309
|
|
|
|
|
|
|
my ($self) = shift; |
310
|
|
|
|
|
|
|
$self->init; |
311
|
|
|
|
|
|
|
$self->{first}->push(map { WebSource::Envelope->new(type => "text/string", data => $_) } @_ ); |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=item B<< $source->query($query); >> |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Build a query %hash for the given parameters and push it in |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=cut |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub query { |
321
|
|
|
|
|
|
|
my $self = shift; |
322
|
|
|
|
|
|
|
$self->init; |
323
|
|
|
|
|
|
|
my %query = @_; |
324
|
|
|
|
|
|
|
if($query{data}) { |
325
|
|
|
|
|
|
|
$query{type} = "text/string"; |
326
|
|
|
|
|
|
|
} else { |
327
|
|
|
|
|
|
|
$query{type} = "empty"; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
my $env = WebSource::Envelope->new(%query); |
330
|
|
|
|
|
|
|
$self->{first}->push($env); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=item B<< $source->set_max_results($count); >> |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Set the maximum number of results to output to $count |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub set_max_results { |
340
|
|
|
|
|
|
|
my ($self,$count) = @_; |
341
|
|
|
|
|
|
|
$self->{max_results} = $count; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=item B<< $source->next_result(); >> |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Returns the following result for the task |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=cut |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub next_result { |
351
|
|
|
|
|
|
|
my $self = shift; |
352
|
|
|
|
|
|
|
if($self->{max_results} && $self->{max_results} <= $self->{cnt_results}) { |
353
|
|
|
|
|
|
|
return undef; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
my $res = $self->{last}->produce; |
356
|
|
|
|
|
|
|
$res and ($self->{result_count} += 1); |
357
|
|
|
|
|
|
|
return $res; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=back |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=item B<< $source->parameters; >> |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Returns a has of the initial tasks parameters |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub parameters { |
369
|
|
|
|
|
|
|
my $self = shift; |
370
|
|
|
|
|
|
|
return $self->{first}->parameters; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=item B<< $source->option_spec; >> |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Returns the spec of the options translated for Getopt::Mixed |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=cut |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub option_spec { |
380
|
|
|
|
|
|
|
my $self = shift; |
381
|
|
|
|
|
|
|
my $doc = $self->{wsddoc}; |
382
|
|
|
|
|
|
|
my $xpc = XML::LibXML::XPathContext->new($doc); |
383
|
|
|
|
|
|
|
$xpc->registerNs('ws',$NameSpace); |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
my @spec; |
386
|
|
|
|
|
|
|
foreach my $onode ($xpc->findnodes('/ws:source/ws:options/*')) { |
387
|
|
|
|
|
|
|
my $name = ""; |
388
|
|
|
|
|
|
|
if($onode->nodeName() eq "option") { |
389
|
|
|
|
|
|
|
warn("Using option element under ws:options is deprecated. Directly use the options name as element name."); |
390
|
|
|
|
|
|
|
$name = $onode->getAttribute("name"); |
391
|
|
|
|
|
|
|
} else { |
392
|
|
|
|
|
|
|
$name = $onode->nodeName(); |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
my $shortcut = $onode->getAttribute("shortcut"); |
395
|
|
|
|
|
|
|
my $type = $onode->getAttribute("type"); |
396
|
|
|
|
|
|
|
if($name) { |
397
|
|
|
|
|
|
|
my $str = $name; |
398
|
|
|
|
|
|
|
if($type eq "string") { |
399
|
|
|
|
|
|
|
$str .= "=s"; |
400
|
|
|
|
|
|
|
} elsif($type eq "integer") { |
401
|
|
|
|
|
|
|
$str .= "=i"; |
402
|
|
|
|
|
|
|
} elsif($type eq "float") { |
403
|
|
|
|
|
|
|
$str .= "=f"; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
if($shortcut) { |
406
|
|
|
|
|
|
|
$str .= " " . $shortcut . ">" . $name; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
CORE::push(@spec,($str)); |
409
|
|
|
|
|
|
|
$self->log(3,"generated option spec '$str'\n"); |
410
|
|
|
|
|
|
|
} else { |
411
|
|
|
|
|
|
|
$self->log(1,"unamed option detected."); |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
return @spec; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item B<< $source->set_option($opt,$val) >> |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Sets source specific option $opt to value $val |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=cut |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub set_option { |
424
|
|
|
|
|
|
|
my ($self,$opt,$val) = @_; |
425
|
|
|
|
|
|
|
$self->log(2,"Setting option $opt to value $val"); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
my $xpc = XML::LibXML::XPathContext->new($self->{wsddoc}); |
428
|
|
|
|
|
|
|
$xpc->registerNs('ws',$NameSpace); |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
if(my @optnode = $xpc->findnodes("//ws:options")) { |
431
|
|
|
|
|
|
|
if (my @nodes = $optnode[0]->getChildrenByTagName($opt)) { |
432
|
|
|
|
|
|
|
if($nodes[0]->hasChildNodes()) { |
433
|
|
|
|
|
|
|
$nodes[0]->firstChild()->setData($val); |
434
|
|
|
|
|
|
|
} else { |
435
|
|
|
|
|
|
|
$nodes[0]->appendText($val); |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
} else { |
438
|
|
|
|
|
|
|
my $nn = $self->{wsddoc}->createElement($opt); |
439
|
|
|
|
|
|
|
$nn->appendText($val); |
440
|
|
|
|
|
|
|
$optnode[0]->appendChild($nn); |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
} else { |
443
|
|
|
|
|
|
|
croak("Setting option while ws:options node is absent"); |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=item B<< $source->apply_imports >> |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
Handles node of type by inserting nodes from the wsd file referenced by href |
451
|
|
|
|
|
|
|
into (imported document) into the current wsd document (target document). |
452
|
|
|
|
|
|
|
A node is inserted from the imported document into the target document only if a node with the same |
453
|
|
|
|
|
|
|
name does not exist in the target document. |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=cut |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub apply_imports { |
458
|
|
|
|
|
|
|
my ($self) = @_; |
459
|
|
|
|
|
|
|
my $doc = $self->{wsddoc}; |
460
|
|
|
|
|
|
|
my $xpc = XML::LibXML::XPathContext->new($doc); |
461
|
|
|
|
|
|
|
$xpc->registerNs('ws',$NameSpace); |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
my @import_nodes = $xpc->findnodes("//ws:import"); |
464
|
|
|
|
|
|
|
while(@import_nodes) { |
465
|
|
|
|
|
|
|
my $im_node = shift @import_nodes; |
466
|
|
|
|
|
|
|
my $im_par = $im_node->parentNode; |
467
|
|
|
|
|
|
|
my $im_wsd = $im_node->getAttribute("href"); |
468
|
|
|
|
|
|
|
$self->log(2,"Processing import of ".$im_wsd); |
469
|
|
|
|
|
|
|
my $im_doc = $self->load_wsd($im_wsd,$self->{wsd}); |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
foreach my $el ($im_doc->documentElement->childNodes) { |
472
|
|
|
|
|
|
|
$el->nodeType == 1 or next; |
473
|
|
|
|
|
|
|
my $nodeType = $el->localName; |
474
|
|
|
|
|
|
|
if($nodeType eq 'options') { |
475
|
|
|
|
|
|
|
# If options have not been locally redefined import them |
476
|
|
|
|
|
|
|
if(!$xpc->exists('//ws:options')) { |
477
|
|
|
|
|
|
|
$im_par->insertBefore($el,$im_node); |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
} else { |
480
|
|
|
|
|
|
|
my $name = $el->getAttribute("name"); |
481
|
|
|
|
|
|
|
if(!$xpc->exists('//*[@name="' . $name . '"]')) { |
482
|
|
|
|
|
|
|
$im_par->insertBefore($el,$im_node); |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
$im_par->removeChild($im_node); |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=item B<< $source->apply_options >> |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Handles node of type by adding |
493
|
|
|
|
|
|
|
and attribut name aname with the value of the option named oname |
494
|
|
|
|
|
|
|
to the parent node. The ws:attribute node is then removed. |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=cut |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
sub apply_options { |
499
|
|
|
|
|
|
|
my ($self) = @_; |
500
|
|
|
|
|
|
|
my $doc = $self->{wsddoc}; |
501
|
|
|
|
|
|
|
my $xpc = XML::LibXML::XPathContext->new($doc); |
502
|
|
|
|
|
|
|
$xpc->registerNs('ws',$NameSpace); |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
my @optnode = $xpc->findnodes("//ws:options"); |
505
|
|
|
|
|
|
|
foreach my $sa ($xpc->findnodes("//ws:set-attribute")) { |
506
|
|
|
|
|
|
|
my $p = $sa->parentNode; |
507
|
|
|
|
|
|
|
my $aname = $sa->getAttribute("name"); |
508
|
|
|
|
|
|
|
my $oexpr = $sa->getAttribute("value-of"); |
509
|
|
|
|
|
|
|
if($oexpr eq "") { |
510
|
|
|
|
|
|
|
$self->log(1,"Warning : Empty value-of attribute on ws:set-attribute"); |
511
|
|
|
|
|
|
|
} else { |
512
|
|
|
|
|
|
|
my $oval = $optnode[0]->findvalue($oexpr); |
513
|
|
|
|
|
|
|
if($oval) { |
514
|
|
|
|
|
|
|
$p->setAttribute($aname,$oval); |
515
|
|
|
|
|
|
|
} else { |
516
|
|
|
|
|
|
|
$self->log(1,"Warning : Expr '$oexpr' has no value"); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
$p->removeChild($sa); |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
$self->log(6,"After applying options...\n", $doc->toString(1)); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=head1 SEE ALSO |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
ws-query, WebSource::Extract, WebSource::Fetch, WebSource::Filter, etc. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
1; |