File Coverage

blib/lib/Net/OAI/Record/NamespaceFilter.pm
Criterion Covered Total %
statement 149 175 85.1
branch 62 90 68.8
condition 5 11 45.4
subroutine 14 19 73.6
pod 12 14 85.7
total 242 309 78.3


line stmt bran cond sub pod time code
1             package Net::OAI::Record::NamespaceFilter;
2              
3 2     2   2486 use strict;
  2         4  
  2         51  
4 2     2   10 use warnings;
  2         4  
  2         55  
5 2     2   9 use base qw( XML::SAX::Base );
  2         3  
  2         141  
6 2     2   9 use Storable;
  2         3  
  2         109  
7 2     2   10 use Carp qw( carp croak );
  2         4  
  2         3954  
8             our $VERSION = "1.16_12";
9              
10             =head1 NAME
11              
12             Net::OAI::Record::NamespaceFilter - general filter class based on namespace URIs
13              
14             =head1 SYNOPSIS
15              
16             $plug = Net::OAI::Record::NamespaceFilter->new(); # Noop
17              
18             $multihandler = Net::OAI::Record::NamespaceFilter->new(
19             'http://www.openarchives.org/OAI/2.0/oai_dc/' => 'Net::OAI::Record::OAI_DC',
20             'http://www.openarchives.org/OAI/2.0/provenance' => 'MySAX::ProvenanceHandler'
21             );
22              
23             $saxfilter = new SOME_SAX_Filter;
24             ...
25             $filter = Net::OAI::Record::NamespaceFilter->new(
26             '*' => $saxfilter, # '*' for any namespace
27             );
28              
29             $filter = Net::OAI::Record::NamespaceFilter->new(
30             '*' => sub { my $x = "";
31             return XML::SAX::Writer->new(Output => \$x);
32             };
33             );
34              
35              
36              
37             =head1 DESCRIPTION
38              
39             It will forward any element belonging to a namespace from this list
40             to the associated SAX filter and all of the element's children
41             (regardless of their respective namespace) to the same one. It can be used either as a
42             C or C.
43              
44             This SAX filter takes a hashref C as argument, with namespace
45             URIs for keys ('*' for "any namespace") and the values are either
46              
47             =over 4
48              
49             =item undef
50              
51             Matching elements and their subelements are suppressed.
52              
53             If the list of namespaces ist empty or C is connected to
54             the filter, it effectively acts as a plug to Net::OAI::Harvester. This
55             might come handy if you are planning to get to the raw result by other
56             means, e.g. by tapping the user agent or accessing the result's xml()
57             method:
58              
59             $plug = Net::OAI::Record::NamespaceFilter->new();
60             $harvester = Net::OAI::Harvester->new( [
61             baseURL => ...,
62             ] );
63              
64             $tapped_by_ua = "";
65             open ($TAP, ">", \$tapped_by_ua);
66             $harvester->userAgent()->add_handler(response_data => sub {
67             my($response, $ua, $h, $data) = @_;
68             print $TAP $data;
69             });
70              
71             $list = $harvester->listRecords(
72             metadataPrefix => 'a_strange_one',
73             recordHandler => $plug,
74             );
75              
76             print $tapped_by_ua; # complete OAI response
77             print $list->xml(); # should be exactly the same
78              
79              
80             Comment: This is quite an efficient way of not processing the XML content
81             of OAI records received.
82              
83              
84             =item a class name of a SAX filter
85              
86             As usual for any record element of the OAI response a new instance is created.
87              
88             # end_document() of instances of MyWriter returns something meaningful...
89             $consumer = Net::OAI::Record::NamespaceFilter->new('*'=> 'MyWriter');
90              
91             $filter = Net::OAI::Record::NamespaceFilter->new(
92             '*' => $consumer
93             );
94            
95             $list = $harvester->listAllRecords(
96             metadataPrefix => 'oai_dc',
97             recordHandler => $filter,
98             );
99              
100             while( $r = $list->next() ) {
101             next if $r->status() eq "deleted";
102             $xmlstringref = $r->recorddata()->result('*');
103             ...
104             };
105              
106             Note: The handlers are instantiated for each single OAI record in the response
107             and will see one start_document() and end_document() event in any case (this
108             behavior is different from that of handler class names directly specified as
109             C or C for a request: instances from those
110             constructions will never see such events).
111              
112              
113             =item a code reference for an constructor
114              
115             Must return a SAX filter ready to accept a new document.
116              
117             The following example returns a string serialization for each single
118             record:
119              
120             # end_document() events will return \$x
121             $constructor = sub { my $x = "";
122             return XML::SAX::Writer->new(Output => \$x);
123             };
124             $filter = Net::OAI::Record::NamespaceFilter->new(
125             '*' => $constructor
126             );
127            
128             $list = $harvester->listRecords(
129             metadataPrefix => 'oai_dc',
130             recordHandler => $filter,
131             );
132              
133             while( $r = $list->next() ) {
134             $xmlstringref = $r->recorddata()->result('*');
135             ...
136             };
137              
138              
139             Comment: This example shows an approach to insulate the "true contents" of individual
140             response records without having to provide a SAX handler class of one's own (just
141             the addidtional prerequisite of L). But what you get is a
142             serialized XML document which then has to be parsed for further processing ...
143              
144              
145             =item an already instantiated SAX filter
146              
147             As usual in this case no C and C events are
148             forwarded to the filter.
149              
150             open $fh, ">", $some_file;
151             $builder = XML::SAX::Writer->new(Output => $fh);
152             $builder->start_document();
153             $rootEL = { Name => 'collection',
154             LocalName => 'collection',
155             NamespaceURI => "http://www.loc.gov/MARC21/slim",
156             Prefix => "",
157             Attributes => {}
158             };
159             $builder->start_element( $rootEL );
160              
161             # filter for OAI-Namespace in records: forward all
162             $filter = Net::OAI::Record::NamespaceFilter->new(
163             'http://www.loc.gov/MARC21/slim' => $builder);
164              
165             $list = $harvester->listRecords(
166             metadataPrefix => 'a_strange_one',
167             metadataHandler => $filter,
168             );
169             # handle resumption tokens if more than the first
170             # chunk shall be stored into $fh ....
171              
172             $builder->end_element( $rootEL );
173             $builder->end_document();
174             close($fh);
175             # ... process contents of $some_file
176              
177             In this example calling the C method for individual records in
178             the response will probably not be of much use.
179              
180             =back
181              
182             Caution: Depending on the namespaces specified, even a handlers which
183             are freshly instantiated for each OAI record might be fed with more
184             than one top-level XML element.
185              
186              
187             =head1 METHODS
188              
189             =head2 new( [%namespaces] )
190              
191             Creates a Handler suitable as recordHandler or metadataHandler. %namespaces
192             has namespace B for keys and values according to the four types
193             described as above.
194              
195              
196             =cut
197              
198             sub new {
199 4     4 1 2890 my ( $class, %opts ) = @_;
200 4   33     45 my $self = bless { namespaces => {%opts} }, ref( $class ) || $class;
201 4         19 $self->{ _activeStack } = [];
202 4         11 $self->{ _tagStack } = [];
203 4         11 $self->{ _result } = [];
204 4         14 $self->{ _prefixmap } = {};
205 4         31 $self->set_handler( undef );
206 4         52 delete $self->{ _noHandler }; # follows set_handler()
207 4         13 $self->{ _handlers } = {};
208 4         11 $self->{ _performing } = {};
209 4         9 while ( my ($key, $value) = each %{$self->{ namespaces }} ) {
  7         37  
210 3 50       20 if ( ! defined $value ) { # no handler
    100          
    100          
211 0         0 Net::OAI::Harvester::debug( "new(): case 1 for $key" );
212             }
213             elsif ( ! ref($value) ) { # class name
214 1         9 Net::OAI::Harvester::debug( "new(): case 2 for $key: $value");
215 1         6 Net::OAI::Harvester::_verifyHandler( $value );
216             }
217             elsif ( ref($value) eq "CODE" ) { # constructor
218 1         7 Net::OAI::Harvester::debug( "new(): case 3 for $key");
219             # can't verify now
220             }
221             else { # active instance
222 1         7 Net::OAI::Harvester::debug( "new(): case 4 for $key" );
223 1         3 $self->{ _handlers }->{ $key } = $value;
224 1         5 $self->{ _performing }->{ $value }--;
225             }
226             };
227 4         14 return( $self );
228             }
229              
230             =head2 result ( [namespace] )
231              
232             If called with a I, it returns the result of the handler,
233             i.e. what C returned for the record in question.
234             Otherwise it returns a hashref for all the results with the
235             corresponding namespaces as keys.
236              
237             =cut
238              
239             sub result {
240 204     204 1 135621 my ( $self, $ns ) = @_;
241 204 100       461 if ( defined $ns ) {
242 202   100     994 return $self->{ _result }->{$ns} || undef}
243             else {
244 2         7 return $self->{ _result }}
245             }
246              
247             =head1 AUTHOR
248              
249             Thomas Berger
250              
251             =cut
252              
253             ## Storable hooks
254              
255             sub STORABLE_freeze {
256 400     400 0 170129 my ($obj, $cloning) = @_;
257 400 50       1050 return if $cloning;
258 400         201523 return "", $obj->{ _result }; # || undef;
259             }
260              
261             sub STORABLE_thaw {
262 400     400 0 12750 my ($obj, $cloning, $serialized, $listref) = @_;
263 400 50       911 return if $cloning;
264 400         6287 $obj->{ _result } = $listref;
265             #carp "thawed @$listref";
266             }
267              
268              
269             ## SAX handlers
270              
271             sub start_document {
272 0     0 1 0 my ($self, $document) = @_;
273 0         0 carp(<<"XxX");
274             unexpected start_document()
275 0         0 \t_activeStack: @{$self->{ _activeStack }}
276 0         0 \t_tagStack: @{$self->{ _tagStack }}
277             XxX
278 0         0 $self->SUPER::start_document( $document );
279             }
280             sub end_document {
281 0     0 1 0 my ($self, $document) = @_;
282 0         0 carp(<<"XxX");
283             unexpected end_document()
284 0         0 \t_activeStack: @{$self->{ _activeStack }}
285 0         0 \t_tagStack: @{$self->{ _tagStack }}
286             XxX
287 0         0 $self->SUPER::end_document( $document );
288             }
289              
290             sub start_prefix_mapping {
291 2010     2010 1 17767 my ($self, $mapping) = @_;
292 2010 100       6550 $self->SUPER::start_prefix_mapping( $mapping ) unless $self->{ _noHandler };
293 2010 100       9971 return if $self->{ _activeStack }->[0];
294 2007         5323 $self->{ _prefixmap }->{ $mapping->{Prefix} } = $mapping;
295 2007         5429 my $activehdl = $self->get_handler();
296 2007 0 33     15294 croak ("wrong assumption") unless (! defined $activehdl) or $self->{ _performing }->{ $activehdl };
297 2007         2525 my $switched;
298 2007         2306 foreach my $hdl ( keys %{$self->{ _performing }} ) {
  2007         5417  
299 5         15 $self->set_handler( $hdl );
300 5         59 $self->SUPER::start_prefix_mapping( $mapping );
301 5         135 $switched = 1;
302             }
303 2007 100       14332 $self->set_handler( $activehdl ) if $switched;
304             }
305              
306             sub end_prefix_mapping {
307 1206     1206 1 23385 my ($self, $mapping) = @_;
308 1206 100       3942 $self->SUPER::end_prefix_mapping( $mapping ) unless $self->{ _noHandler };
309 1206 100       6230 return if $self->{ _activeStack }->[0];
310 1203 50       4267 croak ( "mapping @{[%$mapping]} already removed" ) unless $self->{ _prefixmap }->{ $mapping->{Prefix} };
  0         0  
311 1203         3253 my $activehdl = $self->get_handler(); # always undef
312 1203 0 33     9192 croak ( "wrong assumption" ) unless (! defined $activehdl) or $self->{ _performing }->{ $activehdl };
313 1203         1431 my $switched;
314 1203         1580 foreach my $hdl ( keys %{$self->{ _performing }} ) {
  1203         3250  
315 3         9 $self->set_handler( $hdl );
316 3         35 $self->SUPER::end_prefix_mapping( $mapping );
317 3         81 $switched = 1;
318             }
319 1203         3140 delete $self->{ _prefixmap }->{ $mapping->{Prefix} };
320 1203 100       7308 $self->set_handler( $activehdl ) if $switched;
321             }
322              
323             sub start_element {
324 10668     10668 1 74166 my ( $self, $element ) = @_;
325             # Net::OAI::Harvester::debug(<<"XxX");
326             #\t((( $element->{ Name } (((
327             #\t\t_activeStack: @{$self->{ _activeStack }}
328             #\t\t_tagStack: @{$self->{ _tagStack }}
329             #XxX
330 10668 100       25635 if ( $self->{ _activeStack }->[0] ) { # handler already set up
331             }
332             else {
333 6116 100       14402 unless ( $self->{ _tagStack }->[0] ) { # should be the start of a new record
334 402         964 $self->{ _result } = {};
335             # start_document here for all defined handlers?
336 402         1703 my $activehdl = $self->get_handler(); # always undef
337 402 50       3266 croak( "handler $activehdl already active" ) if defined $activehdl;
338 402         548 my $switched;
339              
340 402         733 while ( my ($key, $value) = each %{$self->{ namespaces }} ) {
  604         9446  
341 202         482 $self->{ _result }->{ $key } = undef;
342 202         344 my $hdl;
343 202 50       1044 if ( ! defined $value ) { # no handler
    100          
    100          
344             # Net::OAI::Harvester::debug( "start_element(): case 1 for $key" );
345             }
346             elsif ( ! ref($value) ) { # class name
347             # Net::OAI::Harvester::debug( "start_element(): case 2 for $key" );
348 1         9 $hdl = $value->new();
349             }
350             elsif ( ref($value) eq "CODE" ) { # constructor
351             # Net::OAI::Harvester::debug( "start_element(): case 3 for $key" );
352 200         834 $hdl = &$value();
353 200         42341 Net::OAI::Harvester::_verifyHandler( $hdl );
354             }
355             else { # always active instance
356             # Net::OAI::Harvester::debug( ""start_element(): case 4 for $key. Handler is $value" );
357 1         2 $switched = 1;
358 1         4 $self->set_handler( $value );
359             # Those mapping evends *have* already been forwarded... => Bugfix for XML::SAX::Writer?
360 1         9 foreach my $mapping ( values %{$self->{ _prefixmap }} ) {
  1         4  
361             # Net::OAI::Harvester::debug( "bugfix supply of deferred @{[%$mapping]}" );
362 4         129 $self->SUPER::start_prefix_mapping( $mapping )}
363 1         33 next;
364             }
365              
366 201         731 $self->{ _handlers }->{ $key } = $hdl;
367 201 50       521 next unless defined $hdl;
368 201 50       931 next if $self->{ _performing }->{ $hdl }++;
369 201         275 $switched = 1;
370 201         660 $self->set_handler( $hdl );
371 201         2774 $self->SUPER::start_document({});
372 201         66035 foreach my $mapping ( values %{$self->{ _prefixmap }} ) {
  201         676  
373 802         26959 $self->SUPER::start_prefix_mapping( $mapping )}
374             }
375 402 100       1409 $self->set_handler( $activehdl ) if $switched;
376             };
377              
378 6116 100       30992 if ( exists $self->{ namespaces }->{$element->{ NamespaceURI }} ) {
    100          
379 201 50       790 if ( defined (my $hdl = $self->{ _handlers }->{$element->{ NamespaceURI }}) ) {
380 201         543 $self->set_handler( $hdl );
381 201         2064 $self->{ _noHandler } = 0;
382             };
383             }
384             elsif ( exists $self->{ namespaces }->{'*'} ) {
385 1 50       6 if ( defined (my $hdl = $self->{ _handlers }->{'*'}) ) {
386 1         3 $self->set_handler( $hdl );
387 1         12 $self->{ _noHandler } = 0;
388             };
389             }
390             else {
391 5914         6861 push (@{$self->{ _tagStack }}, $element->{ Name });
  5914         15280  
392 5914         21935 return;
393             };
394             };
395              
396 4754         6288 push (@{$self->{ _activeStack }}, $element->{ Name });
  4754         12309  
397 4754 50       11152 return if $self->{ _noHandler };
398 4754         12870 $self->SUPER::start_element( $element );
399             }
400              
401             sub end_element {
402 10668     10668 1 73809 my ( $self, $element ) = @_;
403             # Net::OAI::Harvester::debug(<<"XxX");
404             #\t))) $element->{ Name } )))
405             #\t\t_activeStack: @{$self->{ _activeStack }}
406             #\t\t_tagStack: @{$self->{ _tagStack }}
407             #XxX
408 10668 100       33599 if ( $self->{ _activeStack }->[0] ) {
    50          
409 4754 50       9943 unless ( $self->{ _noHandler } ) {
410 4754         11866 $self->SUPER::end_element( $element );
411             };
412 4754         521754 pop (@{$self->{ _activeStack }});
  4754         9078  
413 4754 100       20073 return if $self->{ _activeStack }->[0];
414 202 50       688 unless ( $self->{ _noHandler } ) {
415 202         770 $self->set_handler(undef);
416 202         4077 $self->{ _noHandler } = 1;
417             }
418             }
419             elsif ( $self->{ _tagStack }->[0] ) {
420 5914         7262 pop (@{$self->{ _tagStack }});
  5914         12340  
421             }
422 6116 100       27085 return if $self->{ _tagStack }->[0];
423 402         1282 my $activehdl = $self->get_handler(); # always undef
424 402 50       3423 croak ( "handler $activehdl still active" ) if defined $activehdl;
425 402         675 my $switched;
426 402         711 while ( my ($key, $value) = each %{$self->{ namespaces }} ) {
  604         2512  
427 202 50       937 if ( ! defined $value ) {
    50          
428             # Net::OAI::Harvester::debug( "end_element(): case 1 for $key" );
429 0         0 $self->{ _result }->{ $key } = "";
430             }
431             elsif ( my $hdl = $self->{ _handlers }->{ $key } ) {
432 202 50       1411 if ( ! $self->{ _performing }->{ $hdl } ) {
    100          
433 0         0 carp "already(?) inactive handler $hdl for $key";
434 0         0 delete $self->{ _handlers }->{ $key };
435 0         0 next;
436             }
437             elsif ( $self->{ _performing }->{ $hdl } < 0 ) { # always active handler
438             # Net::OAI::Harvester::debug( "end_element(): case 4 for $key" );
439 1         2 $self->{ _result }->{ $key } = undef;
440 1         4 next;
441             };
442             # Net::OAI::Harvester::debug( "end_element(): case 2/3 for $key" );
443 201         394 delete $self->{ _handlers }->{ $key };
444 201         640 delete $self->{ _performing }->{ $hdl };
445 201         314 $switched = 1;
446 201         656 $self->set_handler( $hdl );
447             # revoke some stored namespace mappings, too?
448 201         2569 my $result = $self->SUPER::end_document({});
449 201         15924 $self->{ _result }->{ $key } = $result;
450             }
451             else {
452 0         0 croak("Assertion failed: $key not listed as _handler");
453             };
454             };
455 402 100       1923 $self->set_handler( $activehdl ) if $switched;
456             }
457              
458             sub characters {
459 23260     23260 1 155499 my ( $self, $characters ) = @_;
460 23260 100       52950 return if $self->{ _noHandler };
461 22860         58197 return $self->SUPER::characters( $characters );
462             }
463              
464             sub ignorable_whitespace {
465 0     0 1   my ( $self, $characters ) = @_;
466 0 0         return if $self->{ _noHandler };
467 0           return $self->SUPER::ignorable_whitespace( $characters );
468             }
469              
470             sub comment {
471 0     0 1   my ( $self, $comment ) = @_;
472 0 0         return if $self->{ _noHandler };
473 0           return $self->SUPER::comment( $comment );
474             }
475              
476             sub processing_instruction {
477 0     0 1   my ( $self, $pi ) = @_;
478 0 0         return if $self->{ _noHandler };
479 0           return $self->SUPER::processing_instruction( $pi );
480             }
481              
482             1;
483