|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package PICA::XMLParser;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $PICA::XMLParser::VERSION = '0.585';  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ABSTRACT: Parse PICA+ XML  | 
| 
6
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
75
 | 
 use strict;  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
561
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
78
 | 
 use base qw(XML::SAX::Base Exporter);  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20708
 | 
    | 
| 
9
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
330611
 | 
 use Carp qw(croak);  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1163
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK = qw(parsefile parsedata);  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require PICA::Field;  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
173
 | 
 use PICA::Field;  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
482
 | 
    | 
| 
16
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
74
 | 
 use PICA::Record;  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
550
 | 
    | 
| 
17
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
13923
 | 
 use XML::SAX::ParserFactory 1.01;  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46019
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
414
 | 
    | 
| 
18
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
 
 | 
97
 | 
 use Carp qw(croak);  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
119
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23085
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
22
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
2039
 | 
     my ($class, %params) = @_;  | 
| 
23
 | 
3
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
19
 | 
     $class = ref $class || $class;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
3
 | 
  
 50
  
 | 
  
 50
  
 | 
 
 | 
 
 | 
116
 | 
     my $self = {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         record => {},  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         fields => {},  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         read_records => [],  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         tag => "",  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         occurrence => "",  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         subfield_code => "",  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         subfield_value => "",  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         limit  => ($params{Limit} || 0) * 1,  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         offset  => ($params{Offset} || 0) * 1,  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Handlers  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         field_handler  => $params{Field} ? $params{Field} : undef,  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         record_handler => $params{Record} ? $params{Record} : undef,  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         collection_handler => $params{Collection} ? $params{Collection} : undef,  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         proceed => $params{Proceed} ? $params{Proceed} : 0,  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         read_counter => 0,  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
48
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     bless $self, $class;  | 
| 
49
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     return $self;  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parsedata {  | 
| 
54
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
6
 | 
     my $self = shift;  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     if ( ref($self) eq "PICA::XMLParser" ) { # called as a method  | 
| 
57
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       my $data = shift;  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
       if ( ! $self->{proceed} ) {  | 
| 
60
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
           $self->{read_counter} = 0;  | 
| 
61
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
           $self->{read_records} = [];  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
       if( UNIVERSAL::isa( $data, 'PICA::Record' ) ) {  | 
| 
65
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         foreach ( $data->fields ) {  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # TODO: we could improve performance here  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # TODO: merge this into PICA::Parser  | 
| 
68
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $self->_parseline( $_->string );  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
       my $parser = XML::SAX::ParserFactory->new(  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         RequiredFeatures => { 'http://xml.org/sax/features/namespaces' => 1 }  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       )->parser( Handler => $self );  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if (ref($data) eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $data = join('',@{$data})  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } elsif (ref($data) eq 'CODE') {  | 
| 
79
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $code = $data;  | 
| 
80
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           $data = "";  | 
| 
81
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           my $chunk = &$code();  | 
| 
82
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
           while(defined $chunk) {  | 
| 
83
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $data .= $chunk;  | 
| 
84
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
               $chunk = &$code();  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $parser->parse_string($data);  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $self;  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else { # called as function  | 
| 
93
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $data = ($self eq 'PICA::XMLParser') ? shift : $self;  | 
| 
94
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         croak("Missing argument to parsedata") unless defined $data;  | 
| 
95
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         PICA::XMLParser->new( @_ )->parsedata( $data );  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parsefile {  | 
| 
101
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
5
 | 
     my $self = shift;  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     if ( ref($self) eq "PICA::XMLParser" ) { # called as a method  | 
| 
104
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         my $file = shift;  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         if ( ! $self->{proceed} ) {  | 
| 
107
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             $self->{read_counter} = 0;  | 
| 
108
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             $self->{read_records} = [];  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
         $self->{filename} = $file if ref(\$file) eq 'SCALAR';  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
         my $parser = XML::SAX::ParserFactory->new(  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           RequiredFeatures => { 'http://xml.org/sax/features/namespaces' => 1 }  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         )->parser( Handler => $self );  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
         if (ref($file) eq 'GLOB' or eval { $file->isa("IO::Handle") }) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $parser->parse_file($file);  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
120
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $parser->parse_uri($file);  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self;  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else { # called as a function         | 
| 
126
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $file = ($self eq 'PICA::XMLParser') ? shift : $self;  | 
| 
127
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         croak("Missing argument to parsefile") unless defined $file;  | 
| 
128
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         PICA::XMLParser->new( @_ )->parsefile( $file );  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub records {  | 
| 
134
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self = shift;   | 
| 
135
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return @{ $self->{read_records} };  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub counter {  | 
| 
140
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self = shift;   | 
| 
141
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return $self->{read_counter};  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub finished {  | 
| 
146
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my $self = shift;   | 
| 
147
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     return $self->{limit} && $self->counter() >= $self->{limit};  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub start_document {  | 
| 
152
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my ($self, $doc) = @_;  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{subfield_code} = "";  | 
| 
155
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{tag} = "";  | 
| 
156
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{occurrence} = "";  | 
| 
157
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{record} = ();  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub end_document {  | 
| 
162
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my ($self, $doc) = @_;  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub start_element {  | 
| 
167
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my ($self, $el) = @_;  | 
| 
168
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $name = $el->{LocalName};  | 
| 
169
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %attrs = map { $_->{LocalName} => $_->{Value} } values %{ $el->{Attributes} };  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $ns = $el->{NamespaceURI};  | 
| 
172
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     $name = '{'.$ns.'}:'.$name if $ns and $ns ne $PICA::Record::XMLNAMESPACE;  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     if ($name eq "subfield") {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $code = $attrs{"code"};  | 
| 
177
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (defined $code) {  | 
| 
178
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if ($code =~ $PICA::Field::SUBFIELD_CODE_REGEXP) {  | 
| 
179
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $self->{subfield_code} = $code;  | 
| 
180
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $self->{subfield_value} = "";  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
182
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                croak "Invalid subfield code '$code'"; # . $self->_getPosition($parser));  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
185
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             croak "Missing attribute 'code'"; # . $self->_getPosition($parser));  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq "field" or $name eq "datafield") {  | 
| 
188
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $tag = $attrs{tag};  | 
| 
189
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (defined $tag) {  | 
| 
190
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if (!($tag =~ $PICA::Field::FIELD_TAG_REGEXP)) {  | 
| 
191
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 croak "Invalid field tag '$tag'"; # . $self->_getPosition($parser));  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } else {  | 
| 
194
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             croak "Missing attribute 'tag'"; # . $self->_getPosition($parser));  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
196
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $occurrence = $attrs{occurrence};  | 
| 
197
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
         if ($occurrence && !($occurrence =~ $PICA::Field::FIELD_OCCURRENCE_REGEXP)) {  | 
| 
198
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             croak "Invalid occurrence '$occurrence'"; # . $self->_getPosition($parser));  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{tag} = $tag;  | 
| 
202
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{occurrence} = $occurrence ? $occurrence : undef;  | 
| 
203
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{subfields} = ();  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq "record") {  | 
| 
206
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{fields} = [];  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq "collection") {  | 
| 
208
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{records} = [];  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
210
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         croak "Unknown element '$name'"; # . $self->_getPosition($parser));  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub end_element {  | 
| 
216
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my ($self, $el) = @_;  | 
| 
217
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $name = $el->{LocalName};  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # TODO: $el->{NamespaceURI}  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     if ($name eq "subfield") {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         push (@{$self->{subfields}}, ($self->{subfield_code}, $self->{subfield_value}));  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq "field" or $name eq "datafield") {  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #        return if $self->{tag} eq ''; # ignore  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #        croak ("Field " . $self->{tag} . " is empty" . $self->_getPosition($parser)) unless $self->{subfields};  | 
| 
227
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         croak ("Field " . $self->{tag} . " is empty") unless $self->{subfields};  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $field = bless {  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             _tag => $self->{tag},  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             _occurrence => $self->{occurrence},  | 
| 
232
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             _subfields => [@{$self->{subfields}}]  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }, 'PICA::Field'; # TODO: use constructor instead  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
235
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($self->{field_handler}) {  | 
| 
236
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $field = $self->{field_handler}( $field );  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if (UNIVERSAL::isa($field,"PICA::Field")) {  | 
| 
240
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             push (@{$self->{fields}}, $field);  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq "record") {  | 
| 
243
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return if $self->finished();  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{read_counter}++;  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
         if (! ($self->{offset} && $self->{read_counter} < $self->{offset}) ) {  | 
| 
248
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $record =  PICA::Record->new( @{$self->{fields}} );  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if ($self->{record_handler}) {  | 
| 
251
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $record = $self->{record_handler}( $record );  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
253
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if ($record) {  | 
| 
254
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 push @{ $self->{read_records} }, $record;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($name eq "collection") {  | 
| 
259
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{collection_handler}( $self->records() )  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if $self->{collection_handler};  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
262
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         croak("Unknown element '$name'"); # . $self->_getPosition($parser));  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub characters {  | 
| 
268
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my ($self, $string) = @_;  | 
| 
269
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ($string) = values %$string;  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # all character data outside of subfield content will be ignored without warning  | 
| 
272
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (defined $self->{subfield_code}) {  | 
| 
273
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $string =~ s/[\n\r]+/ /g; # remove newlines  | 
| 
274
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{subfield_value} .= $string;  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _getPosition {  | 
| 
280
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($self, $parser) = @_;  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ($self->{filename}) {  | 
| 
283
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return " in " . $self->{filename} . ", line " . $parser->current_line();  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
285
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return " in line " . $parser->current_line();  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |