line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# This file is part of TBX-XCS |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# This software is copyright (c) 2013 by Alan K. Melby. |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# This is free software; you can redistribute it and/or modify it under |
7
|
|
|
|
|
|
|
# the same terms as the Perl 5 programming language system itself. |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
package TBX::XCS; |
10
|
6
|
|
|
6
|
|
111723
|
use strict; |
|
6
|
|
|
|
|
15
|
|
|
6
|
|
|
|
|
203
|
|
11
|
6
|
|
|
6
|
|
32
|
use warnings; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
151
|
|
12
|
6
|
|
|
6
|
|
7813
|
use XML::Twig; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use feature 'say'; |
14
|
|
|
|
|
|
|
use JSON; |
15
|
|
|
|
|
|
|
use Carp; |
16
|
|
|
|
|
|
|
#carp from calling package, not from here |
17
|
|
|
|
|
|
|
our @CARP_NOT = qw(TBX::XCS TBX::XCS::JSON); |
18
|
|
|
|
|
|
|
use Data::Dumper; |
19
|
|
|
|
|
|
|
our $VERSION = '0.05'; # VERSION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# ABSTRACT: Extract data from an XCS file |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
#default: read XCS file and dump data to STDOUT |
25
|
|
|
|
|
|
|
__PACKAGE__->new()->_run(@ARGV) unless caller; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub new { |
29
|
|
|
|
|
|
|
my ($class, @args) = @_; |
30
|
|
|
|
|
|
|
my $self = bless {}, $class; |
31
|
|
|
|
|
|
|
if(@args){ |
32
|
|
|
|
|
|
|
$self->parse(@args); |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
return $self; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub parse { |
39
|
|
|
|
|
|
|
my ($self, %args) = @_; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
$self->_init; |
42
|
|
|
|
|
|
|
if(exists $args{file}){ |
43
|
|
|
|
|
|
|
if(not -e $args{file}){ |
44
|
|
|
|
|
|
|
croak "file does not exist: $args{file}"; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
$self->{twig}->parsefile( $args{file} ); |
47
|
|
|
|
|
|
|
}elsif(exists $args{string}){ |
48
|
|
|
|
|
|
|
$self->{twig}->parse( ${$args{string}} ); |
49
|
|
|
|
|
|
|
}else{ |
50
|
|
|
|
|
|
|
croak 'Need to specify either a file or a string pointer with XCS contents'; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
$self->{data}->{constraints} = $self->{twig}->{xcs_constraints}; |
53
|
|
|
|
|
|
|
$self->{data}->{name} = $self->{twig}->{xcs_name}; |
54
|
|
|
|
|
|
|
$self->{data}->{title} = $self->{twig}->{xcs_title}; |
55
|
|
|
|
|
|
|
return; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub _init { |
59
|
|
|
|
|
|
|
my ($self) = @_; |
60
|
|
|
|
|
|
|
$self->{twig}->{xcs_constraints} = {}; |
61
|
|
|
|
|
|
|
$self->{twig} = _init_twig(); |
62
|
|
|
|
|
|
|
return; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _run { |
66
|
|
|
|
|
|
|
my ($self, $file) = @_; |
67
|
|
|
|
|
|
|
$self->parse(file => $file); |
68
|
|
|
|
|
|
|
print Dumper $self->{data}->{constraints}; |
69
|
|
|
|
|
|
|
return; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub get_languages { |
74
|
|
|
|
|
|
|
my ($self) = @_; |
75
|
|
|
|
|
|
|
return $self->{data}->{constraints}->{languages}; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub get_ref_objects { |
80
|
|
|
|
|
|
|
my ($self) = @_; |
81
|
|
|
|
|
|
|
return $self->{data}->{constraints}->{refObjects} ; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub get_data_cats { |
86
|
|
|
|
|
|
|
my ($self) = @_; |
87
|
|
|
|
|
|
|
return $self->{data}->{constraints}->{datCatSet}; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub get_title { |
92
|
|
|
|
|
|
|
my ($self) = @_; |
93
|
|
|
|
|
|
|
return $self->{data}->{title}; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub get_name { |
98
|
|
|
|
|
|
|
my ($self) = @_; |
99
|
|
|
|
|
|
|
return $self->{data}->{name}; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
my @meta_data_cats = qw( |
103
|
|
|
|
|
|
|
adminNote |
104
|
|
|
|
|
|
|
admin |
105
|
|
|
|
|
|
|
descrip |
106
|
|
|
|
|
|
|
descripNote |
107
|
|
|
|
|
|
|
hi |
108
|
|
|
|
|
|
|
ref |
109
|
|
|
|
|
|
|
termNote |
110
|
|
|
|
|
|
|
transac |
111
|
|
|
|
|
|
|
transacNote |
112
|
|
|
|
|
|
|
xref |
113
|
|
|
|
|
|
|
termCompList |
114
|
|
|
|
|
|
|
); |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# these are taken from the core structure DTD |
117
|
|
|
|
|
|
|
# the types are listed on pg 12 of TBX_spec_OSCAR.pdf |
118
|
|
|
|
|
|
|
# TODO: maybe they should be extracted |
119
|
|
|
|
|
|
|
my %default_datatype = ( |
120
|
|
|
|
|
|
|
adminNote => 'plainText', |
121
|
|
|
|
|
|
|
admin => 'noteText', |
122
|
|
|
|
|
|
|
descripNote => 'plainText', |
123
|
|
|
|
|
|
|
descrip => 'noteText', |
124
|
|
|
|
|
|
|
hi => 'plainText', |
125
|
|
|
|
|
|
|
ref => 'plainText', |
126
|
|
|
|
|
|
|
#I don't think XCS will ever mess with this one in a complicated way |
127
|
|
|
|
|
|
|
#TODO: maybe change this to be shown as 'termCompList' type |
128
|
|
|
|
|
|
|
#TODO: how will we allow users to subset this? |
129
|
|
|
|
|
|
|
# termCompList=> 'auxInfo, (termComp | termCompGrp)+', |
130
|
|
|
|
|
|
|
termNote => 'noteText', |
131
|
|
|
|
|
|
|
transacNote => 'plainText', |
132
|
|
|
|
|
|
|
transac => 'plainText', |
133
|
|
|
|
|
|
|
xref => 'plainText', |
134
|
|
|
|
|
|
|
); |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
my $allowed_datatypes = do{ |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
#what datatypes can become what other datatypes? |
139
|
|
|
|
|
|
|
my %datatype_heirarchy = ( |
140
|
|
|
|
|
|
|
noteText => { |
141
|
|
|
|
|
|
|
'noteText' => 1, |
142
|
|
|
|
|
|
|
'basicText' => 1, |
143
|
|
|
|
|
|
|
'plainText' => 1, |
144
|
|
|
|
|
|
|
'picklist' => 1, |
145
|
|
|
|
|
|
|
}, |
146
|
|
|
|
|
|
|
basicText => { |
147
|
|
|
|
|
|
|
'basicText' => 1, |
148
|
|
|
|
|
|
|
'plainText' => 1, |
149
|
|
|
|
|
|
|
'picklist' => 1, |
150
|
|
|
|
|
|
|
}, |
151
|
|
|
|
|
|
|
plainText => { |
152
|
|
|
|
|
|
|
'plainText' => 1, |
153
|
|
|
|
|
|
|
'picklist' => 1, |
154
|
|
|
|
|
|
|
}, |
155
|
|
|
|
|
|
|
); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my $allowed_datatypes = {}; |
158
|
|
|
|
|
|
|
for my $category (keys %default_datatype){ |
159
|
|
|
|
|
|
|
$allowed_datatypes->{$category} = |
160
|
|
|
|
|
|
|
$datatype_heirarchy{ $default_datatype{$category} }; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
$allowed_datatypes; |
163
|
|
|
|
|
|
|
}; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
#return an XML::Twig object which will extract data from an XCS file |
166
|
|
|
|
|
|
|
sub _init_twig { |
167
|
|
|
|
|
|
|
return XML::Twig->new( |
168
|
|
|
|
|
|
|
pretty_print => 'indented', |
169
|
|
|
|
|
|
|
# keep_original_prefix => 1, #maybe; this may be bad because the JS code doesn't process namespaces yet |
170
|
|
|
|
|
|
|
output_encoding => 'UTF-8', |
171
|
|
|
|
|
|
|
do_not_chain_handlers => 1, #can be important when things get complicated |
172
|
|
|
|
|
|
|
keep_spaces => 0, |
173
|
|
|
|
|
|
|
TwigHandlers => { |
174
|
|
|
|
|
|
|
TBXXCS => sub {$_[0]->{xcs_name} = $_->att('name')}, |
175
|
|
|
|
|
|
|
title => sub {$_[0]->{xcs_title} = $_->text}, |
176
|
|
|
|
|
|
|
header => sub {}, |
177
|
|
|
|
|
|
|
#TODO: add handlers for these |
178
|
|
|
|
|
|
|
datCatDoc => sub {}, |
179
|
|
|
|
|
|
|
datCatMap => sub {}, |
180
|
|
|
|
|
|
|
datCatDisplay => sub {}, |
181
|
|
|
|
|
|
|
datCatNote => sub {}, |
182
|
|
|
|
|
|
|
datCatToken => sub {}, |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
languages => \&_languages, |
185
|
|
|
|
|
|
|
langCode => sub {}, |
186
|
|
|
|
|
|
|
langInfo => sub {}, |
187
|
|
|
|
|
|
|
langName => sub {}, |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
refObjectDefSet => \&_refObjectDefSet, |
190
|
|
|
|
|
|
|
refObjectDef => sub {}, |
191
|
|
|
|
|
|
|
refObjectType => sub {}, |
192
|
|
|
|
|
|
|
itemSpecSet => sub {}, |
193
|
|
|
|
|
|
|
itemSpec => sub {}, |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
adminNoteSpec => \&_dataCat, |
196
|
|
|
|
|
|
|
adminSpec => \&_dataCat, |
197
|
|
|
|
|
|
|
descripNoteSpec => \&_dataCat, |
198
|
|
|
|
|
|
|
descripSpec => \&_dataCat, |
199
|
|
|
|
|
|
|
hiSpec => \&_dataCat, |
200
|
|
|
|
|
|
|
refSpec => \&_dataCat, |
201
|
|
|
|
|
|
|
termCompListSpec=> \&_dataCat, |
202
|
|
|
|
|
|
|
termNoteSpec => \&_dataCat, |
203
|
|
|
|
|
|
|
transacNoteSpec => \&_dataCat, |
204
|
|
|
|
|
|
|
transacSpec => \&_dataCat, |
205
|
|
|
|
|
|
|
xrefSpec => \&_dataCat, |
206
|
|
|
|
|
|
|
contents => sub {}, |
207
|
|
|
|
|
|
|
levels => sub {}, |
208
|
|
|
|
|
|
|
datCatSet => sub {}, |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
'_default_' => sub {croak 'unknown tag: ' . $_->tag}, |
211
|
|
|
|
|
|
|
}, |
212
|
|
|
|
|
|
|
); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
###HANDLERS |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
#the languages allowed to be used in the document |
218
|
|
|
|
|
|
|
sub _languages { |
219
|
|
|
|
|
|
|
my ($twig, $el) = @_; |
220
|
|
|
|
|
|
|
my %languages; |
221
|
|
|
|
|
|
|
#make list of allowed languages and store it on the twig |
222
|
|
|
|
|
|
|
foreach my $language($el->children('langInfo')){ |
223
|
|
|
|
|
|
|
$languages{$language->first_child('langCode')->text} = |
224
|
|
|
|
|
|
|
$language->first_child('langName')->text; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
$twig->{xcs_constraints}->{languages} = \%languages; |
227
|
|
|
|
|
|
|
return; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
#the reference objects that can be contained in the tag |
231
|
|
|
|
|
|
|
sub _refObjectDefSet { |
232
|
|
|
|
|
|
|
my ($twig, $el) = @_; |
233
|
|
|
|
|
|
|
my %defSet; |
234
|
|
|
|
|
|
|
#make list of allowed reference object types and store it on the twig |
235
|
|
|
|
|
|
|
foreach my $def ($el->children('refObjectDef')){ |
236
|
|
|
|
|
|
|
$defSet{$def->first_child('refObjectType')->text} = |
237
|
|
|
|
|
|
|
[ |
238
|
|
|
|
|
|
|
map {$_->text} |
239
|
|
|
|
|
|
|
$def->first_child('itemSpecSet')->children('itemSpec') |
240
|
|
|
|
|
|
|
]; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
$twig->{xcs_constraints}->{refObjects} = \%defSet; |
244
|
|
|
|
|
|
|
return; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# all children of dataCatset |
248
|
|
|
|
|
|
|
sub _dataCat { |
249
|
|
|
|
|
|
|
my ($twig, $el) = @_; |
250
|
|
|
|
|
|
|
(my $type = $el->tag) =~ s/Spec$//; |
251
|
|
|
|
|
|
|
_check_meta_cat($type); |
252
|
|
|
|
|
|
|
my $data = {}; |
253
|
|
|
|
|
|
|
$data->{name} = $el->att('name'); |
254
|
|
|
|
|
|
|
if( my $datCatId = $el->att('datcatId') ){ |
255
|
|
|
|
|
|
|
$data->{datCatId} = $datCatId; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
#If the data-category does not take a picklist, |
258
|
|
|
|
|
|
|
#if its data type is the same as that defined for the meta data element in the core-structure DTD, |
259
|
|
|
|
|
|
|
#if its meta data element does not take a target attribute, and |
260
|
|
|
|
|
|
|
#if it does not apply to term components, |
261
|
|
|
|
|
|
|
#this element will be empty and have no attributes specified. |
262
|
|
|
|
|
|
|
my $contents = $el->first_child('contents') |
263
|
|
|
|
|
|
|
or croak 'No contents element in ' . $el->tag . '[@name=' . $el->att('name') . ']'; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
#check restrictions on datatypes |
266
|
|
|
|
|
|
|
my $datatype = $contents->att('datatype'); |
267
|
|
|
|
|
|
|
if($datatype){ |
268
|
|
|
|
|
|
|
if($type eq 'termCompList'){ |
269
|
|
|
|
|
|
|
carp 'Ignoring datatype value in termCompList contents element'; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
else{ |
272
|
|
|
|
|
|
|
_check_datatype($type, $datatype); |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
}else{ |
275
|
|
|
|
|
|
|
$datatype = $default_datatype{$type}; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
#ignore datatypes for termCompList |
278
|
|
|
|
|
|
|
if($type ne 'termCompList'){ |
279
|
|
|
|
|
|
|
$data->{datatype} = $datatype; |
280
|
|
|
|
|
|
|
if($datatype eq 'picklist'){ |
281
|
|
|
|
|
|
|
$data->{choices} = [split ' ', $contents->text]; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
if ($contents->att('forTermComp')){ |
285
|
|
|
|
|
|
|
$data->{forTermComp} = $contents->att('forTermComp');; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
if ($contents->att('targetType')){ |
289
|
|
|
|
|
|
|
$data->{targetType} = $contents->att('targetType'); |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
#levels can be specified for descrip data categories |
293
|
|
|
|
|
|
|
if($type eq 'descrip'){ |
294
|
|
|
|
|
|
|
if(my $levels = $el->first_child('levels')->text){ |
295
|
|
|
|
|
|
|
$data->{levels} = [split ' ', $levels]; |
296
|
|
|
|
|
|
|
_check_levels($data); |
297
|
|
|
|
|
|
|
}else{ |
298
|
|
|
|
|
|
|
#todo: not sure if this is the right behavior for an empty |
299
|
|
|
|
|
|
|
$data->{levels} = [qw(langSet termEntry term)] |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
#also, check page 10 of the OSCAR PDF for elements that can occur at multiple levels |
303
|
|
|
|
|
|
|
push @{ $twig->{xcs_constraints}->{datCatSet}->{$type} }, $data; |
304
|
|
|
|
|
|
|
return; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _check_meta_cat { |
308
|
|
|
|
|
|
|
my ($meta_cat) = @_; |
309
|
|
|
|
|
|
|
if(! grep {$_ eq $meta_cat} @meta_data_cats ){ |
310
|
|
|
|
|
|
|
croak "unknown meta data category: $meta_cat"; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
return; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub _get_default_datatype { |
316
|
|
|
|
|
|
|
my ($meta_cat) = @_; |
317
|
|
|
|
|
|
|
return $default_datatype{$meta_cat}; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub _check_datatype { |
321
|
|
|
|
|
|
|
my ($meta_cat, $datatype) = @_; |
322
|
|
|
|
|
|
|
if(! exists $allowed_datatypes->{$meta_cat}->{$datatype} ){ |
323
|
|
|
|
|
|
|
croak "Can't set datatype of $meta_cat to $datatype. Must be " . |
324
|
|
|
|
|
|
|
join (' or ', |
325
|
|
|
|
|
|
|
sort keys %{ $allowed_datatypes->{$meta_cat} } ) . '.'; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
return; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
#verify the contents of |
331
|
|
|
|
|
|
|
sub _check_levels { |
332
|
|
|
|
|
|
|
my ($data) = @_; |
333
|
|
|
|
|
|
|
my @invalid = |
334
|
|
|
|
|
|
|
grep { $_ !~ /^(?:term|termEntry|langSet)$/ } @{$data->{levels}}; |
335
|
|
|
|
|
|
|
if(@invalid){ |
336
|
|
|
|
|
|
|
croak "Bad levels in descrip[\@name=$data->{name}]. " . |
337
|
|
|
|
|
|
|
' may only include term, termEntry, and langSet'; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
return; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
1; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
__END__ |