| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# This file is part of TBX-Min |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# This software is copyright (c) 2016 by Alan Melby. |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# This is free software; you can redistribute it and/or modify it under |
|
7
|
|
|
|
|
|
|
# the same terms as the Perl 5 programming language system itself. |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
package TBX::Min; |
|
10
|
8
|
|
|
8
|
|
174512
|
use strict; |
|
|
8
|
|
|
|
|
15
|
|
|
|
8
|
|
|
|
|
194
|
|
|
11
|
8
|
|
|
8
|
|
28
|
use warnings; |
|
|
8
|
|
|
|
|
7
|
|
|
|
8
|
|
|
|
|
266
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.08'; # VERSION |
|
13
|
|
|
|
|
|
|
# ABSTRACT: Read, write and edit TBX-Min files |
|
14
|
8
|
|
|
8
|
|
3690
|
use subs qw(date_created directionality entries); |
|
|
8
|
|
|
|
|
136
|
|
|
|
8
|
|
|
|
|
28
|
|
|
15
|
8
|
|
|
|
|
38
|
use Class::Tiny qw( |
|
16
|
|
|
|
|
|
|
id |
|
17
|
|
|
|
|
|
|
description |
|
18
|
|
|
|
|
|
|
creator |
|
19
|
|
|
|
|
|
|
license |
|
20
|
|
|
|
|
|
|
source_lang |
|
21
|
|
|
|
|
|
|
target_lang |
|
22
|
|
|
|
|
|
|
date_created |
|
23
|
|
|
|
|
|
|
directionality |
|
24
|
|
|
|
|
|
|
entries |
|
25
|
8
|
|
|
8
|
|
3725
|
); |
|
|
8
|
|
|
|
|
17868
|
|
|
26
|
8
|
|
|
8
|
|
19507
|
use XML::Twig; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use autodie; |
|
28
|
|
|
|
|
|
|
use Path::Tiny; |
|
29
|
|
|
|
|
|
|
use Carp; |
|
30
|
|
|
|
|
|
|
use Import::Into; |
|
31
|
|
|
|
|
|
|
use DateTime::Format::ISO8601; |
|
32
|
|
|
|
|
|
|
use Try::Tiny; |
|
33
|
|
|
|
|
|
|
use TBX::Min::TermEntry; |
|
34
|
|
|
|
|
|
|
use TBX::Min::LangSet; |
|
35
|
|
|
|
|
|
|
use TBX::Min::TIG; |
|
36
|
|
|
|
|
|
|
use TBX::Min::NoteGrp; |
|
37
|
|
|
|
|
|
|
use TBX::Min::Note; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# Use Import::Into to export subclasses into caller |
|
40
|
|
|
|
|
|
|
sub import { |
|
41
|
|
|
|
|
|
|
my $target = caller; |
|
42
|
|
|
|
|
|
|
TBX::Min::TermEntry->import::into($target); |
|
43
|
|
|
|
|
|
|
TBX::Min::LangSet->import::into($target); |
|
44
|
|
|
|
|
|
|
TBX::Min::TIG->import::into($target); |
|
45
|
|
|
|
|
|
|
TBX::Min::NoteGrp->import::into($target); |
|
46
|
|
|
|
|
|
|
TBX::Min::Note->import::into($target); |
|
47
|
|
|
|
|
|
|
return; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub new_from_xml { |
|
51
|
|
|
|
|
|
|
my ($class, $data) = @_; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
if(!$data){ |
|
54
|
|
|
|
|
|
|
croak 'missing required data argument'; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $fh = _get_handle($data); |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# build a twig out of the input document |
|
60
|
|
|
|
|
|
|
my $twig = XML::Twig->new( |
|
61
|
|
|
|
|
|
|
output_encoding => 'UTF-8', |
|
62
|
|
|
|
|
|
|
# do_not_chain_handlers => 1, #can be important when things get complicated |
|
63
|
|
|
|
|
|
|
keep_spaces => 0, |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# these store new entries, langSets and tigs |
|
66
|
|
|
|
|
|
|
start_tag_handlers => { |
|
67
|
|
|
|
|
|
|
termEntry => \&_termEntryStart, |
|
68
|
|
|
|
|
|
|
langSet => \&_langStart, #langset |
|
69
|
|
|
|
|
|
|
tig => \&_termGrpStart, #tig |
|
70
|
|
|
|
|
|
|
noteGrp => \&_noteGrpStart, #note group |
|
71
|
|
|
|
|
|
|
note => \&_noteStart, #note |
|
72
|
|
|
|
|
|
|
}, |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
TwigHandlers => { |
|
75
|
|
|
|
|
|
|
TBX => \&_check_dialect, |
|
76
|
|
|
|
|
|
|
# header attributes become attributes of the TBX::Min object |
|
77
|
|
|
|
|
|
|
id => \&_headerAtt, |
|
78
|
|
|
|
|
|
|
description => \&_headerAtt, |
|
79
|
|
|
|
|
|
|
dateCreated => \&_date_created, |
|
80
|
|
|
|
|
|
|
creator => \&_headerAtt, |
|
81
|
|
|
|
|
|
|
license => \&_headerAtt, |
|
82
|
|
|
|
|
|
|
directionality => \&_directionality, |
|
83
|
|
|
|
|
|
|
languages => \&_languages, |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# becomes part of the current TBX::Min::TermEntry object |
|
86
|
|
|
|
|
|
|
subjectField => sub { |
|
87
|
|
|
|
|
|
|
shift->{tbx_min_entries}->[-1]->subject_field($_->text)}, |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# these become attributes of the current TBX::Min::TIG object |
|
90
|
|
|
|
|
|
|
term => sub {shift->{tbx_min_current_term_grp}->term($_->text)}, |
|
91
|
|
|
|
|
|
|
partOfSpeech => sub { |
|
92
|
|
|
|
|
|
|
shift->{tbx_min_current_term_grp}->part_of_speech($_->text)}, |
|
93
|
|
|
|
|
|
|
customer => sub { |
|
94
|
|
|
|
|
|
|
shift->{tbx_min_current_term_grp}->customer($_->text)}, |
|
95
|
|
|
|
|
|
|
termStatus => sub { |
|
96
|
|
|
|
|
|
|
shift->{tbx_min_current_term_grp}->status($_->text)}, |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# these become attributes of the current TBX::Min::Note object |
|
99
|
|
|
|
|
|
|
noteKey => sub {shift->{tbx_min_current_note}->noteKey($_->text)}, |
|
100
|
|
|
|
|
|
|
noteValue => sub {shift->{tbx_min_current_note}->noteValue($_->text)}, |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# delete the termEntry twig when finished to free up memory |
|
103
|
|
|
|
|
|
|
termEntry => sub {$_[0]->purge}, |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
); |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# use handlers to process individual tags, then grab the result |
|
108
|
|
|
|
|
|
|
$twig->parse($fh); |
|
109
|
|
|
|
|
|
|
my $self = $twig->{tbx_min_att}; |
|
110
|
|
|
|
|
|
|
$self->{entries} = $twig->{tbx_min_entries} || []; |
|
111
|
|
|
|
|
|
|
bless $self, $class; |
|
112
|
|
|
|
|
|
|
return $self; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub _get_handle { |
|
116
|
|
|
|
|
|
|
my ($data) = @_; |
|
117
|
|
|
|
|
|
|
my $fh; |
|
118
|
|
|
|
|
|
|
if((ref $data) eq 'SCALAR'){ |
|
119
|
|
|
|
|
|
|
open $fh, '<', $data; ## no critic(RequireBriefOpen) |
|
120
|
|
|
|
|
|
|
}else{ |
|
121
|
|
|
|
|
|
|
$fh = path($data)->filehandle('<'); |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
return $fh; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my %valid = map {+($_=>1)} Class::Tiny->get_all_attributes_for(__PACKAGE__); |
|
128
|
|
|
|
|
|
|
sub new { |
|
129
|
|
|
|
|
|
|
my ($class, $args) = @_; |
|
130
|
|
|
|
|
|
|
my $self; |
|
131
|
|
|
|
|
|
|
if((ref $args) eq 'HASH'){ |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# validate arguments |
|
134
|
|
|
|
|
|
|
if(my @invalids = grep {!$valid{$_}} sort keys %$args){ |
|
135
|
|
|
|
|
|
|
croak 'Invalid attributes for class: ' . |
|
136
|
|
|
|
|
|
|
join ' ', @invalids |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
if($args->{entries} && ref $args->{entries} ne 'ARRAY'){ |
|
139
|
|
|
|
|
|
|
croak q{Attribute 'entries' should be an array reference}; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
if(exists $args->{directionality}){ |
|
142
|
|
|
|
|
|
|
_validate_dir($args->{directionality}); |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# validate datetime and store object, not string |
|
146
|
|
|
|
|
|
|
if(my $dt_string = $args->{date_created}){ |
|
147
|
|
|
|
|
|
|
$args->{date_created} = _parse_datetime($dt_string); |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
$self = $args; |
|
151
|
|
|
|
|
|
|
}else{ |
|
152
|
|
|
|
|
|
|
$self = {}; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
$self->{entries} ||= []; |
|
155
|
|
|
|
|
|
|
return bless $self, $class; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub date_created { |
|
159
|
|
|
|
|
|
|
my ($self, $date_created) = @_; |
|
160
|
|
|
|
|
|
|
if($date_created) { |
|
161
|
|
|
|
|
|
|
return $self->{date_created} = |
|
162
|
|
|
|
|
|
|
_parse_datetime($date_created); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
if(my $dt = $self->{date_created}){ |
|
165
|
|
|
|
|
|
|
return $dt->iso8601; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
return; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub _parse_datetime { |
|
171
|
|
|
|
|
|
|
my ($dt_string) = @_; |
|
172
|
|
|
|
|
|
|
my $dt; |
|
173
|
|
|
|
|
|
|
try{ |
|
174
|
|
|
|
|
|
|
$dt = DateTime::Format::ISO8601->parse_datetime($dt_string); |
|
175
|
|
|
|
|
|
|
}catch{ |
|
176
|
|
|
|
|
|
|
croak 'date is not in ISO8601 format'; |
|
177
|
|
|
|
|
|
|
}; |
|
178
|
|
|
|
|
|
|
return $dt; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub directionality { |
|
182
|
|
|
|
|
|
|
my ($self, $directionality) = @_; |
|
183
|
|
|
|
|
|
|
if(defined $directionality) { |
|
184
|
|
|
|
|
|
|
_validate_dir($directionality); |
|
185
|
|
|
|
|
|
|
return $self->{directionality} = $directionality; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
return $self->{directionality}; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _validate_dir { |
|
191
|
|
|
|
|
|
|
my ($dir) = @_; |
|
192
|
|
|
|
|
|
|
if($dir ne 'bidirectional' and $dir ne 'monodirectional'){ |
|
193
|
|
|
|
|
|
|
croak "Illegal directionality '$dir'"; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
return; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub entries { ## no critic(RequireArgUnpacking) |
|
199
|
|
|
|
|
|
|
my ($self) = @_; |
|
200
|
|
|
|
|
|
|
if (@_ > 1){ |
|
201
|
|
|
|
|
|
|
croak 'extra argument found (entries is a getter only)'; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
return $self->{entries}; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub add_entry { |
|
207
|
|
|
|
|
|
|
my ($self, $termEntry) = @_; |
|
208
|
|
|
|
|
|
|
if( !$termEntry || !$termEntry->isa('TBX::Min::TermEntry') ){ |
|
209
|
|
|
|
|
|
|
croak 'argument to add_entry should be a TBx::Min::TermEntry'; |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
push @{$self->{entries}}, $termEntry; |
|
212
|
|
|
|
|
|
|
return; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub as_xml { |
|
216
|
|
|
|
|
|
|
my ($self) = @_; |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# construct the whole document using XML::Twig::El's |
|
219
|
|
|
|
|
|
|
my $root = XML::Twig::Elt->new(TBX => {dialect => 'TBX-Min'}); |
|
220
|
|
|
|
|
|
|
my $header = XML::Twig::Elt->new('header')->paste($root); |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# each of these header elements is a simple element with text |
|
223
|
|
|
|
|
|
|
for my $header_att ( |
|
224
|
|
|
|
|
|
|
qw(id creator license directionality description)){ |
|
225
|
|
|
|
|
|
|
next unless $self->{$header_att}; |
|
226
|
|
|
|
|
|
|
XML::Twig::Elt->new($header_att, |
|
227
|
|
|
|
|
|
|
$self->{$header_att})->paste(last_child => $header); |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
if($self->source_lang || $self->target_lang){ |
|
230
|
|
|
|
|
|
|
my @atts; |
|
231
|
|
|
|
|
|
|
push @atts, (source => $self->source_lang) if $self->source_lang; |
|
232
|
|
|
|
|
|
|
push @atts, (target => $self->target_lang) if $self->target_lang; |
|
233
|
|
|
|
|
|
|
XML::Twig::Elt->new(languages => {@atts})->paste( |
|
234
|
|
|
|
|
|
|
last_child => $header) |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
if(my $dt = $self->{date_created}){ |
|
237
|
|
|
|
|
|
|
XML::Twig::Elt->new(dateCreated => $dt->iso8601)->paste( |
|
238
|
|
|
|
|
|
|
last_child => $header); |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my $body = XML::Twig::Elt->new('body')->paste(last_child => $root); |
|
242
|
|
|
|
|
|
|
for my $termEntry (@{$self->entries}){ |
|
243
|
|
|
|
|
|
|
my $entry_el = XML::Twig::Elt->new( |
|
244
|
|
|
|
|
|
|
termEntry => {$termEntry->id ? (id => $termEntry->id) : ()})-> |
|
245
|
|
|
|
|
|
|
paste(last_child => $body); |
|
246
|
|
|
|
|
|
|
XML::Twig::Elt->new( '#COMMENT', 'terminological entry')-> |
|
247
|
|
|
|
|
|
|
paste(last_child => $entry_el); |
|
248
|
|
|
|
|
|
|
if(my $sf = $termEntry->subject_field){ |
|
249
|
|
|
|
|
|
|
XML::Twig::Elt->new(subjectField => $sf)->paste( |
|
250
|
|
|
|
|
|
|
last_child => $entry_el); |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
for my $langGrp (@{$termEntry->lang_groups}){ |
|
253
|
|
|
|
|
|
|
my $lang_el = XML::Twig::Elt->new(langSet => |
|
254
|
|
|
|
|
|
|
{$langGrp->code ? ('xml:lang' => $langGrp->code) : ()} |
|
255
|
|
|
|
|
|
|
)->paste(last_child => $entry_el); |
|
256
|
|
|
|
|
|
|
for my $termGrp (@{$langGrp->term_groups}){ |
|
257
|
|
|
|
|
|
|
my $term_el = XML::Twig::Elt->new('tig')->paste( |
|
258
|
|
|
|
|
|
|
last_child => $lang_el); |
|
259
|
|
|
|
|
|
|
XML::Twig::Elt->new( '#COMMENT', 'terminological information group')-> |
|
260
|
|
|
|
|
|
|
paste(last_child => $term_el); |
|
261
|
|
|
|
|
|
|
if (my $term = $termGrp->term){ |
|
262
|
|
|
|
|
|
|
XML::Twig::Elt->new(term => $term)->paste( |
|
263
|
|
|
|
|
|
|
last_child => $term_el); |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
if (my $customer = $termGrp->customer){ |
|
267
|
|
|
|
|
|
|
XML::Twig::Elt->new(customer => $customer)->paste( |
|
268
|
|
|
|
|
|
|
last_child => $term_el); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
for my $noteGrp (@{$termGrp->note_groups}){ |
|
272
|
|
|
|
|
|
|
my $note_grp_el = XML::Twig::Elt->new('noteGrp')->paste( |
|
273
|
|
|
|
|
|
|
last_child => $term_el); |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
for my $note (@{$noteGrp->notes}){ |
|
276
|
|
|
|
|
|
|
my $note_el = XML::Twig::Elt->new('note')->paste( |
|
277
|
|
|
|
|
|
|
last_child => $note_grp_el); |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
if (my $noteKey = $note->noteKey){ |
|
280
|
|
|
|
|
|
|
XML::Twig::Elt->new(noteKey => $noteKey)->paste( |
|
281
|
|
|
|
|
|
|
last_child => $note_el); |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
if (my $noteValue = $note->noteValue){ |
|
285
|
|
|
|
|
|
|
XML::Twig::Elt->new(noteValue => $noteValue)->paste( |
|
286
|
|
|
|
|
|
|
last_child => $note_el); |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
} |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
if (my $status = $termGrp->status){ |
|
293
|
|
|
|
|
|
|
XML::Twig::Elt->new(termStatus => $status )->paste( |
|
294
|
|
|
|
|
|
|
last_child => $term_el); |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
if (my $pos = $termGrp->part_of_speech){ |
|
298
|
|
|
|
|
|
|
XML::Twig::Elt->new(partOfSpeech => $pos)->paste( |
|
299
|
|
|
|
|
|
|
last_child => $term_el); |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
} # end tig |
|
303
|
|
|
|
|
|
|
} # end langSet |
|
304
|
|
|
|
|
|
|
} # end termEntry |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# return pretty-printed string |
|
307
|
|
|
|
|
|
|
XML::Twig->set_pretty_print('indented'); |
|
308
|
|
|
|
|
|
|
my $TBXmin = \$root->sprint; |
|
309
|
|
|
|
|
|
|
$$TBXmin =~ s/>[\s\t\n]* |