line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Validate::Sanctions::Fetcher; |
2
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
588
|
use strict; |
|
8
|
|
|
|
|
24
|
|
|
8
|
|
|
|
|
224
|
|
4
|
8
|
|
|
8
|
|
40
|
use warnings; |
|
8
|
|
|
|
|
23
|
|
|
8
|
|
|
|
|
203
|
|
5
|
|
|
|
|
|
|
|
6
|
8
|
|
|
8
|
|
4773
|
use DateTime::Format::Strptime; |
|
8
|
|
|
|
|
6046199
|
|
|
8
|
|
|
|
|
42
|
|
7
|
8
|
|
|
8
|
|
6325
|
use Date::Utility; |
|
8
|
|
|
|
|
5960908
|
|
|
8
|
|
|
|
|
452
|
|
8
|
8
|
|
|
8
|
|
7158
|
use IO::Uncompress::Unzip qw(unzip $UnzipError); |
|
8
|
|
|
|
|
445011
|
|
|
8
|
|
|
|
|
1029
|
|
9
|
8
|
|
|
8
|
|
82
|
use List::Util qw(uniq any); |
|
8
|
|
|
|
|
24
|
|
|
8
|
|
|
|
|
570
|
|
10
|
8
|
|
|
8
|
|
4717
|
use Mojo::UserAgent; |
|
8
|
|
|
|
|
2918587
|
|
|
8
|
|
|
|
|
93
|
|
11
|
8
|
|
|
8
|
|
6479
|
use Text::CSV; |
|
8
|
|
|
|
|
123947
|
|
|
8
|
|
|
|
|
405
|
|
12
|
8
|
|
|
8
|
|
3389
|
use Text::Trim qw(trim); |
|
8
|
|
|
|
|
4239
|
|
|
8
|
|
|
|
|
559
|
|
13
|
8
|
|
|
8
|
|
76
|
use Syntax::Keyword::Try; |
|
8
|
|
|
|
|
21
|
|
|
8
|
|
|
|
|
98
|
|
14
|
8
|
|
|
8
|
|
4228
|
use XML::Fast; |
|
8
|
|
|
|
|
7518
|
|
|
8
|
|
|
|
|
463
|
|
15
|
8
|
|
|
8
|
|
3436
|
use Locale::Country; |
|
8
|
|
|
|
|
351984
|
|
|
8
|
|
|
|
|
30805
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $VERSION = '0.16'; # VERSION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head2 config |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Creastes a hash-ref of sanction source configuration, including their url, description and parser callback. |
22
|
|
|
|
|
|
|
It accepts the following list of named args: |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=over 4 |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=item B<-eu_token>: required if B<eu_url> is empty |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
The token required for accessing EU sanctions (usually added as an arg to URL). |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=item <eu_url>: required if B<eu_token> is empty |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
EU Sanctions full url, token included. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=item B<ofac_sdn_url>: optional |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
OFAC-SDN download url. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=item B<ofac_consolidated_url>: optional |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
OFAC Consilidated download url. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=item B<hmt_url>: optional |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
MHT Sanctions download url. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=back |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub config { |
51
|
8
|
|
|
8
|
1
|
28
|
my %args = @_; |
52
|
|
|
|
|
|
|
|
53
|
8
|
|
66
|
|
|
50
|
my $eu_token = $args{eu_token} // $ENV{EU_SANCTIONS_TOKEN}; |
54
|
8
|
|
66
|
|
|
41
|
my $eu_url = $args{eu_url} || $ENV{EU_SANCTIONS_URL}; |
55
|
|
|
|
|
|
|
|
56
|
8
|
100
|
100
|
|
|
48
|
warn 'EU Sanctions will fail whithout eu_token or eu_url' unless $eu_token or $eu_url; |
57
|
|
|
|
|
|
|
|
58
|
8
|
100
|
|
|
|
94
|
if ($eu_token) { |
59
|
2
|
|
66
|
|
|
11
|
$eu_url ||= "https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token"; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
return { |
63
|
|
|
|
|
|
|
'OFAC-SDN' => { |
64
|
|
|
|
|
|
|
description => 'TREASURY.GOV: Specially Designated Nationals List with a.k.a included', |
65
|
|
|
|
|
|
|
url => $args{ofac_sdn_url} |
66
|
|
|
|
|
|
|
|| 'https://www.treasury.gov/ofac/downloads/sdn_xml.zip', #let's be polite and use zippped version of this 7mb+ file |
67
|
|
|
|
|
|
|
parser => \&_ofac_xml_zip, |
68
|
|
|
|
|
|
|
}, |
69
|
|
|
|
|
|
|
'OFAC-Consolidated' => { |
70
|
|
|
|
|
|
|
description => 'TREASURY.GOV: Consolidated Sanctions List Data Files', |
71
|
|
|
|
|
|
|
url => $args{ofac_consolidated_url} || 'https://www.treasury.gov/ofac/downloads/consolidated/consolidated.xml', |
72
|
|
|
|
|
|
|
parser => \&_ofac_xml, |
73
|
|
|
|
|
|
|
}, |
74
|
|
|
|
|
|
|
'HMT-Sanctions' => { |
75
|
|
|
|
|
|
|
description => 'GOV.UK: Financial sanctions: consolidated list of targets', |
76
|
8
|
|
100
|
|
|
156
|
url => $args{hmt_url} || 'https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv', |
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
77
|
|
|
|
|
|
|
parser => \&_hmt_csv, |
78
|
|
|
|
|
|
|
}, |
79
|
|
|
|
|
|
|
'EU-Sanctions' => { |
80
|
|
|
|
|
|
|
description => 'EUROPA.EU: Consolidated list of persons, groups and entities subject to EU financial sanctions', |
81
|
|
|
|
|
|
|
url => $eu_url, |
82
|
|
|
|
|
|
|
parser => \&_eu_xml, |
83
|
|
|
|
|
|
|
}, |
84
|
|
|
|
|
|
|
}; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# |
88
|
|
|
|
|
|
|
# Parsers - returns timestamp of last update and arrayref of names |
89
|
|
|
|
|
|
|
# |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _process_name { |
92
|
13903
|
|
|
13903
|
|
39647
|
my $r = join ' ', @_; |
93
|
13903
|
|
|
|
|
74529
|
$r =~ s/^\s+|\s+$//g; |
94
|
13903
|
|
|
|
|
34050
|
return $r; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _ofac_xml_zip { |
98
|
7
|
|
|
7
|
|
31
|
my $raw_data = shift; |
99
|
7
|
|
|
|
|
14
|
my $output; |
100
|
7
|
50
|
|
|
|
46
|
unzip \$raw_data => \$output or die "unzip failed: $UnzipError\n"; |
101
|
7
|
|
|
|
|
178269
|
return _ofac_xml($output); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub _date_to_epoch { |
105
|
5444
|
|
|
5444
|
|
10650
|
my $date = shift; |
106
|
|
|
|
|
|
|
|
107
|
5444
|
100
|
|
|
|
19060
|
$date = "$3-$2-$1" if $date =~ m/^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{4})$/; |
108
|
|
|
|
|
|
|
|
109
|
5444
|
|
|
|
|
10545
|
my $result = eval { Date::Utility->new($date)->epoch; }; |
|
5444
|
|
|
|
|
21823
|
|
110
|
5444
|
|
|
|
|
4692362
|
return $result; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head2 get_country_code |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
If the arg is a country code, it's returned in lower case; otherwise the arg is converted to country code. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub get_country_code { |
120
|
11680
|
|
|
11680
|
1
|
27994
|
my $value = trim shift; |
121
|
|
|
|
|
|
|
|
122
|
11680
|
100
|
100
|
|
|
218341
|
return lc(code2country($value) ? $value : country2code($value) // ''); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head2 _process_sanction_entry |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
Processes an entry retrieved from sanction resources and saves it into the specified key-value dataset. |
128
|
|
|
|
|
|
|
An entry may have multilpe names (aliases), each of which will be taken as a key in the dataset with the same values/info. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
It takes following list of args: |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=over 4 |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item - dataset: A hash ref of form [ name => info ] in which the entry will be saved |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item - data: a hash of entry data that may contain: |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=over 4 |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=item * name: an array of names/aliases |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=item * date_of_birth: an array of dates of birth |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Dates of birth are not of standardized format in some data sources; so they are processed in three steps: |
145
|
|
|
|
|
|
|
1- as a first step it will be tried to converetd them into epoch, saved as B<dob_epoch>; |
146
|
|
|
|
|
|
|
2- otherwise to extract year (or an array of years) of birth, saved as B<dob_year>; and |
147
|
|
|
|
|
|
|
3- finally, to saved as raw text in B<dob_text>. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item * place_of_birth: an array of country names or codes |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item * residence: an array of country names or codes |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item * nationality: an array of country names or codes |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item * citizen: an array of country names or codes |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item * postal_code: an array of postal/zip codes |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=item * national_id: an array of national ID numbers |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item * passport_no: an array of passort numbers |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=back |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=back |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=cut |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub _process_sanction_entry { |
170
|
6216
|
|
|
6216
|
|
36264
|
my ($dataset, %data) = @_; |
171
|
|
|
|
|
|
|
|
172
|
6216
|
|
|
|
|
15451
|
my @dob_list = $data{date_of_birth}->@*; |
173
|
6216
|
|
|
|
|
10735
|
my (@dob_epoch, @dob_year, @dob_text); |
174
|
|
|
|
|
|
|
|
175
|
6216
|
|
|
|
|
11932
|
for my $dob (@dob_list) { |
176
|
6874
|
|
|
|
|
19843
|
$dob = trim($dob); |
177
|
6874
|
50
|
|
|
|
131863
|
next unless $dob; |
178
|
|
|
|
|
|
|
|
179
|
6874
|
|
|
|
|
32448
|
$dob =~ s/[ \/]/-/g; |
180
|
|
|
|
|
|
|
#dobs with month = day = 0 are converted to year. |
181
|
6874
|
100
|
|
|
|
35832
|
if ($dob =~ m/^(\d{1,2})-(\d{1,2})-(\d{4})$/) { |
|
|
50
|
|
|
|
|
|
182
|
161
|
100
|
100
|
|
|
877
|
$dob = $3 if $1 == 0 or $2 == 0; |
183
|
|
|
|
|
|
|
} elsif ($dob =~ m/^(\d{4})-(\d0{1,2})-(\d{1,2})$/) { |
184
|
0
|
0
|
0
|
|
|
0
|
$dob = $1 if $2 == 0 or $3 == 0; |
185
|
|
|
|
|
|
|
} |
186
|
6874
|
100
|
|
|
|
17827
|
$dob = $1 if $dob =~ m/^[A-Z][a-z]{2}-(\d{4})$/; |
187
|
|
|
|
|
|
|
|
188
|
6874
|
100
|
|
|
|
21428
|
if ($dob =~ m/^\d{4}$/) { |
|
|
100
|
|
|
|
|
|
189
|
1269
|
|
|
|
|
3344
|
push @dob_year, $dob; |
190
|
|
|
|
|
|
|
} elsif ($dob =~ m/(\d{4}).*to.*(\d{4})$/) { |
191
|
193
|
|
|
|
|
1261
|
push @dob_year, ($1 .. $2); |
192
|
|
|
|
|
|
|
} else { |
193
|
5412
|
|
|
|
|
11147
|
my $epoch = _date_to_epoch($dob); |
194
|
5412
|
100
|
|
|
|
20432
|
(defined $epoch) ? push(@dob_epoch, $epoch) : push(@dob_text, $dob); |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
6216
|
|
|
|
|
13278
|
delete $data{date_of_birth}; |
198
|
6216
|
|
|
|
|
13487
|
$data{dob_epoch} = \@dob_epoch; |
199
|
6216
|
|
|
|
|
11811
|
$data{dob_year} = \@dob_year; |
200
|
6216
|
|
|
|
|
12813
|
$data{dob_text} = \@dob_text; |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# convert all country names to iso codes |
203
|
6216
|
|
|
|
|
12338
|
for my $field (qw/place_of_birth residence nationality citizen/) { |
204
|
24864
|
|
|
|
|
52280
|
$data{$field} = [map { get_country_code($_) } $data{$field}->@*]; |
|
11680
|
|
|
|
|
176598
|
|
205
|
24864
|
|
|
|
|
737079
|
$data{$field} = [grep { $_ } $data{$field}->@*]; |
|
11680
|
|
|
|
|
33831
|
|
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# remove commas |
209
|
6216
|
|
|
|
|
12483
|
$data{names} = [map { trim($_) =~ s/,//gr } $data{names}->@*]; |
|
13998
|
|
|
|
|
147909
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# make values unique |
212
|
6216
|
|
|
|
|
138358
|
%data = map { $_ => [uniq $data{$_}->@*] } keys %data; |
|
68215
|
|
|
|
|
221535
|
|
213
|
|
|
|
|
|
|
# remove empty values |
214
|
6216
|
|
|
|
|
25135
|
for (keys %data) { |
215
|
|
|
|
|
|
|
# dob = 0 is acceptable |
216
|
68215
|
100
|
|
|
|
127887
|
next if $_ eq 'dob_epoch'; |
217
|
|
|
|
|
|
|
|
218
|
61999
|
|
|
|
|
109745
|
$data{$_} = [grep { $_ } $data{$_}->@*]; |
|
28061
|
|
|
|
|
63490
|
|
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
# remove fields with empty list |
221
|
6216
|
|
|
|
|
18926
|
%data = %data{grep { $data{$_}->@* } keys %data}; |
|
68215
|
|
|
|
|
115350
|
|
222
|
|
|
|
|
|
|
|
223
|
6216
|
50
|
|
|
|
21701
|
push $dataset->@*, \%data if $data{names}; |
224
|
|
|
|
|
|
|
|
225
|
6216
|
|
|
|
|
32328
|
return $dataset; |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub _ofac_xml { |
229
|
14
|
|
|
14
|
|
17847
|
my $raw_data = shift; |
230
|
|
|
|
|
|
|
|
231
|
14
|
|
|
|
|
89
|
my $ref = xml2hash($raw_data, array => ['aka'])->{sdnList}; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
my $publish_epoch = |
234
|
14
|
50
|
|
|
|
637045
|
$ref->{publshInformation}{Publish_Date} =~ m/(\d{1,2})\/(\d{1,2})\/(\d{4})/ |
235
|
|
|
|
|
|
|
? _date_to_epoch("$3-$1-$2") |
236
|
|
|
|
|
|
|
: undef; # publshInformation is a typo in ofac xml tags |
237
|
14
|
50
|
|
|
|
51
|
die "Corrupt data. Release date is invalid\n" unless defined $publish_epoch; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
my $parse_list_node = sub { |
240
|
36162
|
|
|
36162
|
|
71800
|
my ($entry, $parent, $child, $attribute) = @_; |
241
|
|
|
|
|
|
|
|
242
|
36162
|
|
100
|
|
|
123017
|
my $node = $entry->{$parent}->{$child} // []; |
243
|
36162
|
100
|
|
|
|
83264
|
$node = [$node] if (ref $node eq 'HASH'); |
244
|
|
|
|
|
|
|
|
245
|
36162
|
|
66
|
|
|
66508
|
return map { $_->{$attribute} // () } @$node; |
|
24316
|
|
|
|
|
96362
|
|
246
|
14
|
|
|
|
|
86
|
}; |
247
|
|
|
|
|
|
|
|
248
|
14
|
|
|
|
|
34
|
my $dataset = []; |
249
|
|
|
|
|
|
|
|
250
|
14
|
|
|
|
|
34
|
foreach my $entry (@{$ref->{sdnEntry}}) { |
|
14
|
|
|
|
|
49
|
|
251
|
11961
|
100
|
|
|
|
46867
|
next unless $entry->{sdnType} eq 'Individual'; |
252
|
|
|
|
|
|
|
|
253
|
6027
|
|
|
|
|
9689
|
my @names; |
254
|
6027
|
|
100
|
|
|
10356
|
for ($entry, @{$entry->{akaList}{aka} // []}) { |
|
6027
|
|
|
|
|
30476
|
|
255
|
15565
|
|
100
|
|
|
53947
|
my $category = $_->{category} // 'strong'; |
256
|
15565
|
100
|
100
|
|
|
61174
|
push @names, _process_name($_->{firstName} // '', $_->{lastName} // '') if $category eq 'strong'; |
|
|
|
50
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# my @dob_list; |
260
|
|
|
|
|
|
|
# my $dobs = $entry->{dateOfBirthList}{dateOfBirthItem}; |
261
|
|
|
|
|
|
|
# # In one of the xml files, some of the clients have more than one date of birth |
262
|
|
|
|
|
|
|
# # Hence, $dob can be either an array or a hashref |
263
|
|
|
|
|
|
|
# foreach my $dob (map { $_->{dateOfBirth} || () } (ref($dobs) eq 'ARRAY' ? @$dobs : $dobs)) { |
264
|
|
|
|
|
|
|
# push @dob_list, $dob; |
265
|
|
|
|
|
|
|
# } |
266
|
6027
|
|
|
|
|
14767
|
my @dob_list = $parse_list_node->($entry, 'dateOfBirthList', 'dateOfBirthItem', 'dateOfBirth'); |
267
|
6027
|
|
|
|
|
12607
|
my @citizen = $parse_list_node->($entry, 'citizenshipList', 'citizenship', 'country'); |
268
|
6027
|
|
|
|
|
11054
|
my @residence = $parse_list_node->($entry, 'addressList', 'address', 'country'); |
269
|
6027
|
|
|
|
|
13157
|
my @postal_code = $parse_list_node->($entry, 'addressList', 'address', 'postalCode'); |
270
|
6027
|
|
|
|
|
11839
|
my @nationality = $parse_list_node->($entry, 'naationalityList', 'nationality', 'country'); |
271
|
|
|
|
|
|
|
|
272
|
6027
|
|
|
|
|
12263
|
my @place_of_birth = $parse_list_node->($entry, 'placeOfBirthList', 'placeOfBirthItem', 'placeOfBirth'); |
273
|
6027
|
|
|
|
|
11783
|
@place_of_birth = map { my @parts = split ',', $_; $parts[-1] } @place_of_birth; |
|
3708
|
|
|
|
|
15675
|
|
|
3708
|
|
|
|
|
11658
|
|
274
|
|
|
|
|
|
|
|
275
|
6027
|
|
100
|
|
|
26433
|
my $id_list = $entry->{idList}->{id} // []; |
276
|
6027
|
100
|
|
|
|
14678
|
$id_list = [$id_list] if ref $id_list eq 'HASH'; |
277
|
6027
|
100
|
|
|
|
12562
|
my @passport_no = map { $_->{idType} eq 'Passport' ? $_->{idNumber} : () } @$id_list; |
|
9249
|
|
|
|
|
30210
|
|
278
|
6027
|
100
|
|
|
|
10592
|
my @national_id = map { $_->{idType} =~ 'National ID' ? $_->{idNumber} : () } @$id_list; |
|
9249
|
|
|
|
|
22717
|
|
279
|
|
|
|
|
|
|
|
280
|
6027
|
|
|
|
|
17483
|
_process_sanction_entry( |
281
|
|
|
|
|
|
|
$dataset, |
282
|
|
|
|
|
|
|
names => \@names, |
283
|
|
|
|
|
|
|
date_of_birth => \@dob_list, |
284
|
|
|
|
|
|
|
place_of_birth => \@place_of_birth, |
285
|
|
|
|
|
|
|
residence => \@residence, |
286
|
|
|
|
|
|
|
nationality => \@nationality, |
287
|
|
|
|
|
|
|
citizen => \@citizen, |
288
|
|
|
|
|
|
|
postal_code => \@postal_code, |
289
|
|
|
|
|
|
|
national_id => \@national_id, |
290
|
|
|
|
|
|
|
passport_no => \@passport_no, |
291
|
|
|
|
|
|
|
); |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
return { |
295
|
14
|
|
|
|
|
116722
|
updated => $publish_epoch, |
296
|
|
|
|
|
|
|
content => $dataset, |
297
|
|
|
|
|
|
|
}; |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub _hmt_csv { |
301
|
7
|
|
|
7
|
|
20
|
my $raw_data = shift; |
302
|
7
|
|
|
|
|
23
|
my $dataset = []; |
303
|
|
|
|
|
|
|
|
304
|
7
|
50
|
|
|
|
79
|
my $csv = Text::CSV->new({binary => 1}) or die "Cannot use CSV: " . Text::CSV->error_diag() . "\n"; |
305
|
|
|
|
|
|
|
|
306
|
7
|
|
|
|
|
1350
|
my @lines = split("\n", $raw_data); |
307
|
|
|
|
|
|
|
|
308
|
7
|
|
|
|
|
37
|
my $parsed = $csv->parse(trim(shift @lines)); |
309
|
7
|
50
|
|
|
|
515
|
my @info = $parsed ? $csv->fields() : (); |
310
|
7
|
50
|
33
|
|
|
130
|
die "Currupt data. Release date was not found\n" unless @info && _date_to_epoch($info[1]); |
311
|
|
|
|
|
|
|
|
312
|
7
|
|
|
|
|
33
|
my $publish_epoch = _date_to_epoch($info[1]); |
313
|
7
|
50
|
|
|
|
26
|
die "Currupt data. Release date is invalid\n" unless defined $publish_epoch; |
314
|
|
|
|
|
|
|
|
315
|
7
|
|
|
|
|
26
|
$parsed = $csv->parse(trim(shift @lines)); |
316
|
7
|
|
|
|
|
438
|
my @row = $csv->fields(); |
317
|
7
|
|
|
|
|
99
|
my %column = map { trim($row[$_]) => $_ } (0 .. @row - 1); |
|
252
|
|
|
|
|
2794
|
|
318
|
|
|
|
|
|
|
|
319
|
7
|
|
|
|
|
228
|
foreach my $line (@lines) { |
320
|
161
|
|
|
|
|
411
|
$line = trim($line); |
321
|
|
|
|
|
|
|
|
322
|
161
|
|
|
|
|
3609
|
$parsed = $csv->parse($line); |
323
|
161
|
50
|
|
|
|
5867
|
next unless $parsed; |
324
|
|
|
|
|
|
|
|
325
|
161
|
|
|
|
|
412
|
my @row = $csv->fields(); |
326
|
|
|
|
|
|
|
|
327
|
161
|
|
|
|
|
1913
|
@row = map { trim($_ =~ s/\([^(]*\)$//r) } @row; |
|
5796
|
|
|
|
|
63265
|
|
328
|
|
|
|
|
|
|
|
329
|
161
|
50
|
|
|
|
2516
|
($row[$column{'Group Type'}] eq "Individual") or next; |
330
|
161
|
|
|
|
|
506
|
my $name = _process_name @row[0 .. 5]; |
331
|
|
|
|
|
|
|
|
332
|
161
|
50
|
|
|
|
506
|
next if $name =~ /^\s*$/; |
333
|
|
|
|
|
|
|
|
334
|
161
|
|
|
|
|
319
|
my $date_of_birth = $row[$column{'DOB'}]; |
335
|
161
|
|
|
|
|
309
|
my $place_of_birth = $row[$column{'Country of Birth'}]; |
336
|
|
|
|
|
|
|
# nationality is saved as an adjective (Iranian, American, etc); let's ignore it. |
337
|
161
|
|
|
|
|
245
|
my $nationality = ''; |
338
|
161
|
|
|
|
|
296
|
my $residence = $row[$column{'Country'}]; |
339
|
161
|
|
|
|
|
275
|
my $postal_code = $row[$column{'Post/Zip Code'}]; |
340
|
161
|
|
|
|
|
284
|
my $national_id = $row[$column{'National Identification Number'}]; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# Fields to be added in the new file format (https://redmine.deriv.cloud/issues/51922) |
343
|
|
|
|
|
|
|
# We can read these fields normally after the data is released in the new format |
344
|
161
|
|
|
|
|
282
|
my ($passport_no, $non_latin_alias); |
345
|
161
|
50
|
|
|
|
401
|
$passport_no = $row[$column{'Passport Number'}] if defined $column{'Passport Number'}; |
346
|
161
|
50
|
|
|
|
375
|
$non_latin_alias = $row[$column{'Name Non-Latin Script'}] if defined $column{'Name Non-Latin Script'}; |
347
|
|
|
|
|
|
|
|
348
|
161
|
100
|
|
|
|
830
|
_process_sanction_entry( |
|
|
50
|
|
|
|
|
|
349
|
|
|
|
|
|
|
$dataset, |
350
|
|
|
|
|
|
|
names => [$name, $non_latin_alias ? $non_latin_alias : ()], |
351
|
|
|
|
|
|
|
date_of_birth => [$date_of_birth], |
352
|
|
|
|
|
|
|
place_of_birth => [$place_of_birth], |
353
|
|
|
|
|
|
|
residence => [$residence], |
354
|
|
|
|
|
|
|
nationality => [$nationality], |
355
|
|
|
|
|
|
|
postal_code => [$postal_code], |
356
|
|
|
|
|
|
|
national_id => [$national_id], |
357
|
|
|
|
|
|
|
$passport_no ? (passport_no => [$passport_no]) : (), |
358
|
|
|
|
|
|
|
); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
return { |
362
|
7
|
|
|
|
|
221
|
updated => $publish_epoch, |
363
|
|
|
|
|
|
|
content => $dataset, |
364
|
|
|
|
|
|
|
}; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub _eu_xml { |
368
|
4
|
|
|
4
|
|
14
|
my $raw_data = shift; |
369
|
4
|
|
|
|
|
27
|
my $ref = xml2hash($raw_data, array => ['nameAlias', 'birthdate'])->{export}; |
370
|
4
|
|
|
|
|
2469
|
my $dataset = []; |
371
|
|
|
|
|
|
|
|
372
|
4
|
|
|
|
|
12
|
foreach my $entry (@{$ref->{sanctionEntity}}) { |
|
4
|
|
|
|
|
19
|
|
373
|
28
|
50
|
|
|
|
93
|
next unless $entry->{subjectType}->{'-code'} eq 'person'; |
374
|
|
|
|
|
|
|
|
375
|
28
|
|
|
|
|
58
|
for (qw/birthdate citizenship address identification/) { |
376
|
112
|
|
100
|
|
|
371
|
$entry->{$_} //= []; |
377
|
112
|
100
|
|
|
|
295
|
$entry->{$_} = [$entry->{$_}] if ref $entry->{$_} eq 'HASH'; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
28
|
|
|
|
|
53
|
my @names; |
381
|
28
|
|
50
|
|
|
45
|
for (@{$entry->{nameAlias} // []}) { |
|
28
|
|
|
|
|
87
|
|
382
|
60
|
|
|
|
|
105
|
my $name = $_->{'-wholeName'}; |
383
|
60
|
50
|
0
|
|
|
118
|
$name = join ' ', ($_->{'-firstName'} // '', $_->{'-lastName'} // '') unless $name; |
|
|
|
0
|
|
|
|
|
384
|
60
|
50
|
|
|
|
149
|
push @names, $name if $name ne ' '; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
28
|
|
|
|
|
44
|
my @dob_list; |
388
|
28
|
|
|
|
|
73
|
foreach my $dob ($entry->{birthdate}->@*) { |
389
|
36
|
100
|
|
|
|
114
|
push @dob_list, $dob->{'-birthdate'} if $dob->{'-birthdate'}; |
390
|
36
|
50
|
66
|
|
|
121
|
push @dob_list, $dob->{'-year'} if not $dob->{'-birthdate'} and $dob->{'-year'}; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
28
|
50
|
|
|
|
63
|
my @place_of_birth = map { $_->{'-countryIso2Code'} || () } $entry->{birthdate}->@*; |
|
36
|
|
|
|
|
105
|
|
394
|
28
|
50
|
|
|
|
59
|
my @citizen = map { $_->{'-countryIso2Code'} || () } $entry->{citizenship}->@*; |
|
12
|
|
|
|
|
40
|
|
395
|
28
|
50
|
|
|
|
56
|
my @residence = map { $_->{'-countryIso2Code'} || () } $entry->{address}->@*; |
|
8
|
|
|
|
|
28
|
|
396
|
28
|
50
|
33
|
|
|
57
|
my @postal_code = map { $_->{'-zipCode'} || $_->{'-poBox'} || () } $entry->{address}->@*; |
|
8
|
|
|
|
|
58
|
|
397
|
28
|
50
|
|
|
|
58
|
my @nationality = map { $_->{'-countryIso2Code'} || () } $entry->{identification}->@*; |
|
12
|
|
|
|
|
44
|
|
398
|
28
|
100
|
33
|
|
|
50
|
my @national_id = map { $_->{'-identificationTypeCode'} eq 'id' ? $_->{'-number'} || () : () } $entry->{identification}->@*; |
|
12
|
|
|
|
|
41
|
|
399
|
28
|
100
|
33
|
|
|
56
|
my @passport_no = map { $_->{'-identificationTypeCode'} eq 'passport' ? $_->{'-number'} || () : () } $entry->{identification}->@*; |
|
12
|
|
|
|
|
45
|
|
400
|
|
|
|
|
|
|
|
401
|
28
|
|
|
|
|
82
|
_process_sanction_entry( |
402
|
|
|
|
|
|
|
$dataset, |
403
|
|
|
|
|
|
|
names => \@names, |
404
|
|
|
|
|
|
|
date_of_birth => \@dob_list, |
405
|
|
|
|
|
|
|
place_of_birth => \@place_of_birth, |
406
|
|
|
|
|
|
|
residence => \@residence, |
407
|
|
|
|
|
|
|
nationality => \@nationality, |
408
|
|
|
|
|
|
|
citizen => \@citizen, |
409
|
|
|
|
|
|
|
postal_code => \@postal_code, |
410
|
|
|
|
|
|
|
national_id => \@national_id, |
411
|
|
|
|
|
|
|
passport_no => \@passport_no, |
412
|
|
|
|
|
|
|
); |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
4
|
|
50
|
|
|
58
|
my @date_parts = split('T', $ref->{'-generationDate'} // ''); |
416
|
4
|
|
50
|
|
|
21
|
my $publish_epoch = _date_to_epoch($date_parts[0] // ''); |
417
|
|
|
|
|
|
|
|
418
|
4
|
50
|
|
|
|
18
|
die "Corrupt data. Release date is invalid\n" unless $publish_epoch; |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
return { |
421
|
4
|
|
|
|
|
312
|
updated => $publish_epoch, |
422
|
|
|
|
|
|
|
content => $dataset, |
423
|
|
|
|
|
|
|
}; |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=head2 run |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Fetches latest version of lists, and returns combined hash of successfully downloaded ones |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=cut |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
sub run { |
433
|
8
|
|
|
8
|
1
|
59472
|
my %args = @_; |
434
|
|
|
|
|
|
|
|
435
|
8
|
|
|
|
|
24
|
my $result = {}; |
436
|
|
|
|
|
|
|
|
437
|
8
|
|
|
|
|
39
|
my $config = config(%args); |
438
|
8
|
|
50
|
|
|
44
|
my $retries = $args{retries} // 3; |
439
|
|
|
|
|
|
|
|
440
|
8
|
|
|
|
|
44
|
foreach my $id (sort keys %$config) { |
441
|
32
|
|
|
|
|
69
|
my $source = $config->{$id}; |
442
|
|
|
|
|
|
|
try { |
443
|
|
|
|
|
|
|
die "Url is empty for $id\n" unless $source->{url}; |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
my $raw_data; |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
if ($source->{url} =~ m/^file:\/\/(.*)$/) { |
448
|
|
|
|
|
|
|
$raw_data = _entries_from_file($id); |
449
|
|
|
|
|
|
|
} else { |
450
|
|
|
|
|
|
|
$raw_data = _entries_from_remote_src({ |
451
|
|
|
|
|
|
|
id => $id, |
452
|
|
|
|
|
|
|
source => $source->{url}, |
453
|
|
|
|
|
|
|
retries => $retries |
454
|
|
|
|
|
|
|
}); |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
my $data = $source->{parser}->($raw_data); |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
if ($data->{updated} > 1) { |
460
|
|
|
|
|
|
|
$result->{$id} = $data; |
461
|
|
|
|
|
|
|
my $count = $data->{content}->@*; |
462
|
|
|
|
|
|
|
print "Source $id: $count entries fetched \n" if $args{verbose}; |
463
|
|
|
|
|
|
|
} |
464
|
32
|
|
|
|
|
81
|
} catch ($e) { |
465
|
|
|
|
|
|
|
$result->{$id}->{error} = $e; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
8
|
|
|
|
|
215
|
return $result; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 _entries_from_file |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Get the sanction entries from a file locally |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=cut |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
sub _entries_from_file { |
479
|
23
|
|
|
23
|
|
57
|
my ($id) = @_; |
480
|
|
|
|
|
|
|
|
481
|
23
|
|
|
|
|
31
|
my $entries; |
482
|
|
|
|
|
|
|
|
483
|
23
|
50
|
|
|
|
1122
|
open my $fh, '<', "$1" or die "Can't open $id file $1 $!\n"; |
484
|
23
|
|
|
|
|
72
|
$entries = do { local $/; <$fh> }; |
|
23
|
|
|
|
|
111
|
|
|
23
|
|
|
|
|
1310
|
|
485
|
23
|
|
|
|
|
279
|
close $fh; |
486
|
|
|
|
|
|
|
|
487
|
23
|
|
|
|
|
168
|
return $entries; |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 _entries_from_remote_src |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Get the sanction entries from a remote source includes retry mechanism |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=cut |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
sub _entries_from_remote_src { |
497
|
8
|
|
|
8
|
|
22
|
my ($args) = @_; |
498
|
|
|
|
|
|
|
|
499
|
8
|
|
|
|
|
18
|
my ($id, $src_url, $retries) = @{$args}{qw/ id source retries /}; |
|
8
|
|
|
|
|
26
|
|
500
|
8
|
|
50
|
|
|
24
|
$retries //= 3; |
501
|
|
|
|
|
|
|
|
502
|
8
|
|
|
|
|
15
|
my $entries; |
503
|
8
|
|
|
|
|
16
|
my $error_log = 'Unknown Error'; |
504
|
|
|
|
|
|
|
|
505
|
8
|
|
|
|
|
89
|
my $ua = Mojo::UserAgent->new; |
506
|
8
|
|
|
|
|
88
|
$ua->connect_timeout(15); |
507
|
8
|
|
|
|
|
86
|
$ua->inactivity_timeout(60); |
508
|
|
|
|
|
|
|
|
509
|
8
|
|
|
|
|
46
|
my $retry_counter = 0; |
510
|
8
|
|
|
|
|
24
|
while ($retry_counter < $retries) { |
511
|
20
|
|
|
|
|
34
|
$retry_counter++; |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
try { |
514
|
|
|
|
|
|
|
my $resp = $ua->get($src_url); |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
die "File not downloaded for $id\n" if $resp->result->is_error; |
517
|
|
|
|
|
|
|
$entries = $resp->result->body; |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
last; |
520
|
|
|
|
|
|
|
} catch ($e) { |
521
|
|
|
|
|
|
|
$error_log = $e; |
522
|
|
|
|
|
|
|
} |
523
|
20
|
|
|
|
|
32
|
} |
524
|
|
|
|
|
|
|
|
525
|
8
|
|
100
|
|
|
97
|
return $entries // die "An error occurred while fetching data from '$src_url' due to $error_log\n"; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
1; |