line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
use strict; |
3
|
5
|
|
|
5
|
|
383
|
use warnings; |
|
5
|
|
|
|
|
13
|
|
|
5
|
|
|
|
|
118
|
|
4
|
5
|
|
|
5
|
|
18
|
|
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
104
|
|
5
|
|
|
|
|
|
|
use DateTime::Format::Strptime; |
6
|
5
|
|
|
5
|
|
2051
|
use Date::Utility; |
|
5
|
|
|
|
|
2666995
|
|
|
5
|
|
|
|
|
20
|
|
7
|
5
|
|
|
5
|
|
2546
|
use IO::Uncompress::Unzip qw(unzip $UnzipError); |
|
5
|
|
|
|
|
2611259
|
|
|
5
|
|
|
|
|
211
|
|
8
|
5
|
|
|
5
|
|
2931
|
use List::Util qw(uniq any); |
|
5
|
|
|
|
|
197059
|
|
|
5
|
|
|
|
|
426
|
|
9
|
5
|
|
|
5
|
|
34
|
use Mojo::UserAgent; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
233
|
|
10
|
5
|
|
|
5
|
|
2071
|
use Text::CSV; |
|
5
|
|
|
|
|
1316891
|
|
|
5
|
|
|
|
|
37
|
|
11
|
5
|
|
|
5
|
|
2846
|
use Text::Trim qw(trim); |
|
5
|
|
|
|
|
53946
|
|
|
5
|
|
|
|
|
184
|
|
12
|
5
|
|
|
5
|
|
1614
|
use Syntax::Keyword::Try; |
|
5
|
|
|
|
|
1956
|
|
|
5
|
|
|
|
|
243
|
|
13
|
5
|
|
|
5
|
|
1706
|
use XML::Fast; |
|
5
|
|
|
|
|
7009
|
|
|
5
|
|
|
|
|
22
|
|
14
|
5
|
|
|
5
|
|
1773
|
use Locale::Country; |
|
5
|
|
|
|
|
3256
|
|
|
5
|
|
|
|
|
217
|
|
15
|
5
|
|
|
5
|
|
1536
|
|
|
5
|
|
|
|
|
153106
|
|
|
5
|
|
|
|
|
13025
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.10'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head2 config |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Creastes a hash-ref of sanction source configuration, including their url, description and parser callback. |
21
|
|
|
|
|
|
|
It accepts the following list of named args: |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=over 4 |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=item B<-eu_token>: required if B<eu_url> is empty |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
The token required for accessing EU sanctions (usually added as an arg to URL). |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=item <eu_url>: required if B<eu_token> is empty |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
EU Sanctions full url, token included. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=item B<ofac_sdn_url>: optional |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
OFAC-SDN download url. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=item B<ofac_consolidated_url>: optional |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
OFAC Consilidated download url. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=item B<hmt_url>: optional |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
MHT Sanctions download url. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=back |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=cut |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
my %args = @_; |
50
|
|
|
|
|
|
|
|
51
|
8
|
|
|
8
|
1
|
19
|
my $eu_token = $args{eu_token} // $ENV{EU_SANCTIONS_TOKEN}; |
52
|
|
|
|
|
|
|
my $eu_url = $args{eu_url} || $ENV{EU_SANCTIONS_URL}; |
53
|
8
|
|
66
|
|
|
39
|
|
54
|
8
|
|
66
|
|
|
24
|
warn 'EU Sanctions will fail whithout eu_token or eu_url' unless $eu_token or $eu_url; |
55
|
|
|
|
|
|
|
|
56
|
8
|
100
|
100
|
|
|
34
|
if ($eu_token) { |
57
|
|
|
|
|
|
|
$eu_url ||= "https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token"; |
58
|
8
|
100
|
|
|
|
56
|
} |
59
|
2
|
|
66
|
|
|
8
|
|
60
|
|
|
|
|
|
|
return { |
61
|
|
|
|
|
|
|
'OFAC-SDN' => { |
62
|
|
|
|
|
|
|
description => 'TREASURY.GOV: Specially Designated Nationals List with a.k.a included', |
63
|
|
|
|
|
|
|
url => $args{ofac_sdn_url} |
64
|
|
|
|
|
|
|
|| 'https://www.treasury.gov/ofac/downloads/sdn_xml.zip', #let's be polite and use zippped version of this 7mb+ file |
65
|
|
|
|
|
|
|
parser => \&_ofac_xml_zip, |
66
|
|
|
|
|
|
|
}, |
67
|
|
|
|
|
|
|
'OFAC-Consolidated' => { |
68
|
|
|
|
|
|
|
description => 'TREASURY.GOV: Consolidated Sanctions List Data Files', |
69
|
|
|
|
|
|
|
url => $args{ofac_consolidated_url} || 'https://www.treasury.gov/ofac/downloads/consolidated/consolidated.xml', |
70
|
|
|
|
|
|
|
parser => \&_ofac_xml, |
71
|
|
|
|
|
|
|
}, |
72
|
|
|
|
|
|
|
'HMT-Sanctions' => { |
73
|
|
|
|
|
|
|
description => 'GOV.UK: Financial sanctions: consolidated list of targets', |
74
|
|
|
|
|
|
|
url => $args{hmt_url} || 'https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv', |
75
|
|
|
|
|
|
|
parser => \&_hmt_csv, |
76
|
8
|
|
100
|
|
|
102
|
}, |
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
77
|
|
|
|
|
|
|
'EU-Sanctions' => { |
78
|
|
|
|
|
|
|
description => 'EUROPA.EU: Consolidated list of persons, groups and entities subject to EU financial sanctions', |
79
|
|
|
|
|
|
|
url => $eu_url, |
80
|
|
|
|
|
|
|
parser => \&_eu_xml, |
81
|
|
|
|
|
|
|
}, |
82
|
|
|
|
|
|
|
}; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# |
86
|
|
|
|
|
|
|
# Parsers - returns timestamp of last update and arrayref of names |
87
|
|
|
|
|
|
|
# |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $r = join ' ', @_; |
90
|
|
|
|
|
|
|
$r =~ s/^\s+|\s+$//g; |
91
|
|
|
|
|
|
|
return $r; |
92
|
13001
|
|
|
13001
|
|
27198
|
} |
93
|
13001
|
|
|
|
|
56860
|
|
94
|
13001
|
|
|
|
|
25257
|
my $raw_data = shift; |
95
|
|
|
|
|
|
|
my $output; |
96
|
|
|
|
|
|
|
unzip \$raw_data => \$output or die "unzip failed: $UnzipError\n"; |
97
|
|
|
|
|
|
|
return _ofac_xml($output); |
98
|
7
|
|
|
7
|
|
44
|
} |
99
|
7
|
|
|
|
|
11
|
|
100
|
7
|
50
|
|
|
|
36
|
my $date = shift; |
101
|
7
|
|
|
|
|
132002
|
|
102
|
|
|
|
|
|
|
$date = "$3-$2-$1" if $date =~ m/^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{4})$/; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
my $result = eval { Date::Utility->new($date)->epoch; }; |
105
|
4951
|
|
|
4951
|
|
7386
|
return $result; |
106
|
|
|
|
|
|
|
} |
107
|
4951
|
100
|
|
|
|
13281
|
|
108
|
|
|
|
|
|
|
=head2 get_country_code |
109
|
4951
|
|
|
|
|
7341
|
|
|
4951
|
|
|
|
|
15388
|
|
110
|
4951
|
|
|
|
|
3708058
|
If the arg is a country code, it's returned in lower case; otherwise the arg is converted to country code. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
my $value = trim shift; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
return lc(code2country($value) ? $value : country2code($value) // ''); |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head2 _process_sanction_entry |
120
|
10860
|
|
|
10860
|
1
|
21733
|
|
121
|
|
|
|
|
|
|
Processes an entry retrieved from sanction resources and saves it into the specified key-value dataset. |
122
|
10860
|
100
|
100
|
|
|
168485
|
An entry may have multilpe names (aliases), each of which will be taken as a key in the dataset with the same values/info. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
It takes following list of args: |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=over 4 |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item - dataset: A hash ref of form [ name => info ] in which the entry will be saved |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item - data: a hash of entry data that may contain: |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=over 4 |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item * name: an array of names/aliases |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item * date_of_birth: an array of dates of birth |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Dates of birth are not of standardized format in some data sources; so they are processed in three steps: |
139
|
|
|
|
|
|
|
1- as a first step it will be tried to converetd them into epoch, saved as B<dob_epoch>; |
140
|
|
|
|
|
|
|
2- otherwise to extract year (or an array of years) of birth, saved as B<dob_year>; and |
141
|
|
|
|
|
|
|
3- finally, to saved as raw text in B<dob_text>. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item * place_of_birth: an array of country names or codes |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item * residence: an array of country names or codes |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item * nationality: an array of country names or codes |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item * citizen: an array of country names or codes |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item * postal_code: an array of postal/zip codes |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item * national_id: an array of national ID numbers |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=item * passport_no: an array of passort numbers |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=back |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=back |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
my ($dataset, %data) = @_; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
my @dob_list = $data{date_of_birth}->@*; |
166
|
|
|
|
|
|
|
my (@dob_epoch, @dob_year, @dob_text); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
for my $dob (@dob_list) { |
169
|
|
|
|
|
|
|
$dob = trim($dob); |
170
|
5697
|
|
|
5697
|
|
28207
|
next unless $dob; |
171
|
|
|
|
|
|
|
|
172
|
5697
|
|
|
|
|
12204
|
$dob =~ s/[ \/]/-/g; |
173
|
5697
|
|
|
|
|
9547
|
#dobs with month = day = 0 are converted to year. |
174
|
|
|
|
|
|
|
if ($dob =~ m/^(\d{1,2})-(\d{1,2})-(\d{4})$/) { |
175
|
5697
|
|
|
|
|
8554
|
$dob = $3 if $1 == 0 or $2 == 0; |
176
|
6327
|
|
|
|
|
15137
|
} elsif ($dob =~ m/^(\d{4})-(\d0{1,2})-(\d{1,2})$/) { |
177
|
6327
|
50
|
|
|
|
99139
|
$dob = $1 if $2 == 0 or $3 == 0; |
178
|
|
|
|
|
|
|
} |
179
|
6327
|
|
|
|
|
25647
|
$dob = $1 if $dob =~ m/^[A-Z][a-z]{2}-(\d{4})$/; |
180
|
|
|
|
|
|
|
|
181
|
6327
|
100
|
|
|
|
27018
|
if ($dob =~ m/^\d{4}$/) { |
|
|
50
|
|
|
|
|
|
182
|
161
|
100
|
100
|
|
|
614
|
push @dob_year, $dob; |
183
|
|
|
|
|
|
|
} elsif ($dob =~ m/(\d{4}).*to.*(\d{4})$/) { |
184
|
0
|
0
|
0
|
|
|
0
|
push @dob_year, ($1 .. $2); |
185
|
|
|
|
|
|
|
} else { |
186
|
6327
|
100
|
|
|
|
12481
|
my $epoch = _date_to_epoch($dob); |
187
|
|
|
|
|
|
|
(defined $epoch) ? push(@dob_epoch, $epoch) : push(@dob_text, $dob); |
188
|
6327
|
100
|
|
|
|
15952
|
} |
|
|
100
|
|
|
|
|
|
189
|
1223
|
|
|
|
|
2317
|
} |
190
|
|
|
|
|
|
|
delete $data{date_of_birth}; |
191
|
185
|
|
|
|
|
1004
|
$data{dob_epoch} = \@dob_epoch; |
192
|
|
|
|
|
|
|
$data{dob_year} = \@dob_year; |
193
|
4919
|
|
|
|
|
9135
|
$data{dob_text} = \@dob_text; |
194
|
4919
|
100
|
|
|
|
14755
|
|
195
|
|
|
|
|
|
|
# convert all country names to iso codes |
196
|
|
|
|
|
|
|
for my $field (qw/place_of_birth residence nationality citizen/) { |
197
|
5697
|
|
|
|
|
10360
|
$data{$field} = [map { get_country_code($_) } $data{$field}->@*]; |
198
|
5697
|
|
|
|
|
10043
|
$data{$field} = [grep { $_ } $data{$field}->@*]; |
199
|
5697
|
|
|
|
|
9069
|
} |
200
|
5697
|
|
|
|
|
9434
|
|
201
|
|
|
|
|
|
|
# remove commas |
202
|
|
|
|
|
|
|
$data{names} = [map { trim($_) =~ s/,//gr } $data{names}->@*]; |
203
|
5697
|
|
|
|
|
9496
|
|
204
|
22788
|
|
|
|
|
38254
|
# make values unique |
|
10804
|
|
|
|
|
141022
|
|
205
|
22788
|
|
|
|
|
545799
|
%data = map { $_ => [uniq $data{$_}->@*] } keys %data; |
|
10804
|
|
|
|
|
25293
|
|
206
|
|
|
|
|
|
|
# remove empty values |
207
|
|
|
|
|
|
|
for (keys %data) { |
208
|
|
|
|
|
|
|
# dob = 0 is acceptable |
209
|
5697
|
|
|
|
|
9996
|
next if $_ eq 'dob_epoch'; |
|
13096
|
|
|
|
|
113999
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
$data{$_} = [grep { $_ } $data{$_}->@*]; |
212
|
5697
|
|
|
|
|
101287
|
} |
|
62506
|
|
|
|
|
160702
|
|
213
|
|
|
|
|
|
|
# remove fields with empty list |
214
|
5697
|
|
|
|
|
18426
|
%data = %data{grep { $data{$_}->@* } keys %data}; |
215
|
|
|
|
|
|
|
|
216
|
62506
|
100
|
|
|
|
97417
|
push $dataset->@*, \%data if $data{names}; |
217
|
|
|
|
|
|
|
|
218
|
56809
|
|
|
|
|
81411
|
return $dataset; |
|
26094
|
|
|
|
|
46919
|
|
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
5697
|
|
|
|
|
13873
|
my $raw_data = shift; |
|
62506
|
|
|
|
|
86637
|
|
222
|
|
|
|
|
|
|
|
223
|
5697
|
50
|
|
|
|
16820
|
my $ref = xml2hash($raw_data, array => ['aka'])->{sdnList}; |
224
|
|
|
|
|
|
|
|
225
|
5697
|
|
|
|
|
25398
|
my $publish_epoch = |
226
|
|
|
|
|
|
|
$ref->{publshInformation}{Publish_Date} =~ m/(\d{1,2})\/(\d{1,2})\/(\d{4})/ |
227
|
|
|
|
|
|
|
? _date_to_epoch("$3-$1-$2") |
228
|
|
|
|
|
|
|
: undef; # publshInformation is a typo in ofac xml tags |
229
|
14
|
|
|
14
|
|
14213
|
die 'Publication date is invalid' unless defined $publish_epoch; |
230
|
|
|
|
|
|
|
|
231
|
14
|
|
|
|
|
88
|
my $parse_list_node = sub { |
232
|
|
|
|
|
|
|
my ($entry, $parent, $child, $attribute) = @_; |
233
|
|
|
|
|
|
|
|
234
|
14
|
50
|
|
|
|
466672
|
my $node = $entry->{$parent}->{$child} // []; |
235
|
|
|
|
|
|
|
$node = [$node] if (ref $node eq 'HASH'); |
236
|
|
|
|
|
|
|
|
237
|
14
|
50
|
|
|
|
42
|
return map { $_->{$attribute} // () } @$node; |
238
|
|
|
|
|
|
|
}; |
239
|
|
|
|
|
|
|
|
240
|
33048
|
|
|
33048
|
|
53433
|
my $dataset = []; |
241
|
|
|
|
|
|
|
|
242
|
33048
|
|
100
|
|
|
97239
|
foreach my $entry (@{$ref->{sdnEntry}}) { |
243
|
33048
|
100
|
|
|
|
64122
|
next unless $entry->{sdnType} eq 'Individual'; |
244
|
|
|
|
|
|
|
|
245
|
33048
|
|
66
|
|
|
49562
|
my @names; |
|
22294
|
|
|
|
|
69634
|
|
246
|
14
|
|
|
|
|
72
|
for ($entry, @{$entry->{akaList}{aka} // []}) { |
247
|
|
|
|
|
|
|
my $category = $_->{category} // 'strong'; |
248
|
14
|
|
|
|
|
23
|
push @names, _process_name($_->{firstName} // '', $_->{lastName} // '') if $category eq 'strong'; |
249
|
|
|
|
|
|
|
} |
250
|
14
|
|
|
|
|
21
|
|
|
14
|
|
|
|
|
37
|
|
251
|
11034
|
100
|
|
|
|
30908
|
# my @dob_list; |
252
|
|
|
|
|
|
|
# my $dobs = $entry->{dateOfBirthList}{dateOfBirthItem}; |
253
|
5508
|
|
|
|
|
6998
|
# # In one of the xml files, some of the clients have more than one date of birth |
254
|
5508
|
|
100
|
|
|
7195
|
# # Hence, $dob can be either an array or a hashref |
|
5508
|
|
|
|
|
19433
|
|
255
|
14600
|
|
100
|
|
|
39912
|
# foreach my $dob (map { $_->{dateOfBirth} || () } (ref($dobs) eq 'ARRAY' ? @$dobs : $dobs)) { |
256
|
14600
|
100
|
100
|
|
|
47527
|
# push @dob_list, $dob; |
|
|
|
50
|
|
|
|
|
257
|
|
|
|
|
|
|
# } |
258
|
|
|
|
|
|
|
my @dob_list = $parse_list_node->($entry, 'dateOfBirthList', 'dateOfBirthItem', 'dateOfBirth'); |
259
|
|
|
|
|
|
|
my @citizen = $parse_list_node->($entry, 'citizenshipList', 'citizenship', 'country'); |
260
|
|
|
|
|
|
|
my @residence = $parse_list_node->($entry, 'addressList', 'address', 'country'); |
261
|
|
|
|
|
|
|
my @postal_code = $parse_list_node->($entry, 'addressList', 'address', 'postalCode'); |
262
|
|
|
|
|
|
|
my @nationality = $parse_list_node->($entry, 'naationalityList', 'nationality', 'country'); |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
my @place_of_birth = $parse_list_node->($entry, 'placeOfBirthList', 'placeOfBirthItem', 'placeOfBirth'); |
265
|
|
|
|
|
|
|
@place_of_birth = map { my @parts = split ',', $_; $parts[-1] } @place_of_birth; |
266
|
5508
|
|
|
|
|
11228
|
|
267
|
5508
|
|
|
|
|
9571
|
my $id_list = $entry->{idList}->{id} // []; |
268
|
5508
|
|
|
|
|
8690
|
$id_list = [$id_list] if ref $id_list eq 'HASH'; |
269
|
5508
|
|
|
|
|
9544
|
my @passport_no = map { $_->{idType} eq 'Passport' ? $_->{idNumber} : () } @$id_list; |
270
|
5508
|
|
|
|
|
9363
|
my @national_id = map { $_->{idType} =~ 'National ID' ? $_->{idNumber} : () } @$id_list; |
271
|
|
|
|
|
|
|
|
272
|
5508
|
|
|
|
|
8529
|
_process_sanction_entry( |
273
|
5508
|
|
|
|
|
7986
|
$dataset, |
|
3470
|
|
|
|
|
11339
|
|
|
3470
|
|
|
|
|
8278
|
|
274
|
|
|
|
|
|
|
names => \@names, |
275
|
5508
|
|
100
|
|
|
16584
|
date_of_birth => \@dob_list, |
276
|
5508
|
100
|
|
|
|
10695
|
place_of_birth => \@place_of_birth, |
277
|
5508
|
100
|
|
|
|
9025
|
residence => \@residence, |
|
8178
|
|
|
|
|
21021
|
|
278
|
5508
|
100
|
|
|
|
7930
|
nationality => \@nationality, |
|
8178
|
|
|
|
|
15706
|
|
279
|
|
|
|
|
|
|
citizen => \@citizen, |
280
|
5508
|
|
|
|
|
13108
|
postal_code => \@postal_code, |
281
|
|
|
|
|
|
|
national_id => \@national_id, |
282
|
|
|
|
|
|
|
passport_no => \@passport_no, |
283
|
|
|
|
|
|
|
); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
return { |
287
|
|
|
|
|
|
|
updated => $publish_epoch, |
288
|
|
|
|
|
|
|
content => $dataset, |
289
|
|
|
|
|
|
|
}; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
my $raw_data = shift; |
293
|
|
|
|
|
|
|
my $dataset = []; |
294
|
|
|
|
|
|
|
|
295
|
14
|
|
|
|
|
107555
|
my $csv = Text::CSV->new({binary => 1}) or die "Cannot use CSV: " . Text::CSV->error_diag(); |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
my @lines = split("\n", $raw_data); |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
my $parsed = $csv->parse(trim(shift @lines)); |
300
|
|
|
|
|
|
|
my @info = $parsed ? $csv->fields() : (); |
301
|
7
|
|
|
7
|
|
10
|
die 'Publication date was not found' unless @info && _date_to_epoch($info[1]); |
302
|
7
|
|
|
|
|
13
|
|
303
|
|
|
|
|
|
|
my $publish_epoch = _date_to_epoch($info[1]); |
304
|
7
|
50
|
|
|
|
53
|
die 'Publication date is invalid' unless defined $publish_epoch; |
305
|
|
|
|
|
|
|
|
306
|
7
|
|
|
|
|
915
|
$parsed = $csv->parse(trim(shift @lines)); |
307
|
|
|
|
|
|
|
my @row = $csv->fields(); |
308
|
7
|
|
|
|
|
22
|
my %column = map { trim($row[$_]) => $_ } (0 .. @row - 1); |
309
|
7
|
50
|
|
|
|
332
|
|
310
|
7
|
50
|
33
|
|
|
99
|
foreach my $line (@lines) { |
311
|
|
|
|
|
|
|
$line = trim($line); |
312
|
7
|
|
|
|
|
19
|
|
313
|
7
|
50
|
|
|
|
17
|
$parsed = $csv->parse($line); |
314
|
|
|
|
|
|
|
next unless $parsed; |
315
|
7
|
|
|
|
|
21
|
|
316
|
7
|
|
|
|
|
307
|
my @row = $csv->fields(); |
317
|
7
|
|
|
|
|
69
|
|
|
252
|
|
|
|
|
1943
|
|
318
|
|
|
|
|
|
|
@row = map { trim($_ =~ s/\([^(]*\)$//r) } @row; |
319
|
7
|
|
|
|
|
182
|
|
320
|
161
|
|
|
|
|
308
|
($row[$column{'Group Type'}] eq "Individual") or next; |
321
|
|
|
|
|
|
|
my $name = _process_name @row[0 .. 5]; |
322
|
161
|
|
|
|
|
2545
|
|
323
|
161
|
50
|
|
|
|
4185
|
next if $name =~ /^\s*$/; |
324
|
|
|
|
|
|
|
|
325
|
161
|
|
|
|
|
326
|
my $date_of_birth = $row[$column{'DOB'}]; |
326
|
|
|
|
|
|
|
my $place_of_birth = $row[$column{'Country of Birth'}]; |
327
|
161
|
|
|
|
|
1344
|
# nationality is saved as an adjective (Iranian, American, etc); let's ignore it. |
|
5796
|
|
|
|
|
43962
|
|
328
|
|
|
|
|
|
|
my $nationality = ''; |
329
|
161
|
50
|
|
|
|
1731
|
my $residence = $row[$column{'Country'}]; |
330
|
161
|
|
|
|
|
297
|
my $postal_code = $row[$column{'Post/Zip Code'}]; |
331
|
|
|
|
|
|
|
my $national_id = $row[$column{'National Identification Number'}]; |
332
|
161
|
50
|
|
|
|
340
|
|
333
|
|
|
|
|
|
|
# Fields to be added in the new file format (https://redmine.deriv.cloud/issues/51922) |
334
|
161
|
|
|
|
|
227
|
# We can read these fields normally after the data is released in the new format |
335
|
161
|
|
|
|
|
198
|
my ($passport_no, $non_latin_alias); |
336
|
|
|
|
|
|
|
$passport_no = $row[$column{'Passport Number'}] if defined $column{'Passport Number'}; |
337
|
161
|
|
|
|
|
176
|
$non_latin_alias = $row[$column{'Name Non-Latin Script'}] if defined $column{'Name Non-Latin Script'}; |
338
|
161
|
|
|
|
|
195
|
|
339
|
161
|
|
|
|
|
203
|
_process_sanction_entry( |
340
|
161
|
|
|
|
|
187
|
$dataset, |
341
|
|
|
|
|
|
|
names => [$name, $non_latin_alias ? $non_latin_alias : ()], |
342
|
|
|
|
|
|
|
date_of_birth => [$date_of_birth], |
343
|
|
|
|
|
|
|
place_of_birth => [$place_of_birth], |
344
|
161
|
|
|
|
|
210
|
residence => [$residence], |
345
|
161
|
50
|
|
|
|
307
|
nationality => [$nationality], |
346
|
161
|
50
|
|
|
|
263
|
postal_code => [$postal_code], |
347
|
|
|
|
|
|
|
national_id => [$national_id], |
348
|
161
|
100
|
|
|
|
583
|
$passport_no ? (passport_no => [$passport_no]) : (), |
|
|
50
|
|
|
|
|
|
349
|
|
|
|
|
|
|
); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
return { |
353
|
|
|
|
|
|
|
updated => $publish_epoch, |
354
|
|
|
|
|
|
|
content => $dataset, |
355
|
|
|
|
|
|
|
}; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
my $raw_data = shift; |
359
|
|
|
|
|
|
|
my $ref = xml2hash($raw_data, array => ['nameAlias', 'birthdate'])->{export}; |
360
|
|
|
|
|
|
|
my $dataset = []; |
361
|
|
|
|
|
|
|
|
362
|
7
|
|
|
|
|
164
|
foreach my $entry (@{$ref->{sanctionEntity}}) { |
363
|
|
|
|
|
|
|
next unless $entry->{subjectType}->{'-code'} eq 'person'; |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
for (qw/birthdate citizenship address identification/) { |
366
|
|
|
|
|
|
|
$entry->{$_} //= []; |
367
|
|
|
|
|
|
|
$entry->{$_} = [$entry->{$_}] if ref $entry->{$_} eq 'HASH'; |
368
|
4
|
|
|
4
|
|
8
|
} |
369
|
4
|
|
|
|
|
20
|
|
370
|
4
|
|
|
|
|
1709
|
my @names; |
371
|
|
|
|
|
|
|
for (@{$entry->{nameAlias} // []}) { |
372
|
4
|
|
|
|
|
6
|
my $name = $_->{'-wholeName'}; |
|
4
|
|
|
|
|
12
|
|
373
|
28
|
50
|
|
|
|
73
|
$name = join ' ', ($_->{'-firstName'} // '', $_->{'-lastName'} // '') unless $name; |
374
|
|
|
|
|
|
|
push @names, $name if $name ne ' '; |
375
|
28
|
|
|
|
|
41
|
} |
376
|
112
|
|
100
|
|
|
261
|
|
377
|
112
|
100
|
|
|
|
216
|
my @dob_list; |
378
|
|
|
|
|
|
|
foreach my $dob ($entry->{birthdate}->@*) { |
379
|
|
|
|
|
|
|
push @dob_list, $dob->{'-birthdate'} if $dob->{'-birthdate'}; |
380
|
28
|
|
|
|
|
33
|
push @dob_list, $dob->{'-year'} if not $dob->{'-birthdate'} and $dob->{'-year'}; |
381
|
28
|
|
50
|
|
|
27
|
} |
|
28
|
|
|
|
|
74
|
|
382
|
60
|
|
|
|
|
85
|
|
383
|
60
|
50
|
0
|
|
|
77
|
my @place_of_birth = map { $_->{'-countryIso2Code'} || () } $entry->{birthdate}->@*; |
|
|
|
0
|
|
|
|
|
384
|
60
|
50
|
|
|
|
114
|
my @citizen = map { $_->{'-countryIso2Code'} || () } $entry->{citizenship}->@*; |
385
|
|
|
|
|
|
|
my @residence = map { $_->{'-countryIso2Code'} || () } $entry->{address}->@*; |
386
|
|
|
|
|
|
|
my @postal_code = map { $_->{'-zipCode'} || $_->{'-poBox'} || () } $entry->{address}->@*; |
387
|
28
|
|
|
|
|
30
|
my @nationality = map { $_->{'-countryIso2Code'} || () } $entry->{identification}->@*; |
388
|
28
|
|
|
|
|
40
|
my @national_id = map { $_->{'-identificationTypeCode'} eq 'id' ? $_->{'-number'} || () : () } $entry->{identification}->@*; |
389
|
36
|
100
|
|
|
|
63
|
my @passport_no = map { $_->{'-identificationTypeCode'} eq 'passport' ? $_->{'-number'} || () : () } $entry->{identification}->@*; |
390
|
36
|
50
|
66
|
|
|
87
|
|
391
|
|
|
|
|
|
|
_process_sanction_entry( |
392
|
|
|
|
|
|
|
$dataset, |
393
|
28
|
50
|
|
|
|
43
|
names => \@names, |
|
36
|
|
|
|
|
84
|
|
394
|
28
|
50
|
|
|
|
38
|
date_of_birth => \@dob_list, |
|
12
|
|
|
|
|
28
|
|
395
|
28
|
50
|
|
|
|
37
|
place_of_birth => \@place_of_birth, |
|
8
|
|
|
|
|
20
|
|
396
|
28
|
50
|
33
|
|
|
34
|
residence => \@residence, |
|
8
|
|
|
|
|
32
|
|
397
|
28
|
50
|
|
|
|
40
|
nationality => \@nationality, |
|
12
|
|
|
|
|
26
|
|
398
|
28
|
100
|
33
|
|
|
39
|
citizen => \@citizen, |
|
12
|
|
|
|
|
28
|
|
399
|
28
|
100
|
33
|
|
|
35
|
postal_code => \@postal_code, |
|
12
|
|
|
|
|
29
|
|
400
|
|
|
|
|
|
|
national_id => \@national_id, |
401
|
28
|
|
|
|
|
65
|
passport_no => \@passport_no, |
402
|
|
|
|
|
|
|
); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
my @date_parts = split('T', $ref->{'-generationDate'} // ''); |
406
|
|
|
|
|
|
|
my $publish_epoch = _date_to_epoch($date_parts[0] // ''); |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
die 'Publication date is invalid' unless $publish_epoch; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
return { |
411
|
|
|
|
|
|
|
updated => $publish_epoch, |
412
|
|
|
|
|
|
|
content => $dataset, |
413
|
|
|
|
|
|
|
}; |
414
|
|
|
|
|
|
|
} |
415
|
4
|
|
50
|
|
|
26
|
|
416
|
4
|
|
50
|
|
|
21
|
=head2 run |
417
|
|
|
|
|
|
|
|
418
|
4
|
50
|
|
|
|
11
|
Fetches latest version of lists, and returns combined hash of successfully downloaded ones |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=cut |
421
|
4
|
|
|
|
|
224
|
|
422
|
|
|
|
|
|
|
my %args = @_; |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
my $result = {}; |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
my $config = config(%args); |
427
|
|
|
|
|
|
|
my $retries = $args{retries} // 3; |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
foreach my $id (sort keys %$config) { |
430
|
|
|
|
|
|
|
my $source = $config->{$id}; |
431
|
|
|
|
|
|
|
try { |
432
|
|
|
|
|
|
|
die "Url is empty for $id" unless $source->{url}; |
433
|
8
|
|
|
8
|
1
|
26939
|
|
434
|
|
|
|
|
|
|
my $raw_data; |
435
|
8
|
|
|
|
|
16
|
|
436
|
|
|
|
|
|
|
if ($source->{url} =~ m/^file:\/\/(.*)$/) { |
437
|
8
|
|
|
|
|
24
|
$raw_data = _entries_from_file($id); |
438
|
8
|
|
50
|
|
|
30
|
} else { |
439
|
|
|
|
|
|
|
$raw_data = _entries_from_remote_src({ |
440
|
8
|
|
|
|
|
33
|
id => $id, |
441
|
32
|
|
|
|
|
295
|
source => $source->{url}, |
442
|
|
|
|
|
|
|
retries => $retries |
443
|
|
|
|
|
|
|
}); |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
my $data = $source->{parser}->($raw_data); |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
if ($data->{updated} > 1) { |
449
|
|
|
|
|
|
|
$result->{$id} = $data; |
450
|
|
|
|
|
|
|
my $count = $data->{content}->@*; |
451
|
|
|
|
|
|
|
print "Source $id: $count entries fetched \n" if $args{verbose}; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} catch { |
454
|
|
|
|
|
|
|
warn "$id list update failed because: $@"; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
return $result; |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head2 _entries_from_file |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Get the sanction entries from a file locally |
464
|
32
|
|
|
|
|
65
|
|
465
|
|
|
|
|
|
|
=cut |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
my ($id) = @_; |
468
|
|
|
|
|
|
|
|
469
|
8
|
|
|
|
|
202
|
my $entries; |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
open my $fh, '<', "$1" or die "Can't open $id file $1 $!"; |
472
|
|
|
|
|
|
|
$entries = do { local $/; <$fh> }; |
473
|
|
|
|
|
|
|
close $fh; |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
return $entries; |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head2 _entries_from_remote_src |
479
|
23
|
|
|
23
|
|
36
|
|
480
|
|
|
|
|
|
|
Get the sanction entries from a remote source includes retry mechanism |
481
|
23
|
|
|
|
|
25
|
|
482
|
|
|
|
|
|
|
=cut |
483
|
23
|
50
|
|
|
|
802
|
|
484
|
23
|
|
|
|
|
51
|
my ($args) = @_; |
|
23
|
|
|
|
|
71
|
|
|
23
|
|
|
|
|
1039
|
|
485
|
23
|
|
|
|
|
187
|
|
486
|
|
|
|
|
|
|
my ($id, $src_url, $retries) = @{$args}{qw/ id source retries /}; |
487
|
23
|
|
|
|
|
113
|
$retries //= 3; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
my $entries; |
490
|
|
|
|
|
|
|
my $error_log = 'Unknown Error'; |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
my $ua = Mojo::UserAgent->new; |
493
|
|
|
|
|
|
|
$ua->connect_timeout(15); |
494
|
|
|
|
|
|
|
$ua->inactivity_timeout(60); |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
my $retry_counter = 0; |
497
|
8
|
|
|
8
|
|
17
|
while ($retry_counter < $retries) { |
498
|
|
|
|
|
|
|
$retry_counter++; |
499
|
8
|
|
|
|
|
12
|
|
|
8
|
|
|
|
|
19
|
|
500
|
8
|
|
50
|
|
|
21
|
try { |
501
|
|
|
|
|
|
|
my $resp = $ua->get($src_url); |
502
|
8
|
|
|
|
|
10
|
|
503
|
8
|
|
|
|
|
12
|
die "File not downloaded for $id" if $resp->result->is_error; |
504
|
|
|
|
|
|
|
$entries = $resp->result->body; |
505
|
8
|
|
|
|
|
93
|
|
506
|
8
|
|
|
|
|
67
|
last; |
507
|
8
|
|
|
|
|
68
|
} catch { |
508
|
|
|
|
|
|
|
$error_log = $@; |
509
|
8
|
|
|
|
|
33
|
} |
510
|
8
|
|
|
|
|
22
|
} |
511
|
20
|
|
|
|
|
23
|
|
512
|
|
|
|
|
|
|
return $entries // die "An error occurred while fetching data from '$src_url' due to $error_log"; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
1; |