line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Validate::Sanctions; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
174569
|
use strict; |
|
7
|
|
|
|
|
47
|
|
|
7
|
|
|
|
|
205
|
|
4
|
7
|
|
|
7
|
|
36
|
use warnings; |
|
7
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
443
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
7
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
8
|
|
|
|
|
|
|
our @EXPORT_OK = qw/is_sanctioned set_sanction_file get_sanction_file/; |
9
|
|
|
|
|
|
|
|
10
|
7
|
|
|
7
|
|
40
|
use Carp; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
570
|
|
11
|
7
|
|
|
7
|
|
3362
|
use Data::Validate::Sanctions::Fetcher; |
|
7
|
|
|
|
|
29
|
|
|
7
|
|
|
|
|
394
|
|
12
|
7
|
|
|
7
|
|
2440
|
use Data::Validate::Sanctions::Redis; |
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
227
|
|
13
|
7
|
|
|
7
|
|
57
|
use File::stat; |
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
83
|
|
14
|
7
|
|
|
7
|
|
503
|
use File::ShareDir; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
286
|
|
15
|
7
|
|
|
7
|
|
831
|
use YAML::XS qw/DumpFile LoadFile/; |
|
7
|
|
|
|
|
5756
|
|
|
7
|
|
|
|
|
357
|
|
16
|
7
|
|
|
7
|
|
48
|
use Scalar::Util qw(blessed); |
|
7
|
|
|
|
|
24
|
|
|
7
|
|
|
|
|
280
|
|
17
|
7
|
|
|
7
|
|
53
|
use Date::Utility; |
|
7
|
|
|
|
|
22
|
|
|
7
|
|
|
|
|
147
|
|
18
|
7
|
|
|
7
|
|
3386
|
use Data::Compare; |
|
7
|
|
|
|
|
82537
|
|
|
7
|
|
|
|
|
59
|
|
19
|
7
|
|
|
7
|
|
26435
|
use List::Util qw(any uniq max min); |
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
549
|
|
20
|
7
|
|
|
7
|
|
52
|
use Locale::Country; |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
905
|
|
21
|
7
|
|
|
7
|
|
55
|
use Text::Trim qw(trim); |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
326
|
|
22
|
7
|
|
|
7
|
|
42
|
use Clone qw(clone); |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
530
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $VERSION = '0.16'; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $sanction_file; |
27
|
|
|
|
|
|
|
my $instance; |
28
|
|
|
|
|
|
|
|
29
|
7
|
|
|
7
|
|
52
|
use constant IGNORE_OPERATION_INTERVAL => 8 * 60; # 8 minutes |
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
17066
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# for OO |
32
|
|
|
|
|
|
|
sub new { ## no critic (RequireArgUnpacking) |
33
|
5
|
|
|
5
|
1
|
24
|
my ($class, %args) = @_; |
34
|
|
|
|
|
|
|
|
35
|
5
|
|
50
|
|
|
28
|
my $storage = delete $args{storage} // ''; |
36
|
|
|
|
|
|
|
|
37
|
5
|
50
|
|
|
|
21
|
return Data::Validate::Sanctions::Redis->new(%args) if $storage eq 'redis'; |
38
|
|
|
|
|
|
|
|
39
|
5
|
|
|
|
|
14
|
my $self = {}; |
40
|
|
|
|
|
|
|
|
41
|
5
|
|
33
|
|
|
24
|
$self->{sanction_file} = $args{sanction_file} // _default_sanction_file(); |
42
|
|
|
|
|
|
|
|
43
|
5
|
|
|
|
|
25
|
$self->{args} = {%args}; |
44
|
|
|
|
|
|
|
|
45
|
5
|
|
|
|
|
14
|
$self->{last_modification} = 0; |
46
|
5
|
|
|
|
|
16
|
$self->{last_index} = 0; |
47
|
5
|
|
|
|
|
14
|
$self->{last_data_load} = 0; |
48
|
|
|
|
|
|
|
|
49
|
5
|
|
33
|
|
|
35
|
return bless $self, ref($class) || $class; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub update_data { |
53
|
0
|
|
|
0
|
1
|
0
|
my ($self, %args) = @_; |
54
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
0
|
$self->_load_data(); |
56
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
0
|
my $new_data = Data::Validate::Sanctions::Fetcher::run($self->{args}->%*, %args); |
58
|
0
|
|
|
|
|
0
|
my $updated = 0; |
59
|
0
|
|
|
|
|
0
|
foreach my $k (keys %$new_data) { |
60
|
0
|
|
0
|
|
|
0
|
$self->{_data}->{$k} //= {}; |
61
|
0
|
|
0
|
|
|
0
|
$self->{_data}->{$k}->{updated} //= 0; |
62
|
0
|
|
0
|
|
|
0
|
$self->{_data}->{$k}->{content} //= []; |
63
|
|
|
|
|
|
|
|
64
|
0
|
0
|
0
|
|
|
0
|
if (!$new_data->{$k}->{error} && $self->{_data}->{$k}->{error}) { |
65
|
0
|
|
|
|
|
0
|
delete $self->{_data}->{$k}->{error}; |
66
|
0
|
|
|
|
|
0
|
$updated = 1; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
0
|
0
|
0
|
|
|
0
|
if ($new_data->{$k}->{error}) { |
|
|
0
|
|
|
|
|
|
70
|
0
|
|
|
|
|
0
|
warn "$k list update failed because: $new_data->{$k}->{error}"; |
71
|
0
|
|
|
|
|
0
|
$self->{_data}->{$k}->{error} = $new_data->{$k}->{error}; |
72
|
0
|
|
|
|
|
0
|
$updated = 1; |
73
|
|
|
|
|
|
|
} elsif ($self->{_data}{$k}->{updated} != $new_data->{$k}->{updated} |
74
|
|
|
|
|
|
|
|| scalar $self->{_data}{$k}->{content}->@* != scalar $new_data->{$k}->{content}->@*) |
75
|
|
|
|
|
|
|
{ |
76
|
0
|
0
|
|
|
|
0
|
print "Source $k is updated with new data \n" if $args{verbose}; |
77
|
0
|
|
|
|
|
0
|
$self->{_data}->{$k} = $new_data->{$k}; |
78
|
0
|
|
|
|
|
0
|
$updated = 1; |
79
|
|
|
|
|
|
|
} else { |
80
|
0
|
0
|
|
|
|
0
|
print "Source $k is not changed \n" if $args{verbose}; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
0
|
0
|
|
|
|
0
|
if ($updated) { |
85
|
0
|
|
|
|
|
0
|
$self->_save_data(); |
86
|
0
|
|
|
|
|
0
|
$self->_index_data(); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
0
|
return; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub last_updated { |
93
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
94
|
0
|
|
|
|
|
0
|
my $list = shift; |
95
|
|
|
|
|
|
|
|
96
|
0
|
0
|
|
|
|
0
|
if ($list) { |
97
|
0
|
|
|
|
|
0
|
return $self->{_data}->{$list}->{updated}; |
98
|
|
|
|
|
|
|
} else { |
99
|
0
|
|
|
|
|
0
|
$self->_load_data(); |
100
|
0
|
|
0
|
|
|
0
|
return max(map { $_->{updated} // 0 } values %{$self->{_data}}); |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub set_sanction_file { ## no critic (RequireArgUnpacking) |
105
|
4
|
|
100
|
4
|
1
|
2411
|
$sanction_file = shift // die "sanction_file is needed"; |
106
|
3
|
|
|
|
|
192402
|
undef $instance; |
107
|
3
|
|
|
|
|
58
|
return; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub get_sanction_file { |
111
|
7
|
|
66
|
7
|
1
|
694
|
$sanction_file //= _default_sanction_file(); |
112
|
7
|
50
|
|
|
|
66
|
return $instance ? $instance->{sanction_file} : $sanction_file; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 is_sanctioned |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Checks if the input profile info matches a sanctioned entity. |
118
|
|
|
|
|
|
|
The arguments are the same as those of B<get_sanctioned_info>. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
It returns 1 if a match is found, otherwise 0. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub is_sanctioned { ## no critic (RequireArgUnpacking) |
125
|
14
|
|
|
14
|
1
|
4704
|
return (get_sanctioned_info(@_))->{matched}; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub data { |
129
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
130
|
|
|
|
|
|
|
|
131
|
0
|
0
|
|
|
|
0
|
$self->_load_data() unless $self->{_data}; |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
0
|
return $self->{_data}; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 _match_other_fields |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Matches fields possibly available in addition to name and date of birth. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Returns a a hash-ref reporting the matched fields if it succeeeds; otherwise returns false (undef). |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=cut |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub _match_other_fields { |
145
|
19
|
|
|
19
|
|
40
|
my ($self, $entry, $args) = @_; |
146
|
|
|
|
|
|
|
|
147
|
19
|
|
|
|
|
63
|
my @optional_fields = qw/place_of_birth residence nationality citizen postal_code national_id passport_no/; |
148
|
|
|
|
|
|
|
|
149
|
19
|
|
|
|
|
31
|
my $matched_args = {}; |
150
|
19
|
|
|
|
|
35
|
for my $field (@optional_fields) { |
151
|
133
|
0
|
33
|
|
|
251
|
next unless ($args->{$field} && $entry->{$field} && $entry->{$field}->@*); |
|
|
|
0
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
0
|
0
|
|
0
|
|
0
|
return undef unless any { $args->{$field} eq $_ } $entry->{$field}->@*; |
|
0
|
|
|
|
|
0
|
|
154
|
0
|
|
|
|
|
0
|
$matched_args->{$field} = $args->{$field}; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
19
|
|
|
|
|
41
|
return $matched_args; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head2 get_sanctioned_info |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Tries to find a match a sanction entry matching the input profile args. |
163
|
|
|
|
|
|
|
It takes arguments in two forms. In the new API, it takes a hashref containing the following named arguments: |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=over 4 |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=item * first_name: first name |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=item * last_name: last name |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=item * date_of_birth: (optional) date of birth as a string or epoch |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item * place_of_birth: (optional) place of birth as a country name or code |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=item * residence: (optional) name or code of the country of residence |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item * nationality: (optional) name or code of the country of nationality |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=item * citizen: (optional) name or code of the country of citizenship |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item * postal_code: (optional) postal/zip code |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=item * national_id: (optional) national ID number |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=item * passport_no: (oiptonal) passort number |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=back |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
For backward compatibility it also supports the old API, taking the following args: |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=over 4 |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=item * first_name: first name |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item * last_name: last name |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item * date_of_birth: (optional) date of birth as a string or epoch |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=back |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
It returns a hash-ref containg the following data: |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=over 4 |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=item - matched: 1 if a match was found; 0 otherwise |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=item - list: the source for the matched entry, |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item - matched_args: a name-value hash-ref of the similar arguments, |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=item - comment: additional comments if necessary, |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=back |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=cut |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub get_sanctioned_info { ## no critic (RequireArgUnpacking) |
218
|
14
|
50
|
|
14
|
1
|
68
|
my $self = blessed($_[0]) ? shift : $instance; |
219
|
14
|
100
|
|
|
|
54
|
unless ($self) { |
220
|
5
|
|
|
|
|
32
|
$instance = __PACKAGE__->new(sanction_file => get_sanction_file()); |
221
|
5
|
|
|
|
|
16
|
$self = $instance; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# It's the old interface |
225
|
14
|
|
|
|
|
42
|
my ($first_name, $last_name, $date_of_birth) = @_; |
226
|
14
|
|
|
|
|
30
|
my $args = {}; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# in the new interface we accept fields in a hashref |
229
|
14
|
50
|
|
|
|
50
|
if (ref $_[0] eq 'HASH') { |
230
|
0
|
|
|
|
|
0
|
($args) = @_; |
231
|
0
|
|
|
|
|
0
|
($first_name, $last_name, $date_of_birth) = $args->@{qw/first_name last_name date_of_birth/}; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# convert country names to iso codes |
235
|
14
|
|
|
|
|
37
|
for my $field (qw/place_of_birth residence nationality citizen/) { |
236
|
56
|
|
|
|
|
89
|
my $value = $args->{$field}; |
237
|
56
|
50
|
|
|
|
123
|
next unless $value; |
238
|
|
|
|
|
|
|
|
239
|
0
|
|
|
|
|
0
|
$args->{$field} = Data::Validate::Sanctions::Fetcher::get_country_code($value); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
14
|
|
|
|
|
55
|
$self->_load_data(); |
243
|
|
|
|
|
|
|
|
244
|
14
|
|
66
|
|
|
82
|
my $client_full_name = join(' ', $first_name, $last_name || ()); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Split into tokens after cleaning |
247
|
14
|
|
|
|
|
40
|
my @client_name_tokens = _clean_names($client_full_name); |
248
|
|
|
|
|
|
|
|
249
|
14
|
|
|
|
|
28
|
my @match_with_dob_text; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# only pick the sanctioned names which have common token with the client tokens |
252
|
|
|
|
|
|
|
# and deduplicate the list |
253
|
14
|
|
|
|
|
31
|
my $filtered_sanctioned_names = {}; |
254
|
14
|
|
|
|
|
29
|
foreach my $token (@client_name_tokens) { |
255
|
29
|
|
|
|
|
44
|
foreach my $name (keys %{$self->{_token_sanctioned_names}->{$token}}) { |
|
29
|
|
|
|
|
196
|
|
256
|
313
|
|
|
|
|
636
|
$filtered_sanctioned_names->{$name} = 1; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
14
|
|
|
|
|
27
|
foreach my $sanctioned_name (keys %{$filtered_sanctioned_names}) { |
|
14
|
|
|
|
|
73
|
|
261
|
165
|
|
|
|
|
394
|
my $sanctioned_name_tokens = $self->{_sanctioned_name_tokens}->{$sanctioned_name}; |
262
|
165
|
100
|
|
|
|
292
|
next unless _name_matches(\@client_name_tokens, $sanctioned_name_tokens); |
263
|
|
|
|
|
|
|
|
264
|
16
|
|
|
|
|
60
|
for my $entry ($self->{_index}->{$sanctioned_name}->@*) { |
265
|
19
|
|
|
|
|
52
|
my $matched_args = $self->_match_other_fields($entry, $args); |
266
|
19
|
50
|
|
|
|
45
|
next unless $matched_args; |
267
|
19
|
|
|
|
|
43
|
$matched_args->{name} = $sanctioned_name; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# dob is matched only if it's included in lookup args |
270
|
19
|
100
|
|
|
|
59
|
return _possible_match($entry->{source}, \%$matched_args) unless defined $date_of_birth; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# 1- Some entries in sanction list can have more than one date of birth |
273
|
|
|
|
|
|
|
# 2- first epoch is compared, then year |
274
|
14
|
|
|
|
|
55
|
my $client_dob_date = Date::Utility->new($date_of_birth); |
275
|
14
|
|
|
|
|
1073
|
$args->{dob_epoch} = $client_dob_date->epoch; |
276
|
14
|
|
|
|
|
393
|
$args->{dob_year} = $client_dob_date->year; |
277
|
|
|
|
|
|
|
|
278
|
14
|
|
|
|
|
370
|
for my $dob_field (qw/dob_epoch dob_year/) { |
279
|
27
|
|
100
|
|
|
92
|
$entry->{$dob_field} //= []; |
280
|
27
|
|
|
14
|
|
128
|
my $checked_dob = any { $_ eq $args->{$dob_field} } $entry->{$dob_field}->@*; |
|
14
|
|
|
|
|
40
|
|
281
|
|
|
|
|
|
|
|
282
|
27
|
100
|
|
|
|
108
|
return _possible_match($entry->{source}, {%$matched_args, $dob_field => $args->{$dob_field}}) if $checked_dob; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# Saving names with dob_text for later check. |
286
|
12
|
100
|
100
|
|
|
49
|
my $has_no_epoch_or_year = ($entry->{dob_epoch}->@* || $entry->{dob_year}->@*) ? 0 : 1; |
287
|
12
|
50
|
50
|
|
|
18
|
my $has_dob_text = @{$entry->{dob_text} // []} ? 1 : 0; |
|
12
|
|
|
|
|
51
|
|
288
|
12
|
100
|
66
|
|
|
55
|
if ($has_dob_text || $has_no_epoch_or_year) { |
289
|
1
|
|
|
|
|
7
|
push @match_with_dob_text, |
290
|
|
|
|
|
|
|
{ |
291
|
|
|
|
|
|
|
name => $sanctioned_name, |
292
|
|
|
|
|
|
|
entry => $entry, |
293
|
|
|
|
|
|
|
matched_args => $matched_args, |
294
|
|
|
|
|
|
|
}; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# Return a possible match if the name matches and no date of birth is present in sanctions |
300
|
7
|
|
|
|
|
25
|
for my $match (@match_with_dob_text) { |
301
|
|
|
|
|
|
|
# We match only in case we have full match for the name |
302
|
|
|
|
|
|
|
# in other case we may get to many false positive |
303
|
1
|
|
|
|
|
10
|
my ($sanction_name, $client_name) = map { uc(s/[^[:alpha:]\s]//gr) } ($match->{name}, $client_full_name); |
|
2
|
|
|
|
|
10
|
|
304
|
|
|
|
|
|
|
|
305
|
1
|
50
|
|
|
|
5
|
next unless $sanction_name eq $client_name; |
306
|
|
|
|
|
|
|
|
307
|
1
|
|
50
|
|
|
10
|
my $dob_text = $match->{entry}->{dob_text} // []; |
308
|
|
|
|
|
|
|
|
309
|
1
|
|
|
|
|
2
|
my $comment; |
310
|
1
|
50
|
|
|
|
5
|
if (@$dob_text) { |
311
|
0
|
|
|
|
|
0
|
$comment = 'dob raw text: ' . join q{, } => @$dob_text; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
1
|
|
|
|
|
6
|
return _possible_match($match->{entry}->{source}, $match->{matched_args}, $comment); |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# Return if no possible match, regardless if date of birth is provided or not |
318
|
6
|
|
|
|
|
91
|
return {matched => 0}; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub _load_data { |
322
|
14
|
|
|
14
|
|
32
|
my $self = shift; |
323
|
14
|
|
|
|
|
49
|
my $sanction_file = $self->{sanction_file}; |
324
|
14
|
|
50
|
|
|
42
|
$self->{last_modification} //= 0; |
325
|
14
|
|
50
|
|
|
48
|
$self->{last_index} //= 0; |
326
|
14
|
|
50
|
|
|
42
|
$self->{last_data_load} //= 0; |
327
|
14
|
|
100
|
|
|
54
|
$self->{_data} //= {}; |
328
|
14
|
|
100
|
|
|
55
|
$self->{_sanctioned_name_tokens} //= {}; |
329
|
14
|
|
100
|
|
|
58
|
$self->{_token_sanctioned_names} //= {}; |
330
|
|
|
|
|
|
|
|
331
|
14
|
100
|
66
|
|
|
108
|
return $self->{_data} if $self->{_data} and $self->{last_data_load} + $self->IGNORE_OPERATION_INTERVAL > time; |
332
|
|
|
|
|
|
|
|
333
|
5
|
50
|
|
|
|
116
|
if (-e $sanction_file) { |
334
|
5
|
|
|
|
|
41
|
my $file_modify_time = stat($sanction_file)->mtime; |
335
|
5
|
0
|
33
|
|
|
1070
|
return $self->{_data} if $file_modify_time <= $self->{last_modification} && $self->{_data}; |
336
|
5
|
|
|
|
|
16
|
$self->{last_modification} = $file_modify_time; |
337
|
5
|
|
|
|
|
25
|
$self->{_data} = LoadFile($sanction_file); |
338
|
5
|
|
|
|
|
222989
|
$self->{last_data_load} = time; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
5
|
|
|
|
|
765
|
$self->_index_data(); |
342
|
|
|
|
|
|
|
|
343
|
5
|
|
|
|
|
9721
|
foreach my $sanctioned_name (keys $self->{_index}->%*) { |
344
|
23102
|
|
|
|
|
38195
|
my @tokens = _clean_names($sanctioned_name); |
345
|
23102
|
|
|
|
|
61801
|
$self->{_sanctioned_name_tokens}->{$sanctioned_name} = \@tokens; |
346
|
23102
|
|
|
|
|
36871
|
foreach my $token (@tokens) { |
347
|
65499
|
|
|
|
|
227619
|
$self->{_token_sanctioned_names}->{$token}->{$sanctioned_name} = 1; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
5
|
|
|
|
|
2791
|
return $self->{_data}; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 _index_data |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
Indexes data by name. Each name may have multiple matching entries. |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub _index_data { |
361
|
5
|
|
|
5
|
|
285
|
my $self = shift; |
362
|
|
|
|
|
|
|
|
363
|
5
|
|
50
|
|
|
112
|
$self->{_data} //= {}; |
364
|
5
|
|
|
|
|
21
|
$self->{_index} = {}; |
365
|
5
|
|
|
|
|
73
|
for my $source (keys $self->{_data}->%*) { |
366
|
8
|
|
100
|
|
|
397447
|
my @content = clone($self->{_data}->{$source}->{content} // [])->@*; |
367
|
|
|
|
|
|
|
|
368
|
8
|
|
|
|
|
4687
|
for my $entry (@content) { |
369
|
15668
|
|
|
|
|
36759
|
$entry->{source} = $source; |
370
|
15668
|
|
|
|
|
30415
|
for my $name ($entry->{names}->@*) { |
371
|
26569
|
|
|
|
|
43514
|
$name = ucfirst($name); |
372
|
26569
|
|
100
|
|
|
82118
|
my $entry_list = $self->{_index}->{$name} // []; |
373
|
26569
|
|
|
|
|
45603
|
push @$entry_list, $entry; |
374
|
26569
|
|
|
|
|
72183
|
$self->{_index}->{$name} = $entry_list; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
5
|
|
|
|
|
19
|
$self->{last_index} = time; |
380
|
|
|
|
|
|
|
|
381
|
5
|
|
|
|
|
13
|
return; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
sub _save_data { |
385
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
386
|
|
|
|
|
|
|
|
387
|
0
|
|
|
|
|
0
|
my $sanction_file = $self->{sanction_file}; |
388
|
0
|
|
|
|
|
0
|
my $new_sanction_file = $sanction_file . ".tmp"; |
389
|
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
0
|
DumpFile($new_sanction_file, $self->{_data}); |
391
|
|
|
|
|
|
|
|
392
|
0
|
0
|
|
|
|
0
|
rename $new_sanction_file, $sanction_file or die "Can't rename $new_sanction_file to $sanction_file, please check it\n"; |
393
|
0
|
|
|
|
|
0
|
$self->{last_modification} = stat($sanction_file)->mtime; |
394
|
0
|
|
|
|
|
0
|
return; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub _default_sanction_file { |
398
|
2
|
|
33
|
2
|
|
15
|
return $ENV{SANCTION_FILE} // File::ShareDir::dist_file('Data-Validate-Sanctions', 'sanctions.yml'); |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub _possible_match { |
402
|
8
|
|
|
8
|
|
23
|
my ($list, $matched_args, $comment) = @_; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
return +{ |
405
|
8
|
|
|
|
|
138
|
matched => 1, |
406
|
|
|
|
|
|
|
list => $list, |
407
|
|
|
|
|
|
|
matched_args => $matched_args, |
408
|
|
|
|
|
|
|
comment => $comment, |
409
|
|
|
|
|
|
|
}; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
sub _clean_names { |
413
|
23116
|
|
|
23116
|
|
43643
|
my ($full_name) = @_; |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# Remove non-alphabets |
416
|
23116
|
|
|
|
|
93228
|
my @cleaned_full_name = split " ", uc($full_name =~ s/[^[:alpha:]\s]//gr); |
417
|
|
|
|
|
|
|
|
418
|
23116
|
|
|
|
|
79256
|
return @cleaned_full_name; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
sub _name_matches { |
422
|
165
|
|
|
165
|
|
311
|
my ($small_tokens_list, $bigger_tokens_list) = @_; |
423
|
|
|
|
|
|
|
|
424
|
165
|
|
|
|
|
232
|
my $name_matches_count = 0; |
425
|
|
|
|
|
|
|
|
426
|
165
|
|
|
|
|
271
|
foreach my $token (@$small_tokens_list) { |
427
|
482
|
100
|
|
1155
|
|
1281
|
$name_matches_count++ if any { $_ eq $token } @$bigger_tokens_list; |
|
1155
|
|
|
|
|
2220
|
|
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
165
|
|
|
|
|
360
|
my $small_tokens_size = min(scalar(@$small_tokens_list), scalar(@$bigger_tokens_list)); |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# - If more than one word matches, return it as possible match |
433
|
|
|
|
|
|
|
# - Some sanctioned individuals have only one name (ex. Hamza); this should be returned as well |
434
|
165
|
100
|
66
|
|
|
625
|
return 1 if ($name_matches_count > 1) || ($name_matches_count == 1 && $small_tokens_size == 1); |
|
|
|
100
|
|
|
|
|
435
|
|
|
|
|
|
|
|
436
|
149
|
|
|
|
|
386
|
return 0; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
sub export_data { |
440
|
0
|
|
|
0
|
1
|
|
my ($self, $path) = @_; |
441
|
|
|
|
|
|
|
|
442
|
0
|
|
|
|
|
|
return DumpFile($path, $self->{_data}); |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
1; |
446
|
|
|
|
|
|
|
__END__ |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=encoding utf-8 |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head1 NAME |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Data::Validate::Sanctions - Validate a name against sanctions lists |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head1 SYNOPSIS |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# as exported function |
457
|
|
|
|
|
|
|
use Data::Validate::Sanctions qw/is_sanctioned get_sanction_file set_sanction_file/; |
458
|
|
|
|
|
|
|
set_sanction_file('/var/storage/sanction.csv'); |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
my ($first_name, $last_name) = ("First", "Last Name"); |
461
|
|
|
|
|
|
|
print 'BAD' if is_sanctioned($first_name, $last_name); |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
# as OO |
464
|
|
|
|
|
|
|
use Data::Validate::Sanctions; |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#You can also set sanction_file in the new method. |
467
|
|
|
|
|
|
|
my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv'); |
468
|
|
|
|
|
|
|
print 'BAD' if $validator->is_sanctioned("$last_name $first_name"); |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=head1 DESCRIPTION |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Data::Validate::Sanctions is a simple validitor to validate a name against sanctions lists. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
The list is from: |
475
|
|
|
|
|
|
|
- L<https://www.treasury.gov/ofac/downloads/sdn.csv>, |
476
|
|
|
|
|
|
|
- L<https://www.treasury.gov/ofac/downloads/consolidated/cons_prim.csv> |
477
|
|
|
|
|
|
|
- L<https://ofsistorage.blob.core.windows.net/publishlive/ConList.csv> |
478
|
|
|
|
|
|
|
- L<https://webgate.ec.europa.eu/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content?token=$eu_token> |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
run F<update_sanctions_csv> to update the bundled csv. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
The path of list can be set by function L</set_sanction_file> or by method L</new>. If not set, then environment variable $ENV{SANCTION_FILE} will be checked, at last |
483
|
|
|
|
|
|
|
the default file in this package will be used. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head1 METHODS |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=head2 is_sanctioned |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
is_sanctioned($last_name, $first_name); |
490
|
|
|
|
|
|
|
is_sanctioned($first_name, $last_name); |
491
|
|
|
|
|
|
|
is_sanctioned("$last_name $first_name"); |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
when one string is passed, please be sure last_name is before first_name. |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
or you can pass first_name, last_name (last_name, first_name), we'll check both "$last_name $first_name" and "$first_name $last_name". |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
retrun 1 if match is found and 0 if match is not found. |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
It will remove all non-alpha chars and compare with the list we have. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head2 get_sanctioned_info |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
my $result =get_sanctioned_info($last_name, $first_name, $date_of_birth); |
504
|
|
|
|
|
|
|
print 'match: ', $result->{matched_args}->{name}, ' on list ', $result->{list} if $result->{matched}; |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
return hashref with keys: |
507
|
|
|
|
|
|
|
B<matched> 1 or 0, depends if name has matched |
508
|
|
|
|
|
|
|
B<list> name of list matched (present only if matched) |
509
|
|
|
|
|
|
|
B<matched_args> The list of arguments matched (name, date of birth, residence, etc.) |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
It will remove all non-alpha chars and compare with the list we have. |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=head2 update_data |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Fetches latest versions of sanction lists, and updates corresponding sections of stored file, if needed |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head2 last_updated |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
Returns timestamp of when the latest list was updated. |
520
|
|
|
|
|
|
|
If argument is provided - return timestamp of when that list was updated. |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 new |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Create the object, and set sanction_file |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
my $validator = Data::Validate::Sanctions->new(sanction_file => '/var/storage/sanction.csv'); |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head2 get_sanction_file |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
get sanction_file which is used by L</is_sanctioned> (procedure-oriented) |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=head2 set_sanction_file |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
set sanction_file which is used by L</is_sanctioned> (procedure-oriented) |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head2 _name_matches |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
Pass in the client's name and sanctioned individual's name to see if they are similar or not |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=head2 export_data |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Exports the sanction lists to a local file in YAML format. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=head2 data |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
Gets the sanction list content with lazy loading. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head1 AUTHOR |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Binary.com E<lt>fayland@binary.comE<gt> |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=head1 COPYRIGHT |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Copyright 2014- Binary.com |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=head1 LICENSE |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
560
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=head1 SEE ALSO |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
L<Data::OFAC> |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=cut |