line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package UR::DataSource::File; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# NOTE! This module is deprecated. Use UR::DataSource::Filesystem instead. |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
# A data source implementation for text files where the fields |
6
|
|
|
|
|
|
|
# are delimited by commas (or anything else really). Usually, |
7
|
|
|
|
|
|
|
# the lines in the file will be sorted by one or more columns, |
8
|
|
|
|
|
|
|
# but it isn't strictly necessary |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# For now, it's structured around files where the record is delimited by |
11
|
|
|
|
|
|
|
# newlines, and the fields are delimited by qr(\s*,\s*). Those are |
12
|
|
|
|
|
|
|
# overridable in concrete data sources by specifying record_seperator() and |
13
|
|
|
|
|
|
|
# delimiter(). |
14
|
|
|
|
|
|
|
# FIXME - work out a way to support record-oriented data as well as line-oriented data |
15
|
|
|
|
|
|
|
|
16
|
19
|
|
|
19
|
|
535
|
use UR; |
|
19
|
|
|
|
|
26
|
|
|
19
|
|
|
|
|
123
|
|
17
|
19
|
|
|
19
|
|
69
|
use strict; |
|
19
|
|
|
|
|
26
|
|
|
19
|
|
|
|
|
328
|
|
18
|
19
|
|
|
19
|
|
63
|
use warnings; |
|
19
|
|
|
|
|
24
|
|
|
19
|
|
|
|
|
851
|
|
19
|
|
|
|
|
|
|
our $VERSION = "0.46"; # UR $VERSION; |
20
|
|
|
|
|
|
|
|
21
|
19
|
|
|
19
|
|
72
|
use Fcntl qw(:DEFAULT :flock); |
|
19
|
|
|
|
|
20
|
|
|
19
|
|
|
|
|
7662
|
|
22
|
19
|
|
|
19
|
|
94
|
use Errno qw(EINTR EAGAIN EOPNOTSUPP); |
|
19
|
|
|
|
|
27
|
|
|
19
|
|
|
|
|
2308
|
|
23
|
19
|
|
|
19
|
|
77
|
use File::Temp; |
|
19
|
|
|
|
|
30
|
|
|
19
|
|
|
|
|
1116
|
|
24
|
19
|
|
|
19
|
|
79
|
use File::Basename; |
|
19
|
|
|
|
|
31
|
|
|
19
|
|
|
|
|
847
|
|
25
|
19
|
|
|
19
|
|
73
|
use IO::File qw(); |
|
19
|
|
|
|
|
22
|
|
|
19
|
|
|
|
|
24865
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
our @CARP_NOT = qw( UR::Context UR::DataSource::FileMux UR::Object::Type ); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
class UR::DataSource::File { |
30
|
|
|
|
|
|
|
is => ['UR::DataSource'], |
31
|
|
|
|
|
|
|
has => [ |
32
|
|
|
|
|
|
|
delimiter => { is => 'String', default_value => '\s*,\s*', doc => 'Delimiter between columns on the same line' }, |
33
|
|
|
|
|
|
|
record_separator => { is => 'String', default_value => "\n", doc => 'Delimiter between lines in the file' }, |
34
|
|
|
|
|
|
|
column_order => { is => 'ARRAY', doc => 'Names of the columns in the file, in order' }, |
35
|
|
|
|
|
|
|
skip_first_line => { is => 'Integer', default_value => 0, doc => 'Number of lines at the start of the file to skip' }, |
36
|
|
|
|
|
|
|
handle_class => { is => 'String', default_value => 'IO::File', doc => 'Class to use for new file handles' }, |
37
|
|
|
|
|
|
|
quick_disconnect => { is => 'Boolean', default_value => 1, doc => 'Do not hold the file handle open between requests' }, |
38
|
|
|
|
|
|
|
], |
39
|
|
|
|
|
|
|
has_optional => [ |
40
|
|
|
|
|
|
|
server => { is => 'String', doc => 'pathname to the data file' }, |
41
|
|
|
|
|
|
|
file_list => { is => 'ARRAY', doc => 'list of pathnames of equivalent files' }, |
42
|
|
|
|
|
|
|
sort_order => { is => 'ARRAY', doc => 'Names of the columns by which the data file is sorted' }, |
43
|
|
|
|
|
|
|
constant_values => { is => 'ARRAY', doc => 'Property names which are not in the data file(s), but are part of the objects loaded from the data source' }, |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# REMOVE |
47
|
|
|
|
|
|
|
#file_cache_index => { is => 'Integer', doc => 'index into the file cache where the next read will be placed' }, |
48
|
|
|
|
|
|
|
_open_query_count => { is => 'Integer', doc => 'number of queries currently using this data source, used internally' }, |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
], |
51
|
|
|
|
|
|
|
doc => 'A data source for line-oriented files', |
52
|
|
|
|
|
|
|
}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
10
|
|
|
10
|
0
|
23
|
sub can_savepoint { 0;} # Doesn't support savepoints |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub create_default_handle { |
58
|
375
|
|
|
375
|
0
|
416
|
my $self = shift; |
59
|
|
|
|
|
|
|
|
60
|
375
|
50
|
|
|
|
1113
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
61
|
0
|
|
|
|
|
0
|
my $time = time(); |
62
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("\nFILE OPEN AT %d [%s]\n",$time, scalar(localtime($time))); |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
375
|
|
|
|
|
1459
|
my $filename = $self->server; |
66
|
375
|
100
|
|
|
|
11748
|
unless (-e $filename) { |
67
|
|
|
|
|
|
|
# file doesn't exist |
68
|
2
|
|
|
|
|
6
|
$filename = '/dev/null'; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
375
|
|
|
|
|
1174
|
my $handle_class = $self->handle_class; |
72
|
375
|
|
|
|
|
2584
|
my $fh = $handle_class->new($filename); |
73
|
375
|
50
|
|
|
|
27407
|
unless($fh) { |
74
|
0
|
|
|
|
|
0
|
$self->error_message("Can't open ".$self->server." for reading: $!"); |
75
|
0
|
|
|
|
|
0
|
return; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
375
|
50
|
|
|
|
1027
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
79
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("FILE: opened %s fileno %d\n\n",$self->server, $fh->fileno); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
375
|
|
|
|
|
1426
|
$self->is_connected(1); |
83
|
375
|
|
|
|
|
793
|
return $fh; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub disconnect { |
87
|
2
|
|
|
2
|
0
|
6
|
my $self = shift; |
88
|
|
|
|
|
|
|
|
89
|
2
|
50
|
|
|
|
4
|
if ($self->has_default_handle) { |
90
|
2
|
|
|
|
|
6
|
my $fh = $self->get_default_handle; |
91
|
2
|
|
|
|
|
26
|
flock($fh,LOCK_UN); |
92
|
2
|
|
|
|
|
14
|
$fh->close(); |
93
|
2
|
|
|
|
|
192
|
$self->__invalidate_get_default_handle__; |
94
|
2
|
|
|
|
|
8
|
$self->is_connected(0); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub _file_position { |
99
|
1222
|
|
|
1222
|
|
1005
|
my $self = shift; |
100
|
1222
|
|
|
|
|
2479
|
my $fh = $self->get_default_handle; |
101
|
1222
|
50
|
|
|
|
3391
|
return $fh ? $fh->tell() : undef; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub prepare_for_fork { |
105
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# make sure this is clear before we fork |
108
|
2
|
|
|
|
|
6
|
$self->{'_fh_position'} = undef; |
109
|
2
|
50
|
|
|
|
8
|
if ($self->has_default_handle) { |
110
|
2
|
|
|
|
|
10
|
$self->{'_fh_position'} = $self->_file_position(); |
111
|
2
|
50
|
|
|
|
24
|
UR::DBI->sql_fh->printf("FILE: preparing to fork; closing file %s and noting position at %s\n",$self->server, $self->{'_fh_position'}) if $ENV{'UR_DBI_MONITOR_SQL'}; |
112
|
|
|
|
|
|
|
} |
113
|
2
|
|
|
|
|
36
|
$self->disconnect_default_handle; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub finish_up_after_fork { |
117
|
2
|
|
|
2
|
0
|
19
|
my $self = shift; |
118
|
2
|
50
|
|
|
|
29
|
if (defined $self->{'_fh_position'}) { |
119
|
2
|
50
|
|
|
|
27
|
UR::DBI->sql_fh->printf("FILE: resetting after fork; reopening file %s and fast-forwarding to %s\n",$self->server, $self->{'_fh_position'}) if $ENV{'UR_DBI_MONITOR_SQL'}; |
120
|
2
|
|
|
|
|
30
|
my $fh = $self->get_default_handle; |
121
|
2
|
|
|
|
|
24
|
$fh->seek($self->{'_fh_position'},0); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub _regex { |
126
|
376
|
|
|
376
|
|
386
|
my $self = shift; |
127
|
|
|
|
|
|
|
|
128
|
376
|
100
|
|
|
|
778
|
unless ($self->{'_regex'}) { |
129
|
26
|
|
|
|
|
96
|
my $delimiter = $self->delimiter; |
130
|
26
|
|
|
|
|
49
|
my $r = eval { qr($delimiter) }; |
|
26
|
|
|
|
|
464
|
|
131
|
26
|
50
|
33
|
|
|
158
|
if ($@ || !$r) { |
132
|
0
|
|
|
|
|
0
|
$self->error_message("Unable to interepret delimiter '".$self->delimiter.": $@"); |
133
|
0
|
|
|
|
|
0
|
return; |
134
|
|
|
|
|
|
|
} |
135
|
26
|
|
|
|
|
68
|
$self->{'_regex'} = $r; |
136
|
|
|
|
|
|
|
} |
137
|
376
|
|
|
|
|
514
|
return $self->{'_regex'}; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# We're overriding server() so everyone else can have a single way of getting |
141
|
|
|
|
|
|
|
# the file's pathname instead of having to know about both server and file_list |
142
|
|
|
|
|
|
|
sub server { |
143
|
45
|
|
|
45
|
1
|
51
|
my $self = shift; |
144
|
|
|
|
|
|
|
|
145
|
45
|
100
|
|
|
|
95
|
unless ($self->{'_cached_server'}) { |
146
|
16
|
100
|
|
|
|
65
|
if ($self->__server()) { |
|
|
50
|
|
|
|
|
|
147
|
15
|
|
|
|
|
39
|
$self->{'_cached_server'} = $self->__server(); |
148
|
|
|
|
|
|
|
} elsif ($self->file_list) { |
149
|
1
|
|
|
|
|
2
|
my $files = $self->file_list; |
150
|
1
|
|
|
|
|
2
|
my $count = scalar(@$files); |
151
|
1
|
|
|
|
|
3
|
my $idx = $$ % $count; |
152
|
1
|
|
|
|
|
3
|
$self->{'_cached_server'} = $files->[$idx]; |
153
|
|
|
|
|
|
|
} else { |
154
|
0
|
|
|
|
|
0
|
die "Data source ",$self->id," didn't specify either server or file_list"; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
} |
157
|
45
|
|
|
|
|
81
|
return $self->{'_cached_server'}; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Should be divisible by 3 |
162
|
|
|
|
|
|
|
our $MAX_CACHE_SIZE = 99; |
163
|
|
|
|
|
|
|
# The offset cache is an arrayref containing three pieces of data: |
164
|
|
|
|
|
|
|
# 0: If this cache slot is being used by a loading iterator |
165
|
|
|
|
|
|
|
# 1: concatenated data from the sorted columns for comparison with where you are in the file |
166
|
|
|
|
|
|
|
# 2: the seek position that line came from |
167
|
|
|
|
|
|
|
sub _offset_cache { |
168
|
1116
|
|
|
1116
|
|
974
|
my $self = shift; |
169
|
|
|
|
|
|
|
|
170
|
1116
|
100
|
|
|
|
1896
|
unless ($self->{'_offset_cache'}) { |
171
|
25
|
|
|
|
|
58
|
$self->{'_offset_cache'} = []; |
172
|
|
|
|
|
|
|
} |
173
|
1116
|
|
|
|
|
1181
|
return $self->{'_offset_cache'}; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
our %iterator_data_source; |
177
|
|
|
|
|
|
|
our %iterator_cache_slot_refs; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub _allocate_offset_cache_slot { |
180
|
372
|
|
|
372
|
|
381
|
my $self = shift; |
181
|
|
|
|
|
|
|
|
182
|
372
|
|
|
|
|
620
|
my $cache = $self->_offset_cache(); |
183
|
372
|
|
|
|
|
454
|
my $next = scalar(@$cache); |
184
|
|
|
|
|
|
|
#print STDERR "_allocate_offset_cache_slot ".$self->server." current size is $next "; |
185
|
372
|
100
|
|
|
|
724
|
if ($next > $MAX_CACHE_SIZE) { |
186
|
|
|
|
|
|
|
#print STDERR "searching... \n"; |
187
|
282
|
|
|
|
|
493
|
my $last_offset_cache_slot = $self->{'_last_offset_cache_slot'}; |
188
|
282
|
100
|
|
|
|
552
|
if ($last_offset_cache_slot >= $MAX_CACHE_SIZE) { |
189
|
9
|
|
|
|
|
15
|
$next = 0; |
190
|
|
|
|
|
|
|
} else { |
191
|
273
|
|
|
|
|
385
|
$next = $last_offset_cache_slot + 3; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
# Search for an unused slot |
194
|
282
|
|
33
|
|
|
869
|
while ($cache->[$next] and $next != $last_offset_cache_slot) { |
195
|
0
|
|
|
|
|
0
|
$next += 3; |
196
|
0
|
0
|
|
|
|
0
|
$next = 0 if ($next > $MAX_CACHE_SIZE); |
197
|
|
|
|
|
|
|
} |
198
|
282
|
50
|
33
|
|
|
1565
|
if ($next > $MAX_CACHE_SIZE or $next eq $last_offset_cache_slot) { |
199
|
|
|
|
|
|
|
#print STDERR scalar(keys(%iterator_data_source))." items in iterator_data_source ".scalar(keys(%iterator_cache_slot))." in iterator_cache_slot\n"; |
200
|
0
|
|
|
|
|
0
|
Carp::carp("Unable to find an open file offset cache slot because there are too many outstanding loading iterators. Temporarily expanding the cache..."); |
201
|
|
|
|
|
|
|
# We'll let it go ahead and expand the list |
202
|
0
|
|
|
|
|
0
|
$next = $MAX_CACHE_SIZE; |
203
|
0
|
|
|
|
|
0
|
$MAX_CACHE_SIZE += 3; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
} |
206
|
372
|
|
|
|
|
423
|
$cache->[$next] = 1; |
207
|
372
|
|
|
|
|
500
|
$cache->[$next+1] = undef; |
208
|
372
|
|
|
|
|
489
|
$cache->[$next+2] = undef; |
209
|
|
|
|
|
|
|
|
210
|
372
|
|
|
|
|
436
|
$self->{'_last_offset_cache_slot'} = $next; |
211
|
|
|
|
|
|
|
#print STDERR "using slot $next current size ".scalar(@$cache)."\n"; |
212
|
372
|
|
|
|
|
550
|
return $next; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _free_offset_cache_slot { |
217
|
372
|
|
|
372
|
|
492
|
my($self, $cache_slot) = @_; |
218
|
|
|
|
|
|
|
|
219
|
372
|
|
|
|
|
671
|
my $cache = $self->_offset_cache(); |
220
|
372
|
50
|
|
|
|
767
|
unless ($cache_slot < scalar(@$cache)) { |
221
|
0
|
|
|
|
|
0
|
$self->warning_message("Freeing offset cache slot past the end. Current size ".scalar(@$cache).", requested $cache_slot"); |
222
|
0
|
|
|
|
|
0
|
return; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
372
|
50
|
|
|
|
804
|
unless (defined $cache->[$cache_slot]) { |
226
|
0
|
|
|
|
|
0
|
$self->warning_message("Freeing unused offset cache slot $cache_slot"); |
227
|
0
|
|
|
|
|
0
|
return; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
372
|
50
|
66
|
|
|
1018
|
if ($cache->[$cache_slot+1] and scalar(@{$cache->[$cache_slot+1]}) == 0) { |
|
169
|
|
|
|
|
489
|
|
231
|
|
|
|
|
|
|
# There's no data in here. Must have happened when the reader went all the |
232
|
|
|
|
|
|
|
# way to the end of the file and found nothing. Remove this entry completely |
233
|
|
|
|
|
|
|
# because it's not helpful |
234
|
0
|
|
|
|
|
0
|
splice(@$cache, $cache_slot,3); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
} else { |
237
|
|
|
|
|
|
|
# There is data in here, mark it as a free slot |
238
|
372
|
|
|
|
|
422
|
$cache->[$cache_slot] = 0; |
239
|
|
|
|
|
|
|
} |
240
|
372
|
|
|
|
|
524
|
return 1; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub _invalidate_cache { |
246
|
4
|
|
|
4
|
|
6
|
my $self = shift; |
247
|
|
|
|
|
|
|
|
248
|
4
|
|
|
|
|
9
|
$self->{'_offset_cache'} = []; |
249
|
|
|
|
|
|
|
|
250
|
4
|
|
|
|
|
15
|
return 1; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
sub _generate_loading_templates_arrayref { |
254
|
49
|
|
|
49
|
|
85
|
my($self,$old_sql_cols) = @_; |
255
|
|
|
|
|
|
|
|
256
|
49
|
|
|
|
|
215
|
my $columns_in_file = $self->column_order; |
257
|
49
|
|
|
|
|
149
|
my %column_to_position_map; |
258
|
49
|
|
|
|
|
152
|
for (my $i = 0; $i < @$columns_in_file; $i++) { |
259
|
136
|
|
|
|
|
330
|
$column_to_position_map{$columns_in_file->[$i]} = $i; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# strip out columns that don't exist in the file |
263
|
49
|
|
|
|
|
57
|
my $sql_cols; |
264
|
49
|
|
|
|
|
102
|
foreach my $column_data ( @$old_sql_cols ) { |
265
|
212
|
|
|
|
|
342
|
my $propertys_column_name = $column_data->[1]->column_name; |
266
|
212
|
100
|
66
|
|
|
567
|
next unless ($propertys_column_name and exists($column_to_position_map{$propertys_column_name})); |
267
|
|
|
|
|
|
|
|
268
|
136
|
|
|
|
|
185
|
push @$sql_cols, $column_data; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
49
|
50
|
|
|
|
136
|
unless ($sql_cols) { |
272
|
0
|
|
|
|
|
0
|
$self->error_message("Couldn't determine column information for data source " . $self->id); |
273
|
0
|
|
|
|
|
0
|
return; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# reorder the requested columns to be in the same order as the file |
277
|
|
|
|
|
|
|
my @sql_cols_with_column_name = |
278
|
49
|
|
|
|
|
87
|
map{ [ $column_to_position_map{ $_->[1]->column_name }, $_ ] } |
|
136
|
|
|
|
|
254
|
|
279
|
|
|
|
|
|
|
@$sql_cols; |
280
|
|
|
|
|
|
|
my @sorted_sql_cols = |
281
|
136
|
|
|
|
|
182
|
map { $_->[1] } |
282
|
49
|
|
|
|
|
227
|
sort { $a->[0] <=> $b->[0] } |
|
125
|
|
|
|
|
205
|
|
283
|
|
|
|
|
|
|
@sql_cols_with_column_name; |
284
|
49
|
|
|
|
|
89
|
$sql_cols = \@sorted_sql_cols; |
285
|
49
|
|
|
|
|
317
|
my $templates = $self->SUPER::_generate_loading_templates_arrayref($sql_cols); |
286
|
|
|
|
|
|
|
|
287
|
49
|
100
|
|
|
|
241
|
if (my $constant_values = $self->constant_values) { |
288
|
|
|
|
|
|
|
# Find the first unused index in the loading template |
289
|
12
|
|
|
|
|
37
|
my $next_template_slot = -1; |
290
|
12
|
|
|
|
|
23
|
foreach my $tmpl ( @$templates ) { |
291
|
12
|
|
|
|
|
11
|
foreach my $col ( @{$tmpl->{'column_positions'}} ) { |
|
12
|
|
|
|
|
19
|
|
292
|
36
|
50
|
|
|
|
49
|
if ($col >= $next_template_slot) { |
293
|
36
|
|
|
|
|
39
|
$next_template_slot = $col + 1; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} |
297
|
12
|
50
|
|
|
|
30
|
if ($next_template_slot == -1) { |
298
|
0
|
|
|
|
|
0
|
die "Couldn't determine last column in loading template for data source" . $self->id; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
12
|
|
|
|
|
19
|
foreach my $prop ( @$constant_values ) { |
302
|
18
|
|
|
|
|
16
|
push @{$templates->[0]->{'column_positions'}}, $next_template_slot++; |
|
18
|
|
|
|
|
29
|
|
303
|
18
|
|
|
|
|
15
|
push @{$templates->[0]->{'property_names'}}, $prop; |
|
18
|
|
|
|
|
30
|
|
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
49
|
|
|
|
|
220
|
return $templates; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub _things_in_list_are_numeric { |
311
|
260
|
|
|
260
|
|
258
|
my $self = shift; |
312
|
|
|
|
|
|
|
|
313
|
260
|
|
|
|
|
241
|
foreach ( @{$_[0]} ) { |
|
260
|
|
|
|
|
493
|
|
314
|
276
|
100
|
|
|
|
1125
|
return 0 if (! Scalar::Util::looks_like_number($_)); |
315
|
|
|
|
|
|
|
} |
316
|
213
|
|
|
|
|
715
|
return 1; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# Construct a closure to perform a test on the $index-th column of |
320
|
|
|
|
|
|
|
# @$$next_candidate_row. The closures return 0 is the test is successful, |
321
|
|
|
|
|
|
|
# -1 if unsuccessful but the file's value was less than $value, and 1 |
322
|
|
|
|
|
|
|
# if unsuccessful and greater. The iterator that churns throug the file |
323
|
|
|
|
|
|
|
# knows that if it's comparing an ID/sorted column, and the comparator |
324
|
|
|
|
|
|
|
# returns 1 then we've gone past the point where we can expect to ever |
325
|
|
|
|
|
|
|
# find another successful match and we should stop looking |
326
|
|
|
|
|
|
|
my $ALWAYS_FALSE = sub { -1 }; |
327
|
|
|
|
|
|
|
sub _comparator_for_operator_and_property { |
328
|
359
|
|
|
359
|
|
618
|
my($self,$property,$next_candidate_row, $index, $operator,$value) = @_; |
329
|
|
|
|
|
|
|
|
330
|
19
|
|
|
19
|
|
96
|
no warnings 'uninitialized'; # we're handling ''/undef/null specially below where it matters |
|
19
|
|
|
|
|
23
|
|
|
19
|
|
|
|
|
39244
|
|
331
|
|
|
|
|
|
|
|
332
|
359
|
100
|
100
|
|
|
2629
|
if ($operator eq 'between') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
333
|
44
|
100
|
100
|
|
|
163
|
if ($value->[0] eq '' or $value->[1] eq '') { |
334
|
28
|
|
|
|
|
40
|
return $ALWAYS_FALSE; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
16
|
50
|
33
|
|
|
38
|
if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) { |
338
|
16
|
50
|
|
|
|
31
|
if ($value->[0] > $value->[1]) { |
339
|
|
|
|
|
|
|
# Will never be true |
340
|
0
|
|
|
|
|
0
|
Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1]; |
341
|
0
|
|
|
|
|
0
|
return $ALWAYS_FALSE; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# numeric 'between' comparison |
345
|
|
|
|
|
|
|
return sub { |
346
|
32
|
50
|
|
32
|
|
67
|
return -1 if ($$next_candidate_row->[$index] eq ''); |
347
|
0
|
0
|
|
|
|
0
|
if ($$next_candidate_row->[$index] < $value->[0]) { |
|
|
0
|
|
|
|
|
|
348
|
0
|
|
|
|
|
0
|
return -1; |
349
|
|
|
|
|
|
|
} elsif ($$next_candidate_row->[$index] > $value->[1]) { |
350
|
0
|
|
|
|
|
0
|
return 1; |
351
|
|
|
|
|
|
|
} else { |
352
|
0
|
|
|
|
|
0
|
return 0; |
353
|
|
|
|
|
|
|
} |
354
|
16
|
|
|
|
|
77
|
}; |
355
|
|
|
|
|
|
|
} else { |
356
|
0
|
0
|
|
|
|
0
|
if ($value->[0] gt $value->[1]) { |
357
|
0
|
|
|
|
|
0
|
Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1]; |
358
|
0
|
|
|
|
|
0
|
return $ALWAYS_FALSE; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# A string 'between' comparison |
362
|
|
|
|
|
|
|
return sub { |
363
|
0
|
0
|
|
0
|
|
0
|
return -1 if ($$next_candidate_row->[$index] eq ''); |
364
|
0
|
0
|
|
|
|
0
|
if ($$next_candidate_row->[$index] lt $value->[0]) { |
|
|
0
|
|
|
|
|
|
365
|
0
|
|
|
|
|
0
|
return -1; |
366
|
|
|
|
|
|
|
} elsif ($$next_candidate_row->[$index] gt $value->[1]) { |
367
|
0
|
|
|
|
|
0
|
return 1; |
368
|
|
|
|
|
|
|
} else { |
369
|
0
|
|
|
|
|
0
|
return 0; |
370
|
|
|
|
|
|
|
} |
371
|
0
|
|
|
|
|
0
|
}; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
} elsif ($operator eq 'in') { |
375
|
20
|
100
|
|
|
|
50
|
if (! @$value) { |
376
|
8
|
|
|
|
|
14
|
return $ALWAYS_FALSE; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
12
|
100
|
66
|
|
|
51
|
if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) { |
380
|
|
|
|
|
|
|
# Numeric 'in' comparison returns undef if we're within the range of the list |
381
|
|
|
|
|
|
|
# but don't actually match any of the items in the list |
382
|
8
|
|
|
|
|
27
|
@$value = sort { $a <=> $b } @$value; # sort the values first |
|
0
|
|
|
|
|
0
|
|
383
|
|
|
|
|
|
|
return sub { |
384
|
16
|
50
|
|
16
|
|
40
|
return -1 if ($$next_candidate_row->[$index] eq ''); |
385
|
0
|
0
|
|
|
|
0
|
if ($$next_candidate_row->[$index] < $value->[0]) { |
|
|
0
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
return -1; |
387
|
|
|
|
|
|
|
} elsif ($$next_candidate_row->[$index] > $value->[-1]) { |
388
|
0
|
|
|
|
|
0
|
return 1; |
389
|
|
|
|
|
|
|
} else { |
390
|
0
|
|
|
|
|
0
|
foreach ( @$value ) { |
391
|
0
|
0
|
|
|
|
0
|
return 0 if $$next_candidate_row->[$index] == $_; |
392
|
|
|
|
|
|
|
} |
393
|
0
|
|
|
|
|
0
|
return -1; |
394
|
|
|
|
|
|
|
} |
395
|
8
|
|
|
|
|
51
|
}; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
} else { |
398
|
|
|
|
|
|
|
# A string 'in' comparison |
399
|
4
|
|
|
|
|
13
|
@$value = sort { $a cmp $b } @$value; |
|
0
|
|
|
|
|
0
|
|
400
|
|
|
|
|
|
|
return sub { |
401
|
8
|
50
|
|
8
|
|
29
|
if ($$next_candidate_row->[$index] lt $value->[0]) { |
|
|
50
|
|
|
|
|
|
402
|
0
|
|
|
|
|
0
|
return -1; |
403
|
|
|
|
|
|
|
} elsif ($$next_candidate_row->[$index] gt $value->[-1]) { |
404
|
0
|
|
|
|
|
0
|
return 1; |
405
|
|
|
|
|
|
|
} else { |
406
|
8
|
|
|
|
|
18
|
foreach ( @$value ) { |
407
|
8
|
50
|
|
|
|
19
|
return 0 if $$next_candidate_row->[$index] eq $_; |
408
|
|
|
|
|
|
|
} |
409
|
0
|
|
|
|
|
0
|
return -1; |
410
|
|
|
|
|
|
|
} |
411
|
4
|
|
|
|
|
26
|
}; |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
} elsif ($operator eq 'not in') { |
416
|
14
|
100
|
|
|
|
41
|
if (! @$value) { |
417
|
4
|
|
|
|
|
6
|
return $ALWAYS_FALSE; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
10
|
100
|
100
|
|
|
38
|
if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) { |
421
|
|
|
|
|
|
|
return sub { |
422
|
8
|
50
|
|
8
|
|
18
|
return -1 if ($$next_candidate_row->[$index] eq ''); |
423
|
0
|
|
|
|
|
0
|
foreach ( @$value ) { |
424
|
0
|
0
|
|
|
|
0
|
return -1 if $$next_candidate_row->[$index] == $_; |
425
|
|
|
|
|
|
|
} |
426
|
0
|
|
|
|
|
0
|
return 0; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
4
|
|
|
|
|
21
|
} else { |
430
|
|
|
|
|
|
|
return sub { |
431
|
16
|
|
|
16
|
|
25
|
foreach ( @$value ) { |
432
|
22
|
100
|
|
|
|
53
|
return -1 if $$next_candidate_row->[$index] eq $_; |
433
|
|
|
|
|
|
|
} |
434
|
4
|
|
|
|
|
6
|
return 0; |
435
|
|
|
|
|
|
|
} |
436
|
6
|
|
|
|
|
37
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
} elsif ($operator eq 'like') { |
439
|
|
|
|
|
|
|
# 'like' is always a string comparison. In addition, we can't know if we're ahead |
440
|
|
|
|
|
|
|
# or behind in the file's ID columns, so the only two return values are 0 and 1 |
441
|
|
|
|
|
|
|
|
442
|
18
|
100
|
|
|
|
60
|
return $ALWAYS_FALSE if ($value eq ''); # property like NULL is always false |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# Convert SQL-type wildcards to Perl-type wildcards |
445
|
|
|
|
|
|
|
# Convert a % to a *, and _ to ., unless they're preceeded by \ to escape them. |
446
|
|
|
|
|
|
|
# Not that this isn't precisely correct, as \\% should really mean a literal \ |
447
|
|
|
|
|
|
|
# followed by a wildcard, but we can't be correct in all cases without including |
448
|
|
|
|
|
|
|
# a real parser. This will catch most cases. |
449
|
|
|
|
|
|
|
|
450
|
14
|
|
|
|
|
82
|
$value =~ s/(?
|
451
|
14
|
|
|
|
|
29
|
$value =~ s/(?
|
452
|
14
|
|
|
|
|
109
|
my $regex = qr($value); |
453
|
|
|
|
|
|
|
return sub { |
454
|
32
|
100
|
|
32
|
|
86
|
return -1 if ($$next_candidate_row->[$index] eq ''); |
455
|
8
|
100
|
|
|
|
32
|
if ($$next_candidate_row->[$index] =~ $regex) { |
456
|
2
|
|
|
|
|
6
|
return 0; |
457
|
|
|
|
|
|
|
} else { |
458
|
6
|
|
|
|
|
8
|
return 1; |
459
|
|
|
|
|
|
|
} |
460
|
14
|
|
|
|
|
92
|
}; |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
} elsif ($operator eq 'not like') { |
463
|
16
|
100
|
|
|
|
53
|
return $ALWAYS_FALSE if ($value eq ''); # property like NULL is always false |
464
|
12
|
|
|
|
|
69
|
$value =~ s/(?
|
465
|
12
|
|
|
|
|
29
|
$value =~ s/(?
|
466
|
12
|
|
|
|
|
90
|
my $regex = qr($value); |
467
|
|
|
|
|
|
|
return sub { |
468
|
24
|
50
|
|
24
|
|
72
|
return -1 if ($$next_candidate_row->[$index] eq ''); |
469
|
0
|
0
|
|
|
|
0
|
if ($$next_candidate_row->[$index] =~ $regex) { |
470
|
0
|
|
|
|
|
0
|
return 1; |
471
|
|
|
|
|
|
|
} else { |
472
|
0
|
|
|
|
|
0
|
return 0; |
473
|
|
|
|
|
|
|
} |
474
|
12
|
|
|
|
|
82
|
}; |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# FIXME - should we only be testing the numericness of the property? |
478
|
|
|
|
|
|
|
} elsif ($property->is_numeric and $self->_things_in_list_are_numeric([$value])) { |
479
|
|
|
|
|
|
|
# Basic numeric comparisons |
480
|
185
|
100
|
33
|
|
|
614
|
if ($operator eq '=') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
481
|
|
|
|
|
|
|
return sub { |
482
|
1470
|
100
|
|
1470
|
|
2178
|
return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number |
483
|
1446
|
|
|
|
|
1873
|
return $$next_candidate_row->[$index] <=> $value; |
484
|
130
|
|
|
|
|
769
|
}; |
485
|
|
|
|
|
|
|
} elsif ($operator eq '<') { |
486
|
|
|
|
|
|
|
return sub { |
487
|
24
|
50
|
|
24
|
|
47
|
return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number |
488
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] < $value ? 0 : 1; |
489
|
12
|
|
|
|
|
56
|
}; |
490
|
|
|
|
|
|
|
} elsif ($operator eq '<=') { |
491
|
|
|
|
|
|
|
return sub { |
492
|
24
|
50
|
|
24
|
|
65
|
return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number |
493
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] <= $value ? 0 : 1; |
494
|
12
|
|
|
|
|
78
|
}; |
495
|
|
|
|
|
|
|
} elsif ($operator eq '>') { |
496
|
|
|
|
|
|
|
return sub { |
497
|
24
|
50
|
|
24
|
|
70
|
return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number |
498
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] > $value ? 0 : -1; |
499
|
12
|
|
|
|
|
92
|
}; |
500
|
|
|
|
|
|
|
} elsif ($operator eq '>=') { |
501
|
|
|
|
|
|
|
return sub { |
502
|
24
|
50
|
|
24
|
|
58
|
return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number |
503
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] >= $value ? 0 : -1; |
504
|
12
|
|
|
|
|
77
|
}; |
505
|
|
|
|
|
|
|
} elsif ($operator eq 'true') { |
506
|
|
|
|
|
|
|
return sub { |
507
|
0
|
0
|
|
0
|
|
0
|
$$next_candidate_row->[$index] ? 0 : -1; |
508
|
0
|
|
|
|
|
0
|
}; |
509
|
|
|
|
|
|
|
} elsif ($operator eq 'false') { |
510
|
|
|
|
|
|
|
return sub { |
511
|
2
|
50
|
|
2
|
|
5
|
$$next_candidate_row->[$index] ? -1 : 0; |
512
|
1
|
|
|
|
|
6
|
}; |
513
|
|
|
|
|
|
|
} elsif ($operator eq '!=' or $operator eq 'ne') { |
514
|
|
|
|
|
|
|
return sub { |
515
|
12
|
50
|
|
12
|
|
27
|
return 0 if ($$next_candidate_row->[$index] eq ''); # null always != a number |
516
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] != $value ? 0 : -1; |
517
|
|
|
|
|
|
|
} |
518
|
6
|
|
|
|
|
29
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
} else { |
521
|
|
|
|
|
|
|
# Basic string comparisons |
522
|
62
|
100
|
0
|
|
|
254
|
if ($operator eq '=') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
523
|
|
|
|
|
|
|
return sub { |
524
|
120
|
50
|
50
|
120
|
|
505
|
return -1 if ($$next_candidate_row->[$index] eq '' xor $value eq ''); |
525
|
120
|
|
|
|
|
168
|
return $$next_candidate_row->[$index] cmp $value; |
526
|
25
|
|
|
|
|
168
|
}; |
527
|
|
|
|
|
|
|
} elsif ($operator eq '<') { |
528
|
|
|
|
|
|
|
return sub { |
529
|
16
|
50
|
|
16
|
|
30
|
$$next_candidate_row->[$index] lt $value ? 0 : 1; |
530
|
8
|
|
|
|
|
38
|
}; |
531
|
|
|
|
|
|
|
} elsif ($operator eq '<=') { |
532
|
|
|
|
|
|
|
return sub { |
533
|
16
|
50
|
33
|
16
|
|
46
|
return -1 if ($$next_candidate_row->[$index] eq '' or $value eq ''); |
534
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] le $value ? 0 : 1; |
535
|
8
|
|
|
|
|
44
|
}; |
536
|
|
|
|
|
|
|
} elsif ($operator eq '>') { |
537
|
|
|
|
|
|
|
return sub { |
538
|
16
|
50
|
|
16
|
|
45
|
$$next_candidate_row->[$index] gt $value ? 0 : -1; |
539
|
8
|
|
|
|
|
54
|
}; |
540
|
|
|
|
|
|
|
} elsif ($operator eq '>=') { |
541
|
|
|
|
|
|
|
return sub { |
542
|
16
|
50
|
33
|
16
|
|
74
|
return -1 if ($$next_candidate_row->[$index] eq '' or $value eq ''); |
543
|
0
|
0
|
|
|
|
0
|
$$next_candidate_row->[$index] ge $value ? 0 : -1; |
544
|
8
|
|
|
|
|
46
|
}; |
545
|
|
|
|
|
|
|
} elsif ($operator eq 'true') { |
546
|
|
|
|
|
|
|
return sub { |
547
|
8
|
50
|
|
8
|
|
17
|
$$next_candidate_row->[$index] ? 0 : -1; |
548
|
4
|
|
|
|
|
29
|
}; |
549
|
|
|
|
|
|
|
} elsif ($operator eq 'false') { |
550
|
|
|
|
|
|
|
return sub { |
551
|
2
|
50
|
|
2
|
|
6
|
$$next_candidate_row->[$index] ? -1 : 0; |
552
|
1
|
|
|
|
|
5
|
}; |
553
|
|
|
|
|
|
|
} elsif ($operator eq '!=' or $operator eq 'ne') { |
554
|
|
|
|
|
|
|
return sub { |
555
|
0
|
0
|
|
0
|
|
0
|
$$next_candidate_row->[$index] ne $value ? 0 : -1; |
556
|
|
|
|
|
|
|
} |
557
|
0
|
|
|
|
|
0
|
} |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
my $iterator_serial = 0; |
562
|
|
|
|
|
|
|
sub create_iterator_closure_for_rule { |
563
|
372
|
|
|
372
|
1
|
381
|
my($self,$rule) = @_; |
564
|
|
|
|
|
|
|
|
565
|
372
|
|
|
|
|
817
|
my $class_name = $rule->subject_class_name; |
566
|
372
|
|
|
|
|
1074
|
my $class_meta = $class_name->__meta__; |
567
|
372
|
|
|
|
|
707
|
my $rule_template = $rule->template; |
568
|
|
|
|
|
|
|
|
569
|
372
|
|
|
|
|
1293
|
my $csv_column_order_names = $self->column_order; |
570
|
372
|
|
|
|
|
1280
|
my $csv_column_count = scalar @$csv_column_order_names; |
571
|
|
|
|
|
|
|
|
572
|
372
|
|
|
|
|
1083
|
my $operators_for_properties = $rule_template->operators_for_properties(); |
573
|
372
|
|
|
|
|
814
|
my $values_for_properties = $rule->legacy_params_hash; |
574
|
372
|
|
|
|
|
1053
|
foreach ( values %$values_for_properties ) { |
575
|
1506
|
50
|
66
|
|
|
2826
|
if (ref eq 'HASH' and exists $_->{'value'}) { |
576
|
188
|
|
|
|
|
339
|
$_ = $_->{'value'}; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
372
|
|
|
|
|
968
|
my $sort_order_names = $self->sort_order; |
581
|
372
|
|
|
|
|
1069
|
my %sort_column_names = map { $_ => 1 } @$sort_order_names; |
|
371
|
|
|
|
|
953
|
|
582
|
372
|
|
|
|
|
525
|
my @non_sort_column_names = grep { ! exists($sort_column_names{$_}) } @$csv_column_order_names; |
|
1096
|
|
|
|
|
1712
|
|
583
|
|
|
|
|
|
|
|
584
|
372
|
|
|
|
|
397
|
my %column_name_to_index_map; |
585
|
372
|
|
|
|
|
882
|
for (my $i = 0; $i < @$csv_column_order_names; $i++) { |
586
|
1096
|
|
|
|
|
1957
|
$column_name_to_index_map{$csv_column_order_names->[$i]} = $i; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
# Index in the split-file-data for each sorted column in order |
590
|
372
|
|
|
|
|
460
|
my @sort_order_column_indexes = map { $column_name_to_index_map{$_} } @$sort_order_names; |
|
371
|
|
|
|
|
769
|
|
591
|
|
|
|
|
|
|
|
592
|
372
|
|
|
|
|
406
|
my(%property_meta_for_column_name); |
593
|
372
|
|
|
|
|
489
|
foreach my $column_name ( @$csv_column_order_names ) { |
594
|
1096
|
|
|
|
|
3445
|
my $prop = UR::Object::Property->get(class_name => $class_name, column_name => $column_name); |
595
|
1096
|
|
|
|
|
1007
|
our %WARNED_ABOUT_COLUMN; |
596
|
1096
|
50
|
33
|
|
|
2348
|
unless ( $prop or $WARNED_ABOUT_COLUMN{$class_name . '::' . $column_name}++) { |
597
|
0
|
|
|
|
|
0
|
$self->warning_message("Couldn't find a property in class $class_name that goes with column $column_name"); |
598
|
0
|
|
|
|
|
0
|
next; |
599
|
|
|
|
|
|
|
} |
600
|
1096
|
|
|
|
|
1906
|
$property_meta_for_column_name{$column_name} = $prop; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
372
|
|
|
|
|
398
|
my @rule_columns_in_order; # The order we should perform rule matches on - value is the index in @next_file_row to test |
604
|
|
|
|
|
|
|
my @comparison_for_column; # closures to call to perform the match - same order as @rule_columns_in_order |
605
|
372
|
|
|
|
|
393
|
my $last_sort_column_in_rule = -1; # Last index in @rule_columns_in_order that applies when trying "the shortcut" |
606
|
372
|
|
|
|
|
348
|
my $looking_for_sort_columns = 1; |
607
|
|
|
|
|
|
|
|
608
|
372
|
|
|
|
|
405
|
my $next_candidate_row; # This will be filled in by the closure below |
609
|
372
|
|
|
|
|
525
|
foreach my $column_name ( @$sort_order_names, @non_sort_column_names ) { |
610
|
1096
|
|
|
|
|
1215
|
my $property_meta = $property_meta_for_column_name{$column_name}; |
611
|
1096
|
50
|
|
|
|
1596
|
unless ($property_meta) { |
612
|
0
|
|
|
|
|
0
|
Carp::croak("Class $class_name has no property connected to column named '$column_name' in data source ".$self->id); |
613
|
|
|
|
|
|
|
} |
614
|
1096
|
|
|
|
|
2240
|
my $property_name = $property_meta->property_name; |
615
|
1096
|
100
|
66
|
|
|
2660
|
if (! $operators_for_properties->{$property_name}) { |
|
|
100
|
|
|
|
|
|
616
|
737
|
|
|
|
|
623
|
$looking_for_sort_columns = 0; |
617
|
737
|
|
|
|
|
996
|
next; |
618
|
|
|
|
|
|
|
} elsif ($looking_for_sort_columns && $sort_column_names{$column_name}) { |
619
|
129
|
|
|
|
|
165
|
$last_sort_column_in_rule++; |
620
|
|
|
|
|
|
|
} else { |
621
|
|
|
|
|
|
|
# There's been a gap in the ID column list in the rule, stop looking for |
622
|
|
|
|
|
|
|
# further ID columns |
623
|
230
|
|
|
|
|
235
|
$looking_for_sort_columns = 0; |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
359
|
|
|
|
|
590
|
push @rule_columns_in_order, $column_name_to_index_map{$column_name}; |
627
|
|
|
|
|
|
|
|
628
|
359
|
|
|
|
|
482
|
my $operator = $operators_for_properties->{$property_name}; |
629
|
359
|
|
|
|
|
436
|
my $rule_value = $values_for_properties->{$property_name}; |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
my $comparison_function = $self->_comparator_for_operator_and_property($property_meta, |
632
|
|
|
|
|
|
|
\$next_candidate_row, |
633
|
359
|
|
|
|
|
1275
|
$column_name_to_index_map{$column_name}, |
634
|
|
|
|
|
|
|
$operator, |
635
|
|
|
|
|
|
|
$rule_value); |
636
|
359
|
50
|
|
|
|
898
|
unless ($comparison_function) { |
637
|
0
|
|
|
|
|
0
|
Carp::croak("Unknown operator '$operator' in file data source filter"); |
638
|
|
|
|
|
|
|
} |
639
|
359
|
|
|
|
|
583
|
push @comparison_for_column, $comparison_function; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
|
642
|
372
|
|
|
|
|
942
|
my $split_regex = $self->_regex(); |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# FIXME - another performance boost might be to do some kind of binary search |
645
|
|
|
|
|
|
|
# against the file to set the initial/next position? |
646
|
372
|
|
|
|
|
385
|
my $file_pos = 0; |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
# search in the offset cache for something helpful |
649
|
372
|
|
|
|
|
761
|
my $offset_cache = $self->_offset_cache(); |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# If the rule doesn't touch the sorted columns, then we can't use the offset cache for help :( |
652
|
372
|
100
|
|
|
|
820
|
if ($last_sort_column_in_rule >= 0) { |
653
|
|
|
|
|
|
|
# Starting at index 1 because we're interested in the file and seek data, not if it's in use |
654
|
|
|
|
|
|
|
# offset 0 is the in-use flag, offset 1 is a ref to the file data and offset 2 is the file seek pos |
655
|
|
|
|
|
|
|
SEARCH_CACHE: |
656
|
129
|
|
|
|
|
378
|
for (my $i = 1; $i < @$offset_cache; $i+=3) { |
657
|
3343
|
100
|
66
|
|
|
7703
|
next unless (defined($offset_cache->[$i]) && defined($offset_cache->[$i+1])); |
658
|
|
|
|
|
|
|
|
659
|
1205
|
|
|
|
|
993
|
$next_candidate_row = $offset_cache->[$i]; |
660
|
1205
|
|
|
|
|
744
|
my $matched = 0; |
661
|
|
|
|
|
|
|
COMPARE_VALUES: |
662
|
1205
|
|
|
|
|
1551
|
for (my $c = 0; $c <= $last_sort_column_in_rule; $c++) { |
663
|
1205
|
|
|
|
|
1138
|
my $comparison = $comparison_for_column[$c]->(); |
664
|
|
|
|
|
|
|
|
665
|
1205
|
100
|
|
|
|
2609
|
next SEARCH_CACHE if $comparison > 0; |
666
|
15
|
100
|
|
|
|
38
|
if ($comparison < 0) { |
667
|
10
|
|
|
|
|
12
|
$matched = 1; |
668
|
10
|
|
|
|
|
16
|
last COMPARE_VALUES; |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
# If we made it this far, then the file data in this slot is earlier in the file |
672
|
|
|
|
|
|
|
# than the data we're looking for. So, if the seek pos data is later than what |
673
|
|
|
|
|
|
|
# we've found yet, use it instead |
674
|
15
|
100
|
66
|
|
|
74
|
if ($matched and $offset_cache->[$i+1] > $file_pos) { |
675
|
10
|
|
|
|
|
26
|
$file_pos = $offset_cache->[$i+1]; |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
372
|
|
|
|
|
388
|
my($monitor_start_time,$monitor_printed_first_fetch); |
681
|
372
|
50
|
|
|
|
951
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
682
|
0
|
|
|
|
|
0
|
$monitor_start_time = Time::HiRes::time(); |
683
|
0
|
|
|
|
|
0
|
$monitor_printed_first_fetch = 0; |
684
|
0
|
|
|
|
|
0
|
my @filters_list; |
685
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < @rule_columns_in_order; $i++) { |
686
|
0
|
|
|
|
|
0
|
my $column = $rule_columns_in_order[$i]; |
687
|
0
|
|
|
|
|
0
|
my $column_name = $csv_column_order_names->[$column]; |
688
|
0
|
0
|
|
|
|
0
|
my $is_sorted = $i <= $last_sort_column_in_rule ? ' (sorted)' : ''; |
689
|
0
|
|
0
|
|
|
0
|
my $operator = $operators_for_properties->{$column_name} || '='; |
690
|
0
|
|
|
|
|
0
|
my $rule_value = $values_for_properties->{$column_name}; |
691
|
0
|
0
|
|
|
|
0
|
if (ref $rule_value eq 'ARRAY') { |
692
|
0
|
|
|
|
|
0
|
$rule_value = '[' . join(',', @$rule_value) . ']'; |
693
|
|
|
|
|
|
|
} |
694
|
0
|
|
|
|
|
0
|
my $filter_string = $column_name . " $operator $rule_value" . $is_sorted; |
695
|
0
|
|
|
|
|
0
|
push @filters_list, $filter_string; |
696
|
|
|
|
|
|
|
} |
697
|
0
|
|
|
|
|
0
|
my $filter_list = join("\n\t", @filters_list); |
698
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("\nFILE: %s\nFILTERS %s\n\n", $self->server, $filter_list); |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
|
701
|
372
|
|
100
|
|
|
1001
|
$self->{'_last_read_serial'} ||= ''; |
702
|
|
|
|
|
|
|
|
703
|
372
|
|
|
|
|
1046
|
my $record_separator = $self->record_separator; |
704
|
372
|
|
|
|
|
788
|
my $cache_slot = $self->_allocate_offset_cache_slot(); |
705
|
372
|
|
|
|
|
389
|
my $cache_insert_counter = 100; # a "breadcrumb" will be left in the offset cache after this many lines are read |
706
|
|
|
|
|
|
|
|
707
|
372
|
|
|
|
|
311
|
my $lines_read = 0; |
708
|
372
|
|
|
|
|
272
|
my $printed_first_match = 0; |
709
|
372
|
|
|
|
|
343
|
my $lines_matched = 0; |
710
|
|
|
|
|
|
|
|
711
|
372
|
|
|
|
|
338
|
my $fh; # File handle we'll be reading from |
712
|
372
|
|
|
|
|
344
|
my $this_iterator_serial = $iterator_serial++; |
713
|
|
|
|
|
|
|
my $iterator = sub { |
714
|
|
|
|
|
|
|
|
715
|
594
|
100
|
|
594
|
|
1065
|
unless (ref($fh)) { |
716
|
372
|
|
|
|
|
1129
|
$fh = $self->get_default_handle(); |
717
|
|
|
|
|
|
|
# Lock the file for reading... For more fine-grained locking we could move this to |
718
|
|
|
|
|
|
|
# after READ_LINE_FROM_FILE: but that would slow down read operations a bit. If |
719
|
|
|
|
|
|
|
# there ends up being a problem with lock contention, go ahead and move it before $line = <$fh>; |
720
|
|
|
|
|
|
|
#flock($fh,LOCK_SH); |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
|
723
|
594
|
50
|
33
|
|
|
1623
|
if ($monitor_start_time && ! $monitor_printed_first_fetch) { |
724
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("FILE: FIRST FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); |
725
|
0
|
|
|
|
|
0
|
$monitor_printed_first_fetch = 1; |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
|
728
|
594
|
100
|
|
|
|
1779
|
if ($self->{'_last_read_serial'} ne $this_iterator_serial) { |
729
|
374
|
50
|
|
|
|
929
|
UR::DBI->sql_fh->printf("FILE: Resetting file position to $file_pos\n") if $ENV{'UR_DBI_MONITOR_SQL'}; |
730
|
|
|
|
|
|
|
# The last read was from a different request, reset the position |
731
|
374
|
|
|
|
|
1510
|
$fh->seek($file_pos,0); |
732
|
374
|
100
|
|
|
|
3121
|
if ($file_pos == 0) { |
733
|
365
|
|
|
|
|
1001
|
my $skip = $self->skip_first_line; |
734
|
365
|
|
|
|
|
1004
|
while ($skip-- > 0) { |
735
|
0
|
|
|
|
|
0
|
scalar(<$fh>); |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
} |
738
|
374
|
|
|
|
|
792
|
$file_pos = $self->_file_position(); |
739
|
|
|
|
|
|
|
|
740
|
374
|
|
|
|
|
2025
|
$self->{'_last_read_serial'} = $this_iterator_serial; |
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
|
743
|
594
|
|
|
|
|
1777
|
local $/; # Make sure some wise guy hasn't changed this out from under us |
744
|
594
|
|
|
|
|
846
|
$/ = $record_separator; |
745
|
|
|
|
|
|
|
|
746
|
594
|
|
|
|
|
737
|
my $line; |
747
|
|
|
|
|
|
|
READ_LINE_FROM_FILE: |
748
|
594
|
|
|
|
|
1307
|
until($line) { |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# Hack for OSX 10.5. |
751
|
|
|
|
|
|
|
# At EOF, the getline below will return undef. Most builds of Perl |
752
|
|
|
|
|
|
|
# will also set $! to 0 at EOF so you can distinguish between the cases |
753
|
|
|
|
|
|
|
# of EOF (which may have actually happened a while ago because of buffering) |
754
|
|
|
|
|
|
|
# and an actual read error. OSX 10.5's Perl does not, and so $! |
755
|
|
|
|
|
|
|
# retains whatever value it had after the last failed syscall, likely |
756
|
|
|
|
|
|
|
# a stat() while looking for a Perl module. This should have no effect |
757
|
|
|
|
|
|
|
# other platforms where you can't trust $! at arbitrary points in time |
758
|
|
|
|
|
|
|
# anyway |
759
|
1095
|
|
|
|
|
1696
|
$! = 0; |
760
|
1095
|
|
|
|
|
8610
|
$line = <$fh>; |
761
|
|
|
|
|
|
|
|
762
|
1095
|
100
|
|
|
|
1844
|
unless (defined $line) { |
763
|
249
|
50
|
|
|
|
707
|
if ($!) { |
764
|
0
|
0
|
0
|
|
|
0
|
redo READ_LINE_FROM_FILE if ($! == EAGAIN or $! == EINTR); |
765
|
0
|
|
|
|
|
0
|
my $pathname = $self->server(); |
766
|
0
|
|
|
|
|
0
|
Carp::confess("getline() failed for DataSource $self pathname $pathname boolexpr $rule: $!"); |
767
|
|
|
|
|
|
|
} |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
# at EOF. Close up shop and return |
770
|
|
|
|
|
|
|
#flock($fh,LOCK_UN); |
771
|
249
|
|
|
|
|
260
|
$fh = undef; |
772
|
|
|
|
|
|
|
|
773
|
249
|
50
|
|
|
|
486
|
if ($monitor_start_time) { |
774
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("FILE: at EOF\nFILE: $lines_read lines read for this request. $lines_matched matches\nFILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
|
777
|
249
|
|
|
|
|
892
|
return; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
846
|
|
|
|
|
720
|
$lines_read++; |
781
|
846
|
|
|
|
|
810
|
my $last_read_size = length($line); |
782
|
846
|
|
|
|
|
974
|
chomp $line; |
783
|
|
|
|
|
|
|
# FIXME - to support record-oriented files, we need some replacement for this... |
784
|
846
|
|
|
|
|
4124
|
$next_candidate_row = [ split($split_regex, $line, $csv_column_count) ]; |
785
|
846
|
|
|
|
|
1321
|
$#{$a} = $csv_column_count-1; |
|
846
|
|
|
|
|
1209
|
|
786
|
|
|
|
|
|
|
|
787
|
846
|
|
|
|
|
1339
|
$file_pos = $self->_file_position(); |
788
|
846
|
|
|
|
|
3095
|
my $file_pos_before_read = $file_pos - $last_read_size; |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
# Every so many lines read, leave a breadcrumb about what we've seen |
791
|
846
|
50
|
|
|
|
1404
|
unless ($lines_read % $cache_insert_counter) { |
792
|
0
|
|
|
|
|
0
|
$offset_cache->[$cache_slot+1] = $next_candidate_row; |
793
|
0
|
|
|
|
|
0
|
$offset_cache->[$cache_slot+2] = $file_pos_before_read; |
794
|
0
|
|
|
|
|
0
|
$self->_free_offset_cache_slot($cache_slot); |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
# get a new slot |
797
|
0
|
|
|
|
|
0
|
$cache_slot = $self->_allocate_offset_cache_slot(); |
798
|
0
|
|
|
|
|
0
|
$offset_cache->[$cache_slot+1] = $next_candidate_row; |
799
|
0
|
|
|
|
|
0
|
$offset_cache->[$cache_slot+2] = $file_pos_before_read; |
800
|
|
|
|
|
|
|
|
801
|
0
|
|
|
|
|
0
|
$cache_insert_counter <<= 2; # Double the insert counter |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
|
804
|
846
|
|
|
|
|
1554
|
for (my $i = 0; $i < @rule_columns_in_order; $i++) { |
805
|
801
|
|
|
|
|
1198
|
my $comparison = $comparison_for_column[$i]->(); |
806
|
|
|
|
|
|
|
|
807
|
801
|
100
|
100
|
|
|
2580
|
if ($comparison > 0 and $i <= $last_sort_column_in_rule) { |
|
|
100
|
|
|
|
|
|
808
|
|
|
|
|
|
|
# We've gone past the last thing that could possibly match |
809
|
|
|
|
|
|
|
|
810
|
121
|
50
|
|
|
|
239
|
if ($monitor_start_time) { |
811
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("FILE: $lines_read lines read for this request. $lines_matched matches\nFILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
#flock($fh,LOCK_UN); |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
# Save the info from the last row we read |
817
|
121
|
|
|
|
|
224
|
$offset_cache->[$cache_slot+1] = $next_candidate_row; |
818
|
121
|
|
|
|
|
248
|
$offset_cache->[$cache_slot+2] = $file_pos_before_read; |
819
|
121
|
|
|
|
|
489
|
return; |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
} elsif ($comparison) { |
822
|
|
|
|
|
|
|
# comparison didn't match, read another line from the file |
823
|
501
|
|
|
|
|
710
|
redo READ_LINE_FROM_FILE; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
# That comparison worked... stay in the for() loop for other comparisons |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
# All the comparisons return '0', meaning they passed |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
# Now see if the offset cache file data is different than the row we just read |
831
|
|
|
|
|
|
|
COMPARE_TO_CACHE: |
832
|
224
|
|
|
|
|
425
|
foreach my $column ( @sort_order_column_indexes) { |
833
|
19
|
|
|
19
|
|
103
|
no warnings 'uninitialized'; |
|
19
|
|
|
|
|
26
|
|
|
19
|
|
|
|
|
26062
|
|
834
|
223
|
50
|
|
|
|
719
|
if ($offset_cache->[$cache_slot+1]->[$column] ne $next_candidate_row->[$column]) { |
835
|
|
|
|
|
|
|
# They're different. Update the offset cache data |
836
|
223
|
|
|
|
|
304
|
$offset_cache->[$cache_slot+1] = $next_candidate_row; |
837
|
223
|
|
|
|
|
315
|
$offset_cache->[$cache_slot+2] = $file_pos_before_read; |
838
|
223
|
|
|
|
|
309
|
last COMPARE_TO_CACHE; |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
|
842
|
224
|
50
|
33
|
|
|
1031
|
if (! $printed_first_match and $monitor_start_time) { |
843
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("FILE: First match after reading $lines_read lines\n"); |
844
|
0
|
|
|
|
|
0
|
$printed_first_match=1; |
845
|
|
|
|
|
|
|
} |
846
|
224
|
|
|
|
|
229
|
$lines_matched++; |
847
|
|
|
|
|
|
|
|
848
|
224
|
|
|
|
|
910
|
return $next_candidate_row; |
849
|
|
|
|
|
|
|
} |
850
|
372
|
|
|
|
|
2408
|
}; # end sub $iterator |
851
|
|
|
|
|
|
|
|
852
|
372
|
|
|
|
|
2380
|
Sub::Name::subname('UR::DataSource::File::__datasource_iterator(closure)__', $iterator); |
853
|
|
|
|
|
|
|
|
854
|
372
|
|
100
|
|
|
1026
|
my $count = $self->_open_query_count() || 0; |
855
|
372
|
|
|
|
|
890
|
$self->_open_query_count($count+1); |
856
|
372
|
|
|
|
|
1171
|
bless $iterator, 'UR::DataSource::File::Tracker'; |
857
|
372
|
|
|
|
|
1017
|
$iterator_data_source{$iterator} = $self; |
858
|
372
|
|
|
|
|
646
|
$iterator_cache_slot_refs{$iterator} = \$cache_slot; |
859
|
|
|
|
|
|
|
|
860
|
372
|
|
|
|
|
2815
|
return $iterator; |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
sub UR::DataSource::File::Tracker::DESTROY { |
865
|
372
|
|
|
372
|
|
348
|
my $iterator = shift; |
866
|
372
|
|
|
|
|
893
|
my $ds = delete $iterator_data_source{$iterator}; |
867
|
372
|
50
|
|
|
|
781
|
return unless $ds; # The data source may have gone out of scope first during global destruction |
868
|
|
|
|
|
|
|
|
869
|
372
|
|
|
|
|
672
|
my $cache_slot_ref = delete $iterator_cache_slot_refs{$iterator}; |
870
|
372
|
50
|
33
|
|
|
1480
|
if (defined($cache_slot_ref) and defined($$cache_slot_ref)) { |
871
|
|
|
|
|
|
|
# Mark this slot unused |
872
|
|
|
|
|
|
|
#print STDERR "Freeing cache slot $cache_slot\n"; |
873
|
|
|
|
|
|
|
#$ds->_offset_cache->[$$cache_slot_ref] = 0; |
874
|
372
|
|
|
|
|
1065
|
$ds->_free_offset_cache_slot($$cache_slot_ref); |
875
|
|
|
|
|
|
|
} |
876
|
|
|
|
|
|
|
|
877
|
372
|
|
|
|
|
938
|
my $count = $ds->_open_query_count(); |
878
|
372
|
|
|
|
|
783
|
$ds->_open_query_count(--$count); |
879
|
|
|
|
|
|
|
|
880
|
372
|
100
|
|
|
|
938
|
return unless ($ds->quick_disconnect); |
881
|
368
|
100
|
66
|
|
|
1469
|
if ($count == 0 && $ds->has_default_handle) { |
882
|
|
|
|
|
|
|
# All open queries have supposedly been fulfilled. Close the |
883
|
|
|
|
|
|
|
# file handle and undef it so get_default_handle() will re-open if necessary |
884
|
366
|
|
|
|
|
692
|
my $fh = $ds->get_default_handle; |
885
|
|
|
|
|
|
|
|
886
|
366
|
50
|
|
|
|
844
|
UR::DBI->sql_fh->printf("FILE: CLOSING fileno ".fileno($fh)."\n") if ($ENV{'UR_DBI_MONITOR_SQL'}); |
887
|
|
|
|
|
|
|
#flock($fh,LOCK_UN); |
888
|
366
|
|
|
|
|
1114
|
$fh->close(); |
889
|
366
|
|
|
|
|
13968
|
$ds->__invalidate_get_default_handle__; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
# Names of creation params that we should force to be listrefs |
894
|
|
|
|
|
|
|
our %creation_param_is_list = map { $_ => 1 } qw( column_order file_list sort_order constant_values ); |
895
|
|
|
|
|
|
|
sub create_from_inline_class_data { |
896
|
3
|
|
|
3
|
1
|
5
|
my($class, $class_data, $ds_data) = @_; |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
# User didn't specify columns in the file. Assumme every property is a column, and in the same order |
899
|
3
|
100
|
|
|
|
11
|
unless (exists $ds_data->{'column_order'}) { |
900
|
1
|
|
|
|
|
299
|
Carp::croak "data_source has no column_order specified"; |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
|
903
|
2
|
|
33
|
|
|
14
|
$ds_data->{'server'} ||= $ds_data->{'path'} || $ds_data->{'file'}; |
|
|
|
66
|
|
|
|
|
904
|
|
|
|
|
|
|
|
905
|
2
|
|
|
|
|
2
|
my %ds_creation_params; |
906
|
2
|
|
|
|
|
5
|
foreach my $param ( qw( delimiter record_separator column_order skip_first_line server file_list sort_order constant_values ) ) { |
907
|
16
|
100
|
|
|
|
27
|
if (exists $ds_data->{$param}) { |
908
|
7
|
50
|
66
|
|
|
28
|
if ($creation_param_is_list{$param} and ref($ds_data->{$param}) ne 'ARRAY') { |
909
|
0
|
|
|
|
|
0
|
$ds_creation_params{$param} = \( $ds_data->{$param} ); |
910
|
|
|
|
|
|
|
} else { |
911
|
7
|
|
|
|
|
10
|
$ds_creation_params{$param} = $ds_data->{$param}; |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
|
916
|
2
|
|
|
|
|
15
|
my($namespace, $class_name) = ($class_data->{'class_name'} =~ m/^(\w+?)::(.*)/); |
917
|
2
|
|
|
|
|
5
|
my $ds_id = "${namespace}::DataSource::${class_name}"; |
918
|
2
|
|
|
|
|
4
|
my $ds_type = delete $ds_data->{'is'}; |
919
|
2
|
|
|
|
|
13
|
my $ds = $ds_type->create( %ds_creation_params, id => $ds_id ); |
920
|
2
|
|
|
|
|
9
|
return $ds; |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
# The string used to join fields of a row together |
925
|
|
|
|
|
|
|
# |
926
|
|
|
|
|
|
|
# Since the 'delimiter' property is interpreted as a regex in the reading |
927
|
|
|
|
|
|
|
# code, we'll try to be smart about making a real string from that. |
928
|
|
|
|
|
|
|
# |
929
|
|
|
|
|
|
|
# subclasses can override this to provide a different implementation |
930
|
|
|
|
|
|
|
sub join_pattern { |
931
|
4
|
|
|
4
|
0
|
6
|
my $self = shift; |
932
|
|
|
|
|
|
|
|
933
|
4
|
|
|
|
|
11
|
my $join_pattern = $self->delimiter; |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
# make some common substitutions... |
936
|
4
|
50
|
|
|
|
11
|
if ($join_pattern eq '\s*,\s*') { |
937
|
|
|
|
|
|
|
# The default... |
938
|
0
|
|
|
|
|
0
|
return ', '; |
939
|
|
|
|
|
|
|
} |
940
|
|
|
|
|
|
|
|
941
|
4
|
|
|
|
|
9
|
$join_pattern =~ s/\\s*//g; # Turn 0-or-more whitespaces to nothing |
942
|
4
|
|
|
|
|
4
|
$join_pattern =~ s/\\t/\t/; # tab |
943
|
4
|
|
|
|
|
6
|
$join_pattern =~ s/\\s/ /; # whitespace |
944
|
|
|
|
|
|
|
|
945
|
4
|
|
|
|
|
5
|
return $join_pattern; |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
sub _sync_database { |
951
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
952
|
4
|
|
|
|
|
8
|
my %params = @_; |
953
|
|
|
|
|
|
|
|
954
|
4
|
50
|
|
|
|
11
|
unless (ref($self)) { |
955
|
0
|
0
|
|
|
|
0
|
if ($self->isa("UR::Singleton")) { |
956
|
0
|
|
|
|
|
0
|
$self = $self->_singleton_object; |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
else { |
959
|
0
|
|
|
|
|
0
|
die "Called as a class-method on a non-singleton datasource!"; |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
} |
962
|
|
|
|
|
|
|
|
963
|
4
|
|
|
|
|
15
|
my $read_fh = $self->get_default_handle(); |
964
|
4
|
50
|
|
|
|
9
|
unless ($read_fh) { |
965
|
0
|
|
|
|
|
0
|
Carp::croak($self->class . ": Can't _sync_database(): Can't open file " . $self->server . " for reading: $!"); |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
|
968
|
4
|
|
|
|
|
10
|
my $original_data_file = $self->server; |
969
|
4
|
|
|
|
|
152
|
my $original_data_dir = File::Basename::dirname($original_data_file); |
970
|
4
|
|
|
|
|
7
|
my $use_quick_rename; |
971
|
4
|
50
|
|
|
|
46
|
unless (-d $original_data_dir){ |
972
|
0
|
|
|
|
|
0
|
File::Path::mkpath($original_data_dir); |
973
|
|
|
|
|
|
|
} |
974
|
4
|
50
|
|
|
|
29
|
if (-w $original_data_dir) { |
|
|
0
|
|
|
|
|
|
975
|
4
|
|
|
|
|
5
|
$use_quick_rename = 1; # We can write to the data dir |
976
|
|
|
|
|
|
|
} elsif (! -w $original_data_file) { |
977
|
0
|
|
|
|
|
0
|
$self->error_message("Neither the directory nor the file for $original_data_file are writable - cannot sync_database"); |
978
|
0
|
|
|
|
|
0
|
return; |
979
|
|
|
|
|
|
|
} |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
|
982
|
4
|
|
|
|
|
12
|
my $split_regex = $self->_regex(); |
983
|
4
|
|
|
|
|
24
|
my $join_pattern = $self->join_pattern; |
984
|
4
|
|
|
|
|
11
|
my $record_separator = $self->record_separator; |
985
|
4
|
|
|
|
|
13
|
local $/; # Make sure some wise guy hasn't changed this out from under us |
986
|
4
|
|
|
|
|
6
|
$/ = $record_separator; |
987
|
|
|
|
|
|
|
|
988
|
4
|
|
|
|
|
9
|
my $csv_column_order_names = $self->column_order; |
989
|
4
|
|
|
|
|
9
|
my $csv_column_count = scalar(@$csv_column_order_names); |
990
|
4
|
|
|
|
|
3
|
my %column_name_to_index_map; |
991
|
4
|
|
|
|
|
14
|
for (my $i = 0; $i < @$csv_column_order_names; $i++) { |
992
|
12
|
|
|
|
|
27
|
$column_name_to_index_map{$csv_column_order_names->[$i]} = $i; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
|
995
|
4
|
|
|
|
|
7
|
my $changed_objects = delete $params{changed_objects}; |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
# We're going to assumme all the passed-in objects are of the same class *gulp* |
999
|
4
|
|
|
|
|
14
|
my $class_name = $changed_objects->[0]->class; |
1000
|
4
|
|
|
|
|
29
|
my $class_meta = UR::Object::Type->get(class_name => $class_name); |
1001
|
15
|
|
|
|
|
22
|
my %column_name_to_property_meta = map { $_->column_name => $_ } |
1002
|
4
|
|
|
|
|
35
|
grep { $_->column_name } |
|
19
|
|
|
|
|
29
|
|
1003
|
|
|
|
|
|
|
$class_meta->all_property_metas; |
1004
|
4
|
|
|
|
|
9
|
my @property_names_in_column_order; |
1005
|
4
|
|
|
|
|
9
|
foreach my $column_name ( @$csv_column_order_names ) { |
1006
|
12
|
|
|
|
|
13
|
my $prop_meta = $column_name_to_property_meta{$column_name}; |
1007
|
12
|
50
|
|
|
|
20
|
unless ($prop_meta) { |
1008
|
0
|
|
|
|
|
0
|
die "Data source " . $self->class . " id " . $self->id . |
1009
|
|
|
|
|
|
|
" could not resolve a $class_name property for the data source's column named $column_name"; |
1010
|
|
|
|
|
|
|
} |
1011
|
|
|
|
|
|
|
|
1012
|
12
|
|
|
|
|
19
|
push @property_names_in_column_order, $prop_meta->property_name; |
1013
|
|
|
|
|
|
|
} |
1014
|
|
|
|
|
|
|
|
1015
|
4
|
|
|
|
|
6
|
my $insert = []; |
1016
|
4
|
|
|
|
|
6
|
my $update = {}; |
1017
|
4
|
|
|
|
|
5
|
my $delete = {}; |
1018
|
4
|
|
|
|
|
7
|
foreach my $obj ( @$changed_objects ) { |
1019
|
16
|
100
|
|
|
|
50
|
if ($obj->isa('UR::Object::Ghost')) { |
|
|
100
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
# This should be removed from the file |
1021
|
4
|
|
|
|
|
6
|
my $original = $obj->{'db_committed'}; |
1022
|
4
|
|
|
|
|
4
|
my $line = join($join_pattern, @{$original}{@property_names_in_column_order}) . $record_separator; |
|
4
|
|
|
|
|
12
|
|
1023
|
4
|
|
|
|
|
7
|
$delete->{$line} = $obj; |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
} elsif ($obj->{'db_committed'}) { |
1026
|
|
|
|
|
|
|
# This object is changed since it was read in the file |
1027
|
4
|
|
|
|
|
6
|
my $original = $obj->{'db_committed'}; |
1028
|
4
|
|
|
|
|
4
|
my $original_line = join($join_pattern, @{$original}{@property_names_in_column_order}) . $record_separator; |
|
4
|
|
|
|
|
13
|
|
1029
|
4
|
|
|
|
|
6
|
my $changed_line = join($join_pattern, @{$obj}{@property_names_in_column_order}) . $record_separator; |
|
4
|
|
|
|
|
7
|
|
1030
|
4
|
|
|
|
|
9
|
$update->{$original_line} = $changed_line; |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
} else { |
1033
|
|
|
|
|
|
|
# This object is new and should be added to the file |
1034
|
8
|
|
|
|
|
15
|
push @$insert, [ @{$obj}{@property_names_in_column_order} ]; |
|
8
|
|
|
|
|
17
|
|
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
|
1038
|
4
|
|
|
|
|
11
|
my $sort_order_names = $self->sort_order; |
1039
|
4
|
|
|
|
|
14
|
foreach my $sort_column_name ( @$sort_order_names ) { |
1040
|
4
|
50
|
|
|
|
12
|
unless (exists $column_name_to_index_map{$sort_column_name}) { |
1041
|
0
|
|
|
|
|
0
|
Carp::croak("Column name '$sort_column_name' appears in the sort_order list, but not in the column_order list for data source ".$self->id); |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
} |
1044
|
4
|
|
|
|
|
6
|
my $file_is_sorted = scalar(@$sort_order_names); |
1045
|
4
|
|
|
|
|
7
|
my %column_sorts_numerically = map { $_->column_name => $_->is_numeric } |
|
15
|
|
|
|
|
28
|
|
1046
|
|
|
|
|
|
|
values %column_name_to_property_meta; |
1047
|
|
|
|
|
|
|
my $row_sort_sub = sub ($$) { |
1048
|
20
|
|
|
20
|
|
15
|
my $comparison; |
1049
|
|
|
|
|
|
|
|
1050
|
20
|
|
|
|
|
18
|
foreach my $column_name ( @$sort_order_names ) { |
1051
|
20
|
|
|
|
|
21
|
my $i = $column_name_to_index_map{$column_name}; |
1052
|
20
|
50
|
|
|
|
24
|
if ($column_sorts_numerically{$column_name}) { |
1053
|
20
|
|
|
|
|
25
|
$comparison = $_[0]->[$i] <=> $_[1]->[$i]; |
1054
|
|
|
|
|
|
|
} else { |
1055
|
0
|
|
|
|
|
0
|
$comparison = $_[0]->[$i] cmp $_[1]->[$i]; |
1056
|
|
|
|
|
|
|
} |
1057
|
20
|
50
|
|
|
|
39
|
return $comparison if $comparison != 0; |
1058
|
|
|
|
|
|
|
} |
1059
|
0
|
|
|
|
|
0
|
return 0; |
1060
|
4
|
|
|
|
|
21
|
}; |
1061
|
4
|
50
|
33
|
|
|
25
|
if ($sort_order_names && $file_is_sorted && scalar(@$insert)) { |
|
|
|
50
|
|
|
|
|
1062
|
|
|
|
|
|
|
# the inserted things should be sorted the same way as the file |
1063
|
4
|
|
|
|
|
9
|
my @sorted = sort $row_sort_sub @$insert; |
1064
|
4
|
|
|
|
|
6
|
$insert = \@sorted; |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
|
1067
|
4
|
|
|
|
|
6
|
my $write_fh; |
1068
|
|
|
|
|
|
|
my $temp_file_name; |
1069
|
4
|
50
|
|
|
|
7
|
if ($use_quick_rename) { |
1070
|
4
|
|
|
|
|
28
|
$temp_file_name = sprintf("%s/.%d.%d" , $original_data_dir, time(), $$); |
1071
|
4
|
|
|
|
|
21
|
$write_fh = IO::File->new($temp_file_name, O_WRONLY|O_CREAT); |
1072
|
|
|
|
|
|
|
} else { |
1073
|
0
|
|
|
|
|
0
|
$write_fh = File::Temp->new(UNLINK => 1); |
1074
|
0
|
0
|
|
|
|
0
|
$temp_file_name = $write_fh->filename if ($write_fh); |
1075
|
|
|
|
|
|
|
} |
1076
|
4
|
50
|
|
|
|
504
|
unless ($write_fh) { |
1077
|
0
|
|
|
|
|
0
|
Carp::croak "Can't create temporary file for writing: $!"; |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
|
1080
|
4
|
|
|
|
|
6
|
my $monitor_start_time; |
1081
|
4
|
50
|
|
|
|
18
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1082
|
0
|
|
|
|
|
0
|
$monitor_start_time = Time::HiRes::time(); |
1083
|
0
|
|
|
|
|
0
|
my $time = time(); |
1084
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("\nFILE: SYNC_DATABASE AT %d [%s]. Started transaction for %s to temp file %s\n", |
1085
|
|
|
|
|
|
|
$time, scalar(localtime($time)), $original_data_file, $temp_file_name); |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
|
1089
|
4
|
50
|
|
|
|
29
|
unless (flock($read_fh,LOCK_SH)) { |
1090
|
0
|
0
|
|
|
|
0
|
unless ($! == EOPNOTSUPP ) { |
1091
|
0
|
|
|
|
|
0
|
Carp::croak($self->class(). ": Can't get exclusive lock for file ".$self->server.": $!"); |
1092
|
|
|
|
|
|
|
} |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
# write headers to the new file |
1096
|
4
|
|
|
|
|
17
|
for (my $i = 0; $i < $self->skip_first_line; $i++) { |
1097
|
0
|
|
|
|
|
0
|
my $line = <$read_fh>; |
1098
|
0
|
|
|
|
|
0
|
$write_fh->print($line); |
1099
|
|
|
|
|
|
|
} |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
|
1102
|
4
|
|
|
|
|
6
|
my $line; |
1103
|
|
|
|
|
|
|
READ_A_LINE: |
1104
|
4
|
|
|
|
|
5
|
while(1) { |
1105
|
20
|
100
|
|
|
|
26
|
unless ($line) { |
1106
|
15
|
|
|
|
|
90
|
$line = <$read_fh>; |
1107
|
15
|
100
|
|
|
|
25
|
last unless defined $line; |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
|
1110
|
16
|
100
|
50
|
|
|
38
|
if ($file_is_sorted && scalar(@$insert)) { |
1111
|
|
|
|
|
|
|
# there are sorted things waiting to insert |
1112
|
15
|
|
|
|
|
16
|
my $chomped = $line; |
1113
|
15
|
|
|
|
|
14
|
chomp $chomped; |
1114
|
15
|
|
|
|
|
51
|
my $row = [ split($split_regex, $chomped, $csv_column_count) ]; |
1115
|
15
|
|
|
|
|
23
|
my $comparison = $row_sort_sub->($row, $insert->[0]); |
1116
|
15
|
100
|
|
|
|
29
|
if ($comparison > 0) { |
1117
|
|
|
|
|
|
|
# write the object's data |
1118
|
19
|
|
|
19
|
|
104
|
no warnings 'uninitialized'; # Some of the object's data may be undef |
|
19
|
|
|
|
|
32
|
|
|
19
|
|
|
|
|
3899
|
|
1119
|
5
|
|
|
|
|
7
|
my $new_row = shift @$insert; |
1120
|
5
|
|
|
|
|
12
|
my $new_line = join($join_pattern, @$new_row) . $record_separator; |
1121
|
|
|
|
|
|
|
|
1122
|
5
|
50
|
|
|
|
10
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1123
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->print("INSERT >>$new_line<<\n"); |
1124
|
|
|
|
|
|
|
} |
1125
|
|
|
|
|
|
|
|
1126
|
5
|
|
|
|
|
13
|
$write_fh->print($new_line); |
1127
|
|
|
|
|
|
|
# Don't undef the last line read, meaning it could still be written to the output... |
1128
|
5
|
|
|
|
|
35
|
next READ_A_LINE; |
1129
|
|
|
|
|
|
|
} |
1130
|
|
|
|
|
|
|
} |
1131
|
|
|
|
|
|
|
|
1132
|
11
|
100
|
|
|
|
32
|
if (my $obj = delete $delete->{$line}) { |
|
|
100
|
|
|
|
|
|
1133
|
2
|
50
|
|
|
|
8
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1134
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->print("DELETE >>$line<<\n"); |
1135
|
|
|
|
|
|
|
} |
1136
|
2
|
|
|
|
|
2
|
$line = undef; |
1137
|
2
|
|
|
|
|
4
|
next; |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
} elsif (my $changed = delete $update->{$line}) { |
1140
|
4
|
50
|
|
|
|
11
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1141
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->print("UPDATE replace >>$line<< with >>$changed<<\n"); |
1142
|
|
|
|
|
|
|
} |
1143
|
4
|
|
|
|
|
8
|
$write_fh->print($changed); |
1144
|
4
|
|
|
|
|
15
|
$line = undef; |
1145
|
4
|
|
|
|
|
5
|
next; |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
} else { |
1148
|
|
|
|
|
|
|
# This line from the file was unchanged in the app |
1149
|
5
|
|
|
|
|
10
|
$write_fh->print($line); |
1150
|
5
|
|
|
|
|
24
|
$line = undef; |
1151
|
|
|
|
|
|
|
} |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
|
1154
|
4
|
50
|
|
|
|
12
|
if (keys %$delete) { |
1155
|
0
|
|
|
|
|
0
|
$self->warning_message("There were ",scalar(keys %$delete)," deleted $class_name objects that did not match data in the file"); |
1156
|
|
|
|
|
|
|
} |
1157
|
4
|
50
|
|
|
|
14
|
if (keys %$update) { |
1158
|
0
|
|
|
|
|
0
|
$self->warning_message("There were ",scalar(keys %$update)," updated $class_name objects that did not match data in the file"); |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
# finish out by writing the rest of the new data |
1162
|
4
|
|
|
|
|
7
|
foreach my $new_row ( @$insert ) { |
1163
|
19
|
|
|
19
|
|
89
|
no warnings 'uninitialized'; # Some of the object's data may be undef |
|
19
|
|
|
|
|
32
|
|
|
19
|
|
|
|
|
8317
|
|
1164
|
3
|
|
|
|
|
11
|
my $new_line = join($join_pattern, @$new_row) . $record_separator; |
1165
|
3
|
50
|
|
|
|
9
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1166
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->print("INSERT >>$new_line<<\n"); |
1167
|
|
|
|
|
|
|
} |
1168
|
3
|
|
|
|
|
7
|
$write_fh->print($new_line); |
1169
|
|
|
|
|
|
|
} |
1170
|
4
|
|
|
|
|
28
|
$write_fh->close(); |
1171
|
|
|
|
|
|
|
|
1172
|
4
|
50
|
|
|
|
253
|
if ($use_quick_rename) { |
1173
|
4
|
50
|
|
|
|
13
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1174
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->print("FILE: COMMIT rename $temp_file_name over $original_data_file\n"); |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
|
1177
|
4
|
50
|
|
|
|
195
|
unless(rename($temp_file_name, $original_data_file)) { |
1178
|
0
|
|
|
|
|
0
|
$self->error_message("Can't rename the temp file over the original file: $!"); |
1179
|
0
|
|
|
|
|
0
|
return; |
1180
|
|
|
|
|
|
|
} |
1181
|
|
|
|
|
|
|
} else { |
1182
|
|
|
|
|
|
|
# We have to copy the data from the temp file to the original file |
1183
|
|
|
|
|
|
|
|
1184
|
0
|
0
|
|
|
|
0
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1185
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->print("FILE: COMMIT write over $original_data_file in place\n"); |
1186
|
|
|
|
|
|
|
} |
1187
|
0
|
|
|
|
|
0
|
my $new_write_fh = IO::File->new($original_data_file, O_WRONLY|O_TRUNC); |
1188
|
0
|
0
|
|
|
|
0
|
unless ($new_write_fh) { |
1189
|
0
|
|
|
|
|
0
|
$self->error_message("Can't open $original_data_file for writing: $!"); |
1190
|
0
|
|
|
|
|
0
|
return; |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
0
|
|
|
|
|
0
|
my $temp_file_fh = IO::File->new($temp_file_name); |
1194
|
0
|
0
|
|
|
|
0
|
unless ($temp_file_fh) { |
1195
|
0
|
|
|
|
|
0
|
$self->error_message("Can't open $temp_file_name for reading: $!"); |
1196
|
0
|
|
|
|
|
0
|
return; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
|
1199
|
0
|
|
|
|
|
0
|
while(<$temp_file_fh>) { |
1200
|
0
|
|
|
|
|
0
|
$new_write_fh->print($_); |
1201
|
|
|
|
|
|
|
} |
1202
|
|
|
|
|
|
|
|
1203
|
0
|
|
|
|
|
0
|
$new_write_fh->close(); |
1204
|
|
|
|
|
|
|
} |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
# Because of the rename/copy process during syncing, the previously opened filehandle may |
1207
|
|
|
|
|
|
|
# not be valid anymore. get_default_handle will reopen the file next time it's needed |
1208
|
4
|
|
|
|
|
27
|
$self->_invalidate_cache(); |
1209
|
4
|
|
|
|
|
33
|
$self->__invalidate_get_default_handle__; |
1210
|
|
|
|
|
|
|
|
1211
|
4
|
50
|
|
|
|
9
|
if ($ENV{'UR_DBI_MONITOR_SQL'}) { |
1212
|
0
|
|
|
|
|
0
|
UR::DBI->sql_fh->printf("FILE: TOTAL COMMIT TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
|
1215
|
4
|
|
|
|
|
32
|
flock($read_fh, LOCK_UN); |
1216
|
4
|
|
|
|
|
11
|
$read_fh->close(); |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
# FIXME - this is ugly... With RDBMS-type data sources, they will call $dbh->commit() which |
1219
|
|
|
|
|
|
|
# gets to UR::DBI->commit(), which calls _set_object_saved_committed for them. Since we're |
1220
|
|
|
|
|
|
|
# not using DBI we have to do this 2-part thing ourselves. In the future, we might break |
1221
|
|
|
|
|
|
|
# out things so the saving to the temp file goes in _sync_database(), and moving the temp |
1222
|
|
|
|
|
|
|
# file over the original goes in commit() |
1223
|
4
|
50
|
|
|
|
2092
|
unless ($self->_set_specified_objects_saved_uncommitted($changed_objects)) { |
1224
|
0
|
|
|
|
|
0
|
Carp::croak("Error setting objects to a saved state after sync_database. Exiting."); |
1225
|
0
|
|
|
|
|
0
|
return; |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
|
1228
|
4
|
|
|
|
|
47
|
$self->_set_specified_objects_saved_committed($changed_objects); |
1229
|
4
|
|
|
|
|
73
|
return 1; |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
sub initializer_should_create_column_name_for_class_properties { |
1235
|
28
|
|
|
28
|
0
|
353
|
1; |
1236
|
|
|
|
|
|
|
} |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
1; |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
=pod |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
=head1 NAME |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
UR::DataSource::File - Parent class for file-based data sources |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=head1 DEPRECATED |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
This module is deprecated. Use UR::DataSource::Filesystem instead. |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
=head1 SYNOPSIS |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
package MyNamespace::DataSource::MyFile; |
1254
|
|
|
|
|
|
|
class MyNamespace::DataSource::MyFile { |
1255
|
|
|
|
|
|
|
is => ['UR::DataSource::File', 'UR::Singleton'], |
1256
|
|
|
|
|
|
|
}; |
1257
|
|
|
|
|
|
|
sub server { '/path/to/file' } |
1258
|
|
|
|
|
|
|
sub delimiter { "\t" } |
1259
|
|
|
|
|
|
|
sub column_order { ['thing_id', 'thing_name', 'thing_color' ] } |
1260
|
|
|
|
|
|
|
sub sort_order { ['thing_id'] } |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
package main; |
1263
|
|
|
|
|
|
|
class MyNamespace::Thing { |
1264
|
|
|
|
|
|
|
id_by => 'thing_id', |
1265
|
|
|
|
|
|
|
has => [ 'thing_id', 'thing_name', 'thing_color' ], |
1266
|
|
|
|
|
|
|
data_source => 'MyNamespace::DataSource::MyFile', |
1267
|
|
|
|
|
|
|
} |
1268
|
|
|
|
|
|
|
my @objs = MyNamespace::Thing->get(thing_name => 'Bob'); |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
Classes which wish to retrieve their data from a regular file can use a UR::DataSource::File-based |
1273
|
|
|
|
|
|
|
data source. The modules implementing these data sources live under the DataSource subdirectory |
1274
|
|
|
|
|
|
|
of the application's Namespace, by convention. Besides defining a class for your data source |
1275
|
|
|
|
|
|
|
inheriting from UR::DataSource::File, it should have the following methods, either as properties |
1276
|
|
|
|
|
|
|
or functions in the package. |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
=head2 Configuration |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
These methods determine the configuration for your data source. |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=over 4 |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=item server() |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
server() should return a string representing the pathname of the file where the data is stored. |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
=item file_list() |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
The file_list() method should return a listref of pathnames to one or more identical files |
1291
|
|
|
|
|
|
|
where data is stored. Use file_list() instead of server() when you want to load-balance several NFS |
1292
|
|
|
|
|
|
|
servers, for example. |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
You must have either server() or file_list() in your module, but not both. The existence of server() |
1295
|
|
|
|
|
|
|
takes precedence over file_list(). |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
=item delimiter() |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
delimiter() should return a string representing how the fields in each record are split into |
1300
|
|
|
|
|
|
|
columns. This string is interpreted as a regex internally. The default delimiter is "\s*,\s*" |
1301
|
|
|
|
|
|
|
meaning that the file is separated by commas. |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=item record_separator() |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
record_separator() should return a string that gets stored in $/ before getline() is called on the |
1306
|
|
|
|
|
|
|
file's filehandle. The default record_separator() is "\n" meaning that the file's records are |
1307
|
|
|
|
|
|
|
separated by newlines. |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=item skip_first_line() |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
skip_first_line() should return a boolean value. If true, the first line of the file is ignored, for |
1312
|
|
|
|
|
|
|
example if the first line defines the columns in the file. |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
=item column_order() |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
column_order() should return a listref of column names in the file. column_order is required; there |
1317
|
|
|
|
|
|
|
is no default. |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
=item sort_order() |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
If the data file is sorted in some way, sort_order() should return a listref of column names (which must |
1322
|
|
|
|
|
|
|
exist in column_order()) by which the file is sorted. This gives the system a hint about how the file |
1323
|
|
|
|
|
|
|
is structured, and is able to make shortcuts when reading the file to speed up data access. The default |
1324
|
|
|
|
|
|
|
is to assumme the file is not sorted. |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
=back |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
=head1 INHERITANCE |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
UR::DataSource |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
=head1 SEE ALSO |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
UR, UR::DataSource |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=cut |