| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package UR::DataSource::File; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | # NOTE!  This module is deprecated.  Use UR::DataSource::Filesystem instead. | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | # A data source implementation for text files where the fields | 
| 6 |  |  |  |  |  |  | # are delimited by commas (or anything else really).  Usually, | 
| 7 |  |  |  |  |  |  | # the lines in the file will be sorted by one or more columns, | 
| 8 |  |  |  |  |  |  | # but it isn't strictly necessary | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  | # For now, it's structured around files where the record is delimited by | 
| 11 |  |  |  |  |  |  | # newlines, and the fields are delimited by qr(\s*,\s*).  Those are | 
| 12 |  |  |  |  |  |  | # overridable in concrete data sources by specifying record_seperator() and | 
| 13 |  |  |  |  |  |  | # delimiter(). | 
| 14 |  |  |  |  |  |  | # FIXME - work out a way to support record-oriented data as well as line-oriented data | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 19 |  |  | 19 |  | 535 | use UR; | 
|  | 19 |  |  |  |  | 26 |  | 
|  | 19 |  |  |  |  | 123 |  | 
| 17 | 19 |  |  | 19 |  | 69 | use strict; | 
|  | 19 |  |  |  |  | 26 |  | 
|  | 19 |  |  |  |  | 328 |  | 
| 18 | 19 |  |  | 19 |  | 63 | use warnings; | 
|  | 19 |  |  |  |  | 24 |  | 
|  | 19 |  |  |  |  | 851 |  | 
| 19 |  |  |  |  |  |  | our $VERSION = "0.46"; # UR $VERSION; | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 19 |  |  | 19 |  | 72 | use Fcntl qw(:DEFAULT :flock); | 
|  | 19 |  |  |  |  | 20 |  | 
|  | 19 |  |  |  |  | 7662 |  | 
| 22 | 19 |  |  | 19 |  | 94 | use Errno qw(EINTR EAGAIN EOPNOTSUPP); | 
|  | 19 |  |  |  |  | 27 |  | 
|  | 19 |  |  |  |  | 2308 |  | 
| 23 | 19 |  |  | 19 |  | 77 | use File::Temp; | 
|  | 19 |  |  |  |  | 30 |  | 
|  | 19 |  |  |  |  | 1116 |  | 
| 24 | 19 |  |  | 19 |  | 79 | use File::Basename; | 
|  | 19 |  |  |  |  | 31 |  | 
|  | 19 |  |  |  |  | 847 |  | 
| 25 | 19 |  |  | 19 |  | 73 | use IO::File qw(); | 
|  | 19 |  |  |  |  | 22 |  | 
|  | 19 |  |  |  |  | 24865 |  | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | our @CARP_NOT = qw( UR::Context UR::DataSource::FileMux UR::Object::Type ); | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | class UR::DataSource::File { | 
| 30 |  |  |  |  |  |  | is => ['UR::DataSource'], | 
| 31 |  |  |  |  |  |  | has => [ | 
| 32 |  |  |  |  |  |  | delimiter             => { is => 'String', default_value => '\s*,\s*', doc => 'Delimiter between columns on the same line' }, | 
| 33 |  |  |  |  |  |  | record_separator      => { is => 'String', default_value => "\n", doc => 'Delimiter between lines in the file' }, | 
| 34 |  |  |  |  |  |  | column_order          => { is => 'ARRAY',  doc => 'Names of the columns in the file, in order' }, | 
| 35 |  |  |  |  |  |  | skip_first_line       => { is => 'Integer', default_value => 0, doc => 'Number of lines at the start of the file to skip' }, | 
| 36 |  |  |  |  |  |  | handle_class          => { is => 'String', default_value => 'IO::File', doc => 'Class to use for new file handles' }, | 
| 37 |  |  |  |  |  |  | quick_disconnect      => { is => 'Boolean', default_value => 1, doc => 'Do not hold the file handle open between requests' }, | 
| 38 |  |  |  |  |  |  | ], | 
| 39 |  |  |  |  |  |  | has_optional => [ | 
| 40 |  |  |  |  |  |  | server                => { is => 'String', doc => 'pathname to the data file' }, | 
| 41 |  |  |  |  |  |  | file_list             => { is => 'ARRAY',  doc => 'list of pathnames of equivalent files' }, | 
| 42 |  |  |  |  |  |  | sort_order            => { is => 'ARRAY',  doc => 'Names of the columns by which the data file is sorted' }, | 
| 43 |  |  |  |  |  |  | constant_values       => { is => 'ARRAY',  doc => 'Property names which are not in the data file(s), but are part of the objects loaded from the data source' }, | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | # REMOVE | 
| 47 |  |  |  |  |  |  | #file_cache_index      => { is => 'Integer', doc => 'index into the file cache where the next read will be placed' }, | 
| 48 |  |  |  |  |  |  | _open_query_count      => { is => 'Integer', doc => 'number of queries currently using this data source, used internally' }, | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | ], | 
| 51 |  |  |  |  |  |  | doc => 'A data source for line-oriented files', | 
| 52 |  |  |  |  |  |  | }; | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 10 |  |  | 10 | 0 | 23 | sub can_savepoint { 0;}  # Doesn't support savepoints | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | sub create_default_handle { | 
| 58 | 375 |  |  | 375 | 0 | 416 | my $self = shift; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 375 | 50 |  |  |  | 1113 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 61 | 0 |  |  |  |  | 0 | my $time = time(); | 
| 62 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("\nFILE OPEN AT %d [%s]\n",$time, scalar(localtime($time))); | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 375 |  |  |  |  | 1459 | my $filename = $self->server; | 
| 66 | 375 | 100 |  |  |  | 11748 | unless (-e $filename) { | 
| 67 |  |  |  |  |  |  | # file doesn't exist | 
| 68 | 2 |  |  |  |  | 6 | $filename = '/dev/null'; | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 375 |  |  |  |  | 1174 | my $handle_class = $self->handle_class; | 
| 72 | 375 |  |  |  |  | 2584 | my $fh = $handle_class->new($filename); | 
| 73 | 375 | 50 |  |  |  | 27407 | unless($fh) { | 
| 74 | 0 |  |  |  |  | 0 | $self->error_message("Can't open ".$self->server." for reading: $!"); | 
| 75 | 0 |  |  |  |  | 0 | return; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 375 | 50 |  |  |  | 1027 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 79 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("FILE: opened %s fileno %d\n\n",$self->server, $fh->fileno); | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 375 |  |  |  |  | 1426 | $self->is_connected(1); | 
| 83 | 375 |  |  |  |  | 793 | return $fh; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub disconnect { | 
| 87 | 2 |  |  | 2 | 0 | 6 | my $self = shift; | 
| 88 |  |  |  |  |  |  |  | 
| 89 | 2 | 50 |  |  |  | 4 | if ($self->has_default_handle) { | 
| 90 | 2 |  |  |  |  | 6 | my $fh = $self->get_default_handle; | 
| 91 | 2 |  |  |  |  | 26 | flock($fh,LOCK_UN); | 
| 92 | 2 |  |  |  |  | 14 | $fh->close(); | 
| 93 | 2 |  |  |  |  | 192 | $self->__invalidate_get_default_handle__; | 
| 94 | 2 |  |  |  |  | 8 | $self->is_connected(0); | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | sub _file_position { | 
| 99 | 1222 |  |  | 1222 |  | 1005 | my $self = shift; | 
| 100 | 1222 |  |  |  |  | 2479 | my $fh = $self->get_default_handle; | 
| 101 | 1222 | 50 |  |  |  | 3391 | return $fh ? $fh->tell() : undef; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | sub prepare_for_fork { | 
| 105 | 2 |  |  | 2 | 0 | 4 | my $self = shift; | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | # make sure this is clear before we fork | 
| 108 | 2 |  |  |  |  | 6 | $self->{'_fh_position'} = undef; | 
| 109 | 2 | 50 |  |  |  | 8 | if ($self->has_default_handle) { | 
| 110 | 2 |  |  |  |  | 10 | $self->{'_fh_position'} = $self->_file_position(); | 
| 111 | 2 | 50 |  |  |  | 24 | UR::DBI->sql_fh->printf("FILE: preparing to fork; closing file %s and noting position at %s\n",$self->server, $self->{'_fh_position'}) if $ENV{'UR_DBI_MONITOR_SQL'}; | 
| 112 |  |  |  |  |  |  | } | 
| 113 | 2 |  |  |  |  | 36 | $self->disconnect_default_handle; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | sub finish_up_after_fork { | 
| 117 | 2 |  |  | 2 | 0 | 19 | my $self = shift; | 
| 118 | 2 | 50 |  |  |  | 29 | if (defined $self->{'_fh_position'}) { | 
| 119 | 2 | 50 |  |  |  | 27 | UR::DBI->sql_fh->printf("FILE: resetting after fork; reopening file %s and fast-forwarding to %s\n",$self->server, $self->{'_fh_position'}) if $ENV{'UR_DBI_MONITOR_SQL'}; | 
| 120 | 2 |  |  |  |  | 30 | my $fh = $self->get_default_handle; | 
| 121 | 2 |  |  |  |  | 24 | $fh->seek($self->{'_fh_position'},0); | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | sub _regex { | 
| 126 | 376 |  |  | 376 |  | 386 | my $self = shift; | 
| 127 |  |  |  |  |  |  |  | 
| 128 | 376 | 100 |  |  |  | 778 | unless ($self->{'_regex'}) { | 
| 129 | 26 |  |  |  |  | 96 | my $delimiter = $self->delimiter; | 
| 130 | 26 |  |  |  |  | 49 | my $r = eval { qr($delimiter)  }; | 
|  | 26 |  |  |  |  | 464 |  | 
| 131 | 26 | 50 | 33 |  |  | 158 | if ($@ || !$r) { | 
| 132 | 0 |  |  |  |  | 0 | $self->error_message("Unable to interepret delimiter '".$self->delimiter.": $@"); | 
| 133 | 0 |  |  |  |  | 0 | return; | 
| 134 |  |  |  |  |  |  | } | 
| 135 | 26 |  |  |  |  | 68 | $self->{'_regex'} = $r; | 
| 136 |  |  |  |  |  |  | } | 
| 137 | 376 |  |  |  |  | 514 | return $self->{'_regex'}; | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | # We're overriding server() so everyone else can have a single way of getting | 
| 141 |  |  |  |  |  |  | # the file's pathname instead of having to know about both server and file_list | 
| 142 |  |  |  |  |  |  | sub server { | 
| 143 | 45 |  |  | 45 | 1 | 51 | my $self = shift; | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 45 | 100 |  |  |  | 95 | unless ($self->{'_cached_server'}) { | 
| 146 | 16 | 100 |  |  |  | 65 | if ($self->__server()) { | 
|  |  | 50 |  |  |  |  |  | 
| 147 | 15 |  |  |  |  | 39 | $self->{'_cached_server'} = $self->__server(); | 
| 148 |  |  |  |  |  |  | } elsif ($self->file_list) { | 
| 149 | 1 |  |  |  |  | 2 | my $files = $self->file_list; | 
| 150 | 1 |  |  |  |  | 2 | my $count = scalar(@$files); | 
| 151 | 1 |  |  |  |  | 3 | my $idx = $$ % $count; | 
| 152 | 1 |  |  |  |  | 3 | $self->{'_cached_server'} = $files->[$idx]; | 
| 153 |  |  |  |  |  |  | } else { | 
| 154 | 0 |  |  |  |  | 0 | die "Data source ",$self->id," didn't specify either server or file_list"; | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  | } | 
| 157 | 45 |  |  |  |  | 81 | return $self->{'_cached_server'}; | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | # Should be divisible by 3 | 
| 162 |  |  |  |  |  |  | our $MAX_CACHE_SIZE = 99; | 
| 163 |  |  |  |  |  |  | # The offset cache is an arrayref containing three pieces of data: | 
| 164 |  |  |  |  |  |  | # 0: If this cache slot is being used by a loading iterator | 
| 165 |  |  |  |  |  |  | # 1: concatenated data from the sorted columns for comparison with where you are in the file | 
| 166 |  |  |  |  |  |  | # 2: the seek position that line came from | 
| 167 |  |  |  |  |  |  | sub _offset_cache { | 
| 168 | 1116 |  |  | 1116 |  | 974 | my $self = shift; | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 1116 | 100 |  |  |  | 1896 | unless ($self->{'_offset_cache'}) { | 
| 171 | 25 |  |  |  |  | 58 | $self->{'_offset_cache'} = []; | 
| 172 |  |  |  |  |  |  | } | 
| 173 | 1116 |  |  |  |  | 1181 | return $self->{'_offset_cache'}; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | our %iterator_data_source; | 
| 177 |  |  |  |  |  |  | our %iterator_cache_slot_refs; | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | sub _allocate_offset_cache_slot { | 
| 180 | 372 |  |  | 372 |  | 381 | my $self = shift; | 
| 181 |  |  |  |  |  |  |  | 
| 182 | 372 |  |  |  |  | 620 | my $cache = $self->_offset_cache(); | 
| 183 | 372 |  |  |  |  | 454 | my $next = scalar(@$cache); | 
| 184 |  |  |  |  |  |  | #print STDERR "_allocate_offset_cache_slot ".$self->server." current size is $next "; | 
| 185 | 372 | 100 |  |  |  | 724 | if ($next > $MAX_CACHE_SIZE) { | 
| 186 |  |  |  |  |  |  | #print STDERR "searching... \n"; | 
| 187 | 282 |  |  |  |  | 493 | my $last_offset_cache_slot = $self->{'_last_offset_cache_slot'}; | 
| 188 | 282 | 100 |  |  |  | 552 | if ($last_offset_cache_slot >= $MAX_CACHE_SIZE) { | 
| 189 | 9 |  |  |  |  | 15 | $next = 0; | 
| 190 |  |  |  |  |  |  | } else { | 
| 191 | 273 |  |  |  |  | 385 | $next = $last_offset_cache_slot + 3; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | # Search for an unused slot | 
| 194 | 282 |  | 33 |  |  | 869 | while ($cache->[$next] and $next != $last_offset_cache_slot) { | 
| 195 | 0 |  |  |  |  | 0 | $next += 3; | 
| 196 | 0 | 0 |  |  |  | 0 | $next = 0 if ($next > $MAX_CACHE_SIZE); | 
| 197 |  |  |  |  |  |  | } | 
| 198 | 282 | 50 | 33 |  |  | 1565 | if ($next > $MAX_CACHE_SIZE or $next eq $last_offset_cache_slot) { | 
| 199 |  |  |  |  |  |  | #print STDERR scalar(keys(%iterator_data_source))." items in iterator_data_source ".scalar(keys(%iterator_cache_slot))." in iterator_cache_slot\n"; | 
| 200 | 0 |  |  |  |  | 0 | Carp::carp("Unable to find an open file offset cache slot because there are too many outstanding loading iterators.  Temporarily expanding the cache..."); | 
| 201 |  |  |  |  |  |  | # We'll let it go ahead and expand the list | 
| 202 | 0 |  |  |  |  | 0 | $next = $MAX_CACHE_SIZE; | 
| 203 | 0 |  |  |  |  | 0 | $MAX_CACHE_SIZE += 3; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | } | 
| 206 | 372 |  |  |  |  | 423 | $cache->[$next] = 1; | 
| 207 | 372 |  |  |  |  | 500 | $cache->[$next+1] = undef; | 
| 208 | 372 |  |  |  |  | 489 | $cache->[$next+2] = undef; | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 372 |  |  |  |  | 436 | $self->{'_last_offset_cache_slot'} = $next; | 
| 211 |  |  |  |  |  |  | #print STDERR "using slot $next current size ".scalar(@$cache)."\n"; | 
| 212 | 372 |  |  |  |  | 550 | return $next; | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub _free_offset_cache_slot { | 
| 217 | 372 |  |  | 372 |  | 492 | my($self, $cache_slot) = @_; | 
| 218 |  |  |  |  |  |  |  | 
| 219 | 372 |  |  |  |  | 671 | my $cache = $self->_offset_cache(); | 
| 220 | 372 | 50 |  |  |  | 767 | unless ($cache_slot < scalar(@$cache)) { | 
| 221 | 0 |  |  |  |  | 0 | $self->warning_message("Freeing offset cache slot past the end.  Current size ".scalar(@$cache).", requested $cache_slot"); | 
| 222 | 0 |  |  |  |  | 0 | return; | 
| 223 |  |  |  |  |  |  | } | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 372 | 50 |  |  |  | 804 | unless (defined $cache->[$cache_slot]) { | 
| 226 | 0 |  |  |  |  | 0 | $self->warning_message("Freeing unused offset cache slot $cache_slot"); | 
| 227 | 0 |  |  |  |  | 0 | return; | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 | 372 | 50 | 66 |  |  | 1018 | if ($cache->[$cache_slot+1] and scalar(@{$cache->[$cache_slot+1]}) == 0) { | 
|  | 169 |  |  |  |  | 489 |  | 
| 231 |  |  |  |  |  |  | # There's no data in here.  Must have happened when the reader went all the | 
| 232 |  |  |  |  |  |  | # way to the end of the file and found nothing.  Remove this entry completely | 
| 233 |  |  |  |  |  |  | # because it's not helpful | 
| 234 | 0 |  |  |  |  | 0 | splice(@$cache, $cache_slot,3); | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | } else { | 
| 237 |  |  |  |  |  |  | # There is data in here, mark it as a free slot | 
| 238 | 372 |  |  |  |  | 422 | $cache->[$cache_slot] = 0; | 
| 239 |  |  |  |  |  |  | } | 
| 240 | 372 |  |  |  |  | 524 | return 1; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | sub _invalidate_cache { | 
| 246 | 4 |  |  | 4 |  | 6 | my $self = shift; | 
| 247 |  |  |  |  |  |  |  | 
| 248 | 4 |  |  |  |  | 9 | $self->{'_offset_cache'} = []; | 
| 249 |  |  |  |  |  |  |  | 
| 250 | 4 |  |  |  |  | 15 | return 1; | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | sub _generate_loading_templates_arrayref { | 
| 254 | 49 |  |  | 49 |  | 85 | my($self,$old_sql_cols) = @_; | 
| 255 |  |  |  |  |  |  |  | 
| 256 | 49 |  |  |  |  | 215 | my $columns_in_file = $self->column_order; | 
| 257 | 49 |  |  |  |  | 149 | my %column_to_position_map; | 
| 258 | 49 |  |  |  |  | 152 | for (my $i = 0; $i < @$columns_in_file; $i++) { | 
| 259 | 136 |  |  |  |  | 330 | $column_to_position_map{$columns_in_file->[$i]} = $i; | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | # strip out columns that don't exist in the file | 
| 263 | 49 |  |  |  |  | 57 | my $sql_cols; | 
| 264 | 49 |  |  |  |  | 102 | foreach my $column_data ( @$old_sql_cols ) { | 
| 265 | 212 |  |  |  |  | 342 | my $propertys_column_name = $column_data->[1]->column_name; | 
| 266 | 212 | 100 | 66 |  |  | 567 | next unless ($propertys_column_name and exists($column_to_position_map{$propertys_column_name})); | 
| 267 |  |  |  |  |  |  |  | 
| 268 | 136 |  |  |  |  | 185 | push @$sql_cols, $column_data; | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  |  | 
| 271 | 49 | 50 |  |  |  | 136 | unless ($sql_cols) { | 
| 272 | 0 |  |  |  |  | 0 | $self->error_message("Couldn't determine column information for data source " . $self->id); | 
| 273 | 0 |  |  |  |  | 0 | return; | 
| 274 |  |  |  |  |  |  | } | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | # reorder the requested columns to be in the same order as the file | 
| 277 |  |  |  |  |  |  | my @sql_cols_with_column_name = | 
| 278 | 49 |  |  |  |  | 87 | map{ [ $column_to_position_map{ $_->[1]->column_name }, $_ ] } | 
|  | 136 |  |  |  |  | 254 |  | 
| 279 |  |  |  |  |  |  | @$sql_cols; | 
| 280 |  |  |  |  |  |  | my @sorted_sql_cols = | 
| 281 | 136 |  |  |  |  | 182 | map { $_->[1] } | 
| 282 | 49 |  |  |  |  | 227 | sort { $a->[0] <=> $b->[0] } | 
|  | 125 |  |  |  |  | 205 |  | 
| 283 |  |  |  |  |  |  | @sql_cols_with_column_name; | 
| 284 | 49 |  |  |  |  | 89 | $sql_cols = \@sorted_sql_cols; | 
| 285 | 49 |  |  |  |  | 317 | my $templates = $self->SUPER::_generate_loading_templates_arrayref($sql_cols); | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 49 | 100 |  |  |  | 241 | if (my $constant_values = $self->constant_values) { | 
| 288 |  |  |  |  |  |  | # Find the first unused index in the loading template | 
| 289 | 12 |  |  |  |  | 37 | my $next_template_slot = -1; | 
| 290 | 12 |  |  |  |  | 23 | foreach my $tmpl ( @$templates ) { | 
| 291 | 12 |  |  |  |  | 11 | foreach my $col ( @{$tmpl->{'column_positions'}} ) { | 
|  | 12 |  |  |  |  | 19 |  | 
| 292 | 36 | 50 |  |  |  | 49 | if ($col >= $next_template_slot) { | 
| 293 | 36 |  |  |  |  | 39 | $next_template_slot = $col + 1; | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  | } | 
| 297 | 12 | 50 |  |  |  | 30 | if ($next_template_slot == -1) { | 
| 298 | 0 |  |  |  |  | 0 | die "Couldn't determine last column in loading template for data source" . $self->id; | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 12 |  |  |  |  | 19 | foreach my $prop ( @$constant_values ) { | 
| 302 | 18 |  |  |  |  | 16 | push @{$templates->[0]->{'column_positions'}}, $next_template_slot++; | 
|  | 18 |  |  |  |  | 29 |  | 
| 303 | 18 |  |  |  |  | 15 | push @{$templates->[0]->{'property_names'}}, $prop; | 
|  | 18 |  |  |  |  | 30 |  | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  | } | 
| 306 |  |  |  |  |  |  |  | 
| 307 | 49 |  |  |  |  | 220 | return $templates; | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | sub _things_in_list_are_numeric { | 
| 311 | 260 |  |  | 260 |  | 258 | my $self = shift; | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 260 |  |  |  |  | 241 | foreach ( @{$_[0]} ) { | 
|  | 260 |  |  |  |  | 493 |  | 
| 314 | 276 | 100 |  |  |  | 1125 | return 0 if (! Scalar::Util::looks_like_number($_)); | 
| 315 |  |  |  |  |  |  | } | 
| 316 | 213 |  |  |  |  | 715 | return 1; | 
| 317 |  |  |  |  |  |  | } | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | # Construct a closure to perform a test on the $index-th column of | 
| 320 |  |  |  |  |  |  | # @$$next_candidate_row.  The closures return 0 is the test is successful, | 
| 321 |  |  |  |  |  |  | # -1 if unsuccessful but the file's value was less than $value, and 1 | 
| 322 |  |  |  |  |  |  | # if unsuccessful and greater.  The iterator that churns throug the file | 
| 323 |  |  |  |  |  |  | # knows that if it's comparing an ID/sorted column, and the comparator | 
| 324 |  |  |  |  |  |  | # returns 1 then we've gone past the point where we can expect to ever | 
| 325 |  |  |  |  |  |  | # find another successful match and we should stop looking | 
| 326 |  |  |  |  |  |  | my $ALWAYS_FALSE = sub { -1 }; | 
| 327 |  |  |  |  |  |  | sub _comparator_for_operator_and_property { | 
| 328 | 359 |  |  | 359 |  | 618 | my($self,$property,$next_candidate_row, $index, $operator,$value) = @_; | 
| 329 |  |  |  |  |  |  |  | 
| 330 | 19 |  |  | 19 |  | 96 | no warnings 'uninitialized';  # we're handling ''/undef/null specially below where it matters | 
|  | 19 |  |  |  |  | 23 |  | 
|  | 19 |  |  |  |  | 39244 |  | 
| 331 |  |  |  |  |  |  |  | 
| 332 | 359 | 100 | 100 |  |  | 2629 | if ($operator eq 'between') { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 333 | 44 | 100 | 100 |  |  | 163 | if ($value->[0] eq '' or $value->[1] eq '') { | 
| 334 | 28 |  |  |  |  | 40 | return $ALWAYS_FALSE; | 
| 335 |  |  |  |  |  |  | } | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 16 | 50 | 33 |  |  | 38 | if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) { | 
| 338 | 16 | 50 |  |  |  | 31 | if ($value->[0] > $value->[1]) { | 
| 339 |  |  |  |  |  |  | # Will never be true | 
| 340 | 0 |  |  |  |  | 0 | Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1]; | 
| 341 | 0 |  |  |  |  | 0 | return $ALWAYS_FALSE; | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | # numeric 'between' comparison | 
| 345 |  |  |  |  |  |  | return sub { | 
| 346 | 32 | 50 |  | 32 |  | 67 | return -1 if ($$next_candidate_row->[$index] eq ''); | 
| 347 | 0 | 0 |  |  |  | 0 | if ($$next_candidate_row->[$index] < $value->[0]) { | 
|  |  | 0 |  |  |  |  |  | 
| 348 | 0 |  |  |  |  | 0 | return -1; | 
| 349 |  |  |  |  |  |  | } elsif ($$next_candidate_row->[$index] > $value->[1]) { | 
| 350 | 0 |  |  |  |  | 0 | return 1; | 
| 351 |  |  |  |  |  |  | } else { | 
| 352 | 0 |  |  |  |  | 0 | return 0; | 
| 353 |  |  |  |  |  |  | } | 
| 354 | 16 |  |  |  |  | 77 | }; | 
| 355 |  |  |  |  |  |  | } else { | 
| 356 | 0 | 0 |  |  |  | 0 | if ($value->[0] gt $value->[1]) { | 
| 357 | 0 |  |  |  |  | 0 | Carp::carp "'between' comparison will never be true with values ".$value->[0]," and ".$value->[1]; | 
| 358 | 0 |  |  |  |  | 0 | return $ALWAYS_FALSE; | 
| 359 |  |  |  |  |  |  | } | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | # A string 'between' comparison | 
| 362 |  |  |  |  |  |  | return sub { | 
| 363 | 0 | 0 |  | 0 |  | 0 | return -1 if ($$next_candidate_row->[$index] eq ''); | 
| 364 | 0 | 0 |  |  |  | 0 | if ($$next_candidate_row->[$index] lt $value->[0]) { | 
|  |  | 0 |  |  |  |  |  | 
| 365 | 0 |  |  |  |  | 0 | return -1; | 
| 366 |  |  |  |  |  |  | } elsif ($$next_candidate_row->[$index] gt $value->[1]) { | 
| 367 | 0 |  |  |  |  | 0 | return 1; | 
| 368 |  |  |  |  |  |  | } else { | 
| 369 | 0 |  |  |  |  | 0 | return 0; | 
| 370 |  |  |  |  |  |  | } | 
| 371 | 0 |  |  |  |  | 0 | }; | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | } elsif ($operator eq 'in') { | 
| 375 | 20 | 100 |  |  |  | 50 | if (! @$value) { | 
| 376 | 8 |  |  |  |  | 14 | return $ALWAYS_FALSE; | 
| 377 |  |  |  |  |  |  | } | 
| 378 |  |  |  |  |  |  |  | 
| 379 | 12 | 100 | 66 |  |  | 51 | if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) { | 
| 380 |  |  |  |  |  |  | # Numeric 'in' comparison  returns undef if we're within the range of the list | 
| 381 |  |  |  |  |  |  | # but don't actually match any of the items in the list | 
| 382 | 8 |  |  |  |  | 27 | @$value = sort { $a <=> $b } @$value;  # sort the values first | 
|  | 0 |  |  |  |  | 0 |  | 
| 383 |  |  |  |  |  |  | return sub { | 
| 384 | 16 | 50 |  | 16 |  | 40 | return -1 if ($$next_candidate_row->[$index] eq ''); | 
| 385 | 0 | 0 |  |  |  | 0 | if ($$next_candidate_row->[$index] < $value->[0]) { | 
|  |  | 0 |  |  |  |  |  | 
| 386 | 0 |  |  |  |  | 0 | return -1; | 
| 387 |  |  |  |  |  |  | } elsif ($$next_candidate_row->[$index] > $value->[-1]) { | 
| 388 | 0 |  |  |  |  | 0 | return 1; | 
| 389 |  |  |  |  |  |  | } else { | 
| 390 | 0 |  |  |  |  | 0 | foreach ( @$value ) { | 
| 391 | 0 | 0 |  |  |  | 0 | return 0 if $$next_candidate_row->[$index] == $_; | 
| 392 |  |  |  |  |  |  | } | 
| 393 | 0 |  |  |  |  | 0 | return -1; | 
| 394 |  |  |  |  |  |  | } | 
| 395 | 8 |  |  |  |  | 51 | }; | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | } else { | 
| 398 |  |  |  |  |  |  | # A string 'in' comparison | 
| 399 | 4 |  |  |  |  | 13 | @$value = sort { $a cmp $b } @$value; | 
|  | 0 |  |  |  |  | 0 |  | 
| 400 |  |  |  |  |  |  | return sub { | 
| 401 | 8 | 50 |  | 8 |  | 29 | if ($$next_candidate_row->[$index] lt $value->[0]) { | 
|  |  | 50 |  |  |  |  |  | 
| 402 | 0 |  |  |  |  | 0 | return -1; | 
| 403 |  |  |  |  |  |  | } elsif ($$next_candidate_row->[$index] gt $value->[-1]) { | 
| 404 | 0 |  |  |  |  | 0 | return 1; | 
| 405 |  |  |  |  |  |  | } else { | 
| 406 | 8 |  |  |  |  | 18 | foreach ( @$value ) { | 
| 407 | 8 | 50 |  |  |  | 19 | return 0 if $$next_candidate_row->[$index] eq $_; | 
| 408 |  |  |  |  |  |  | } | 
| 409 | 0 |  |  |  |  | 0 | return -1; | 
| 410 |  |  |  |  |  |  | } | 
| 411 | 4 |  |  |  |  | 26 | }; | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | } | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | } elsif ($operator eq 'not in') { | 
| 416 | 14 | 100 |  |  |  | 41 | if (! @$value) { | 
| 417 | 4 |  |  |  |  | 6 | return $ALWAYS_FALSE; | 
| 418 |  |  |  |  |  |  | } | 
| 419 |  |  |  |  |  |  |  | 
| 420 | 10 | 100 | 100 |  |  | 38 | if ($property->is_numeric and $self->_things_in_list_are_numeric($value)) { | 
| 421 |  |  |  |  |  |  | return sub { | 
| 422 | 8 | 50 |  | 8 |  | 18 | return -1 if ($$next_candidate_row->[$index] eq ''); | 
| 423 | 0 |  |  |  |  | 0 | foreach ( @$value ) { | 
| 424 | 0 | 0 |  |  |  | 0 | return -1 if $$next_candidate_row->[$index] == $_; | 
| 425 |  |  |  |  |  |  | } | 
| 426 | 0 |  |  |  |  | 0 | return 0; | 
| 427 |  |  |  |  |  |  | } | 
| 428 |  |  |  |  |  |  |  | 
| 429 | 4 |  |  |  |  | 21 | } else { | 
| 430 |  |  |  |  |  |  | return sub { | 
| 431 | 16 |  |  | 16 |  | 25 | foreach ( @$value ) { | 
| 432 | 22 | 100 |  |  |  | 53 | return -1 if $$next_candidate_row->[$index] eq $_; | 
| 433 |  |  |  |  |  |  | } | 
| 434 | 4 |  |  |  |  | 6 | return 0; | 
| 435 |  |  |  |  |  |  | } | 
| 436 | 6 |  |  |  |  | 37 | } | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | } elsif ($operator eq 'like') { | 
| 439 |  |  |  |  |  |  | # 'like' is always a string comparison.  In addition, we can't know if we're ahead | 
| 440 |  |  |  |  |  |  | # or behind in the file's ID columns, so the only two return values are 0 and 1 | 
| 441 |  |  |  |  |  |  |  | 
| 442 | 18 | 100 |  |  |  | 60 | return $ALWAYS_FALSE if ($value eq '');  # property like NULL is always false | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | # Convert SQL-type wildcards to Perl-type wildcards | 
| 445 |  |  |  |  |  |  | # Convert a % to a *, and _ to ., unless they're preceeded by \ to escape them. | 
| 446 |  |  |  |  |  |  | # Not that this isn't precisely correct, as \\% should really mean a literal \ | 
| 447 |  |  |  |  |  |  | # followed by a wildcard, but we can't be correct in all cases without including | 
| 448 |  |  |  |  |  |  | # a real parser.  This will catch most cases. | 
| 449 |  |  |  |  |  |  |  | 
| 450 | 14 |  |  |  |  | 82 | $value =~ s/(? | 
| 451 | 14 |  |  |  |  | 29 | $value =~ s/(? | 
| 452 | 14 |  |  |  |  | 109 | my $regex = qr($value); | 
| 453 |  |  |  |  |  |  | return sub { | 
| 454 | 32 | 100 |  | 32 |  | 86 | return -1 if ($$next_candidate_row->[$index] eq ''); | 
| 455 | 8 | 100 |  |  |  | 32 | if ($$next_candidate_row->[$index] =~ $regex) { | 
| 456 | 2 |  |  |  |  | 6 | return 0; | 
| 457 |  |  |  |  |  |  | } else { | 
| 458 | 6 |  |  |  |  | 8 | return 1; | 
| 459 |  |  |  |  |  |  | } | 
| 460 | 14 |  |  |  |  | 92 | }; | 
| 461 |  |  |  |  |  |  |  | 
| 462 |  |  |  |  |  |  | } elsif ($operator eq 'not like') { | 
| 463 | 16 | 100 |  |  |  | 53 | return $ALWAYS_FALSE if ($value eq '');  # property like NULL is always false | 
| 464 | 12 |  |  |  |  | 69 | $value =~ s/(? | 
| 465 | 12 |  |  |  |  | 29 | $value =~ s/(? | 
| 466 | 12 |  |  |  |  | 90 | my $regex = qr($value); | 
| 467 |  |  |  |  |  |  | return sub { | 
| 468 | 24 | 50 |  | 24 |  | 72 | return -1 if ($$next_candidate_row->[$index] eq ''); | 
| 469 | 0 | 0 |  |  |  | 0 | if ($$next_candidate_row->[$index] =~ $regex) { | 
| 470 | 0 |  |  |  |  | 0 | return 1; | 
| 471 |  |  |  |  |  |  | } else { | 
| 472 | 0 |  |  |  |  | 0 | return 0; | 
| 473 |  |  |  |  |  |  | } | 
| 474 | 12 |  |  |  |  | 82 | }; | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  |  | 
| 477 |  |  |  |  |  |  | # FIXME - should we only be testing the numericness of the property? | 
| 478 |  |  |  |  |  |  | } elsif ($property->is_numeric and $self->_things_in_list_are_numeric([$value])) { | 
| 479 |  |  |  |  |  |  | # Basic numeric comparisons | 
| 480 | 185 | 100 | 33 |  |  | 614 | if ($operator eq '=') { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | return sub { | 
| 482 | 1470 | 100 |  | 1470 |  | 2178 | return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number | 
| 483 | 1446 |  |  |  |  | 1873 | return $$next_candidate_row->[$index] <=> $value; | 
| 484 | 130 |  |  |  |  | 769 | }; | 
| 485 |  |  |  |  |  |  | } elsif ($operator eq '<') { | 
| 486 |  |  |  |  |  |  | return sub { | 
| 487 | 24 | 50 |  | 24 |  | 47 | return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number | 
| 488 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] < $value ? 0 : 1; | 
| 489 | 12 |  |  |  |  | 56 | }; | 
| 490 |  |  |  |  |  |  | } elsif ($operator eq '<=') { | 
| 491 |  |  |  |  |  |  | return sub { | 
| 492 | 24 | 50 |  | 24 |  | 65 | return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number | 
| 493 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] <= $value ? 0 : 1; | 
| 494 | 12 |  |  |  |  | 78 | }; | 
| 495 |  |  |  |  |  |  | } elsif ($operator eq '>') { | 
| 496 |  |  |  |  |  |  | return sub { | 
| 497 | 24 | 50 |  | 24 |  | 70 | return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number | 
| 498 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] > $value ? 0 : -1; | 
| 499 | 12 |  |  |  |  | 92 | }; | 
| 500 |  |  |  |  |  |  | } elsif ($operator eq '>=') { | 
| 501 |  |  |  |  |  |  | return sub { | 
| 502 | 24 | 50 |  | 24 |  | 58 | return -1 if ($$next_candidate_row->[$index] eq ''); # null always != a number | 
| 503 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] >= $value ? 0 : -1; | 
| 504 | 12 |  |  |  |  | 77 | }; | 
| 505 |  |  |  |  |  |  | } elsif ($operator eq 'true') { | 
| 506 |  |  |  |  |  |  | return sub { | 
| 507 | 0 | 0 |  | 0 |  | 0 | $$next_candidate_row->[$index] ? 0 : -1; | 
| 508 | 0 |  |  |  |  | 0 | }; | 
| 509 |  |  |  |  |  |  | } elsif ($operator eq 'false') { | 
| 510 |  |  |  |  |  |  | return sub { | 
| 511 | 2 | 50 |  | 2 |  | 5 | $$next_candidate_row->[$index] ? -1 : 0; | 
| 512 | 1 |  |  |  |  | 6 | }; | 
| 513 |  |  |  |  |  |  | } elsif ($operator eq '!=' or $operator eq 'ne') { | 
| 514 |  |  |  |  |  |  | return sub { | 
| 515 | 12 | 50 |  | 12 |  | 27 | return 0 if ($$next_candidate_row->[$index] eq '');  # null always != a number | 
| 516 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] != $value ? 0 : -1; | 
| 517 |  |  |  |  |  |  | } | 
| 518 | 6 |  |  |  |  | 29 | } | 
| 519 |  |  |  |  |  |  |  | 
| 520 |  |  |  |  |  |  | } else { | 
| 521 |  |  |  |  |  |  | # Basic string comparisons | 
| 522 | 62 | 100 | 0 |  |  | 254 | if ($operator eq '=') { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 523 |  |  |  |  |  |  | return sub { | 
| 524 | 120 | 50 | 50 | 120 |  | 505 | return -1 if ($$next_candidate_row->[$index] eq '' xor $value eq ''); | 
| 525 | 120 |  |  |  |  | 168 | return $$next_candidate_row->[$index] cmp $value; | 
| 526 | 25 |  |  |  |  | 168 | }; | 
| 527 |  |  |  |  |  |  | } elsif ($operator eq '<') { | 
| 528 |  |  |  |  |  |  | return sub { | 
| 529 | 16 | 50 |  | 16 |  | 30 | $$next_candidate_row->[$index] lt $value ? 0 : 1; | 
| 530 | 8 |  |  |  |  | 38 | }; | 
| 531 |  |  |  |  |  |  | } elsif ($operator eq '<=') { | 
| 532 |  |  |  |  |  |  | return sub { | 
| 533 | 16 | 50 | 33 | 16 |  | 46 | return -1 if ($$next_candidate_row->[$index] eq '' or $value eq ''); | 
| 534 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] le $value ? 0 : 1; | 
| 535 | 8 |  |  |  |  | 44 | }; | 
| 536 |  |  |  |  |  |  | } elsif ($operator eq '>') { | 
| 537 |  |  |  |  |  |  | return sub { | 
| 538 | 16 | 50 |  | 16 |  | 45 | $$next_candidate_row->[$index] gt $value ? 0 : -1; | 
| 539 | 8 |  |  |  |  | 54 | }; | 
| 540 |  |  |  |  |  |  | } elsif ($operator eq '>=') { | 
| 541 |  |  |  |  |  |  | return sub { | 
| 542 | 16 | 50 | 33 | 16 |  | 74 | return -1 if ($$next_candidate_row->[$index] eq '' or $value eq ''); | 
| 543 | 0 | 0 |  |  |  | 0 | $$next_candidate_row->[$index] ge $value ? 0 : -1; | 
| 544 | 8 |  |  |  |  | 46 | }; | 
| 545 |  |  |  |  |  |  | } elsif ($operator eq 'true') { | 
| 546 |  |  |  |  |  |  | return sub { | 
| 547 | 8 | 50 |  | 8 |  | 17 | $$next_candidate_row->[$index] ? 0 : -1; | 
| 548 | 4 |  |  |  |  | 29 | }; | 
| 549 |  |  |  |  |  |  | } elsif ($operator eq 'false') { | 
| 550 |  |  |  |  |  |  | return sub { | 
| 551 | 2 | 50 |  | 2 |  | 6 | $$next_candidate_row->[$index] ? -1 : 0; | 
| 552 | 1 |  |  |  |  | 5 | }; | 
| 553 |  |  |  |  |  |  | } elsif ($operator eq '!=' or $operator eq 'ne') { | 
| 554 |  |  |  |  |  |  | return sub { | 
| 555 | 0 | 0 |  | 0 |  | 0 | $$next_candidate_row->[$index] ne $value ? 0 : -1; | 
| 556 |  |  |  |  |  |  | } | 
| 557 | 0 |  |  |  |  | 0 | } | 
| 558 |  |  |  |  |  |  | } | 
| 559 |  |  |  |  |  |  | } | 
| 560 |  |  |  |  |  |  |  | 
| 561 |  |  |  |  |  |  | my $iterator_serial = 0; | 
| 562 |  |  |  |  |  |  | sub create_iterator_closure_for_rule { | 
| 563 | 372 |  |  | 372 | 1 | 381 | my($self,$rule) = @_; | 
| 564 |  |  |  |  |  |  |  | 
| 565 | 372 |  |  |  |  | 817 | my $class_name = $rule->subject_class_name; | 
| 566 | 372 |  |  |  |  | 1074 | my $class_meta = $class_name->__meta__; | 
| 567 | 372 |  |  |  |  | 707 | my $rule_template = $rule->template; | 
| 568 |  |  |  |  |  |  |  | 
| 569 | 372 |  |  |  |  | 1293 | my $csv_column_order_names = $self->column_order; | 
| 570 | 372 |  |  |  |  | 1280 | my $csv_column_count = scalar @$csv_column_order_names; | 
| 571 |  |  |  |  |  |  |  | 
| 572 | 372 |  |  |  |  | 1083 | my $operators_for_properties = $rule_template->operators_for_properties(); | 
| 573 | 372 |  |  |  |  | 814 | my $values_for_properties = $rule->legacy_params_hash; | 
| 574 | 372 |  |  |  |  | 1053 | foreach ( values %$values_for_properties ) { | 
| 575 | 1506 | 50 | 66 |  |  | 2826 | if (ref eq 'HASH' and exists $_->{'value'}) { | 
| 576 | 188 |  |  |  |  | 339 | $_ = $_->{'value'}; | 
| 577 |  |  |  |  |  |  | } | 
| 578 |  |  |  |  |  |  | } | 
| 579 |  |  |  |  |  |  |  | 
| 580 | 372 |  |  |  |  | 968 | my $sort_order_names = $self->sort_order; | 
| 581 | 372 |  |  |  |  | 1069 | my %sort_column_names = map { $_ => 1 } @$sort_order_names; | 
|  | 371 |  |  |  |  | 953 |  | 
| 582 | 372 |  |  |  |  | 525 | my @non_sort_column_names = grep { ! exists($sort_column_names{$_}) } @$csv_column_order_names; | 
|  | 1096 |  |  |  |  | 1712 |  | 
| 583 |  |  |  |  |  |  |  | 
| 584 | 372 |  |  |  |  | 397 | my %column_name_to_index_map; | 
| 585 | 372 |  |  |  |  | 882 | for (my $i = 0; $i < @$csv_column_order_names; $i++) { | 
| 586 | 1096 |  |  |  |  | 1957 | $column_name_to_index_map{$csv_column_order_names->[$i]} = $i; | 
| 587 |  |  |  |  |  |  | } | 
| 588 |  |  |  |  |  |  |  | 
| 589 |  |  |  |  |  |  | # Index in the split-file-data for each sorted column in order | 
| 590 | 372 |  |  |  |  | 460 | my @sort_order_column_indexes = map { $column_name_to_index_map{$_} } @$sort_order_names; | 
|  | 371 |  |  |  |  | 769 |  | 
| 591 |  |  |  |  |  |  |  | 
| 592 | 372 |  |  |  |  | 406 | my(%property_meta_for_column_name); | 
| 593 | 372 |  |  |  |  | 489 | foreach my $column_name ( @$csv_column_order_names ) { | 
| 594 | 1096 |  |  |  |  | 3445 | my $prop = UR::Object::Property->get(class_name => $class_name, column_name => $column_name); | 
| 595 | 1096 |  |  |  |  | 1007 | our %WARNED_ABOUT_COLUMN; | 
| 596 | 1096 | 50 | 33 |  |  | 2348 | unless ( $prop or $WARNED_ABOUT_COLUMN{$class_name . '::' . $column_name}++) { | 
| 597 | 0 |  |  |  |  | 0 | $self->warning_message("Couldn't find a property in class $class_name that goes with column $column_name"); | 
| 598 | 0 |  |  |  |  | 0 | next; | 
| 599 |  |  |  |  |  |  | } | 
| 600 | 1096 |  |  |  |  | 1906 | $property_meta_for_column_name{$column_name} = $prop; | 
| 601 |  |  |  |  |  |  | } | 
| 602 |  |  |  |  |  |  |  | 
| 603 | 372 |  |  |  |  | 398 | my @rule_columns_in_order;  # The order we should perform rule matches on - value is the index in @next_file_row to test | 
| 604 |  |  |  |  |  |  | my @comparison_for_column;  # closures to call to perform the match - same order as @rule_columns_in_order | 
| 605 | 372 |  |  |  |  | 393 | my $last_sort_column_in_rule = -1; # Last index in @rule_columns_in_order that applies when trying "the shortcut" | 
| 606 | 372 |  |  |  |  | 348 | my $looking_for_sort_columns = 1; | 
| 607 |  |  |  |  |  |  |  | 
| 608 | 372 |  |  |  |  | 405 | my $next_candidate_row;  # This will be filled in by the closure below | 
| 609 | 372 |  |  |  |  | 525 | foreach my $column_name ( @$sort_order_names, @non_sort_column_names ) { | 
| 610 | 1096 |  |  |  |  | 1215 | my $property_meta = $property_meta_for_column_name{$column_name}; | 
| 611 | 1096 | 50 |  |  |  | 1596 | unless ($property_meta) { | 
| 612 | 0 |  |  |  |  | 0 | Carp::croak("Class $class_name has no property connected to column named '$column_name' in data source ".$self->id); | 
| 613 |  |  |  |  |  |  | } | 
| 614 | 1096 |  |  |  |  | 2240 | my $property_name = $property_meta->property_name; | 
| 615 | 1096 | 100 | 66 |  |  | 2660 | if (! $operators_for_properties->{$property_name}) { | 
|  |  | 100 |  |  |  |  |  | 
| 616 | 737 |  |  |  |  | 623 | $looking_for_sort_columns = 0; | 
| 617 | 737 |  |  |  |  | 996 | next; | 
| 618 |  |  |  |  |  |  | } elsif ($looking_for_sort_columns && $sort_column_names{$column_name}) { | 
| 619 | 129 |  |  |  |  | 165 | $last_sort_column_in_rule++; | 
| 620 |  |  |  |  |  |  | } else { | 
| 621 |  |  |  |  |  |  | # There's been a gap in the ID column list in the rule, stop looking for | 
| 622 |  |  |  |  |  |  | # further ID columns | 
| 623 | 230 |  |  |  |  | 235 | $looking_for_sort_columns = 0; | 
| 624 |  |  |  |  |  |  | } | 
| 625 |  |  |  |  |  |  |  | 
| 626 | 359 |  |  |  |  | 590 | push @rule_columns_in_order, $column_name_to_index_map{$column_name}; | 
| 627 |  |  |  |  |  |  |  | 
| 628 | 359 |  |  |  |  | 482 | my $operator = $operators_for_properties->{$property_name}; | 
| 629 | 359 |  |  |  |  | 436 | my $rule_value = $values_for_properties->{$property_name}; | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | my $comparison_function = $self->_comparator_for_operator_and_property($property_meta, | 
| 632 |  |  |  |  |  |  | \$next_candidate_row, | 
| 633 | 359 |  |  |  |  | 1275 | $column_name_to_index_map{$column_name}, | 
| 634 |  |  |  |  |  |  | $operator, | 
| 635 |  |  |  |  |  |  | $rule_value); | 
| 636 | 359 | 50 |  |  |  | 898 | unless ($comparison_function) { | 
| 637 | 0 |  |  |  |  | 0 | Carp::croak("Unknown operator '$operator' in file data source filter"); | 
| 638 |  |  |  |  |  |  | } | 
| 639 | 359 |  |  |  |  | 583 | push @comparison_for_column, $comparison_function; | 
| 640 |  |  |  |  |  |  | } | 
| 641 |  |  |  |  |  |  |  | 
| 642 | 372 |  |  |  |  | 942 | my $split_regex = $self->_regex(); | 
| 643 |  |  |  |  |  |  |  | 
| 644 |  |  |  |  |  |  | # FIXME - another performance boost might be to do some kind of binary search | 
| 645 |  |  |  |  |  |  | # against the file to set the initial/next position? | 
| 646 | 372 |  |  |  |  | 385 | my $file_pos = 0; | 
| 647 |  |  |  |  |  |  |  | 
| 648 |  |  |  |  |  |  | # search in the offset cache for something helpful | 
| 649 | 372 |  |  |  |  | 761 | my $offset_cache = $self->_offset_cache(); | 
| 650 |  |  |  |  |  |  |  | 
| 651 |  |  |  |  |  |  | # If the rule doesn't touch the sorted columns, then we can't use the offset cache for help :( | 
| 652 | 372 | 100 |  |  |  | 820 | if ($last_sort_column_in_rule >= 0) { | 
| 653 |  |  |  |  |  |  | # Starting at index 1 because we're interested in the file and seek data, not if it's in use | 
| 654 |  |  |  |  |  |  | # offset 0 is the in-use flag, offset 1 is a ref to the file data and offset 2 is the file seek pos | 
| 655 |  |  |  |  |  |  | SEARCH_CACHE: | 
| 656 | 129 |  |  |  |  | 378 | for (my $i = 1; $i < @$offset_cache; $i+=3) { | 
| 657 | 3343 | 100 | 66 |  |  | 7703 | next unless (defined($offset_cache->[$i]) && defined($offset_cache->[$i+1])); | 
| 658 |  |  |  |  |  |  |  | 
| 659 | 1205 |  |  |  |  | 993 | $next_candidate_row = $offset_cache->[$i]; | 
| 660 | 1205 |  |  |  |  | 744 | my $matched = 0; | 
| 661 |  |  |  |  |  |  | COMPARE_VALUES: | 
| 662 | 1205 |  |  |  |  | 1551 | for (my $c = 0; $c <= $last_sort_column_in_rule; $c++) { | 
| 663 | 1205 |  |  |  |  | 1138 | my $comparison = $comparison_for_column[$c]->(); | 
| 664 |  |  |  |  |  |  |  | 
| 665 | 1205 | 100 |  |  |  | 2609 | next SEARCH_CACHE if $comparison > 0; | 
| 666 | 15 | 100 |  |  |  | 38 | if ($comparison < 0) { | 
| 667 | 10 |  |  |  |  | 12 | $matched = 1; | 
| 668 | 10 |  |  |  |  | 16 | last COMPARE_VALUES; | 
| 669 |  |  |  |  |  |  | } | 
| 670 |  |  |  |  |  |  | } | 
| 671 |  |  |  |  |  |  | # If we made it this far, then the file data in this slot is earlier in the file | 
| 672 |  |  |  |  |  |  | # than the data we're looking for.  So, if the seek pos data is later than what | 
| 673 |  |  |  |  |  |  | # we've found yet, use it instead | 
| 674 | 15 | 100 | 66 |  |  | 74 | if ($matched and $offset_cache->[$i+1] > $file_pos) { | 
| 675 | 10 |  |  |  |  | 26 | $file_pos = $offset_cache->[$i+1]; | 
| 676 |  |  |  |  |  |  | } | 
| 677 |  |  |  |  |  |  | } | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  |  | 
| 680 | 372 |  |  |  |  | 388 | my($monitor_start_time,$monitor_printed_first_fetch); | 
| 681 | 372 | 50 |  |  |  | 951 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 682 | 0 |  |  |  |  | 0 | $monitor_start_time = Time::HiRes::time(); | 
| 683 | 0 |  |  |  |  | 0 | $monitor_printed_first_fetch = 0; | 
| 684 | 0 |  |  |  |  | 0 | my @filters_list; | 
| 685 | 0 |  |  |  |  | 0 | for (my $i = 0; $i < @rule_columns_in_order; $i++) { | 
| 686 | 0 |  |  |  |  | 0 | my $column = $rule_columns_in_order[$i]; | 
| 687 | 0 |  |  |  |  | 0 | my $column_name = $csv_column_order_names->[$column]; | 
| 688 | 0 | 0 |  |  |  | 0 | my $is_sorted = $i <= $last_sort_column_in_rule ? ' (sorted)' : ''; | 
| 689 | 0 |  | 0 |  |  | 0 | my $operator = $operators_for_properties->{$column_name} || '='; | 
| 690 | 0 |  |  |  |  | 0 | my $rule_value = $values_for_properties->{$column_name}; | 
| 691 | 0 | 0 |  |  |  | 0 | if (ref $rule_value eq 'ARRAY') { | 
| 692 | 0 |  |  |  |  | 0 | $rule_value = '[' . join(',', @$rule_value) . ']'; | 
| 693 |  |  |  |  |  |  | } | 
| 694 | 0 |  |  |  |  | 0 | my $filter_string = $column_name . " $operator $rule_value" . $is_sorted; | 
| 695 | 0 |  |  |  |  | 0 | push @filters_list, $filter_string; | 
| 696 |  |  |  |  |  |  | } | 
| 697 | 0 |  |  |  |  | 0 | my $filter_list = join("\n\t", @filters_list); | 
| 698 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("\nFILE: %s\nFILTERS %s\n\n", $self->server, $filter_list); | 
| 699 |  |  |  |  |  |  | } | 
| 700 |  |  |  |  |  |  |  | 
| 701 | 372 |  | 100 |  |  | 1001 | $self->{'_last_read_serial'} ||= ''; | 
| 702 |  |  |  |  |  |  |  | 
| 703 | 372 |  |  |  |  | 1046 | my $record_separator = $self->record_separator; | 
| 704 | 372 |  |  |  |  | 788 | my $cache_slot = $self->_allocate_offset_cache_slot(); | 
| 705 | 372 |  |  |  |  | 389 | my $cache_insert_counter = 100;  # a "breadcrumb" will be left in the offset cache after this many lines are read | 
| 706 |  |  |  |  |  |  |  | 
| 707 | 372 |  |  |  |  | 311 | my $lines_read = 0; | 
| 708 | 372 |  |  |  |  | 272 | my $printed_first_match = 0; | 
| 709 | 372 |  |  |  |  | 343 | my $lines_matched = 0; | 
| 710 |  |  |  |  |  |  |  | 
| 711 | 372 |  |  |  |  | 338 | my $fh;  # File handle we'll be reading from | 
| 712 | 372 |  |  |  |  | 344 | my $this_iterator_serial = $iterator_serial++; | 
| 713 |  |  |  |  |  |  | my $iterator = sub { | 
| 714 |  |  |  |  |  |  |  | 
| 715 | 594 | 100 |  | 594 |  | 1065 | unless (ref($fh)) { | 
| 716 | 372 |  |  |  |  | 1129 | $fh = $self->get_default_handle(); | 
| 717 |  |  |  |  |  |  | # Lock the file for reading...  For more fine-grained locking we could move this to | 
| 718 |  |  |  |  |  |  | # after READ_LINE_FROM_FILE: but that would slow down read operations a bit.  If | 
| 719 |  |  |  |  |  |  | # there ends up being a problem with lock contention, go ahead and move it before $line = <$fh>; | 
| 720 |  |  |  |  |  |  | #flock($fh,LOCK_SH); | 
| 721 |  |  |  |  |  |  | } | 
| 722 |  |  |  |  |  |  |  | 
| 723 | 594 | 50 | 33 |  |  | 1623 | if ($monitor_start_time && ! $monitor_printed_first_fetch) { | 
| 724 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("FILE: FIRST FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); | 
| 725 | 0 |  |  |  |  | 0 | $monitor_printed_first_fetch = 1; | 
| 726 |  |  |  |  |  |  | } | 
| 727 |  |  |  |  |  |  |  | 
| 728 | 594 | 100 |  |  |  | 1779 | if ($self->{'_last_read_serial'} ne $this_iterator_serial) { | 
| 729 | 374 | 50 |  |  |  | 929 | UR::DBI->sql_fh->printf("FILE: Resetting file position to $file_pos\n") if $ENV{'UR_DBI_MONITOR_SQL'}; | 
| 730 |  |  |  |  |  |  | # The last read was from a different request, reset the position | 
| 731 | 374 |  |  |  |  | 1510 | $fh->seek($file_pos,0); | 
| 732 | 374 | 100 |  |  |  | 3121 | if ($file_pos == 0) { | 
| 733 | 365 |  |  |  |  | 1001 | my $skip = $self->skip_first_line; | 
| 734 | 365 |  |  |  |  | 1004 | while ($skip-- > 0) { | 
| 735 | 0 |  |  |  |  | 0 | scalar(<$fh>); | 
| 736 |  |  |  |  |  |  | } | 
| 737 |  |  |  |  |  |  | } | 
| 738 | 374 |  |  |  |  | 792 | $file_pos = $self->_file_position(); | 
| 739 |  |  |  |  |  |  |  | 
| 740 | 374 |  |  |  |  | 2025 | $self->{'_last_read_serial'} = $this_iterator_serial; | 
| 741 |  |  |  |  |  |  | } | 
| 742 |  |  |  |  |  |  |  | 
| 743 | 594 |  |  |  |  | 1777 | local $/;   # Make sure some wise guy hasn't changed this out from under us | 
| 744 | 594 |  |  |  |  | 846 | $/ = $record_separator; | 
| 745 |  |  |  |  |  |  |  | 
| 746 | 594 |  |  |  |  | 737 | my $line; | 
| 747 |  |  |  |  |  |  | READ_LINE_FROM_FILE: | 
| 748 | 594 |  |  |  |  | 1307 | until($line) { | 
| 749 |  |  |  |  |  |  |  | 
| 750 |  |  |  |  |  |  | # Hack for OSX 10.5. | 
| 751 |  |  |  |  |  |  | # At EOF, the getline below will return undef.  Most builds of Perl | 
| 752 |  |  |  |  |  |  | # will also set $! to 0 at EOF so you can distinguish between the cases | 
| 753 |  |  |  |  |  |  | # of EOF (which may have actually happened a while ago because of buffering) | 
| 754 |  |  |  |  |  |  | # and an actual read error.  OSX 10.5's Perl does not, and so $! | 
| 755 |  |  |  |  |  |  | # retains whatever value it had after the last failed syscall, likely | 
| 756 |  |  |  |  |  |  | # a stat() while looking for a Perl module.  This should have no effect | 
| 757 |  |  |  |  |  |  | # other platforms where you can't trust $! at arbitrary points in time | 
| 758 |  |  |  |  |  |  | # anyway | 
| 759 | 1095 |  |  |  |  | 1696 | $! = 0; | 
| 760 | 1095 |  |  |  |  | 8610 | $line = <$fh>; | 
| 761 |  |  |  |  |  |  |  | 
| 762 | 1095 | 100 |  |  |  | 1844 | unless (defined $line) { | 
| 763 | 249 | 50 |  |  |  | 707 | if ($!) { | 
| 764 | 0 | 0 | 0 |  |  | 0 | redo READ_LINE_FROM_FILE if ($! == EAGAIN or $! == EINTR); | 
| 765 | 0 |  |  |  |  | 0 | my $pathname = $self->server(); | 
| 766 | 0 |  |  |  |  | 0 | Carp::confess("getline() failed for DataSource $self pathname $pathname boolexpr $rule: $!"); | 
| 767 |  |  |  |  |  |  | } | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | # at EOF.  Close up shop and return | 
| 770 |  |  |  |  |  |  | #flock($fh,LOCK_UN); | 
| 771 | 249 |  |  |  |  | 260 | $fh = undef; | 
| 772 |  |  |  |  |  |  |  | 
| 773 | 249 | 50 |  |  |  | 486 | if ($monitor_start_time) { | 
| 774 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("FILE: at EOF\nFILE: $lines_read lines read for this request.  $lines_matched matches\nFILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); | 
| 775 |  |  |  |  |  |  | } | 
| 776 |  |  |  |  |  |  |  | 
| 777 | 249 |  |  |  |  | 892 | return; | 
| 778 |  |  |  |  |  |  | } | 
| 779 |  |  |  |  |  |  |  | 
| 780 | 846 |  |  |  |  | 720 | $lines_read++; | 
| 781 | 846 |  |  |  |  | 810 | my $last_read_size = length($line); | 
| 782 | 846 |  |  |  |  | 974 | chomp $line; | 
| 783 |  |  |  |  |  |  | # FIXME - to support record-oriented files, we need some replacement for this... | 
| 784 | 846 |  |  |  |  | 4124 | $next_candidate_row = [ split($split_regex, $line, $csv_column_count) ]; | 
| 785 | 846 |  |  |  |  | 1321 | $#{$a} = $csv_column_count-1; | 
|  | 846 |  |  |  |  | 1209 |  | 
| 786 |  |  |  |  |  |  |  | 
| 787 | 846 |  |  |  |  | 1339 | $file_pos = $self->_file_position(); | 
| 788 | 846 |  |  |  |  | 3095 | my $file_pos_before_read = $file_pos - $last_read_size; | 
| 789 |  |  |  |  |  |  |  | 
| 790 |  |  |  |  |  |  | # Every so many lines read, leave a breadcrumb about what we've seen | 
| 791 | 846 | 50 |  |  |  | 1404 | unless ($lines_read % $cache_insert_counter) { | 
| 792 | 0 |  |  |  |  | 0 | $offset_cache->[$cache_slot+1] = $next_candidate_row; | 
| 793 | 0 |  |  |  |  | 0 | $offset_cache->[$cache_slot+2] = $file_pos_before_read; | 
| 794 | 0 |  |  |  |  | 0 | $self->_free_offset_cache_slot($cache_slot); | 
| 795 |  |  |  |  |  |  |  | 
| 796 |  |  |  |  |  |  | # get a new slot | 
| 797 | 0 |  |  |  |  | 0 | $cache_slot = $self->_allocate_offset_cache_slot(); | 
| 798 | 0 |  |  |  |  | 0 | $offset_cache->[$cache_slot+1] = $next_candidate_row; | 
| 799 | 0 |  |  |  |  | 0 | $offset_cache->[$cache_slot+2] = $file_pos_before_read; | 
| 800 |  |  |  |  |  |  |  | 
| 801 | 0 |  |  |  |  | 0 | $cache_insert_counter <<= 2;  # Double the insert counter | 
| 802 |  |  |  |  |  |  | } | 
| 803 |  |  |  |  |  |  |  | 
| 804 | 846 |  |  |  |  | 1554 | for (my $i = 0; $i < @rule_columns_in_order; $i++) { | 
| 805 | 801 |  |  |  |  | 1198 | my $comparison = $comparison_for_column[$i]->(); | 
| 806 |  |  |  |  |  |  |  | 
| 807 | 801 | 100 | 100 |  |  | 2580 | if ($comparison > 0 and $i <= $last_sort_column_in_rule) { | 
|  |  | 100 |  |  |  |  |  | 
| 808 |  |  |  |  |  |  | # We've gone past the last thing that could possibly match | 
| 809 |  |  |  |  |  |  |  | 
| 810 | 121 | 50 |  |  |  | 239 | if ($monitor_start_time) { | 
| 811 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("FILE: $lines_read lines read for this request.  $lines_matched matches\nFILE: TOTAL EXECUTE-FETCH TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); | 
| 812 |  |  |  |  |  |  | } | 
| 813 |  |  |  |  |  |  |  | 
| 814 |  |  |  |  |  |  | #flock($fh,LOCK_UN); | 
| 815 |  |  |  |  |  |  |  | 
| 816 |  |  |  |  |  |  | # Save the info from the last row we read | 
| 817 | 121 |  |  |  |  | 224 | $offset_cache->[$cache_slot+1] = $next_candidate_row; | 
| 818 | 121 |  |  |  |  | 248 | $offset_cache->[$cache_slot+2] = $file_pos_before_read; | 
| 819 | 121 |  |  |  |  | 489 | return; | 
| 820 |  |  |  |  |  |  |  | 
| 821 |  |  |  |  |  |  | } elsif ($comparison) { | 
| 822 |  |  |  |  |  |  | # comparison didn't match, read another line from the file | 
| 823 | 501 |  |  |  |  | 710 | redo READ_LINE_FROM_FILE; | 
| 824 |  |  |  |  |  |  | } | 
| 825 |  |  |  |  |  |  |  | 
| 826 |  |  |  |  |  |  | # That comparison worked... stay in the for() loop for other comparisons | 
| 827 |  |  |  |  |  |  | } | 
| 828 |  |  |  |  |  |  | # All the comparisons return '0', meaning they passed | 
| 829 |  |  |  |  |  |  |  | 
| 830 |  |  |  |  |  |  | # Now see if the offset cache file data is different than the row we just read | 
| 831 |  |  |  |  |  |  | COMPARE_TO_CACHE: | 
| 832 | 224 |  |  |  |  | 425 | foreach my $column ( @sort_order_column_indexes) { | 
| 833 | 19 |  |  | 19 |  | 103 | no warnings 'uninitialized'; | 
|  | 19 |  |  |  |  | 26 |  | 
|  | 19 |  |  |  |  | 26062 |  | 
| 834 | 223 | 50 |  |  |  | 719 | if ($offset_cache->[$cache_slot+1]->[$column] ne $next_candidate_row->[$column]) { | 
| 835 |  |  |  |  |  |  | # They're different.  Update the offset cache data | 
| 836 | 223 |  |  |  |  | 304 | $offset_cache->[$cache_slot+1] = $next_candidate_row; | 
| 837 | 223 |  |  |  |  | 315 | $offset_cache->[$cache_slot+2] = $file_pos_before_read; | 
| 838 | 223 |  |  |  |  | 309 | last COMPARE_TO_CACHE; | 
| 839 |  |  |  |  |  |  | } | 
| 840 |  |  |  |  |  |  | } | 
| 841 |  |  |  |  |  |  |  | 
| 842 | 224 | 50 | 33 |  |  | 1031 | if (! $printed_first_match and $monitor_start_time) { | 
| 843 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("FILE: First match after reading $lines_read lines\n"); | 
| 844 | 0 |  |  |  |  | 0 | $printed_first_match=1; | 
| 845 |  |  |  |  |  |  | } | 
| 846 | 224 |  |  |  |  | 229 | $lines_matched++; | 
| 847 |  |  |  |  |  |  |  | 
| 848 | 224 |  |  |  |  | 910 | return $next_candidate_row; | 
| 849 |  |  |  |  |  |  | } | 
| 850 | 372 |  |  |  |  | 2408 | }; # end sub $iterator | 
| 851 |  |  |  |  |  |  |  | 
| 852 | 372 |  |  |  |  | 2380 | Sub::Name::subname('UR::DataSource::File::__datasource_iterator(closure)__', $iterator); | 
| 853 |  |  |  |  |  |  |  | 
| 854 | 372 |  | 100 |  |  | 1026 | my $count = $self->_open_query_count() || 0; | 
| 855 | 372 |  |  |  |  | 890 | $self->_open_query_count($count+1); | 
| 856 | 372 |  |  |  |  | 1171 | bless $iterator, 'UR::DataSource::File::Tracker'; | 
| 857 | 372 |  |  |  |  | 1017 | $iterator_data_source{$iterator} = $self; | 
| 858 | 372 |  |  |  |  | 646 | $iterator_cache_slot_refs{$iterator} = \$cache_slot; | 
| 859 |  |  |  |  |  |  |  | 
| 860 | 372 |  |  |  |  | 2815 | return $iterator; | 
| 861 |  |  |  |  |  |  | } | 
| 862 |  |  |  |  |  |  |  | 
| 863 |  |  |  |  |  |  |  | 
| 864 |  |  |  |  |  |  | sub UR::DataSource::File::Tracker::DESTROY { | 
| 865 | 372 |  |  | 372 |  | 348 | my $iterator = shift; | 
| 866 | 372 |  |  |  |  | 893 | my $ds = delete $iterator_data_source{$iterator}; | 
| 867 | 372 | 50 |  |  |  | 781 | return unless $ds;   # The data source may have gone out of scope first during global destruction | 
| 868 |  |  |  |  |  |  |  | 
| 869 | 372 |  |  |  |  | 672 | my $cache_slot_ref = delete $iterator_cache_slot_refs{$iterator}; | 
| 870 | 372 | 50 | 33 |  |  | 1480 | if (defined($cache_slot_ref) and defined($$cache_slot_ref)) { | 
| 871 |  |  |  |  |  |  | # Mark this slot unused | 
| 872 |  |  |  |  |  |  | #print STDERR "Freeing cache slot $cache_slot\n"; | 
| 873 |  |  |  |  |  |  | #$ds->_offset_cache->[$$cache_slot_ref] = 0; | 
| 874 | 372 |  |  |  |  | 1065 | $ds->_free_offset_cache_slot($$cache_slot_ref); | 
| 875 |  |  |  |  |  |  | } | 
| 876 |  |  |  |  |  |  |  | 
| 877 | 372 |  |  |  |  | 938 | my $count = $ds->_open_query_count(); | 
| 878 | 372 |  |  |  |  | 783 | $ds->_open_query_count(--$count); | 
| 879 |  |  |  |  |  |  |  | 
| 880 | 372 | 100 |  |  |  | 938 | return unless ($ds->quick_disconnect); | 
| 881 | 368 | 100 | 66 |  |  | 1469 | if ($count == 0 && $ds->has_default_handle) { | 
| 882 |  |  |  |  |  |  | # All open queries have supposedly been fulfilled.  Close the | 
| 883 |  |  |  |  |  |  | # file handle and undef it so get_default_handle() will re-open if necessary | 
| 884 | 366 |  |  |  |  | 692 | my $fh = $ds->get_default_handle; | 
| 885 |  |  |  |  |  |  |  | 
| 886 | 366 | 50 |  |  |  | 844 | UR::DBI->sql_fh->printf("FILE: CLOSING fileno ".fileno($fh)."\n") if ($ENV{'UR_DBI_MONITOR_SQL'}); | 
| 887 |  |  |  |  |  |  | #flock($fh,LOCK_UN); | 
| 888 | 366 |  |  |  |  | 1114 | $fh->close(); | 
| 889 | 366 |  |  |  |  | 13968 | $ds->__invalidate_get_default_handle__; | 
| 890 |  |  |  |  |  |  | } | 
| 891 |  |  |  |  |  |  | } | 
| 892 |  |  |  |  |  |  |  | 
| 893 |  |  |  |  |  |  | # Names of creation params that we should force to be listrefs | 
| 894 |  |  |  |  |  |  | our %creation_param_is_list = map { $_ => 1 } qw( column_order file_list sort_order constant_values ); | 
| 895 |  |  |  |  |  |  | sub create_from_inline_class_data { | 
| 896 | 3 |  |  | 3 | 1 | 5 | my($class, $class_data, $ds_data) = @_; | 
| 897 |  |  |  |  |  |  |  | 
| 898 |  |  |  |  |  |  | # User didn't specify columns in the file.  Assumme every property is a column, and in the same order | 
| 899 | 3 | 100 |  |  |  | 11 | unless (exists $ds_data->{'column_order'}) { | 
| 900 | 1 |  |  |  |  | 299 | Carp::croak "data_source has no column_order specified"; | 
| 901 |  |  |  |  |  |  | } | 
| 902 |  |  |  |  |  |  |  | 
| 903 | 2 |  | 33 |  |  | 14 | $ds_data->{'server'} ||= $ds_data->{'path'} || $ds_data->{'file'}; | 
|  |  |  | 66 |  |  |  |  | 
| 904 |  |  |  |  |  |  |  | 
| 905 | 2 |  |  |  |  | 2 | my %ds_creation_params; | 
| 906 | 2 |  |  |  |  | 5 | foreach my $param ( qw( delimiter record_separator column_order skip_first_line server file_list sort_order constant_values ) ) { | 
| 907 | 16 | 100 |  |  |  | 27 | if (exists $ds_data->{$param}) { | 
| 908 | 7 | 50 | 66 |  |  | 28 | if ($creation_param_is_list{$param} and ref($ds_data->{$param}) ne 'ARRAY') { | 
| 909 | 0 |  |  |  |  | 0 | $ds_creation_params{$param} = \( $ds_data->{$param} ); | 
| 910 |  |  |  |  |  |  | } else { | 
| 911 | 7 |  |  |  |  | 10 | $ds_creation_params{$param} = $ds_data->{$param}; | 
| 912 |  |  |  |  |  |  | } | 
| 913 |  |  |  |  |  |  | } | 
| 914 |  |  |  |  |  |  | } | 
| 915 |  |  |  |  |  |  |  | 
| 916 | 2 |  |  |  |  | 15 | my($namespace, $class_name) = ($class_data->{'class_name'} =~ m/^(\w+?)::(.*)/); | 
| 917 | 2 |  |  |  |  | 5 | my $ds_id = "${namespace}::DataSource::${class_name}"; | 
| 918 | 2 |  |  |  |  | 4 | my $ds_type = delete $ds_data->{'is'}; | 
| 919 | 2 |  |  |  |  | 13 | my $ds = $ds_type->create( %ds_creation_params, id => $ds_id ); | 
| 920 | 2 |  |  |  |  | 9 | return $ds; | 
| 921 |  |  |  |  |  |  | } | 
| 922 |  |  |  |  |  |  |  | 
| 923 |  |  |  |  |  |  |  | 
| 924 |  |  |  |  |  |  | # The string used to join fields of a row together | 
| 925 |  |  |  |  |  |  | # | 
| 926 |  |  |  |  |  |  | # Since the 'delimiter' property is interpreted as a regex in the reading | 
| 927 |  |  |  |  |  |  | # code, we'll try to be smart about making a real string from that. | 
| 928 |  |  |  |  |  |  | # | 
| 929 |  |  |  |  |  |  | # subclasses can override this to provide a different implementation | 
| 930 |  |  |  |  |  |  | sub join_pattern { | 
| 931 | 4 |  |  | 4 | 0 | 6 | my $self = shift; | 
| 932 |  |  |  |  |  |  |  | 
| 933 | 4 |  |  |  |  | 11 | my $join_pattern = $self->delimiter; | 
| 934 |  |  |  |  |  |  |  | 
| 935 |  |  |  |  |  |  | # make some common substitutions... | 
| 936 | 4 | 50 |  |  |  | 11 | if ($join_pattern eq '\s*,\s*') { | 
| 937 |  |  |  |  |  |  | # The default... | 
| 938 | 0 |  |  |  |  | 0 | return ', '; | 
| 939 |  |  |  |  |  |  | } | 
| 940 |  |  |  |  |  |  |  | 
| 941 | 4 |  |  |  |  | 9 | $join_pattern =~ s/\\s*//g;  # Turn 0-or-more whitespaces to nothing | 
| 942 | 4 |  |  |  |  | 4 | $join_pattern =~ s/\\t/\t/;  # tab | 
| 943 | 4 |  |  |  |  | 6 | $join_pattern =~ s/\\s/ /;   # whitespace | 
| 944 |  |  |  |  |  |  |  | 
| 945 | 4 |  |  |  |  | 5 | return $join_pattern; | 
| 946 |  |  |  |  |  |  | } | 
| 947 |  |  |  |  |  |  |  | 
| 948 |  |  |  |  |  |  |  | 
| 949 |  |  |  |  |  |  |  | 
| 950 |  |  |  |  |  |  | sub _sync_database { | 
| 951 | 4 |  |  | 4 |  | 5 | my $self = shift; | 
| 952 | 4 |  |  |  |  | 8 | my %params = @_; | 
| 953 |  |  |  |  |  |  |  | 
| 954 | 4 | 50 |  |  |  | 11 | unless (ref($self)) { | 
| 955 | 0 | 0 |  |  |  | 0 | if ($self->isa("UR::Singleton")) { | 
| 956 | 0 |  |  |  |  | 0 | $self = $self->_singleton_object; | 
| 957 |  |  |  |  |  |  | } | 
| 958 |  |  |  |  |  |  | else { | 
| 959 | 0 |  |  |  |  | 0 | die "Called as a class-method on a non-singleton datasource!"; | 
| 960 |  |  |  |  |  |  | } | 
| 961 |  |  |  |  |  |  | } | 
| 962 |  |  |  |  |  |  |  | 
| 963 | 4 |  |  |  |  | 15 | my $read_fh = $self->get_default_handle(); | 
| 964 | 4 | 50 |  |  |  | 9 | unless ($read_fh) { | 
| 965 | 0 |  |  |  |  | 0 | Carp::croak($self->class . ": Can't _sync_database(): Can't open file " . $self->server . " for reading: $!"); | 
| 966 |  |  |  |  |  |  | } | 
| 967 |  |  |  |  |  |  |  | 
| 968 | 4 |  |  |  |  | 10 | my $original_data_file = $self->server; | 
| 969 | 4 |  |  |  |  | 152 | my $original_data_dir  = File::Basename::dirname($original_data_file); | 
| 970 | 4 |  |  |  |  | 7 | my $use_quick_rename; | 
| 971 | 4 | 50 |  |  |  | 46 | unless (-d $original_data_dir){ | 
| 972 | 0 |  |  |  |  | 0 | File::Path::mkpath($original_data_dir); | 
| 973 |  |  |  |  |  |  | } | 
| 974 | 4 | 50 |  |  |  | 29 | if (-w $original_data_dir) { | 
|  |  | 0 |  |  |  |  |  | 
| 975 | 4 |  |  |  |  | 5 | $use_quick_rename = 1;  # We can write to the data dir | 
| 976 |  |  |  |  |  |  | } elsif (! -w $original_data_file) { | 
| 977 | 0 |  |  |  |  | 0 | $self->error_message("Neither the directory nor the file for $original_data_file are writable - cannot sync_database"); | 
| 978 | 0 |  |  |  |  | 0 | return; | 
| 979 |  |  |  |  |  |  | } | 
| 980 |  |  |  |  |  |  |  | 
| 981 |  |  |  |  |  |  |  | 
| 982 | 4 |  |  |  |  | 12 | my $split_regex = $self->_regex(); | 
| 983 | 4 |  |  |  |  | 24 | my $join_pattern = $self->join_pattern; | 
| 984 | 4 |  |  |  |  | 11 | my $record_separator = $self->record_separator; | 
| 985 | 4 |  |  |  |  | 13 | local $/;   # Make sure some wise guy hasn't changed this out from under us | 
| 986 | 4 |  |  |  |  | 6 | $/ = $record_separator; | 
| 987 |  |  |  |  |  |  |  | 
| 988 | 4 |  |  |  |  | 9 | my $csv_column_order_names = $self->column_order; | 
| 989 | 4 |  |  |  |  | 9 | my $csv_column_count = scalar(@$csv_column_order_names); | 
| 990 | 4 |  |  |  |  | 3 | my %column_name_to_index_map; | 
| 991 | 4 |  |  |  |  | 14 | for (my $i = 0; $i < @$csv_column_order_names; $i++) { | 
| 992 | 12 |  |  |  |  | 27 | $column_name_to_index_map{$csv_column_order_names->[$i]} = $i; | 
| 993 |  |  |  |  |  |  | } | 
| 994 |  |  |  |  |  |  |  | 
| 995 | 4 |  |  |  |  | 7 | my $changed_objects = delete $params{changed_objects}; | 
| 996 |  |  |  |  |  |  |  | 
| 997 |  |  |  |  |  |  |  | 
| 998 |  |  |  |  |  |  | # We're going to assumme all the passed-in objects are of the same class *gulp* | 
| 999 | 4 |  |  |  |  | 14 | my $class_name = $changed_objects->[0]->class; | 
| 1000 | 4 |  |  |  |  | 29 | my $class_meta = UR::Object::Type->get(class_name => $class_name); | 
| 1001 | 15 |  |  |  |  | 22 | my %column_name_to_property_meta = map { $_->column_name => $_ } | 
| 1002 | 4 |  |  |  |  | 35 | grep { $_->column_name } | 
|  | 19 |  |  |  |  | 29 |  | 
| 1003 |  |  |  |  |  |  | $class_meta->all_property_metas; | 
| 1004 | 4 |  |  |  |  | 9 | my @property_names_in_column_order; | 
| 1005 | 4 |  |  |  |  | 9 | foreach my $column_name ( @$csv_column_order_names ) { | 
| 1006 | 12 |  |  |  |  | 13 | my $prop_meta = $column_name_to_property_meta{$column_name}; | 
| 1007 | 12 | 50 |  |  |  | 20 | unless ($prop_meta) { | 
| 1008 | 0 |  |  |  |  | 0 | die "Data source " . $self->class . " id " . $self->id . | 
| 1009 |  |  |  |  |  |  | " could not resolve a $class_name property for the data source's column named $column_name"; | 
| 1010 |  |  |  |  |  |  | } | 
| 1011 |  |  |  |  |  |  |  | 
| 1012 | 12 |  |  |  |  | 19 | push @property_names_in_column_order, $prop_meta->property_name; | 
| 1013 |  |  |  |  |  |  | } | 
| 1014 |  |  |  |  |  |  |  | 
| 1015 | 4 |  |  |  |  | 6 | my $insert = []; | 
| 1016 | 4 |  |  |  |  | 6 | my $update = {}; | 
| 1017 | 4 |  |  |  |  | 5 | my $delete = {}; | 
| 1018 | 4 |  |  |  |  | 7 | foreach my $obj ( @$changed_objects ) { | 
| 1019 | 16 | 100 |  |  |  | 50 | if ($obj->isa('UR::Object::Ghost')) { | 
|  |  | 100 |  |  |  |  |  | 
| 1020 |  |  |  |  |  |  | # This should be removed from the file | 
| 1021 | 4 |  |  |  |  | 6 | my $original = $obj->{'db_committed'}; | 
| 1022 | 4 |  |  |  |  | 4 | my $line = join($join_pattern, @{$original}{@property_names_in_column_order}) . $record_separator; | 
|  | 4 |  |  |  |  | 12 |  | 
| 1023 | 4 |  |  |  |  | 7 | $delete->{$line} = $obj; | 
| 1024 |  |  |  |  |  |  |  | 
| 1025 |  |  |  |  |  |  | } elsif ($obj->{'db_committed'}) { | 
| 1026 |  |  |  |  |  |  | # This object is changed since it was read in the file | 
| 1027 | 4 |  |  |  |  | 6 | my $original = $obj->{'db_committed'}; | 
| 1028 | 4 |  |  |  |  | 4 | my $original_line = join($join_pattern, @{$original}{@property_names_in_column_order}) . $record_separator; | 
|  | 4 |  |  |  |  | 13 |  | 
| 1029 | 4 |  |  |  |  | 6 | my $changed_line = join($join_pattern, @{$obj}{@property_names_in_column_order}) . $record_separator; | 
|  | 4 |  |  |  |  | 7 |  | 
| 1030 | 4 |  |  |  |  | 9 | $update->{$original_line} = $changed_line; | 
| 1031 |  |  |  |  |  |  |  | 
| 1032 |  |  |  |  |  |  | } else { | 
| 1033 |  |  |  |  |  |  | # This object is new and should be added to the file | 
| 1034 | 8 |  |  |  |  | 15 | push @$insert, [ @{$obj}{@property_names_in_column_order} ]; | 
|  | 8 |  |  |  |  | 17 |  | 
| 1035 |  |  |  |  |  |  | } | 
| 1036 |  |  |  |  |  |  | } | 
| 1037 |  |  |  |  |  |  |  | 
| 1038 | 4 |  |  |  |  | 11 | my $sort_order_names = $self->sort_order; | 
| 1039 | 4 |  |  |  |  | 14 | foreach my $sort_column_name ( @$sort_order_names ) { | 
| 1040 | 4 | 50 |  |  |  | 12 | unless (exists $column_name_to_index_map{$sort_column_name}) { | 
| 1041 | 0 |  |  |  |  | 0 | Carp::croak("Column name '$sort_column_name' appears in the sort_order list, but not in the column_order list for data source ".$self->id); | 
| 1042 |  |  |  |  |  |  | } | 
| 1043 |  |  |  |  |  |  | } | 
| 1044 | 4 |  |  |  |  | 6 | my $file_is_sorted = scalar(@$sort_order_names); | 
| 1045 | 4 |  |  |  |  | 7 | my %column_sorts_numerically = map { $_->column_name => $_->is_numeric } | 
|  | 15 |  |  |  |  | 28 |  | 
| 1046 |  |  |  |  |  |  | values %column_name_to_property_meta; | 
| 1047 |  |  |  |  |  |  | my $row_sort_sub = sub ($$) { | 
| 1048 | 20 |  |  | 20 |  | 15 | my $comparison; | 
| 1049 |  |  |  |  |  |  |  | 
| 1050 | 20 |  |  |  |  | 18 | foreach my $column_name ( @$sort_order_names ) { | 
| 1051 | 20 |  |  |  |  | 21 | my $i = $column_name_to_index_map{$column_name}; | 
| 1052 | 20 | 50 |  |  |  | 24 | if ($column_sorts_numerically{$column_name}) { | 
| 1053 | 20 |  |  |  |  | 25 | $comparison = $_[0]->[$i] <=> $_[1]->[$i]; | 
| 1054 |  |  |  |  |  |  | } else { | 
| 1055 | 0 |  |  |  |  | 0 | $comparison = $_[0]->[$i] cmp $_[1]->[$i]; | 
| 1056 |  |  |  |  |  |  | } | 
| 1057 | 20 | 50 |  |  |  | 39 | return $comparison if $comparison != 0; | 
| 1058 |  |  |  |  |  |  | } | 
| 1059 | 0 |  |  |  |  | 0 | return 0; | 
| 1060 | 4 |  |  |  |  | 21 | }; | 
| 1061 | 4 | 50 | 33 |  |  | 25 | if ($sort_order_names && $file_is_sorted && scalar(@$insert)) { | 
|  |  |  | 50 |  |  |  |  | 
| 1062 |  |  |  |  |  |  | # the inserted things should be sorted the same way as the file | 
| 1063 | 4 |  |  |  |  | 9 | my @sorted = sort $row_sort_sub @$insert; | 
| 1064 | 4 |  |  |  |  | 6 | $insert = \@sorted; | 
| 1065 |  |  |  |  |  |  | } | 
| 1066 |  |  |  |  |  |  |  | 
| 1067 | 4 |  |  |  |  | 6 | my $write_fh; | 
| 1068 |  |  |  |  |  |  | my $temp_file_name; | 
| 1069 | 4 | 50 |  |  |  | 7 | if ($use_quick_rename) { | 
| 1070 | 4 |  |  |  |  | 28 | $temp_file_name = sprintf("%s/.%d.%d" , $original_data_dir, time(), $$); | 
| 1071 | 4 |  |  |  |  | 21 | $write_fh = IO::File->new($temp_file_name, O_WRONLY|O_CREAT); | 
| 1072 |  |  |  |  |  |  | } else { | 
| 1073 | 0 |  |  |  |  | 0 | $write_fh = File::Temp->new(UNLINK => 1); | 
| 1074 | 0 | 0 |  |  |  | 0 | $temp_file_name = $write_fh->filename if ($write_fh); | 
| 1075 |  |  |  |  |  |  | } | 
| 1076 | 4 | 50 |  |  |  | 504 | unless ($write_fh) { | 
| 1077 | 0 |  |  |  |  | 0 | Carp::croak "Can't create temporary file for writing: $!"; | 
| 1078 |  |  |  |  |  |  | } | 
| 1079 |  |  |  |  |  |  |  | 
| 1080 | 4 |  |  |  |  | 6 | my $monitor_start_time; | 
| 1081 | 4 | 50 |  |  |  | 18 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1082 | 0 |  |  |  |  | 0 | $monitor_start_time = Time::HiRes::time(); | 
| 1083 | 0 |  |  |  |  | 0 | my $time = time(); | 
| 1084 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("\nFILE: SYNC_DATABASE AT %d [%s].  Started transaction for %s to temp file %s\n", | 
| 1085 |  |  |  |  |  |  | $time, scalar(localtime($time)), $original_data_file, $temp_file_name); | 
| 1086 |  |  |  |  |  |  |  | 
| 1087 |  |  |  |  |  |  | } | 
| 1088 |  |  |  |  |  |  |  | 
| 1089 | 4 | 50 |  |  |  | 29 | unless (flock($read_fh,LOCK_SH)) { | 
| 1090 | 0 | 0 |  |  |  | 0 | unless ($! == EOPNOTSUPP ) { | 
| 1091 | 0 |  |  |  |  | 0 | Carp::croak($self->class(). ": Can't get exclusive lock for file ".$self->server.": $!"); | 
| 1092 |  |  |  |  |  |  | } | 
| 1093 |  |  |  |  |  |  | } | 
| 1094 |  |  |  |  |  |  |  | 
| 1095 |  |  |  |  |  |  | # write headers to the new file | 
| 1096 | 4 |  |  |  |  | 17 | for (my $i = 0; $i < $self->skip_first_line; $i++) { | 
| 1097 | 0 |  |  |  |  | 0 | my $line = <$read_fh>; | 
| 1098 | 0 |  |  |  |  | 0 | $write_fh->print($line); | 
| 1099 |  |  |  |  |  |  | } | 
| 1100 |  |  |  |  |  |  |  | 
| 1101 |  |  |  |  |  |  |  | 
| 1102 | 4 |  |  |  |  | 6 | my $line; | 
| 1103 |  |  |  |  |  |  | READ_A_LINE: | 
| 1104 | 4 |  |  |  |  | 5 | while(1) { | 
| 1105 | 20 | 100 |  |  |  | 26 | unless ($line) { | 
| 1106 | 15 |  |  |  |  | 90 | $line = <$read_fh>; | 
| 1107 | 15 | 100 |  |  |  | 25 | last unless defined $line; | 
| 1108 |  |  |  |  |  |  | } | 
| 1109 |  |  |  |  |  |  |  | 
| 1110 | 16 | 100 | 50 |  |  | 38 | if ($file_is_sorted && scalar(@$insert)) { | 
| 1111 |  |  |  |  |  |  | # there are sorted things waiting to insert | 
| 1112 | 15 |  |  |  |  | 16 | my $chomped = $line; | 
| 1113 | 15 |  |  |  |  | 14 | chomp $chomped; | 
| 1114 | 15 |  |  |  |  | 51 | my $row = [ split($split_regex, $chomped, $csv_column_count) ]; | 
| 1115 | 15 |  |  |  |  | 23 | my $comparison = $row_sort_sub->($row, $insert->[0]); | 
| 1116 | 15 | 100 |  |  |  | 29 | if ($comparison > 0) { | 
| 1117 |  |  |  |  |  |  | # write the object's data | 
| 1118 | 19 |  |  | 19 |  | 104 | no warnings 'uninitialized';   # Some of the object's data may be undef | 
|  | 19 |  |  |  |  | 32 |  | 
|  | 19 |  |  |  |  | 3899 |  | 
| 1119 | 5 |  |  |  |  | 7 | my $new_row = shift @$insert; | 
| 1120 | 5 |  |  |  |  | 12 | my $new_line = join($join_pattern, @$new_row) . $record_separator; | 
| 1121 |  |  |  |  |  |  |  | 
| 1122 | 5 | 50 |  |  |  | 10 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1123 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->print("INSERT >>$new_line<<\n"); | 
| 1124 |  |  |  |  |  |  | } | 
| 1125 |  |  |  |  |  |  |  | 
| 1126 | 5 |  |  |  |  | 13 | $write_fh->print($new_line); | 
| 1127 |  |  |  |  |  |  | # Don't undef the last line read, meaning it could still be written to the output... | 
| 1128 | 5 |  |  |  |  | 35 | next READ_A_LINE; | 
| 1129 |  |  |  |  |  |  | } | 
| 1130 |  |  |  |  |  |  | } | 
| 1131 |  |  |  |  |  |  |  | 
| 1132 | 11 | 100 |  |  |  | 32 | if (my $obj = delete $delete->{$line}) { | 
|  |  | 100 |  |  |  |  |  | 
| 1133 | 2 | 50 |  |  |  | 8 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1134 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->print("DELETE >>$line<<\n"); | 
| 1135 |  |  |  |  |  |  | } | 
| 1136 | 2 |  |  |  |  | 2 | $line = undef; | 
| 1137 | 2 |  |  |  |  | 4 | next; | 
| 1138 |  |  |  |  |  |  |  | 
| 1139 |  |  |  |  |  |  | } elsif (my $changed = delete $update->{$line}) { | 
| 1140 | 4 | 50 |  |  |  | 11 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1141 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->print("UPDATE replace >>$line<< with >>$changed<<\n"); | 
| 1142 |  |  |  |  |  |  | } | 
| 1143 | 4 |  |  |  |  | 8 | $write_fh->print($changed); | 
| 1144 | 4 |  |  |  |  | 15 | $line = undef; | 
| 1145 | 4 |  |  |  |  | 5 | next; | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 |  |  |  |  |  |  | } else { | 
| 1148 |  |  |  |  |  |  | # This line from the file was unchanged in the app | 
| 1149 | 5 |  |  |  |  | 10 | $write_fh->print($line); | 
| 1150 | 5 |  |  |  |  | 24 | $line = undef; | 
| 1151 |  |  |  |  |  |  | } | 
| 1152 |  |  |  |  |  |  | } | 
| 1153 |  |  |  |  |  |  |  | 
| 1154 | 4 | 50 |  |  |  | 12 | if (keys %$delete) { | 
| 1155 | 0 |  |  |  |  | 0 | $self->warning_message("There were ",scalar(keys %$delete)," deleted $class_name objects that did not match data in the file"); | 
| 1156 |  |  |  |  |  |  | } | 
| 1157 | 4 | 50 |  |  |  | 14 | if (keys %$update) { | 
| 1158 | 0 |  |  |  |  | 0 | $self->warning_message("There were ",scalar(keys %$update)," updated $class_name objects that did not match data in the file"); | 
| 1159 |  |  |  |  |  |  | } | 
| 1160 |  |  |  |  |  |  |  | 
| 1161 |  |  |  |  |  |  | # finish out by writing the rest of the new data | 
| 1162 | 4 |  |  |  |  | 7 | foreach my $new_row ( @$insert ) { | 
| 1163 | 19 |  |  | 19 |  | 89 | no warnings 'uninitialized';   # Some of the object's data may be undef | 
|  | 19 |  |  |  |  | 32 |  | 
|  | 19 |  |  |  |  | 8317 |  | 
| 1164 | 3 |  |  |  |  | 11 | my $new_line = join($join_pattern, @$new_row) . $record_separator; | 
| 1165 | 3 | 50 |  |  |  | 9 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1166 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->print("INSERT >>$new_line<<\n"); | 
| 1167 |  |  |  |  |  |  | } | 
| 1168 | 3 |  |  |  |  | 7 | $write_fh->print($new_line); | 
| 1169 |  |  |  |  |  |  | } | 
| 1170 | 4 |  |  |  |  | 28 | $write_fh->close(); | 
| 1171 |  |  |  |  |  |  |  | 
| 1172 | 4 | 50 |  |  |  | 253 | if ($use_quick_rename) { | 
| 1173 | 4 | 50 |  |  |  | 13 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1174 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->print("FILE: COMMIT rename $temp_file_name over $original_data_file\n"); | 
| 1175 |  |  |  |  |  |  | } | 
| 1176 |  |  |  |  |  |  |  | 
| 1177 | 4 | 50 |  |  |  | 195 | unless(rename($temp_file_name, $original_data_file)) { | 
| 1178 | 0 |  |  |  |  | 0 | $self->error_message("Can't rename the temp file over the original file: $!"); | 
| 1179 | 0 |  |  |  |  | 0 | return; | 
| 1180 |  |  |  |  |  |  | } | 
| 1181 |  |  |  |  |  |  | } else { | 
| 1182 |  |  |  |  |  |  | # We have to copy the data from the temp file to the original file | 
| 1183 |  |  |  |  |  |  |  | 
| 1184 | 0 | 0 |  |  |  | 0 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1185 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->print("FILE: COMMIT write over $original_data_file in place\n"); | 
| 1186 |  |  |  |  |  |  | } | 
| 1187 | 0 |  |  |  |  | 0 | my $new_write_fh = IO::File->new($original_data_file, O_WRONLY|O_TRUNC); | 
| 1188 | 0 | 0 |  |  |  | 0 | unless ($new_write_fh) { | 
| 1189 | 0 |  |  |  |  | 0 | $self->error_message("Can't open $original_data_file for writing: $!"); | 
| 1190 | 0 |  |  |  |  | 0 | return; | 
| 1191 |  |  |  |  |  |  | } | 
| 1192 |  |  |  |  |  |  |  | 
| 1193 | 0 |  |  |  |  | 0 | my $temp_file_fh = IO::File->new($temp_file_name); | 
| 1194 | 0 | 0 |  |  |  | 0 | unless ($temp_file_fh) { | 
| 1195 | 0 |  |  |  |  | 0 | $self->error_message("Can't open $temp_file_name for reading: $!"); | 
| 1196 | 0 |  |  |  |  | 0 | return; | 
| 1197 |  |  |  |  |  |  | } | 
| 1198 |  |  |  |  |  |  |  | 
| 1199 | 0 |  |  |  |  | 0 | while(<$temp_file_fh>) { | 
| 1200 | 0 |  |  |  |  | 0 | $new_write_fh->print($_); | 
| 1201 |  |  |  |  |  |  | } | 
| 1202 |  |  |  |  |  |  |  | 
| 1203 | 0 |  |  |  |  | 0 | $new_write_fh->close(); | 
| 1204 |  |  |  |  |  |  | } | 
| 1205 |  |  |  |  |  |  |  | 
| 1206 |  |  |  |  |  |  | # Because of the rename/copy process during syncing, the previously opened filehandle may | 
| 1207 |  |  |  |  |  |  | # not be valid anymore.  get_default_handle will reopen the file next time it's needed | 
| 1208 | 4 |  |  |  |  | 27 | $self->_invalidate_cache(); | 
| 1209 | 4 |  |  |  |  | 33 | $self->__invalidate_get_default_handle__; | 
| 1210 |  |  |  |  |  |  |  | 
| 1211 | 4 | 50 |  |  |  | 9 | if ($ENV{'UR_DBI_MONITOR_SQL'}) { | 
| 1212 | 0 |  |  |  |  | 0 | UR::DBI->sql_fh->printf("FILE: TOTAL COMMIT TIME: %.4f s\n", Time::HiRes::time() - $monitor_start_time); | 
| 1213 |  |  |  |  |  |  | } | 
| 1214 |  |  |  |  |  |  |  | 
| 1215 | 4 |  |  |  |  | 32 | flock($read_fh, LOCK_UN); | 
| 1216 | 4 |  |  |  |  | 11 | $read_fh->close(); | 
| 1217 |  |  |  |  |  |  |  | 
| 1218 |  |  |  |  |  |  | # FIXME - this is ugly... With RDBMS-type data sources, they will call $dbh->commit() which | 
| 1219 |  |  |  |  |  |  | # gets to UR::DBI->commit(), which calls _set_object_saved_committed for them.  Since we're | 
| 1220 |  |  |  |  |  |  | # not using DBI we have to do this 2-part thing ourselves.  In the future, we might break | 
| 1221 |  |  |  |  |  |  | # out things so the saving to the temp file goes in _sync_database(), and moving the temp | 
| 1222 |  |  |  |  |  |  | # file over the original goes in commit() | 
| 1223 | 4 | 50 |  |  |  | 2092 | unless ($self->_set_specified_objects_saved_uncommitted($changed_objects)) { | 
| 1224 | 0 |  |  |  |  | 0 | Carp::croak("Error setting objects to a saved state after sync_database.  Exiting."); | 
| 1225 | 0 |  |  |  |  | 0 | return; | 
| 1226 |  |  |  |  |  |  | } | 
| 1227 |  |  |  |  |  |  |  | 
| 1228 | 4 |  |  |  |  | 47 | $self->_set_specified_objects_saved_committed($changed_objects); | 
| 1229 | 4 |  |  |  |  | 73 | return 1; | 
| 1230 |  |  |  |  |  |  | } | 
| 1231 |  |  |  |  |  |  |  | 
| 1232 |  |  |  |  |  |  |  | 
| 1233 |  |  |  |  |  |  |  | 
| 1234 |  |  |  |  |  |  | sub initializer_should_create_column_name_for_class_properties { | 
| 1235 | 28 |  |  | 28 | 0 | 353 | 1; | 
| 1236 |  |  |  |  |  |  | } | 
| 1237 |  |  |  |  |  |  |  | 
| 1238 |  |  |  |  |  |  |  | 
| 1239 |  |  |  |  |  |  | 1; | 
| 1240 |  |  |  |  |  |  |  | 
| 1241 |  |  |  |  |  |  | =pod | 
| 1242 |  |  |  |  |  |  |  | 
| 1243 |  |  |  |  |  |  | =head1 NAME | 
| 1244 |  |  |  |  |  |  |  | 
| 1245 |  |  |  |  |  |  | UR::DataSource::File - Parent class for file-based data sources | 
| 1246 |  |  |  |  |  |  |  | 
| 1247 |  |  |  |  |  |  | =head1 DEPRECATED | 
| 1248 |  |  |  |  |  |  |  | 
| 1249 |  |  |  |  |  |  | This module is deprecated.  Use UR::DataSource::Filesystem instead. | 
| 1250 |  |  |  |  |  |  |  | 
| 1251 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 1252 |  |  |  |  |  |  |  | 
| 1253 |  |  |  |  |  |  | package MyNamespace::DataSource::MyFile; | 
| 1254 |  |  |  |  |  |  | class MyNamespace::DataSource::MyFile { | 
| 1255 |  |  |  |  |  |  | is => ['UR::DataSource::File', 'UR::Singleton'], | 
| 1256 |  |  |  |  |  |  | }; | 
| 1257 |  |  |  |  |  |  | sub server { '/path/to/file' } | 
| 1258 |  |  |  |  |  |  | sub delimiter { "\t" } | 
| 1259 |  |  |  |  |  |  | sub column_order { ['thing_id', 'thing_name', 'thing_color' ] } | 
| 1260 |  |  |  |  |  |  | sub sort_order { ['thing_id'] } | 
| 1261 |  |  |  |  |  |  |  | 
| 1262 |  |  |  |  |  |  | package main; | 
| 1263 |  |  |  |  |  |  | class MyNamespace::Thing { | 
| 1264 |  |  |  |  |  |  | id_by => 'thing_id', | 
| 1265 |  |  |  |  |  |  | has => [ 'thing_id', 'thing_name', 'thing_color' ], | 
| 1266 |  |  |  |  |  |  | data_source => 'MyNamespace::DataSource::MyFile', | 
| 1267 |  |  |  |  |  |  | } | 
| 1268 |  |  |  |  |  |  | my @objs = MyNamespace::Thing->get(thing_name => 'Bob'); | 
| 1269 |  |  |  |  |  |  |  | 
| 1270 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 1271 |  |  |  |  |  |  |  | 
| 1272 |  |  |  |  |  |  | Classes which wish to retrieve their data from a regular file can use a UR::DataSource::File-based | 
| 1273 |  |  |  |  |  |  | data source.  The modules implementing these data sources live under the DataSource subdirectory | 
| 1274 |  |  |  |  |  |  | of the application's Namespace, by convention.  Besides defining a class for your data source | 
| 1275 |  |  |  |  |  |  | inheriting from UR::DataSource::File, it should have the following methods, either as properties | 
| 1276 |  |  |  |  |  |  | or functions in the package. | 
| 1277 |  |  |  |  |  |  |  | 
| 1278 |  |  |  |  |  |  | =head2 Configuration | 
| 1279 |  |  |  |  |  |  |  | 
| 1280 |  |  |  |  |  |  | These methods determine the configuration for your data source. | 
| 1281 |  |  |  |  |  |  |  | 
| 1282 |  |  |  |  |  |  | =over 4 | 
| 1283 |  |  |  |  |  |  |  | 
| 1284 |  |  |  |  |  |  | =item server() | 
| 1285 |  |  |  |  |  |  |  | 
| 1286 |  |  |  |  |  |  | server() should return a string representing the pathname of the file where the data is stored. | 
| 1287 |  |  |  |  |  |  |  | 
| 1288 |  |  |  |  |  |  | =item file_list() | 
| 1289 |  |  |  |  |  |  |  | 
| 1290 |  |  |  |  |  |  | The file_list() method should return a listref of pathnames to one or more identical files | 
| 1291 |  |  |  |  |  |  | where data is stored.   Use file_list() instead of server() when you want to load-balance several NFS | 
| 1292 |  |  |  |  |  |  | servers, for example. | 
| 1293 |  |  |  |  |  |  |  | 
| 1294 |  |  |  |  |  |  | You must have either server() or file_list() in your module, but not both.  The existence of server() | 
| 1295 |  |  |  |  |  |  | takes precedence over file_list(). | 
| 1296 |  |  |  |  |  |  |  | 
| 1297 |  |  |  |  |  |  | =item delimiter() | 
| 1298 |  |  |  |  |  |  |  | 
| 1299 |  |  |  |  |  |  | delimiter() should return a string representing how the fields in each record are split into | 
| 1300 |  |  |  |  |  |  | columns.  This string is interpreted as a regex internally.  The default delimiter is "\s*,\s*" | 
| 1301 |  |  |  |  |  |  | meaning that the file is separated by commas. | 
| 1302 |  |  |  |  |  |  |  | 
| 1303 |  |  |  |  |  |  | =item record_separator() | 
| 1304 |  |  |  |  |  |  |  | 
| 1305 |  |  |  |  |  |  | record_separator() should return a string that gets stored in $/ before getline() is called on the | 
| 1306 |  |  |  |  |  |  | file's filehandle.  The default record_separator() is "\n" meaning that the file's records are | 
| 1307 |  |  |  |  |  |  | separated by newlines. | 
| 1308 |  |  |  |  |  |  |  | 
| 1309 |  |  |  |  |  |  | =item skip_first_line() | 
| 1310 |  |  |  |  |  |  |  | 
| 1311 |  |  |  |  |  |  | skip_first_line() should return a boolean value.  If true, the first line of the file is ignored, for | 
| 1312 |  |  |  |  |  |  | example if the first line defines the columns in the file. | 
| 1313 |  |  |  |  |  |  |  | 
| 1314 |  |  |  |  |  |  | =item column_order() | 
| 1315 |  |  |  |  |  |  |  | 
| 1316 |  |  |  |  |  |  | column_order() should return a listref of column names in the file.  column_order is required; there | 
| 1317 |  |  |  |  |  |  | is no default. | 
| 1318 |  |  |  |  |  |  |  | 
| 1319 |  |  |  |  |  |  | =item sort_order() | 
| 1320 |  |  |  |  |  |  |  | 
| 1321 |  |  |  |  |  |  | If the data file is sorted in some way, sort_order() should return a listref of column names (which must | 
| 1322 |  |  |  |  |  |  | exist in column_order()) by which the file is sorted.  This gives the system a hint about how the file | 
| 1323 |  |  |  |  |  |  | is structured, and is able to make shortcuts when reading the file to speed up data access.  The default | 
| 1324 |  |  |  |  |  |  | is to assumme the file is not sorted. | 
| 1325 |  |  |  |  |  |  |  | 
| 1326 |  |  |  |  |  |  | =back | 
| 1327 |  |  |  |  |  |  |  | 
| 1328 |  |  |  |  |  |  | =head1 INHERITANCE | 
| 1329 |  |  |  |  |  |  |  | 
| 1330 |  |  |  |  |  |  | UR::DataSource | 
| 1331 |  |  |  |  |  |  |  | 
| 1332 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 1333 |  |  |  |  |  |  |  | 
| 1334 |  |  |  |  |  |  | UR, UR::DataSource | 
| 1335 |  |  |  |  |  |  |  | 
| 1336 |  |  |  |  |  |  | =cut |