| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package IMDB::Local; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Suggestions for improvements |
|
5
|
|
|
|
|
|
|
# - |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
17036
|
use 5.006; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
48
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
41
|
|
|
11
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
80
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
IMDB::Local - Tools to dowload and manage a local copy of the IMDB list files in a database. |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=cut |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '1.3'; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Quick summary of what the module does. |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Perhaps a little code snippet. |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $foo = new IMDB::Local('imdbDir' => "./imdb-data", |
|
28
|
|
|
|
|
|
|
'listsDir' => "./imdb-data/lists", |
|
29
|
|
|
|
|
|
|
'showProgressBar' => 1); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
for my $type ( $foo->listTypes() ) { |
|
32
|
|
|
|
|
|
|
if ( $foo->importList($type) != 0 ) { |
|
33
|
|
|
|
|
|
|
warn("$type import failed, check $foo->{imdbDir}/stage-$type.log"); |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
... |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 EXPORT |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
A list of functions that can be exported. You can delete this section |
|
41
|
|
|
|
|
|
|
if you don't export anything, such as for a purely object-oriented module. |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# Use Term::ProgressBar if installed. |
|
48
|
1
|
|
|
|
|
2
|
use constant Have_bar => eval { |
|
49
|
1
|
|
|
|
|
531
|
require Term::ProgressBar; |
|
50
|
1
|
|
|
|
|
308578
|
$Term::ProgressBar::VERSION >= 2; |
|
51
|
1
|
|
|
1
|
|
11
|
}; |
|
|
1
|
|
|
|
|
1
|
|
|
52
|
|
|
|
|
|
|
|
|
53
|
1
|
|
|
1
|
|
362
|
use IMDB::Local::DB; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
292
|
|
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 new |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Create new IMDB::Local object. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Arguments: |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
imdbDir - required or die |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
verbose - optional, default is 0. |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
listsDir - folder where list files exist (see IMDB::Local::Download). |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
showProgressBar - if non-zero and Term::ProgressBar is available progress bars in import methods will be displayed. Ignored if Term::ProgressBar is not available. |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=cut |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub new |
|
72
|
|
|
|
|
|
|
{ |
|
73
|
0
|
|
|
0
|
1
|
|
my ($type) = shift; |
|
74
|
0
|
|
|
|
|
|
my $self={ @_ }; # remaining args become attributes |
|
75
|
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
for ('imdbDir', 'verbose') { |
|
77
|
0
|
0
|
|
|
|
|
die "invalid usage - no $_" if ( !defined($self->{$_})); |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
#$self->{stages} = { 1=>'movies', 2=>'directors', 3=>'actors', 4=>'actresses', 5=>'genres', 6=>'ratings', 7=>'keywords', 8=>'plot' }; |
|
81
|
|
|
|
|
|
|
#$self->{optionalStages} = { 'keywords' => 7, 'plot' => 8 }; # list of optional stages - no need to download files for these |
|
82
|
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
$self->{moviedbInfo}="$self->{imdbDir}/moviedb.info"; |
|
84
|
0
|
|
|
|
|
|
$self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline"; |
|
85
|
|
|
|
|
|
|
|
|
86
|
0
|
0
|
|
|
|
|
if ( defined($self->{listsDir}) ) { |
|
87
|
0
|
|
|
|
|
|
$self->{listFiles}=new IMDB::Local::ListFiles(listsDir=>$self->{listsDir}); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# only leave progress bar on if its available |
|
91
|
0
|
|
|
|
|
|
if ( !Have_bar ) { |
|
92
|
|
|
|
|
|
|
$self->{showProgressBar}=0; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
bless($self, $type); |
|
96
|
0
|
|
|
|
|
|
return($self); |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#sub openDB |
|
100
|
|
|
|
|
|
|
#{ |
|
101
|
|
|
|
|
|
|
# my ($self)=@_; |
|
102
|
|
|
|
|
|
|
# |
|
103
|
|
|
|
|
|
|
# my $DB=new IMDB::Local::DB(database=>"$self->{imdbDir}/imdb.db"); |
|
104
|
|
|
|
|
|
|
# |
|
105
|
|
|
|
|
|
|
# if ( !$DB->connect() ) { |
|
106
|
|
|
|
|
|
|
# carp "imdbdb connect failed:$DBI::errstr"; |
|
107
|
|
|
|
|
|
|
# } |
|
108
|
|
|
|
|
|
|
# $self->{DB}=$DB; |
|
109
|
|
|
|
|
|
|
# |
|
110
|
|
|
|
|
|
|
# return($DB); |
|
111
|
|
|
|
|
|
|
#} |
|
112
|
|
|
|
|
|
|
# |
|
113
|
|
|
|
|
|
|
#sub closeDB |
|
114
|
|
|
|
|
|
|
#{ |
|
115
|
|
|
|
|
|
|
# my ($self)=@_; |
|
116
|
|
|
|
|
|
|
# |
|
117
|
|
|
|
|
|
|
# $self->{DB}->disconnect(); |
|
118
|
|
|
|
|
|
|
# undef $self->{DB}; |
|
119
|
|
|
|
|
|
|
#} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head2 listTypes |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Returns an array of list files supported (currently 'movies', 'directors', 'actors', 'actresses', 'genres', 'ratings', 'keywords', 'plot') |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub listTypes($) |
|
128
|
|
|
|
|
|
|
{ |
|
129
|
0
|
|
|
0
|
1
|
|
my $self=shift; |
|
130
|
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
return( $self->{listFiles}->types() ); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub error($$) |
|
136
|
|
|
|
|
|
|
{ |
|
137
|
0
|
|
|
0
|
0
|
|
my $self=shift; |
|
138
|
0
|
0
|
|
|
|
|
if ( defined($self->{logfd}) ) { |
|
139
|
0
|
|
|
|
|
|
print {$self->{logfd}} $_[0]."\n"; |
|
|
0
|
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
|
$self->{errorCountInLog}++; |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
else { |
|
143
|
0
|
|
|
|
|
|
print STDERR $_[0]."\n"; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub status($$) |
|
148
|
|
|
|
|
|
|
{ |
|
149
|
0
|
|
|
0
|
0
|
|
my $self=shift; |
|
150
|
|
|
|
|
|
|
|
|
151
|
0
|
0
|
|
|
|
|
if ( $self->{verbose} ) { |
|
152
|
0
|
|
|
|
|
|
print STDERR $_[0]."\n"; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub withThousands ($) |
|
157
|
|
|
|
|
|
|
{ |
|
158
|
0
|
|
|
0
|
0
|
|
my ($val) = @_; |
|
159
|
0
|
|
|
|
|
|
$val =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g; |
|
160
|
0
|
|
|
|
|
|
return $val; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
1
|
|
|
|
|
1
|
use constant Have_gunzip => eval { |
|
164
|
1
|
|
|
|
|
505
|
require IO::Uncompress::Gunzip; |
|
165
|
1
|
|
|
1
|
|
6
|
}; |
|
|
1
|
|
|
|
|
1
|
|
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub openMaybeGunzip($) |
|
169
|
|
|
|
|
|
|
{ |
|
170
|
0
|
|
|
0
|
0
|
|
my ($file)=@_; |
|
171
|
|
|
|
|
|
|
|
|
172
|
0
|
0
|
|
|
|
|
if ($file=~m/\.gz$/ ) { |
|
173
|
0
|
|
|
|
|
|
if ( Have_gunzip ) { |
|
174
|
0
|
|
|
|
|
|
return new IO::Uncompress::Gunzip($file); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
else { |
|
177
|
|
|
|
|
|
|
my $fd; |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
if ( open($fd, "gzip -d < $file |") ) { |
|
180
|
|
|
|
|
|
|
return($fd); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
carp("no suitable gzip decompression found"); |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
else { |
|
186
|
0
|
|
|
|
|
|
require IO::File; |
|
187
|
0
|
|
|
|
|
|
return new IO::File("< $file"); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub closeMaybeGunzip($$) |
|
192
|
|
|
|
|
|
|
{ |
|
193
|
0
|
|
|
0
|
0
|
|
my ($file, $fd)=@_; |
|
194
|
|
|
|
|
|
|
|
|
195
|
0
|
0
|
|
|
|
|
if ($file=~m/\.gz$/ ) { |
|
196
|
0
|
|
|
|
|
|
if ( Have_gunzip ) { |
|
197
|
0
|
|
|
|
|
|
$fd->close(); |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
else { |
|
200
|
|
|
|
|
|
|
close($fd); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
else { |
|
204
|
0
|
|
|
|
|
|
$fd->close(); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub decodeImdbKey($$$) |
|
209
|
|
|
|
|
|
|
{ |
|
210
|
0
|
|
|
0
|
0
|
|
my ($self, $DB, $dbkey, $year, $titleID)=@_; |
|
211
|
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
my %hash; |
|
213
|
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
$hash{parentId}=0; |
|
215
|
0
|
|
|
|
|
|
$hash{series}=0; |
|
216
|
0
|
|
|
|
|
|
$hash{episode}=0; |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# drop episode information - ex: "Studio One" (1948) {Twelve Angry Men (#7.1)} |
|
219
|
0
|
0
|
|
|
|
|
if ( $dbkey=~s/\s*\{([^\}]+)\}//o ) { |
|
220
|
0
|
|
|
|
|
|
my $s=$1; |
|
221
|
0
|
0
|
|
|
|
|
if ( $s=~s/\s*\(\#(\d+)\.(\d+)\)$// ) { |
|
222
|
0
|
|
|
|
|
|
$hash{series}=$1; |
|
223
|
0
|
|
|
|
|
|
$hash{episode}=$2; |
|
224
|
0
|
|
|
|
|
|
$hash{title}=$s; |
|
225
|
|
|
|
|
|
|
#print "title: $s\n"; |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# attempt to locate parentId matching series title |
|
228
|
0
|
|
|
|
|
|
my $parentKey=$dbkey; |
|
229
|
0
|
|
|
|
|
|
$parentKey=~s/^\"//o; |
|
230
|
0
|
|
|
|
|
|
$parentKey=~s/\" \(/ \(/o; |
|
231
|
|
|
|
|
|
|
|
|
232
|
0
|
0
|
|
|
|
|
if ( defined($self->{seriesKeys}->{$parentKey}) ) { |
|
233
|
0
|
|
|
|
|
|
$hash{parentId}=$self->{seriesKeys}->{$parentKey}; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
else { |
|
237
|
0
|
|
|
|
|
|
$hash{title}=$s; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# change double-quotes around title to be (made-for-tv) suffix instead |
|
242
|
0
|
0
|
0
|
|
|
|
if ( $dbkey=~s/^\"//o && $dbkey=~s/\" \(/ \(/o) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
243
|
0
|
0
|
|
|
|
|
if ( $dbkey=~s/\s+\(mini\)$//o ) { |
|
244
|
0
|
0
|
|
|
|
|
if ( $hash{parentId} == 0 ) { |
|
245
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"tv_mini_series"}; |
|
246
|
0
|
|
|
|
|
|
$self->{seriesKeys}->{$dbkey}=$titleID; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
else { |
|
249
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"episode_of_tv_mini_series"}; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
else { |
|
253
|
0
|
0
|
|
|
|
|
if ( $hash{parentId} == 0 ) { |
|
254
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"tv_series"}; |
|
255
|
0
|
|
|
|
|
|
$self->{seriesKeys}->{$dbkey}=$titleID; |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
else { |
|
258
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"episode_of_tv_series"}; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
elsif ( $dbkey=~s/\s+\(TV\)$//o ) { |
|
263
|
|
|
|
|
|
|
# how rude, some entries have (TV) appearing more than once. |
|
264
|
|
|
|
|
|
|
#$dbkey=~s/\s*\(TV\)$//o; |
|
265
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"tv_movie"}; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
elsif ( $dbkey=~s/\s+\(V\)$//o ) { |
|
268
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"video_movie"}; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
elsif ( $dbkey=~s/\s+\(VG\)$//o ) { |
|
271
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"video_game"}; |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
else { |
|
274
|
0
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"movie"}; |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
#if ( $dbkey=~s/\s+\((tv_series|tv_mini_series|tv_movie|video_movie|video_game)\)$//o ) { |
|
278
|
|
|
|
|
|
|
# $qualifier=$1; |
|
279
|
|
|
|
|
|
|
#} |
|
280
|
0
|
|
|
|
|
|
$hash{dbkey}=$dbkey; |
|
281
|
|
|
|
|
|
|
|
|
282
|
0
|
|
|
|
|
|
my $title=$dbkey; |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# todo - this is the wrong year for episode titles |
|
285
|
0
|
0
|
0
|
|
|
|
if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #" |
|
286
|
0
|
|
|
|
|
|
$title=~s/^\"//o; #" |
|
287
|
0
|
|
|
|
|
|
$title=~s/\"(\s*\()/$1/o; #" |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
0
|
0
|
0
|
|
|
|
if ( $title=~s/\s+\((\d\d\d\d)\)$//o || |
|
|
|
0
|
0
|
|
|
|
|
|
291
|
|
|
|
|
|
|
$title=~s/\s+\((\d\d\d\d)\/[IVXL]+\)$//o ) { |
|
292
|
|
|
|
|
|
|
# over-ride with what is given |
|
293
|
0
|
0
|
|
|
|
|
if ( !defined($year) ) { |
|
294
|
0
|
|
|
|
|
|
$hash{year}=$1; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
else { |
|
297
|
0
|
|
|
|
|
|
$hash{year}=$year; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o || |
|
301
|
|
|
|
|
|
|
$title=~s/\s+\((\?\?\?\?)\/[IVXL]+\)$//o ) { |
|
302
|
|
|
|
|
|
|
# over-ride with what is given |
|
303
|
0
|
0
|
|
|
|
|
if ( !defined($year) ) { |
|
304
|
0
|
|
|
|
|
|
$hash{year}=0; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
else { |
|
307
|
0
|
|
|
|
|
|
$hash{year}=$year; |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
else { |
|
311
|
0
|
|
|
|
|
|
$self->error("movie list format failed to decode year from title '$title'"); |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# over-ride with what is given |
|
314
|
0
|
0
|
|
|
|
|
if ( ! defined($year) ) { |
|
315
|
0
|
|
|
|
|
|
$hash{year}=0; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
else { |
|
318
|
0
|
|
|
|
|
|
$hash{year}=$year; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
} |
|
321
|
0
|
|
|
|
|
|
$title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# leave searchtitle empty for tv series' |
|
324
|
0
|
0
|
|
|
|
|
if ( $hash{series} != 0 ) { |
|
325
|
0
|
|
|
|
|
|
$hash{searchTitle}=''; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
else { |
|
328
|
0
|
0
|
|
|
|
|
if ( !defined($hash{title}) ) { |
|
329
|
0
|
|
|
|
|
|
$hash{title}=$title; |
|
330
|
0
|
|
|
|
|
|
$hash{searchTitle}=$DB->makeSearchableTitle($title, 0); |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# todo - is this more useful ? |
|
333
|
|
|
|
|
|
|
#$hash{searchTitleWithYear}=MakeSearchtitle($DB, $title."(".$hash{year}.")", 0); |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
else { |
|
336
|
0
|
|
|
|
|
|
$hash{searchTitle}=$DB->makeSearchableTitle($title."(".$hash{year}.")", 0); |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# todo - is this more useful ? |
|
339
|
|
|
|
|
|
|
#$hash{searchTitleWithYear}=$DB->makeSearchableTitle($title."(".$hash{year}.")", 0); |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
} |
|
342
|
|
|
|
|
|
|
|
|
343
|
0
|
|
|
|
|
|
return(\%hash); |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub importMovies($$$$) |
|
347
|
|
|
|
|
|
|
{ |
|
348
|
0
|
|
|
0
|
0
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
|
349
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
350
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
351
|
|
|
|
|
|
|
|
|
352
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
353
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
354
|
0
|
|
|
|
|
|
$lineCount++; |
|
355
|
0
|
0
|
|
|
|
|
if ( m/^MOVIES LIST/o ) { |
|
|
|
0
|
|
|
|
|
|
|
356
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
|
357
|
0
|
|
|
|
|
|
$self->error("missing ======= after 'MOVIES LIST' at line $lineCount"); |
|
358
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
359
|
0
|
|
|
|
|
|
return(-1); |
|
360
|
|
|
|
|
|
|
} |
|
361
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
|
362
|
0
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
|
363
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
364
|
0
|
|
|
|
|
|
return(-1); |
|
365
|
|
|
|
|
|
|
} |
|
366
|
0
|
|
|
|
|
|
last; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
|
369
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"MOVIES LIST\" line"); |
|
370
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
371
|
0
|
|
|
|
|
|
return(-1); |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
|
|
375
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Movies", |
|
376
|
|
|
|
|
|
|
count => $countEstimate, |
|
377
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
378
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ); |
|
379
|
|
|
|
|
|
|
|
|
380
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
381
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
382
|
0
|
|
|
|
|
|
my $next_update=0; |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# preload qualifier types |
|
385
|
0
|
|
|
|
|
|
$self->{QualifierTypeIDs}=$DB->select2Hash("select Name, QualifierTypeID from QualifierTypes"); |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
#$DB->runSQL("BEGIN TRANSACTION"); |
|
388
|
|
|
|
|
|
|
|
|
389
|
0
|
|
|
|
|
|
my $count=0; |
|
390
|
0
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Titles (TitleID, SearchTitle, Title, QualifierTypeID, Year, ParentID, Series, Episode) VALUES (?,?,?,?,?,?,?,?)'); |
|
391
|
|
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
393
|
|
|
|
|
|
|
|
|
394
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
395
|
0
|
|
|
|
|
|
$lineCount++; |
|
396
|
0
|
|
|
|
|
|
my $line=$_; |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# end is line consisting of only '-' |
|
399
|
0
|
0
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
|
400
|
|
|
|
|
|
|
|
|
401
|
0
|
0
|
|
|
|
|
next if ( $line=~m/\{\{SUSPENDED\}\}/o ); |
|
402
|
0
|
|
|
|
|
|
$line=~s/\n$//o; |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
#print "read line $lineCount:$line\n"; |
|
405
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
406
|
|
|
|
|
|
|
|
|
407
|
0
|
|
|
|
|
|
my $tab=index($line, "\t"); |
|
408
|
0
|
0
|
|
|
|
|
if ( $tab != -1 ) { |
|
409
|
0
|
|
|
|
|
|
my $ykey=substr($line, $tab+1); |
|
410
|
0
|
0
|
|
|
|
|
if ( $ykey=m/\s+(\d\d\d\d)$/ ) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
411
|
0
|
|
|
|
|
|
$ykey=$1; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
elsif ( $ykey=m/\s+(\?\?\?\?)$/ ) { |
|
414
|
0
|
|
|
|
|
|
$ykey=undef; |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
elsif ( $ykey=m/\s+(\d\d\d\d)\-(\?\?\?\?)$/ ) { |
|
417
|
0
|
|
|
|
|
|
$ykey=$1; |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
elsif ( $ykey=m/\s+(\d\d\d\d)\-(\d\d\d\d)$/ ) { |
|
420
|
0
|
|
|
|
|
|
$ykey=$1; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
else { |
|
423
|
0
|
|
|
|
|
|
warn("invalid year ($ykey) - $line"); |
|
424
|
|
|
|
|
|
|
#$ykey=undef; |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
|
|
427
|
0
|
|
|
|
|
|
my $mkey=substr($line, 0, $tab); |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# lets not import video games |
|
430
|
|
|
|
|
|
|
#if ( $decoded->{qualifier} != $self->{QualifierTypeIDs}->{'video_game'} ) { |
|
431
|
|
|
|
|
|
|
# returned count is number of titles found |
|
432
|
0
|
|
|
|
|
|
$count++; |
|
433
|
|
|
|
|
|
|
|
|
434
|
0
|
|
|
|
|
|
my $decoded=$self->decodeImdbKey($DB, $mkey, $ykey, $count); |
|
435
|
|
|
|
|
|
|
|
|
436
|
0
|
|
|
|
|
|
$tableInsert_sth->execute($count, |
|
437
|
|
|
|
|
|
|
$decoded->{searchTitle}, |
|
438
|
|
|
|
|
|
|
$decoded->{title}, |
|
439
|
|
|
|
|
|
|
$decoded->{qualifier}, |
|
440
|
|
|
|
|
|
|
$decoded->{year}, |
|
441
|
|
|
|
|
|
|
$decoded->{parentId}, |
|
442
|
|
|
|
|
|
|
$decoded->{series}, |
|
443
|
|
|
|
|
|
|
$decoded->{episode}); |
|
444
|
|
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
|
$self->{imdbMovie2DBKey}->{$mkey}=$count; |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
#if ( ($count % 50000) == 0 ) { |
|
448
|
|
|
|
|
|
|
#$DB->commit(); |
|
449
|
|
|
|
|
|
|
#} |
|
450
|
|
|
|
|
|
|
#} |
|
451
|
|
|
|
|
|
|
|
|
452
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
453
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
454
|
0
|
0
|
|
|
|
|
if ( $count > $countEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
455
|
0
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
|
456
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
|
459
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
460
|
|
|
|
|
|
|
} |
|
461
|
|
|
|
|
|
|
} |
|
462
|
|
|
|
|
|
|
} |
|
463
|
|
|
|
|
|
|
else { |
|
464
|
0
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format (missing tab)"); |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
#$DB->runSQL("END TRANSACTION"); |
|
468
|
|
|
|
|
|
|
|
|
469
|
0
|
0
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
|
470
|
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
|
$self->status(sprintf("importing Movies found ".withThousands($count)." in ". |
|
472
|
|
|
|
|
|
|
withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
473
|
|
|
|
|
|
|
|
|
474
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
475
|
0
|
|
|
|
|
|
$DB->commit(); |
|
476
|
0
|
|
|
|
|
|
return($count); |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
sub importGenres($$$$) |
|
480
|
|
|
|
|
|
|
{ |
|
481
|
0
|
|
|
0
|
0
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
|
482
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
483
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
484
|
|
|
|
|
|
|
|
|
485
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
486
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
487
|
0
|
|
|
|
|
|
$lineCount++; |
|
488
|
0
|
0
|
|
|
|
|
if ( m/^8: THE GENRES LIST/o ) { |
|
|
|
0
|
|
|
|
|
|
|
489
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
|
490
|
0
|
|
|
|
|
|
$self->error("missing ======= after 'THE GENRES LIST' at line $lineCount"); |
|
491
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
492
|
0
|
|
|
|
|
|
return(-1); |
|
493
|
|
|
|
|
|
|
} |
|
494
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
|
495
|
0
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
|
496
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
497
|
0
|
|
|
|
|
|
return(-1); |
|
498
|
|
|
|
|
|
|
} |
|
499
|
0
|
|
|
|
|
|
last; |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
|
502
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"THE GENRES LIST\" line"); |
|
503
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
504
|
0
|
|
|
|
|
|
return(-1); |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
} |
|
507
|
|
|
|
|
|
|
|
|
508
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Genres", |
|
509
|
|
|
|
|
|
|
count => $countEstimate, |
|
510
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
511
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ); |
|
512
|
|
|
|
|
|
|
|
|
513
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
514
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
515
|
0
|
|
|
|
|
|
my $next_update=0; |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# preload qualifier types |
|
518
|
0
|
|
|
|
|
|
$self->{QualifierTypeIDs}=$DB->select2Hash("select Name, QualifierTypeID from QualifierTypes"); |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
#$DB->runSQL("BEGIN TRANSACTION"); |
|
521
|
|
|
|
|
|
|
|
|
522
|
0
|
|
|
|
|
|
my $count=0; |
|
523
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
524
|
0
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Titles2Genres (TitleID, GenreID) VALUES (?,?)'); |
|
525
|
|
|
|
|
|
|
|
|
526
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
527
|
0
|
|
|
|
|
|
$lineCount++; |
|
528
|
0
|
|
|
|
|
|
my $line=$_; |
|
529
|
|
|
|
|
|
|
#print "read line $lineCount:$line\n"; |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# end is line consisting of only '-' |
|
532
|
0
|
0
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
|
533
|
0
|
0
|
|
|
|
|
next if ( $line=~m/\s*\{\{SUSPENDED\}\}/o); |
|
534
|
|
|
|
|
|
|
|
|
535
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
536
|
|
|
|
|
|
|
|
|
537
|
0
|
|
|
|
|
|
$line=~s/\n$//o; |
|
538
|
|
|
|
|
|
|
|
|
539
|
0
|
|
|
|
|
|
my $tab=index($line, "\t"); |
|
540
|
0
|
0
|
|
|
|
|
if ( $tab != -1 ) { |
|
541
|
0
|
|
|
|
|
|
my $mkey=substr($line, 0, $tab); |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# ignore {Twelve Angry Men (1954)} |
|
544
|
|
|
|
|
|
|
# TODO - do we want this ? |
|
545
|
|
|
|
|
|
|
#$mkey=~s/\s*\{[^\}]+\}//go; |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
# skip enties that have {} in them since they're tv episodes |
|
548
|
|
|
|
|
|
|
#next if ( $mkey=~s/\s*\{[^\}]+\}$//o ); |
|
549
|
|
|
|
|
|
|
|
|
550
|
0
|
|
|
|
|
|
my $genre=substr($line, $tab); |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# genres sometimes has more than one tab |
|
553
|
0
|
|
|
|
|
|
$genre=~s/^\t+//og; |
|
554
|
|
|
|
|
|
|
|
|
555
|
0
|
0
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$mkey} ) { |
|
556
|
|
|
|
|
|
|
# insert into db as discovered |
|
557
|
0
|
0
|
|
|
|
|
if ( ! defined($self->{GenreID}->{$genre}) ) { |
|
558
|
0
|
|
|
|
|
|
$self->{GenreID}->{$genre}=$DB->insert_row('Genres', 'GenreID', Name=>$genre); |
|
559
|
|
|
|
|
|
|
} |
|
560
|
0
|
|
|
|
|
|
$tableInsert_sth->execute($self->{imdbMovie2DBKey}->{$mkey}, |
|
561
|
|
|
|
|
|
|
$self->{GenreID}->{$genre}); |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# returned count is number of titles found |
|
564
|
0
|
|
|
|
|
|
$count++; |
|
565
|
|
|
|
|
|
|
|
|
566
|
0
|
0
|
|
|
|
|
if ( ($count % 50000) ==0 ) { |
|
567
|
0
|
|
|
|
|
|
$DB->commit(); |
|
568
|
|
|
|
|
|
|
} |
|
569
|
|
|
|
|
|
|
} |
|
570
|
|
|
|
|
|
|
|
|
571
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
572
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
573
|
0
|
0
|
|
|
|
|
if ( $count > $countEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
574
|
0
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
|
575
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
576
|
|
|
|
|
|
|
} |
|
577
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
|
578
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
else { |
|
583
|
0
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format (missing tab)"); |
|
584
|
|
|
|
|
|
|
} |
|
585
|
|
|
|
|
|
|
} |
|
586
|
|
|
|
|
|
|
#$DB->runSQL("END TRANSACTION"); |
|
587
|
|
|
|
|
|
|
|
|
588
|
0
|
0
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
|
589
|
|
|
|
|
|
|
|
|
590
|
0
|
|
|
|
|
|
$self->status(sprintf("importing Genres found ".withThousands($count)." in ". |
|
591
|
|
|
|
|
|
|
withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
592
|
|
|
|
|
|
|
|
|
593
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
594
|
0
|
|
|
|
|
|
$DB->commit(); |
|
595
|
0
|
|
|
|
|
|
return($count); |
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
sub importActors($$$$) |
|
599
|
|
|
|
|
|
|
{ |
|
600
|
0
|
|
|
0
|
0
|
|
my ($self, $whichCastType, $castCountEstimate, $file, $DB)=@_; |
|
601
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
602
|
|
|
|
|
|
|
|
|
603
|
0
|
0
|
|
|
|
|
if ( $whichCastType eq "Actors" ) { |
|
604
|
0
|
0
|
0
|
|
|
|
if ( $DB->table_row_count('Actors') > 0 || |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
605
|
|
|
|
|
|
|
$DB->table_row_count('Titles2Actors') > 0 || |
|
606
|
|
|
|
|
|
|
$DB->table_row_count('Titles2Hosts') > 0 || |
|
607
|
|
|
|
|
|
|
$DB->table_row_count('Titles2Narrators') > 0 ) { |
|
608
|
0
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
|
609
|
0
|
|
|
|
|
|
$DB->table_clear('Actors'); |
|
610
|
0
|
|
|
|
|
|
$DB->table_clear('Titles2Actors'); |
|
611
|
0
|
|
|
|
|
|
$DB->table_clear('Titles2Hosts'); |
|
612
|
0
|
|
|
|
|
|
$DB->table_clear('Titles2Narrators'); |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
|
|
616
|
0
|
|
|
|
|
|
my $header; |
|
617
|
|
|
|
|
|
|
my $whatAreWeParsing; |
|
618
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
619
|
|
|
|
|
|
|
|
|
620
|
0
|
0
|
|
|
|
|
if ( $whichCastType eq "Actors" ) { |
|
|
|
0
|
|
|
|
|
|
|
621
|
0
|
|
|
|
|
|
$header="THE ACTORS LIST"; |
|
622
|
0
|
|
|
|
|
|
$whatAreWeParsing=1; |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
elsif ( $whichCastType eq "Actresses" ) { |
|
625
|
0
|
|
|
|
|
|
$header="THE ACTRESSES LIST"; |
|
626
|
0
|
|
|
|
|
|
$whatAreWeParsing=2; |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
else { |
|
629
|
0
|
|
|
|
|
|
die "why are we here ?"; |
|
630
|
|
|
|
|
|
|
} |
|
631
|
|
|
|
|
|
|
|
|
632
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
633
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing $whichCastType", |
|
634
|
|
|
|
|
|
|
count => $castCountEstimate, |
|
635
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
636
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
|
637
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
638
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
639
|
0
|
|
|
|
|
|
my $next_update=0; |
|
640
|
|
|
|
|
|
|
|
|
641
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
642
|
0
|
|
|
|
|
|
$lineCount++; |
|
643
|
0
|
0
|
|
|
|
|
if ( m/^$header/ ) { |
|
|
|
0
|
|
|
|
|
|
|
644
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
|
645
|
0
|
|
|
|
|
|
$self->error("missing ======= after $header at line $lineCount"); |
|
646
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
647
|
0
|
|
|
|
|
|
return(-1); |
|
648
|
|
|
|
|
|
|
} |
|
649
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
|
650
|
0
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
|
651
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
652
|
0
|
|
|
|
|
|
return(-1); |
|
653
|
|
|
|
|
|
|
} |
|
654
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) { |
|
655
|
0
|
|
|
|
|
|
$self->error("missing name/titles line after ======= at line $lineCount"); |
|
656
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
657
|
0
|
|
|
|
|
|
return(-1); |
|
658
|
|
|
|
|
|
|
} |
|
659
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) { |
|
660
|
0
|
|
|
|
|
|
$self->error("missing name/titles suffix line after ======= at line $lineCount"); |
|
661
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
662
|
0
|
|
|
|
|
|
return(-1); |
|
663
|
|
|
|
|
|
|
} |
|
664
|
0
|
|
|
|
|
|
last; |
|
665
|
|
|
|
|
|
|
} |
|
666
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
|
667
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); |
|
668
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
669
|
0
|
|
|
|
|
|
return(-1); |
|
670
|
|
|
|
|
|
|
} |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
|
|
673
|
0
|
|
|
|
|
|
my $cur_name; |
|
674
|
0
|
|
|
|
|
|
my $count=0; |
|
675
|
0
|
|
|
|
|
|
my $castNames=0; |
|
676
|
0
|
|
|
|
|
|
my $tableInsert_sth1=$DB->prepare('INSERT INTO Actors (ActorID, SearchName, Name) VALUES (?,?,?)'); |
|
677
|
0
|
|
|
|
|
|
my $tableInsert_sth2=$DB->prepare('INSERT INTO Titles2Hosts (TitleID, ActorID) VALUES (?,?)'); |
|
678
|
0
|
|
|
|
|
|
my $tableInsert_sth3=$DB->prepare('INSERT INTO Titles2Narrators (TitleID, ActorID) VALUES (?,?)'); |
|
679
|
0
|
|
|
|
|
|
my $tableInsert_sth4=$DB->prepare('INSERT INTO Titles2Actors (TitleID, ActorID, Billing) VALUES (?,?,?)'); |
|
680
|
|
|
|
|
|
|
|
|
681
|
0
|
|
|
|
|
|
my $cur_actorId=$DB->select2Scalar('Select MAX(ActorID) from Actors'); |
|
682
|
0
|
0
|
|
|
|
|
if ( !defined($cur_actorId) ) { |
|
683
|
0
|
|
|
|
|
|
$cur_actorId=0; |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
|
|
686
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
687
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
688
|
0
|
|
|
|
|
|
$lineCount++; |
|
689
|
0
|
|
|
|
|
|
my $line=$_; |
|
690
|
0
|
|
|
|
|
|
$line=~s/\n$//o; |
|
691
|
|
|
|
|
|
|
#$self->status("read line $lineCount:$line"); |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
# end is line consisting of only '-' |
|
694
|
0
|
0
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
|
695
|
|
|
|
|
|
|
|
|
696
|
0
|
0
|
|
|
|
|
next if ( length($line) == 0 ); |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
# try ignoring these |
|
699
|
0
|
0
|
|
|
|
|
next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); |
|
700
|
|
|
|
|
|
|
|
|
701
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
702
|
0
|
|
|
|
|
|
my $billing=9999; |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
# actors or actresses |
|
705
|
0
|
0
|
|
|
|
|
if ( $line=~s/\s*<(\d+)>//o ) { |
|
706
|
0
|
|
|
|
|
|
$billing=int($1); |
|
707
|
0
|
0
|
|
|
|
|
next if ( $billing >3 ); |
|
708
|
|
|
|
|
|
|
} |
|
709
|
|
|
|
|
|
|
|
|
710
|
0
|
0
|
|
|
|
|
if ( $line=~s/^([^\t]+)\t+//o ) { |
|
711
|
0
|
|
|
|
|
|
$cur_name=$1; |
|
712
|
0
|
|
|
|
|
|
$castNames++; |
|
713
|
|
|
|
|
|
|
|
|
714
|
0
|
|
|
|
|
|
$cur_actorId++; |
|
715
|
|
|
|
|
|
|
|
|
716
|
0
|
|
|
|
|
|
my $c=$cur_name; |
|
717
|
0
|
|
|
|
|
|
$c=~s/\s*\([IVXL]+\)//o; |
|
718
|
0
|
|
|
|
|
|
$tableInsert_sth1->execute($cur_actorId, $DB->makeSearchableTitle($c, 0), $cur_name); |
|
719
|
|
|
|
|
|
|
|
|
720
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
721
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
722
|
0
|
0
|
|
|
|
|
if ( $castNames > $castCountEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
723
|
0
|
|
|
|
|
|
$castCountEstimate = $progress->target($castNames+100); |
|
724
|
0
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
|
725
|
|
|
|
|
|
|
} |
|
726
|
|
|
|
|
|
|
elsif ( $castNames > $next_update ) { |
|
727
|
0
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
|
|
732
|
0
|
|
|
|
|
|
my $isHost=0; |
|
733
|
0
|
|
|
|
|
|
my $isNarrator=0; |
|
734
|
0
|
0
|
|
|
|
|
if ( (my $start=index($line, " [")) != -1 ) { |
|
735
|
|
|
|
|
|
|
#my $end=rindex($line, "]"); |
|
736
|
0
|
|
|
|
|
|
my $ex=substr($line, $start+1); |
|
737
|
|
|
|
|
|
|
|
|
738
|
0
|
0
|
|
|
|
|
if ( $ex=~s/Host//o ) { |
|
739
|
0
|
|
|
|
|
|
$isHost=1; |
|
740
|
|
|
|
|
|
|
} |
|
741
|
0
|
0
|
|
|
|
|
if ( $ex=~s/Narrator//o ) { |
|
742
|
0
|
|
|
|
|
|
$isNarrator=1; |
|
743
|
|
|
|
|
|
|
} |
|
744
|
0
|
|
|
|
|
|
$line=substr($line, 0, $start); |
|
745
|
|
|
|
|
|
|
# ignore character name |
|
746
|
|
|
|
|
|
|
} |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# TODO - do we want to just ignore these ? |
|
749
|
0
|
0
|
|
|
|
|
if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) { |
|
750
|
|
|
|
|
|
|
#$attrs=$1; |
|
751
|
|
|
|
|
|
|
} |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
# TODO - what are we ignoring here ? |
|
754
|
0
|
0
|
|
|
|
|
if ( $line=~s/ (\(.*)$//o ) { |
|
755
|
|
|
|
|
|
|
#$attrs=$1; |
|
756
|
|
|
|
|
|
|
} |
|
757
|
0
|
|
|
|
|
|
$line=~s/^\s+//og; |
|
758
|
0
|
|
|
|
|
|
$line=~s/\s+$//og; |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
# TODO - does this exist ? |
|
761
|
0
|
0
|
|
|
|
|
if ( $line=~s/\s+Narrator$//o ) { |
|
762
|
0
|
|
|
|
|
|
$self->error("extra narrator on line: $lineCount"); |
|
763
|
|
|
|
|
|
|
# TODO - do we want to store this ? Does it actually occur ? |
|
764
|
|
|
|
|
|
|
# ignore |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
#if ( $line=~s/\s*\([A-Z]+\)$//o ) { |
|
768
|
|
|
|
|
|
|
#} |
|
769
|
|
|
|
|
|
|
|
|
770
|
0
|
|
|
|
|
|
my $titleID=$self->{imdbMovie2DBKey}->{$line}; |
|
771
|
0
|
0
|
|
|
|
|
if ( $titleID ) { |
|
772
|
0
|
0
|
|
|
|
|
if ( $isHost ) { |
|
773
|
0
|
|
|
|
|
|
$tableInsert_sth2->execute($titleID, $cur_actorId); |
|
774
|
|
|
|
|
|
|
} |
|
775
|
0
|
0
|
|
|
|
|
if ( $isNarrator ) { |
|
776
|
0
|
|
|
|
|
|
$tableInsert_sth3->execute($titleID, $cur_actorId); |
|
777
|
|
|
|
|
|
|
} |
|
778
|
0
|
0
|
0
|
|
|
|
if ( !$isHost && !$isNarrator ) { |
|
779
|
0
|
|
|
|
|
|
$tableInsert_sth4->execute($titleID, $cur_actorId, $billing); |
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
|
|
782
|
0
|
|
|
|
|
|
$count++; |
|
783
|
0
|
0
|
|
|
|
|
if ( ($count % 50000) == 0 ) { |
|
784
|
0
|
|
|
|
|
|
$DB->commit(); |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
else { |
|
788
|
|
|
|
|
|
|
#warn($line); |
|
789
|
|
|
|
|
|
|
} |
|
790
|
|
|
|
|
|
|
} |
|
791
|
0
|
0
|
|
|
|
|
$progress->update($castCountEstimate) if ($self->{showProgressBar}); |
|
792
|
|
|
|
|
|
|
|
|
793
|
0
|
|
|
|
|
|
$self->status(sprintf("importing $whichCastType found ".withThousands($castNames)." names, ". |
|
794
|
|
|
|
|
|
|
withThousands($count)." titles in ".withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
795
|
|
|
|
|
|
|
|
|
796
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
797
|
|
|
|
|
|
|
|
|
798
|
0
|
|
|
|
|
|
$DB->commit(); |
|
799
|
0
|
|
|
|
|
|
return($castNames); |
|
800
|
|
|
|
|
|
|
} |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
sub importDirectors($$$) |
|
803
|
|
|
|
|
|
|
{ |
|
804
|
0
|
|
|
0
|
0
|
|
my ($self, $castCountEstimate, $file, $DB)=@_; |
|
805
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
806
|
|
|
|
|
|
|
|
|
807
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
808
|
|
|
|
|
|
|
|
|
809
|
0
|
0
|
0
|
|
|
|
if ( $DB->table_row_count('Directors') > 0 || |
|
810
|
|
|
|
|
|
|
$DB->table_row_count('Titles2Directors') > 0 ) { |
|
811
|
0
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
|
812
|
0
|
|
|
|
|
|
$DB->table_clear('Directors'); |
|
813
|
0
|
|
|
|
|
|
$DB->table_clear('Titles2Directors'); |
|
814
|
|
|
|
|
|
|
} |
|
815
|
|
|
|
|
|
|
|
|
816
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
817
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Directors", |
|
818
|
|
|
|
|
|
|
count => $castCountEstimate, |
|
819
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
820
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
|
821
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
822
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
823
|
0
|
|
|
|
|
|
my $next_update=0; |
|
824
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
825
|
0
|
|
|
|
|
|
$lineCount++; |
|
826
|
0
|
0
|
|
|
|
|
if ( m/^THE DIRECTORS LIST/ ) { |
|
|
|
0
|
|
|
|
|
|
|
827
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
|
828
|
0
|
|
|
|
|
|
$self->error("missing ======= after THE DIRECTORS LIST at line $lineCount"); |
|
829
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
830
|
0
|
|
|
|
|
|
return(-1); |
|
831
|
|
|
|
|
|
|
} |
|
832
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
|
833
|
0
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
|
834
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
835
|
0
|
|
|
|
|
|
return(-1); |
|
836
|
|
|
|
|
|
|
} |
|
837
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) { |
|
838
|
0
|
|
|
|
|
|
$self->error("missing name/titles line after ======= at line $lineCount"); |
|
839
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
840
|
0
|
|
|
|
|
|
return(-1); |
|
841
|
|
|
|
|
|
|
} |
|
842
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) { |
|
843
|
0
|
|
|
|
|
|
$self->error("missing name/titles suffix line after ======= at line $lineCount"); |
|
844
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
845
|
0
|
|
|
|
|
|
return(-1); |
|
846
|
|
|
|
|
|
|
} |
|
847
|
0
|
|
|
|
|
|
last; |
|
848
|
|
|
|
|
|
|
} |
|
849
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
|
850
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"THE DIRECTORS LIST\" line"); |
|
851
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
852
|
0
|
|
|
|
|
|
return(-1); |
|
853
|
|
|
|
|
|
|
} |
|
854
|
|
|
|
|
|
|
} |
|
855
|
|
|
|
|
|
|
|
|
856
|
0
|
|
|
|
|
|
my $cur_name; |
|
857
|
0
|
|
|
|
|
|
my $count=0; |
|
858
|
0
|
|
|
|
|
|
my $castNames=0; |
|
859
|
0
|
|
|
|
|
|
my %found; |
|
860
|
0
|
|
|
|
|
|
my $directorCount=0; |
|
861
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
862
|
|
|
|
|
|
|
|
|
863
|
0
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Directors (DirectorID, SearchName, Name) VALUES (?,?,?)'); |
|
864
|
0
|
|
|
|
|
|
my $tableInsert_sth2=$DB->prepare('INSERT INTO Titles2Directors (TitleID, DirectorID) VALUES (?,?)'); |
|
865
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
866
|
0
|
|
|
|
|
|
$lineCount++; |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
#last if ( $lineCount > 10000); |
|
869
|
0
|
|
|
|
|
|
my $line=$_; |
|
870
|
0
|
|
|
|
|
|
$line=~s/\n$//o; |
|
871
|
|
|
|
|
|
|
#$self->status("read line $lineCount:$line"); |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
# end is line consisting of only '-' |
|
874
|
0
|
0
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
|
875
|
0
|
0
|
|
|
|
|
next if ( length($line) == 0 ); |
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
# try ignoring these |
|
878
|
0
|
0
|
|
|
|
|
next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); |
|
879
|
|
|
|
|
|
|
|
|
880
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
881
|
|
|
|
|
|
|
|
|
882
|
0
|
0
|
|
|
|
|
if ( $line=~s/^([^\t]+)\t+//o ) { |
|
883
|
0
|
|
|
|
|
|
$cur_name=$1; |
|
884
|
0
|
|
|
|
|
|
$castNames++; |
|
885
|
|
|
|
|
|
|
|
|
886
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
887
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
888
|
0
|
0
|
|
|
|
|
if ( $castNames > $castCountEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
889
|
0
|
|
|
|
|
|
$castCountEstimate = $progress->target($castNames+100); |
|
890
|
0
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
|
891
|
|
|
|
|
|
|
} |
|
892
|
|
|
|
|
|
|
elsif ( $castNames > $next_update ) { |
|
893
|
0
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
|
894
|
|
|
|
|
|
|
} |
|
895
|
|
|
|
|
|
|
} |
|
896
|
|
|
|
|
|
|
} |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
# BUG |
|
899
|
|
|
|
|
|
|
# ##ignore {Twelve Angry Men (1954)} |
|
900
|
|
|
|
|
|
|
#$line=~s/\s*\{[^\}]+\}//o; |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
# sometimes there are extra bits of info attached at the end of lines, we'll ignore these |
|
903
|
|
|
|
|
|
|
# |
|
904
|
|
|
|
|
|
|
# examples: |
|
905
|
|
|
|
|
|
|
# Deszcz (1997) (as Tomasz Baginski) |
|
906
|
|
|
|
|
|
|
# Adventures of Modest Mouse (2008) (co-director) |
|
907
|
|
|
|
|
|
|
# Vida (2010) (collaborating director) |
|
908
|
|
|
|
|
|
|
# Rex Harrison Presents Stories of Love (1974) (TV) (segment "Epicac") |
|
909
|
0
|
0
|
|
|
|
|
if ( $line=~s/ (\(.*)$//o ) { |
|
910
|
|
|
|
|
|
|
# $attrs=$1; |
|
911
|
|
|
|
|
|
|
} |
|
912
|
|
|
|
|
|
|
|
|
913
|
0
|
|
|
|
|
|
$line=~s/^\s+//og; |
|
914
|
0
|
|
|
|
|
|
$line=~s/\s+$//og; |
|
915
|
|
|
|
|
|
|
|
|
916
|
0
|
0
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$line} ) { |
|
917
|
|
|
|
|
|
|
|
|
918
|
0
|
0
|
|
|
|
|
if ( !defined($found{$cur_name}) ) { |
|
919
|
0
|
|
|
|
|
|
$directorCount++; |
|
920
|
0
|
|
|
|
|
|
$found{$cur_name}=$directorCount; |
|
921
|
|
|
|
|
|
|
|
|
922
|
0
|
|
|
|
|
|
my $c=$cur_name; |
|
923
|
0
|
|
|
|
|
|
$c=~s/\s*\([IVXL]+\)//o; |
|
924
|
0
|
|
|
|
|
|
$tableInsert_sth->execute($directorCount, $DB->makeSearchableTitle($c, 0), $cur_name); |
|
925
|
|
|
|
|
|
|
} |
|
926
|
|
|
|
|
|
|
|
|
927
|
0
|
|
|
|
|
|
$tableInsert_sth2->execute($self->{imdbMovie2DBKey}->{$line}, $found{$cur_name}); |
|
928
|
0
|
|
|
|
|
|
$count++; |
|
929
|
0
|
0
|
|
|
|
|
if ( ($count % 50000) == 0 ) { |
|
930
|
0
|
|
|
|
|
|
$DB->commit(); |
|
931
|
|
|
|
|
|
|
} |
|
932
|
|
|
|
|
|
|
} |
|
933
|
|
|
|
|
|
|
else { |
|
934
|
0
|
|
|
|
|
|
$self->error("$lineCount: unable to match title key '$line'"); |
|
935
|
|
|
|
|
|
|
} |
|
936
|
|
|
|
|
|
|
} |
|
937
|
0
|
0
|
|
|
|
|
$progress->update($castCountEstimate) if ($self->{showProgressBar}); |
|
938
|
|
|
|
|
|
|
|
|
939
|
0
|
|
|
|
|
|
$self->status(sprintf("importing Directors found ".withThousands($castNames)." names, ". |
|
940
|
|
|
|
|
|
|
withThousands($count)." titles in ".withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
941
|
|
|
|
|
|
|
|
|
942
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
943
|
|
|
|
|
|
|
|
|
944
|
0
|
|
|
|
|
|
$DB->commit(); |
|
945
|
0
|
|
|
|
|
|
return($castNames); |
|
946
|
|
|
|
|
|
|
} |
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
sub importRatings($$) |
|
949
|
|
|
|
|
|
|
{ |
|
950
|
0
|
|
|
0
|
0
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
|
951
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
952
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
953
|
|
|
|
|
|
|
|
|
954
|
0
|
0
|
|
|
|
|
if ( $DB->table_row_count('Ratings') > 0 ) { |
|
955
|
0
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
|
956
|
0
|
|
|
|
|
|
$DB->table_clear('Ratings'); |
|
957
|
|
|
|
|
|
|
} |
|
958
|
|
|
|
|
|
|
|
|
959
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
960
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
961
|
0
|
|
|
|
|
|
$lineCount++; |
|
962
|
0
|
0
|
|
|
|
|
if ( m/^MOVIE RATINGS REPORT/o ) { |
|
|
|
0
|
|
|
|
|
|
|
963
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o) { |
|
964
|
0
|
|
|
|
|
|
$self->error("missing empty line after \"MOVIE RATINGS REPORT\" at line $lineCount"); |
|
965
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
966
|
0
|
|
|
|
|
|
return(-1); |
|
967
|
|
|
|
|
|
|
} |
|
968
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) { |
|
969
|
0
|
|
|
|
|
|
$self->error("missing \"New Distribution Votes Rank Title\" at line $lineCount"); |
|
970
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
971
|
0
|
|
|
|
|
|
return(-1); |
|
972
|
|
|
|
|
|
|
} |
|
973
|
0
|
|
|
|
|
|
last; |
|
974
|
|
|
|
|
|
|
} |
|
975
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
|
976
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"MOVIE RATINGS REPORT\" line"); |
|
977
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
978
|
0
|
|
|
|
|
|
return(-1); |
|
979
|
|
|
|
|
|
|
} |
|
980
|
|
|
|
|
|
|
} |
|
981
|
|
|
|
|
|
|
|
|
982
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Ratings", |
|
983
|
|
|
|
|
|
|
count => $countEstimate, |
|
984
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
985
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
|
986
|
|
|
|
|
|
|
|
|
987
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
988
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
989
|
0
|
|
|
|
|
|
my $next_update=0; |
|
990
|
|
|
|
|
|
|
|
|
991
|
0
|
|
|
|
|
|
my $countImported=0; |
|
992
|
0
|
|
|
|
|
|
my $count=0; |
|
993
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
994
|
0
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Ratings (TitleID, Distribution, Votes, Rank) VALUES (?,?,?,?)'); |
|
995
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
996
|
0
|
|
|
|
|
|
$lineCount++; |
|
997
|
0
|
|
|
|
|
|
my $line=$_; |
|
998
|
|
|
|
|
|
|
#print "read line $lineCount:$line"; |
|
999
|
|
|
|
|
|
|
|
|
1000
|
0
|
|
|
|
|
|
$line=~s/\n$//o; |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
# skip empty lines (only really appear right before last line ending with ---- |
|
1003
|
0
|
0
|
|
|
|
|
next if ( $line=~m/^\s*$/o ); |
|
1004
|
|
|
|
|
|
|
# end is line consisting of only '-' |
|
1005
|
0
|
0
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
|
1006
|
|
|
|
|
|
|
|
|
1007
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
1008
|
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
# e.g. New Distribution Votes Rank Title |
|
1010
|
|
|
|
|
|
|
# 0000000133 225568 8.9 12 Angry Men (1957) |
|
1011
|
0
|
0
|
|
|
|
|
if ( $line=~m/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+\.\d+)\s+(.+)$/o ) { |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
0
|
0
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$4} ) { |
|
1014
|
0
|
|
|
|
|
|
$tableInsert_sth->execute($self->{imdbMovie2DBKey}->{$4}, $1, $2, $3); |
|
1015
|
0
|
|
|
|
|
|
$countImported++; |
|
1016
|
0
|
0
|
|
|
|
|
if ( ($countImported % 50000) == 0 ) { |
|
1017
|
0
|
|
|
|
|
|
$DB->commit(); |
|
1018
|
|
|
|
|
|
|
} |
|
1019
|
|
|
|
|
|
|
} |
|
1020
|
|
|
|
|
|
|
|
|
1021
|
0
|
|
|
|
|
|
$count++; |
|
1022
|
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
#$self->{movies}{$line}=[$1,$2,"$3.$4"]; |
|
1024
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
1025
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
1026
|
0
|
0
|
|
|
|
|
if ( $count > $countEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
1027
|
0
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
|
1028
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
1029
|
|
|
|
|
|
|
} |
|
1030
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
|
1031
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
|
|
|
|
|
|
} |
|
1034
|
|
|
|
|
|
|
} |
|
1035
|
|
|
|
|
|
|
else { |
|
1036
|
0
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format"); |
|
1037
|
|
|
|
|
|
|
} |
|
1038
|
|
|
|
|
|
|
} |
|
1039
|
0
|
0
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
0
|
|
|
|
|
|
$self->status(sprintf("importing Ratings found ".withThousands($count)." in ". |
|
1042
|
|
|
|
|
|
|
withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
0
|
|
|
|
|
|
$DB->commit(); |
|
1047
|
0
|
|
|
|
|
|
return($count); |
|
1048
|
|
|
|
|
|
|
} |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
sub importKeywords($$$$) |
|
1051
|
|
|
|
|
|
|
{ |
|
1052
|
0
|
|
|
0
|
0
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
|
1053
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
1054
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
0
|
0
|
|
|
|
|
if ( $DB->table_row_count('Keywords') > 0 ) { |
|
1057
|
0
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
|
1058
|
0
|
|
|
|
|
|
$DB->table_clear('Keywords'); |
|
1059
|
|
|
|
|
|
|
} |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
1062
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
1063
|
0
|
|
|
|
|
|
$lineCount++; |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
0
|
0
|
|
|
|
|
if ( m/THE KEYWORDS LIST/ ) { |
|
|
|
0
|
|
|
|
|
|
|
1066
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
|
1067
|
0
|
|
|
|
|
|
$self->error("missing ======= after \"THE KEYWORDS LIST\" at line $lineCount"); |
|
1068
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1069
|
0
|
|
|
|
|
|
return(-1); |
|
1070
|
|
|
|
|
|
|
} |
|
1071
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
|
1072
|
0
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
|
1073
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1074
|
0
|
|
|
|
|
|
return(-1); |
|
1075
|
|
|
|
|
|
|
} |
|
1076
|
0
|
|
|
|
|
|
last; |
|
1077
|
|
|
|
|
|
|
} |
|
1078
|
|
|
|
|
|
|
elsif ( $lineCount > 200000 ) { |
|
1079
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"THE KEYWORDS LIST\" line"); |
|
1080
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1081
|
0
|
|
|
|
|
|
return(-1); |
|
1082
|
|
|
|
|
|
|
} |
|
1083
|
|
|
|
|
|
|
} |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Keywords", |
|
1086
|
|
|
|
|
|
|
count => $countEstimate, |
|
1087
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
1088
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
|
1089
|
|
|
|
|
|
|
|
|
1090
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
1091
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
1092
|
0
|
|
|
|
|
|
my $next_update=0; |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
0
|
|
|
|
|
|
my $count=0; |
|
1095
|
0
|
|
|
|
|
|
my $countImported=0; |
|
1096
|
0
|
|
|
|
|
|
my %found; |
|
1097
|
0
|
|
|
|
|
|
my $tableInsert_sth1=$DB->prepare('INSERT INTO Keywords (KeywordID, Name) VALUES (?,?)'); |
|
1098
|
0
|
|
|
|
|
|
my $tableInsert_sth2=$DB->prepare('INSERT INTO Titles2Keywords (TitleID, KeywordID) VALUES (?,?)'); |
|
1099
|
0
|
|
|
|
|
|
my $keywordCount=0; |
|
1100
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
1103
|
0
|
|
|
|
|
|
$lineCount++; |
|
1104
|
0
|
|
|
|
|
|
my $line=$_; |
|
1105
|
0
|
|
|
|
|
|
chomp($line); |
|
1106
|
0
|
0
|
|
|
|
|
next if ($line =~ m/^\s*$/); |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
0
|
|
|
|
|
|
my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/); |
|
1111
|
0
|
0
|
0
|
|
|
|
if ( defined($title) and defined($keyword) ) { |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
0
|
|
|
|
|
|
my ($episode) = $title =~ m/\s+(\{.*\})$/o; |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
0
|
0
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$title} ) { |
|
1116
|
0
|
0
|
|
|
|
|
if ( !defined($found{$keyword}) ) { |
|
1117
|
0
|
|
|
|
|
|
$keywordCount++; |
|
1118
|
|
|
|
|
|
|
|
|
1119
|
0
|
|
|
|
|
|
$found{$keyword}=$keywordCount; |
|
1120
|
0
|
|
|
|
|
|
$tableInsert_sth1->execute($keywordCount, $keyword); |
|
1121
|
|
|
|
|
|
|
#=$DB->insert_row('Keywords', 'KeywordID', Name=>$keyword); |
|
1122
|
|
|
|
|
|
|
} |
|
1123
|
0
|
|
|
|
|
|
$tableInsert_sth2->execute($self->{imdbMovie2DBKey}->{$title}, $found{$keyword}); |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
#$DB->insert_row('Titles2Keywords', undef, TitleID=>$self->{imdbMovie2DBKey}->{$title}, KeywordID=>$found{$keyword}); |
|
1126
|
0
|
|
|
|
|
|
$countImported++; |
|
1127
|
0
|
0
|
|
|
|
|
if ( ($countImported % 50000) == 0 ) { |
|
1128
|
0
|
|
|
|
|
|
$DB->commit(); |
|
1129
|
|
|
|
|
|
|
} |
|
1130
|
|
|
|
|
|
|
} |
|
1131
|
0
|
|
|
|
|
|
$count++; |
|
1132
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
1133
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
1134
|
0
|
0
|
|
|
|
|
if ( $count > $countEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
1135
|
0
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
|
1136
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
1137
|
|
|
|
|
|
|
} |
|
1138
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
|
1139
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
1140
|
|
|
|
|
|
|
} |
|
1141
|
|
|
|
|
|
|
} |
|
1142
|
|
|
|
|
|
|
} else { |
|
1143
|
0
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format \"$line\""); |
|
1144
|
|
|
|
|
|
|
} |
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
} |
|
1147
|
0
|
0
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
0
|
|
|
|
|
|
$self->status(sprintf("importing Keywords found ".withThousands($count)." in ". |
|
1150
|
|
|
|
|
|
|
withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1153
|
0
|
|
|
|
|
|
$DB->commit(); |
|
1154
|
0
|
|
|
|
|
|
return($count); |
|
1155
|
|
|
|
|
|
|
} |
|
1156
|
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
sub importPlots($$$$) |
|
1158
|
|
|
|
|
|
|
{ |
|
1159
|
0
|
|
|
0
|
0
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
|
1160
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
1161
|
0
|
|
|
|
|
|
my $lineCount=0; |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
0
|
0
|
|
|
|
|
if ( $DB->table_row_count('Plots') > 0 ) { |
|
1164
|
0
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
|
1165
|
0
|
|
|
|
|
|
$DB->table_clear('Plots'); |
|
1166
|
|
|
|
|
|
|
} |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
0
|
|
0
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
|
1169
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
1170
|
0
|
|
|
|
|
|
$lineCount++; |
|
1171
|
|
|
|
|
|
|
|
|
1172
|
0
|
0
|
|
|
|
|
if ( m/PLOT SUMMARIES LIST/ ) { |
|
|
|
0
|
|
|
|
|
|
|
1173
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
|
1174
|
0
|
|
|
|
|
|
$self->error("missing ======= after \"PLOT SUMMARIES LIST\" at line $lineCount"); |
|
1175
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1176
|
0
|
|
|
|
|
|
return(-1); |
|
1177
|
|
|
|
|
|
|
} |
|
1178
|
0
|
0
|
0
|
|
|
|
if ( !($_=<$fh>) || !m/^-----------/o ) { |
|
1179
|
0
|
|
|
|
|
|
$self->error("missing ------- line after ======= at line $lineCount"); |
|
1180
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1181
|
0
|
|
|
|
|
|
return(-1); |
|
1182
|
|
|
|
|
|
|
} |
|
1183
|
0
|
|
|
|
|
|
last; |
|
1184
|
|
|
|
|
|
|
} |
|
1185
|
|
|
|
|
|
|
elsif ( $lineCount > 500 ) { |
|
1186
|
0
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"PLOT SUMMARIES LIST\" line"); |
|
1187
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1188
|
0
|
|
|
|
|
|
return(-1); |
|
1189
|
|
|
|
|
|
|
} |
|
1190
|
|
|
|
|
|
|
} |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
0
|
0
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Plots", |
|
1193
|
|
|
|
|
|
|
count => $countEstimate, |
|
1194
|
|
|
|
|
|
|
ETA => 'linear'}) |
|
1195
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
0
|
0
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
|
1198
|
0
|
0
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
|
1199
|
0
|
|
|
|
|
|
my $next_update=0; |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
0
|
|
|
|
|
|
my $count=0; |
|
1202
|
0
|
|
|
|
|
|
my $potentialEntries=0; |
|
1203
|
0
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Plots (TitleID, Sequence, Description, Author) VALUES (?,?,?,?)'); |
|
1204
|
0
|
|
|
|
|
|
while(<$fh>) { |
|
1205
|
0
|
|
|
|
|
|
$lineCount++; |
|
1206
|
0
|
|
|
|
|
|
my $line=$_; |
|
1207
|
0
|
|
|
|
|
|
chomp($line); |
|
1208
|
0
|
0
|
|
|
|
|
next if ($line =~ m/^\s*$/); |
|
1209
|
0
|
0
|
|
|
|
|
next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
0
|
|
|
|
|
|
$potentialEntries++; |
|
1212
|
|
|
|
|
|
|
|
|
1213
|
0
|
|
|
|
|
|
my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/); |
|
1214
|
0
|
0
|
|
|
|
|
if ( defined($title) ) { |
|
1215
|
|
|
|
|
|
|
|
|
1216
|
0
|
|
|
|
|
|
$line =~s/^MV:\s*//; |
|
1217
|
|
|
|
|
|
|
|
|
1218
|
0
|
|
|
|
|
|
my $sequence=1; |
|
1219
|
0
|
|
|
|
|
|
my $plot = ''; |
|
1220
|
|
|
|
|
|
|
|
|
1221
|
0
|
|
|
|
|
|
while ( my $l = <$fh> ) { |
|
1222
|
0
|
|
|
|
|
|
$lineCount++; |
|
1223
|
0
|
|
|
|
|
|
chomp($l); |
|
1224
|
|
|
|
|
|
|
|
|
1225
|
0
|
0
|
|
|
|
|
next if ($l =~ m/^\s*$/); |
|
1226
|
|
|
|
|
|
|
|
|
1227
|
0
|
0
|
|
|
|
|
if ( $l =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting "PL:" |
|
1228
|
0
|
0
|
|
|
|
|
$plot .= ($plot ne '' ?' ':'') . $1; |
|
1229
|
|
|
|
|
|
|
} |
|
1230
|
|
|
|
|
|
|
|
|
1231
|
0
|
0
|
0
|
|
|
|
if ( $l =~ m/BY:\s(.*)$/ || $l =~ m/^(\-\-\-\-\-\-\-\-)/o ) { |
|
1232
|
0
|
|
|
|
|
|
my $token=$1; |
|
1233
|
0
|
|
|
|
|
|
my $author=$1; |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
0
|
0
|
|
|
|
|
if ( $token eq "\-\-\-\-\-\-\-\-" ) { |
|
1236
|
0
|
0
|
|
|
|
|
if ( $plot eq '' ) { |
|
1237
|
0
|
|
|
|
|
|
last; |
|
1238
|
|
|
|
|
|
|
} |
|
1239
|
0
|
|
|
|
|
|
$author=''; |
|
1240
|
|
|
|
|
|
|
} |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
0
|
0
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$line} ) { |
|
1243
|
0
|
|
|
|
|
|
$tableInsert_sth->execute($self->{imdbMovie2DBKey}->{$line}, $sequence, $plot, $author); |
|
1244
|
|
|
|
|
|
|
|
|
1245
|
0
|
|
|
|
|
|
$count++; |
|
1246
|
0
|
0
|
|
|
|
|
if ( ($count % 50000) == 0 ) { |
|
1247
|
0
|
|
|
|
|
|
$DB->commit(); |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
|
|
|
|
|
|
} |
|
1250
|
|
|
|
|
|
|
else { |
|
1251
|
0
|
|
|
|
|
|
$self->error("$lineCount: unable to match title key '$line'"); |
|
1252
|
|
|
|
|
|
|
} |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
0
|
|
|
|
|
|
$plot=''; |
|
1255
|
0
|
|
|
|
|
|
$sequence++; |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
0
|
0
|
|
|
|
|
if ( $token eq "\-\-\-\-\-\-\-\-" ) { |
|
1258
|
0
|
|
|
|
|
|
last; |
|
1259
|
|
|
|
|
|
|
} |
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
} |
|
1262
|
|
|
|
|
|
|
|
|
1263
|
0
|
0
|
|
|
|
|
if ( length($plot) ) { |
|
1264
|
0
|
|
|
|
|
|
$self->error("$lineCount: truncated plot with title key '$line'"); |
|
1265
|
|
|
|
|
|
|
} |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
0
|
0
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
|
1268
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
|
1269
|
0
|
0
|
|
|
|
|
if ( $count > $countEstimate ) { |
|
|
|
0
|
|
|
|
|
|
|
1270
|
0
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
|
1271
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
1272
|
|
|
|
|
|
|
} |
|
1273
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
|
1274
|
0
|
|
|
|
|
|
$next_update=$progress->update($count); |
|
1275
|
|
|
|
|
|
|
} |
|
1276
|
|
|
|
|
|
|
} |
|
1277
|
|
|
|
|
|
|
} else { |
|
1278
|
|
|
|
|
|
|
# skip lines up to the next "MV:" |
|
1279
|
0
|
0
|
|
|
|
|
if ($line !~ m/^(---|PL:|BY:)/ ) { |
|
1280
|
0
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format \"$line\""); |
|
1281
|
|
|
|
|
|
|
} |
|
1282
|
|
|
|
|
|
|
#$next_update=$progress->update($count) if ($self->{showProgressBar}); |
|
1283
|
0
|
0
|
|
|
|
|
if ( $count > $next_update ) { |
|
1284
|
0
|
0
|
|
|
|
|
if ($self->{showProgressBar}) { |
|
1285
|
0
|
|
|
|
|
|
$next_update=$progress->update($count) ; |
|
1286
|
0
|
|
|
|
|
|
warn "next $count -> $next_update"; |
|
1287
|
|
|
|
|
|
|
} |
|
1288
|
|
|
|
|
|
|
} |
|
1289
|
|
|
|
|
|
|
} |
|
1290
|
|
|
|
|
|
|
} |
|
1291
|
0
|
0
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
0
|
|
|
|
|
|
$self->status(sprintf("importing Plots found ".withThousands($count)." in ". |
|
1294
|
|
|
|
|
|
|
withThousands($potentialEntries)." entries in %d seconds",time()-$startTime)); |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
0
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
|
1297
|
0
|
|
|
|
|
|
$DB->commit(); |
|
1298
|
0
|
|
|
|
|
|
return($count); |
|
1299
|
|
|
|
|
|
|
} |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
sub loadDBInfo($) |
|
1302
|
|
|
|
|
|
|
{ |
|
1303
|
0
|
|
|
0
|
0
|
|
my $file=shift; |
|
1304
|
0
|
|
|
|
|
|
my $info; |
|
1305
|
|
|
|
|
|
|
|
|
1306
|
0
|
0
|
|
|
|
|
open(INFO, "< $file") || return("imdbDir index file \"$file\":$!"); |
|
1307
|
0
|
|
|
|
|
|
while() { |
|
1308
|
0
|
|
|
|
|
|
chop(); |
|
1309
|
0
|
0
|
|
|
|
|
if ( s/^([^:]+)://o ) { |
|
1310
|
0
|
|
|
|
|
|
$info->{$1}=$_; |
|
1311
|
|
|
|
|
|
|
} |
|
1312
|
|
|
|
|
|
|
} |
|
1313
|
0
|
|
|
|
|
|
close(INFO); |
|
1314
|
0
|
|
|
|
|
|
return($info); |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
sub dbinfoLoad($) |
|
1318
|
|
|
|
|
|
|
{ |
|
1319
|
0
|
|
|
0
|
0
|
|
my $self=shift; |
|
1320
|
|
|
|
|
|
|
|
|
1321
|
0
|
|
|
|
|
|
my $info=loadDBInfo($self->{moviedbInfo}); |
|
1322
|
0
|
0
|
|
|
|
|
if ( ref $info ne 'HASH' ) { |
|
1323
|
0
|
|
|
|
|
|
return(1); |
|
1324
|
|
|
|
|
|
|
} |
|
1325
|
0
|
|
|
|
|
|
$self->{dbinfo}=$info; |
|
1326
|
0
|
|
|
|
|
|
return(undef); |
|
1327
|
|
|
|
|
|
|
} |
|
1328
|
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
sub dbinfoAdd($$$) |
|
1330
|
|
|
|
|
|
|
{ |
|
1331
|
0
|
|
|
0
|
0
|
|
my ($self, $key, $value)=@_; |
|
1332
|
0
|
|
|
|
|
|
$self->{dbinfo}->{$key}=$value; |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
sub dbinfoGet($$$) |
|
1336
|
|
|
|
|
|
|
{ |
|
1337
|
0
|
|
|
0
|
0
|
|
my ($self, $key, $defaultValue)=@_; |
|
1338
|
0
|
0
|
|
|
|
|
if ( defined($self->{dbinfo}->{$key}) ) { |
|
1339
|
0
|
|
|
|
|
|
return($self->{dbinfo}->{$key}); |
|
1340
|
|
|
|
|
|
|
} |
|
1341
|
0
|
|
|
|
|
|
return($defaultValue); |
|
1342
|
|
|
|
|
|
|
} |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
sub dbinfoSave($) |
|
1345
|
|
|
|
|
|
|
{ |
|
1346
|
0
|
|
|
0
|
0
|
|
my $self=shift; |
|
1347
|
0
|
0
|
|
|
|
|
open(INFO, "> $self->{moviedbInfo}") || return(1); |
|
1348
|
0
|
|
|
|
|
|
for (sort keys %{$self->{dbinfo}}) { |
|
|
0
|
|
|
|
|
|
|
|
1349
|
0
|
|
|
|
|
|
print INFO "".$_.":".$self->{dbinfo}->{$_}."\n"; |
|
1350
|
|
|
|
|
|
|
} |
|
1351
|
0
|
|
|
|
|
|
close(INFO); |
|
1352
|
0
|
|
|
|
|
|
return(0); |
|
1353
|
|
|
|
|
|
|
} |
|
1354
|
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
sub dbinfoGetFileSize($$) |
|
1356
|
|
|
|
|
|
|
{ |
|
1357
|
0
|
|
|
0
|
0
|
|
my ($self, $key)=@_; |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
|
|
1360
|
0
|
0
|
|
|
|
|
if ( !defined($self->{listFiles}->paths_isset($key) ) ) { |
|
1361
|
0
|
|
|
|
|
|
die ("invalid call for $key"); |
|
1362
|
|
|
|
|
|
|
} |
|
1363
|
0
|
|
|
|
|
|
my $filePath=$self->{listFiles}->paths_index($key); |
|
1364
|
0
|
0
|
|
|
|
|
if ( ! -f $filePath ) { |
|
1365
|
0
|
|
|
|
|
|
return(0); |
|
1366
|
|
|
|
|
|
|
} |
|
1367
|
|
|
|
|
|
|
|
|
1368
|
0
|
|
|
|
|
|
my $fileSize=int(-s $filePath); |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
# if compressed, then attempt to run gzip -l |
|
1371
|
0
|
0
|
|
|
|
|
if ( $filePath=~m/.gz$/) { |
|
1372
|
0
|
0
|
|
|
|
|
if ( open(my $fd, "gzip -l $filePath |") ) { |
|
1373
|
|
|
|
|
|
|
# if parse fails, then defalt to wild ass guess of compression of 65% |
|
1374
|
0
|
|
|
|
|
|
$fileSize=int(($fileSize*100)/(100-65)); |
|
1375
|
|
|
|
|
|
|
|
|
1376
|
0
|
|
|
|
|
|
while(<$fd>) { |
|
1377
|
0
|
0
|
|
|
|
|
if ( m/^\s*\d+\s+(\d+)/ ) { |
|
1378
|
0
|
|
|
|
|
|
$fileSize=$1; |
|
1379
|
|
|
|
|
|
|
} |
|
1380
|
|
|
|
|
|
|
} |
|
1381
|
0
|
|
|
|
|
|
close($fd); |
|
1382
|
|
|
|
|
|
|
} |
|
1383
|
|
|
|
|
|
|
else { |
|
1384
|
|
|
|
|
|
|
# wild ass guess of compression of 65% |
|
1385
|
0
|
|
|
|
|
|
$fileSize=int(($fileSize*100)/(100-65)); |
|
1386
|
|
|
|
|
|
|
} |
|
1387
|
|
|
|
|
|
|
} |
|
1388
|
0
|
|
|
|
|
|
return($fileSize); |
|
1389
|
|
|
|
|
|
|
} |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
sub _redirect($$) |
|
1392
|
|
|
|
|
|
|
{ |
|
1393
|
0
|
|
|
0
|
|
|
my ($self, $file)=@_; |
|
1394
|
|
|
|
|
|
|
|
|
1395
|
0
|
0
|
|
|
|
|
if ( defined($file) ) { |
|
1396
|
0
|
0
|
|
|
|
|
if ( !open($self->{logfd}, "> $file") ) { |
|
1397
|
0
|
|
|
|
|
|
print STDERR "$file:$!\n"; |
|
1398
|
0
|
|
|
|
|
|
return(0); |
|
1399
|
|
|
|
|
|
|
} |
|
1400
|
0
|
|
|
|
|
|
$self->{errorCountInLog}=0; |
|
1401
|
|
|
|
|
|
|
} |
|
1402
|
|
|
|
|
|
|
else { |
|
1403
|
0
|
|
|
|
|
|
close($self->{logfd}); |
|
1404
|
0
|
|
|
|
|
|
$self->{logfd}=undef; |
|
1405
|
|
|
|
|
|
|
} |
|
1406
|
0
|
|
|
|
|
|
return(1); |
|
1407
|
|
|
|
|
|
|
} |
|
1408
|
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
=head2 importListComplete |
|
1410
|
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
Check to see if spcified list file has been successfully imported |
|
1412
|
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
=cut |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
sub importListComplete($) |
|
1416
|
|
|
|
|
|
|
{ |
|
1417
|
0
|
|
|
0
|
1
|
|
my ($self, $type)=@_; |
|
1418
|
|
|
|
|
|
|
|
|
1419
|
0
|
0
|
|
|
|
|
if ( -f "$self->{imdbDir}/stage-$type.log" ) { |
|
1420
|
0
|
|
|
|
|
|
return(1); |
|
1421
|
|
|
|
|
|
|
} |
|
1422
|
0
|
|
|
|
|
|
return(0); |
|
1423
|
|
|
|
|
|
|
} |
|
1424
|
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
sub _prepStage |
|
1426
|
|
|
|
|
|
|
{ |
|
1427
|
0
|
|
|
0
|
|
|
my ($self, $type)=@_; |
|
1428
|
|
|
|
|
|
|
|
|
1429
|
0
|
|
|
|
|
|
my $DB=new IMDB::Local::DB(database=>"$self->{imdbDir}/imdb.db"); |
|
1430
|
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
# if we're restarting, lets start fresh |
|
1432
|
0
|
0
|
|
|
|
|
if ( $type eq 'movies' ) { |
|
1433
|
|
|
|
|
|
|
#warn("recreating db ".$DB->database()); |
|
1434
|
0
|
|
|
|
|
|
$DB->delete(); |
|
1435
|
|
|
|
|
|
|
|
|
1436
|
0
|
|
|
|
|
|
for my $type ( $self->listTypes() ) { |
|
1437
|
0
|
|
|
|
|
|
unlink("$self->{imdbDir}/stage-$type.log"); |
|
1438
|
|
|
|
|
|
|
} |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
} |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
0
|
0
|
|
|
|
|
if ( !$self->_redirect(sprintf("%s/stage-$type.log", $self->{imdbDir})) ) { |
|
1443
|
0
|
|
|
|
|
|
return(1); |
|
1444
|
|
|
|
|
|
|
} |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
0
|
0
|
|
|
|
|
if ( !$DB->connect() ) { |
|
1447
|
0
|
|
|
|
|
|
die "imdbdb connect failed:$DBI::errstr"; |
|
1448
|
|
|
|
|
|
|
} |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
0
|
|
|
|
|
|
$DB->runSQL("PRAGMA synchronous = OFF"); |
|
1451
|
0
|
|
|
|
|
|
return($DB); |
|
1452
|
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
} |
|
1454
|
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
sub _unprepStage |
|
1456
|
|
|
|
|
|
|
{ |
|
1457
|
0
|
|
|
0
|
|
|
my ($self, $db)=@_; |
|
1458
|
|
|
|
|
|
|
|
|
1459
|
0
|
|
|
|
|
|
$db->commit(); |
|
1460
|
0
|
|
|
|
|
|
$db->disconnect(); |
|
1461
|
|
|
|
|
|
|
|
|
1462
|
0
|
|
|
|
|
|
$self->_redirect(undef); |
|
1463
|
|
|
|
|
|
|
} |
|
1464
|
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
sub importListFile($$$) |
|
1466
|
|
|
|
|
|
|
{ |
|
1467
|
0
|
|
|
0
|
0
|
|
my ($self, $DB, $type)=@_; |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
|
|
1470
|
0
|
0
|
|
|
|
|
if ( !grep(/^$type$/, $self->listTypes()) ) { |
|
1471
|
0
|
|
|
|
|
|
die "invalid type $type"; |
|
1472
|
|
|
|
|
|
|
} |
|
1473
|
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
my $dbinfoCalcEstimate=sub { |
|
1475
|
0
|
|
|
0
|
|
|
my ($self, $key)=@_; |
|
1476
|
|
|
|
|
|
|
|
|
1477
|
0
|
|
|
|
|
|
my %estimateSizePerEntry=(movies=>47, |
|
1478
|
|
|
|
|
|
|
directors=>258, |
|
1479
|
|
|
|
|
|
|
actors=>695, |
|
1480
|
|
|
|
|
|
|
actresses=>779, |
|
1481
|
|
|
|
|
|
|
genres=>38, |
|
1482
|
|
|
|
|
|
|
ratings=>68, |
|
1483
|
|
|
|
|
|
|
keywords=>47, |
|
1484
|
|
|
|
|
|
|
plot=>731); |
|
1485
|
0
|
|
|
|
|
|
my $fileSize=$self->dbinfoGetFileSize($key); |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
0
|
|
|
|
|
|
my $countEstimate=int($fileSize/$estimateSizePerEntry{$key}); |
|
1488
|
|
|
|
|
|
|
|
|
1489
|
0
|
|
|
|
|
|
my $filePath=$self->{listFiles}->paths_index($key); |
|
1490
|
|
|
|
|
|
|
|
|
1491
|
0
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_file", $filePath); |
|
1492
|
0
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_file_size", "".int(-s $filePath)); |
|
1493
|
0
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_file_size_uncompressed", $fileSize); |
|
1494
|
0
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_count_estimate", $countEstimate); |
|
1495
|
0
|
|
|
|
|
|
return($countEstimate); |
|
1496
|
0
|
|
|
|
|
|
}; |
|
1497
|
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
my $dbinfoCalcBytesPerEntry = sub { |
|
1499
|
0
|
|
|
0
|
|
|
my ($self, $key, $calcActualForThisNumber)=@_; |
|
1500
|
0
|
|
|
|
|
|
my $fileSize=$self->dbinfoGetFileSize($key); |
|
1501
|
0
|
|
|
|
|
|
return(int($fileSize/$calcActualForThisNumber)); |
|
1502
|
0
|
|
|
|
|
|
}; |
|
1503
|
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
|
|
1505
|
0
|
0
|
|
|
|
|
if ( ! -f $self->{listFiles}->paths_index($type) ) { |
|
1506
|
0
|
|
|
|
|
|
$self->status("no $type file available"); |
|
1507
|
0
|
|
|
|
|
|
return(1); |
|
1508
|
|
|
|
|
|
|
} |
|
1509
|
|
|
|
|
|
|
|
|
1510
|
0
|
0
|
|
|
|
|
if ( $type eq 'movies') { |
|
1511
|
|
|
|
|
|
|
|
|
1512
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Titles'); |
|
1513
|
|
|
|
|
|
|
|
|
1514
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "movies"); |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
0
|
|
|
|
|
|
my $num=$self->importMovies($countEstimate, $self->{listFiles}->paths_index('movies'), $DB); |
|
1517
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1518
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1519
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('movies')." from ftp.imdb.com"); |
|
1520
|
|
|
|
|
|
|
} |
|
1521
|
0
|
|
|
|
|
|
return(1); |
|
1522
|
|
|
|
|
|
|
} |
|
1523
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
|
1524
|
0
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "movies", $num); |
|
1525
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for movies needs updating, found $num ($better bytes/entry)"); |
|
1526
|
|
|
|
|
|
|
} |
|
1527
|
|
|
|
|
|
|
|
|
1528
|
0
|
0
|
|
|
|
|
open(OUT, "> $self->{imdbDir}/titles.tsv") || die "$self->{imdbDir}/titles.tsv:$!"; |
|
1529
|
0
|
|
|
|
|
|
for my $mkey (sort keys %{$self->{imdbMovie2DBKey}}) { |
|
|
0
|
|
|
|
|
|
|
|
1530
|
0
|
|
|
|
|
|
print OUT "".$self->{imdbMovie2DBKey}->{$mkey}."\t".$mkey."\n"; |
|
1531
|
|
|
|
|
|
|
} |
|
1532
|
0
|
|
|
|
|
|
close(OUT); |
|
1533
|
|
|
|
|
|
|
|
|
1534
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_movie_count", "$num"); |
|
1535
|
|
|
|
|
|
|
|
|
1536
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1537
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Titles'); |
|
1538
|
|
|
|
|
|
|
|
|
1539
|
0
|
|
|
|
|
|
return(0); |
|
1540
|
|
|
|
|
|
|
} |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
# read in keys so we have them for follow-up stages |
|
1543
|
0
|
0
|
|
|
|
|
if ( !defined($self->{imdbMovie2DBKey}) ) { |
|
1544
|
|
|
|
|
|
|
#$self->{imdbMovie2DBKey}=$DB->select2Hash("select IMDBKey, TitleID from Titles"); |
|
1545
|
|
|
|
|
|
|
|
|
1546
|
0
|
|
|
|
|
|
if ( 1 ) { |
|
1547
|
0
|
0
|
|
|
|
|
open(IN, "< $self->{imdbDir}/titles.tsv") || die "$self->{imdbDir}/titles.tsv:$!"; |
|
1548
|
0
|
|
|
|
|
|
while () { |
|
1549
|
0
|
|
|
|
|
|
chomp(); |
|
1550
|
0
|
0
|
|
|
|
|
if ( m/^(\d+)\t(.+)/o ) { |
|
1551
|
0
|
|
|
|
|
|
$self->{imdbMovie2DBKey}->{$2}=$1; |
|
1552
|
|
|
|
|
|
|
} |
|
1553
|
|
|
|
|
|
|
} |
|
1554
|
0
|
|
|
|
|
|
close(IN); |
|
1555
|
|
|
|
|
|
|
} |
|
1556
|
|
|
|
|
|
|
} |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
# need to read-movie kesy |
|
1559
|
0
|
0
|
|
|
|
|
if ( $type eq 'directors') { |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Directors'); |
|
1562
|
|
|
|
|
|
|
|
|
1563
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "directors"); |
|
1564
|
|
|
|
|
|
|
|
|
1565
|
0
|
|
|
|
|
|
my $num=$self->importDirectors($countEstimate, $self->{listFiles}->paths_index('directors'), $DB); |
|
1566
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1567
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1568
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('directors')." from ftp.imdb.com (see http://www.imdb.com/interfaces)"); |
|
1569
|
|
|
|
|
|
|
} |
|
1570
|
0
|
|
|
|
|
|
return(1); |
|
1571
|
|
|
|
|
|
|
} |
|
1572
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
|
1573
|
0
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "directors", $num); |
|
1574
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for directors needs updating, found $num ($better bytes/entry)"); |
|
1575
|
|
|
|
|
|
|
} |
|
1576
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_director_count", "$num"); |
|
1577
|
|
|
|
|
|
|
|
|
1578
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1579
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Directors'); |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
0
|
|
|
|
|
|
return(0); |
|
1582
|
|
|
|
|
|
|
} |
|
1583
|
|
|
|
|
|
|
|
|
1584
|
0
|
0
|
|
|
|
|
if ( $type eq 'actors') { |
|
1585
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Actors'); |
|
1586
|
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
#print "re-reading movies into memory for reverse lookup..\n"; |
|
1588
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "actors"); |
|
1589
|
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
#my $num=$self->readCast("Actors", $countEstimate, "$self->{imdbListFiles}->{actors}"); |
|
1591
|
0
|
|
|
|
|
|
my $num=$self->importActors("Actors", $countEstimate, $self->{listFiles}->paths_index('actors'), $DB); |
|
1592
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1593
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1594
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('actors')." from ftp.imdb.com (see http://www.imdb.com/interfaces)"); |
|
1595
|
|
|
|
|
|
|
} |
|
1596
|
0
|
|
|
|
|
|
return(1); |
|
1597
|
|
|
|
|
|
|
} |
|
1598
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
|
1599
|
0
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "actors", $num); |
|
1600
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for actors needs updating, found $num ($better bytes/entry)"); |
|
1601
|
|
|
|
|
|
|
} |
|
1602
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_actor_count", "$num"); |
|
1603
|
0
|
|
|
|
|
|
return(0); |
|
1604
|
|
|
|
|
|
|
} |
|
1605
|
|
|
|
|
|
|
|
|
1606
|
0
|
0
|
|
|
|
|
if ( $type eq 'actresses') { |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "actresses"); |
|
1609
|
0
|
|
|
|
|
|
my $num=$self->importActors("Actresses", $countEstimate, $self->{listFiles}->paths_index('actresses'), $DB); |
|
1610
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1611
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1612
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('actresses')." from ftp.imdb.com (see http://www.imdb.com/interfaces)"); |
|
1613
|
|
|
|
|
|
|
} |
|
1614
|
0
|
|
|
|
|
|
return(1); |
|
1615
|
|
|
|
|
|
|
} |
|
1616
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
|
1617
|
0
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "actresses", $num); |
|
1618
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for actresses needs updating, found $num ($better bytes/entry)"); |
|
1619
|
|
|
|
|
|
|
} |
|
1620
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_actress_count", "$num"); |
|
1621
|
|
|
|
|
|
|
|
|
1622
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1623
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Actors'); |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
0
|
|
|
|
|
|
return(0); |
|
1626
|
|
|
|
|
|
|
} |
|
1627
|
|
|
|
|
|
|
|
|
1628
|
0
|
0
|
|
|
|
|
if ( $type eq 'genres') { |
|
1629
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Genres'); |
|
1630
|
|
|
|
|
|
|
|
|
1631
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "genres"); |
|
1632
|
|
|
|
|
|
|
|
|
1633
|
0
|
|
|
|
|
|
my $num=$self->importGenres($countEstimate, $self->{listFiles}->paths_index('genres'), $DB); |
|
1634
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1635
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1636
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('genres')." from ftp.imdb.com"); |
|
1637
|
|
|
|
|
|
|
} |
|
1638
|
0
|
|
|
|
|
|
return(1); |
|
1639
|
|
|
|
|
|
|
} |
|
1640
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
|
1641
|
0
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "genres", $num); |
|
1642
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for genres needs updating, found $num ($better bytes/entry)"); |
|
1643
|
|
|
|
|
|
|
} |
|
1644
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_genres_count", "$num"); |
|
1645
|
|
|
|
|
|
|
|
|
1646
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1647
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Genres'); |
|
1648
|
|
|
|
|
|
|
|
|
1649
|
0
|
|
|
|
|
|
return(0); |
|
1650
|
|
|
|
|
|
|
} |
|
1651
|
|
|
|
|
|
|
|
|
1652
|
0
|
0
|
|
|
|
|
if ( $type eq 'ratings') { |
|
1653
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Ratings'); |
|
1654
|
|
|
|
|
|
|
|
|
1655
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "ratings"); |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
0
|
|
|
|
|
|
my $num=$self->importRatings($countEstimate, $self->{listFiles}->paths_index('ratings'), $DB); |
|
1658
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1659
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1660
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('ratings')." from ftp.imdb.com"); |
|
1661
|
|
|
|
|
|
|
} |
|
1662
|
0
|
|
|
|
|
|
return(1); |
|
1663
|
|
|
|
|
|
|
} |
|
1664
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
|
1665
|
0
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "ratings", $num); |
|
1666
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for ratings needs updating, found $num ($better bytes/entry)"); |
|
1667
|
|
|
|
|
|
|
} |
|
1668
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_ratings_count", "$num"); |
|
1669
|
|
|
|
|
|
|
|
|
1670
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1671
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Ratings'); |
|
1672
|
|
|
|
|
|
|
|
|
1673
|
0
|
|
|
|
|
|
return(0); |
|
1674
|
|
|
|
|
|
|
} |
|
1675
|
|
|
|
|
|
|
|
|
1676
|
0
|
0
|
|
|
|
|
if ( $type eq 'keywords') { |
|
1677
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Keywords'); |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "keywords"); |
|
1680
|
|
|
|
|
|
|
#my $countEstimate=5554178; |
|
1681
|
|
|
|
|
|
|
|
|
1682
|
0
|
|
|
|
|
|
my $num=$self->importKeywords($countEstimate, $self->{listFiles}->paths_index('keywords'), $DB); |
|
1683
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1684
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1685
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('keywords')." from ftp.imdb.com"); |
|
1686
|
|
|
|
|
|
|
} |
|
1687
|
0
|
|
|
|
|
|
return(1); |
|
1688
|
|
|
|
|
|
|
} |
|
1689
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) { |
|
1690
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for keywords needs updating, found $num"); |
|
1691
|
|
|
|
|
|
|
} |
|
1692
|
0
|
|
|
|
|
|
$self->dbinfoAdd("keywords_list_file", $self->{listFiles}->paths_index('keywords')); |
|
1693
|
0
|
|
|
|
|
|
$self->dbinfoAdd("keywords_list_file_size", -s $self->{listFiles}->paths_index('keywords')); |
|
1694
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_keywords_count", "$num"); |
|
1695
|
|
|
|
|
|
|
|
|
1696
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1697
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Keywords'); |
|
1698
|
|
|
|
|
|
|
|
|
1699
|
0
|
|
|
|
|
|
return(0); |
|
1700
|
|
|
|
|
|
|
} |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
0
|
0
|
|
|
|
|
if ( $type eq 'plot') { |
|
1703
|
0
|
|
|
|
|
|
$DB->drop_table_indexes('Plots'); |
|
1704
|
|
|
|
|
|
|
|
|
1705
|
0
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "plot"); |
|
1706
|
0
|
|
|
|
|
|
my $num=$self->importPlots($countEstimate, $self->{listFiles}->paths_index('plot'), $DB); |
|
1707
|
0
|
0
|
|
|
|
|
if ( $num < 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
1708
|
0
|
0
|
|
|
|
|
if ( $num == -2 ) { |
|
1709
|
0
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('plot')." from ftp.imdb.com"); |
|
1710
|
|
|
|
|
|
|
} |
|
1711
|
0
|
|
|
|
|
|
return(1); |
|
1712
|
|
|
|
|
|
|
} |
|
1713
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) { |
|
1714
|
0
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for plots needs updating, found $num"); |
|
1715
|
|
|
|
|
|
|
} |
|
1716
|
0
|
|
|
|
|
|
$self->dbinfoAdd("plots_list_file", $self->{listFiles}->paths_index('plot')); |
|
1717
|
0
|
|
|
|
|
|
$self->dbinfoAdd("plots_list_file_size", -s $self->{listFiles}->paths_index('plot')); |
|
1718
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_plots_count", "$num"); |
|
1719
|
|
|
|
|
|
|
|
|
1720
|
0
|
|
|
|
|
|
$self->status("Creating Table indexes.."); |
|
1721
|
0
|
|
|
|
|
|
$DB->create_table_indexes('Plots'); |
|
1722
|
|
|
|
|
|
|
|
|
1723
|
0
|
|
|
|
|
|
return(0); |
|
1724
|
|
|
|
|
|
|
} |
|
1725
|
|
|
|
|
|
|
|
|
1726
|
0
|
|
|
|
|
|
$self->error("invalid type $type"); |
|
1727
|
0
|
|
|
|
|
|
return(1); |
|
1728
|
|
|
|
|
|
|
} |
|
1729
|
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
=head2 importList |
|
1731
|
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
Import a list file from 'listsDir' into the IMDB::Local Database. |
|
1733
|
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
=cut |
|
1735
|
|
|
|
|
|
|
|
|
1736
|
|
|
|
|
|
|
sub importList($$) |
|
1737
|
|
|
|
|
|
|
{ |
|
1738
|
0
|
|
|
0
|
1
|
|
my ($self, $type)=@_; |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
0
|
|
|
|
|
|
my $DB=$self->_prepStage($type); |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
# lets load our stats |
|
1743
|
0
|
|
|
|
|
|
$self->dbinfoLoad(); |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
0
|
|
|
|
|
|
my $startTime=time(); |
|
1746
|
0
|
0
|
|
|
|
|
if ( $self->importListFile($DB, $type) != 0 ) { |
|
1747
|
0
|
|
|
|
|
|
$DB->disconnect(); |
|
1748
|
0
|
|
|
|
|
|
return(1); |
|
1749
|
|
|
|
|
|
|
} |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
0
|
|
|
|
|
|
$self->dbinfoAdd("seconds_to_complete_prep_stage_$type", (time()-$startTime)); |
|
1752
|
0
|
|
|
|
|
|
$self->dbinfoSave(); |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
0
|
|
|
|
|
|
$self->_unprepStage($DB); |
|
1755
|
0
|
|
|
|
|
|
return(0); |
|
1756
|
|
|
|
|
|
|
} |
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=head2 importAll |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
Import all available list files from 'listsDir' into the IMDB::Local Database. |
|
1761
|
|
|
|
|
|
|
Returns # of list files that produced errors. |
|
1762
|
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
=cut |
|
1764
|
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
sub importAll($$) |
|
1766
|
|
|
|
|
|
|
{ |
|
1767
|
0
|
|
|
0
|
1
|
|
my ($self, $type)=@_; |
|
1768
|
|
|
|
|
|
|
|
|
1769
|
0
|
|
|
|
|
|
my $err=0; |
|
1770
|
0
|
|
|
|
|
|
for my $type ( $self->listTypes() ) { |
|
1771
|
0
|
0
|
|
|
|
|
if ( $self->importList($type) != 0 ) { |
|
1772
|
0
|
|
|
|
|
|
warn("list import $type failed to load, $self->{errorCountInLog} errors in $self->{imdbDir}/stage-$type.log"); |
|
1773
|
0
|
|
|
|
|
|
$err++; |
|
1774
|
|
|
|
|
|
|
} |
|
1775
|
|
|
|
|
|
|
} |
|
1776
|
0
|
|
|
|
|
|
return($err); |
|
1777
|
|
|
|
|
|
|
} |
|
1778
|
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
=head2 optimize |
|
1780
|
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
Optimize the database for better performance. |
|
1782
|
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
=cut |
|
1784
|
|
|
|
|
|
|
sub optimize($) |
|
1785
|
|
|
|
|
|
|
{ |
|
1786
|
0
|
|
|
0
|
1
|
|
my ($self)=@_; |
|
1787
|
|
|
|
|
|
|
|
|
1788
|
0
|
|
|
|
|
|
my $DB=new IMDB::Local::DB(database=>"$self->{imdbDir}/imdb.db", db_AutoCommit=>1); |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
0
|
0
|
|
|
|
|
if ( !$DB->connect() ) { |
|
1791
|
0
|
|
|
|
|
|
die "imdbdb connect failed:$DBI::errstr"; |
|
1792
|
|
|
|
|
|
|
} |
|
1793
|
|
|
|
|
|
|
|
|
1794
|
0
|
|
|
|
|
|
$DB->runSQL("VACUUM"); |
|
1795
|
0
|
|
|
|
|
|
$DB->disconnect(); |
|
1796
|
0
|
|
|
|
|
|
return(1); |
|
1797
|
|
|
|
|
|
|
} |
|
1798
|
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
sub _NOT_USED_checkSantity($) |
|
1800
|
|
|
|
|
|
|
{ |
|
1801
|
0
|
|
|
0
|
|
|
my ($self)=@_; |
|
1802
|
|
|
|
|
|
|
|
|
1803
|
0
|
|
|
|
|
|
$self->dbinfoAdd("db_version", $IMDB::Local::VERSION); |
|
1804
|
|
|
|
|
|
|
|
|
1805
|
0
|
0
|
|
|
|
|
if ( $self->dbinfoSave() ) { |
|
1806
|
0
|
|
|
|
|
|
$self->error("$self->{moviedbInfo}:$!"); |
|
1807
|
0
|
|
|
|
|
|
return(1); |
|
1808
|
|
|
|
|
|
|
} |
|
1809
|
|
|
|
|
|
|
|
|
1810
|
0
|
|
|
|
|
|
$self->status("running quick sanity check on database indexes..."); |
|
1811
|
0
|
|
|
|
|
|
my $imdb=new IMDB::Local('imdbDir' => $self->{imdbDir}, |
|
1812
|
|
|
|
|
|
|
'verbose' => $self->{verbose}); |
|
1813
|
|
|
|
|
|
|
|
|
1814
|
0
|
0
|
|
|
|
|
if ( -e "$self->{moviedbOffline}" ) { |
|
1815
|
0
|
|
|
|
|
|
unlink("$self->{moviedbOffline}"); |
|
1816
|
|
|
|
|
|
|
} |
|
1817
|
|
|
|
|
|
|
|
|
1818
|
0
|
0
|
|
|
|
|
if ( my $errline=$imdb->sanityCheckDatabase() ) { |
|
1819
|
0
|
0
|
|
|
|
|
open(OFF, "> $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; |
|
1820
|
0
|
|
|
|
|
|
print OFF $errline."\n"; |
|
1821
|
0
|
|
|
|
|
|
print OFF "one of the prep stages' must have produced corrupt data\n"; |
|
1822
|
0
|
|
|
|
|
|
print OFF "report the following details to xmltv-devel\@lists.sf.net\n"; |
|
1823
|
|
|
|
|
|
|
|
|
1824
|
0
|
|
|
|
|
|
my $info=loadDBInfo($self->{moviedbInfo}); |
|
1825
|
0
|
0
|
|
|
|
|
if ( ref $info eq 'HASH' ) { |
|
1826
|
0
|
|
|
|
|
|
for my $key (sort keys %{$info}) { |
|
|
0
|
|
|
|
|
|
|
|
1827
|
0
|
|
|
|
|
|
print OFF "\t$key:$info->{$key}\n"; |
|
1828
|
|
|
|
|
|
|
} |
|
1829
|
|
|
|
|
|
|
} |
|
1830
|
|
|
|
|
|
|
else { |
|
1831
|
0
|
|
|
|
|
|
print OFF "\tdbinfo file corrupt\n"; |
|
1832
|
0
|
|
|
|
|
|
print OFF "\t$info"; |
|
1833
|
|
|
|
|
|
|
} |
|
1834
|
0
|
|
|
|
|
|
print OFF "database taken offline\n"; |
|
1835
|
0
|
|
|
|
|
|
close(OFF); |
|
1836
|
0
|
0
|
|
|
|
|
open(OFF, "< $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; |
|
1837
|
0
|
|
|
|
|
|
while() { |
|
1838
|
0
|
|
|
|
|
|
chop(); |
|
1839
|
0
|
|
|
|
|
|
$self->error($_); |
|
1840
|
|
|
|
|
|
|
} |
|
1841
|
0
|
|
|
|
|
|
close(OFF); |
|
1842
|
0
|
|
|
|
|
|
return(1); |
|
1843
|
|
|
|
|
|
|
} |
|
1844
|
0
|
|
|
|
|
|
$self->status("sanity intact :)"); |
|
1845
|
0
|
|
|
|
|
|
return(0); |
|
1846
|
|
|
|
|
|
|
} |
|
1847
|
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1849
|
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
jerryv, C<< >> |
|
1851
|
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
=head1 BUGS |
|
1853
|
|
|
|
|
|
|
|
|
1854
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
|
1855
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
|
1856
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
|
1857
|
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
=head1 SUPPORT |
|
1862
|
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
1864
|
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
perldoc IMDB::Local |
|
1866
|
|
|
|
|
|
|
|
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
You can also look for information at: |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
=over 4 |
|
1871
|
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
|
1873
|
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
L |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
1877
|
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
L |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
1881
|
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
L |
|
1883
|
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
=item * Search CPAN |
|
1885
|
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
L |
|
1887
|
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
=back |
|
1889
|
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
1892
|
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
1895
|
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
Copyright 2015 jerryv. |
|
1897
|
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
1899
|
|
|
|
|
|
|
under the terms of the the Artistic License (2.0). You may obtain a |
|
1900
|
|
|
|
|
|
|
copy of the full license at: |
|
1901
|
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
L |
|
1903
|
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified |
|
1905
|
|
|
|
|
|
|
Versions is governed by this Artistic License. By using, modifying or |
|
1906
|
|
|
|
|
|
|
distributing the Package, you accept this license. Do not use, modify, |
|
1907
|
|
|
|
|
|
|
or distribute the Package, if you do not accept this license. |
|
1908
|
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made |
|
1910
|
|
|
|
|
|
|
by someone other than you, you are nevertheless required to ensure that |
|
1911
|
|
|
|
|
|
|
your Modified Version complies with the requirements of this license. |
|
1912
|
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service |
|
1914
|
|
|
|
|
|
|
mark, tradename, or logo of the Copyright Holder. |
|
1915
|
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge |
|
1917
|
|
|
|
|
|
|
patent license to make, have made, use, offer to sell, sell, import and |
|
1918
|
|
|
|
|
|
|
otherwise transfer the Package with respect to any patent claims |
|
1919
|
|
|
|
|
|
|
licensable by the Copyright Holder that are necessarily infringed by the |
|
1920
|
|
|
|
|
|
|
Package. If you institute patent litigation (including a cross-claim or |
|
1921
|
|
|
|
|
|
|
counterclaim) against any party alleging that the Package constitutes |
|
1922
|
|
|
|
|
|
|
direct or contributory patent infringement, then this Artistic License |
|
1923
|
|
|
|
|
|
|
to you shall terminate on the date that such litigation is filed. |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER |
|
1926
|
|
|
|
|
|
|
AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. |
|
1927
|
|
|
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
|
1928
|
|
|
|
|
|
|
PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY |
|
1929
|
|
|
|
|
|
|
YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR |
|
1930
|
|
|
|
|
|
|
CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR |
|
1931
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, |
|
1932
|
|
|
|
|
|
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
1933
|
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
=cut |
|
1936
|
|
|
|
|
|
|
|
|
1937
|
|
|
|
|
|
|
1; # End of IMDB::Local |