line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package DataCube::FileSplitter; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
2595
|
use lib '..'; |
|
1
|
|
|
|
|
868
|
|
|
1
|
|
|
|
|
7
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
131
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
27
|
|
9
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use Fcntl; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
337
|
|
12
|
1
|
|
|
1
|
|
5
|
use URI::file; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
13
|
1
|
|
|
1
|
|
6
|
use Digest::MD5; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
14
|
1
|
|
|
1
|
|
6
|
use Time::HiRes; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
11
|
|
15
|
1
|
|
|
1
|
|
113
|
use Data::Dumper; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
58
|
|
16
|
1
|
|
|
1
|
|
5
|
use Cwd qw(getcwd); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
17
|
1
|
|
|
1
|
|
4
|
use Storable qw(nstore retrieve); |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
38
|
|
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
53
|
use DataCube; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use DataCube::Schema; |
21
|
|
|
|
|
|
|
use DataCube::MeasureUpdater; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub new { |
24
|
|
|
|
|
|
|
my($class,%opts) = @_; |
25
|
|
|
|
|
|
|
bless {%opts}, ref($class) || $class; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub split { |
29
|
|
|
|
|
|
|
my($self,@opts,%opts) = @_; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
split_opts:{ |
32
|
|
|
|
|
|
|
%opts = @opts and last split_opts if @_ > 2 && @_ % 2; |
33
|
|
|
|
|
|
|
$opts{file} = $opts[0] and last split_opts if @_ == 2; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $path = $opts{file}; |
37
|
|
|
|
|
|
|
my $pref = $opts{prefix} || 2; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$path =~ /^((?:.*?[\/\\])?)([^\/\\]+?)$/; |
40
|
|
|
|
|
|
|
my($dir,$file) = ($1,$2); |
41
|
|
|
|
|
|
|
$file =~ s/\..{1,4}$//i; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
mkdir($dir.$file) |
44
|
|
|
|
|
|
|
or die "DataCube::FileSplitter(split):\ncant make directory:\n$dir$file\nfrom path:$path\n$!\n"; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $digester = Digest::MD5->new; |
47
|
|
|
|
|
|
|
my $data_cube = Storable::retrieve($path); |
48
|
|
|
|
|
|
|
my $base_cube_name = $data_cube->{meta_data}->{system}->{base_cube_name} || $data_cube->{meta_data}->{system}->{base_cube}; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my $cubes = $data_cube->{cube_store}->cubes; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
for(keys %$cubes){ |
53
|
|
|
|
|
|
|
my $cube_name = $_; |
54
|
|
|
|
|
|
|
my $cube_data = $data_cube->{cube_store}->fetch($cube_name); |
55
|
|
|
|
|
|
|
my $name_dige = $cube_data->{schema}->{name_digest}; |
56
|
|
|
|
|
|
|
my $cube_hash = $cube_data->{cube}; |
57
|
|
|
|
|
|
|
my $cube_targ = $dir.$file.'/'.$name_dige; |
58
|
|
|
|
|
|
|
mkdir($cube_targ) |
59
|
|
|
|
|
|
|
or die "DataCube::FileSplitter(split):\ncant make directory:\n". |
60
|
|
|
|
|
|
|
"$cube_targ\nfrom cube named\n$cube_name\n$name_dige\n$!\n"; |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
nstore($cube_data->{schema}, $cube_targ."/.schema"); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my %prefices; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
for(keys %$cube_hash){ |
67
|
|
|
|
|
|
|
my $digest = $digester->add($_)->hexdigest; |
68
|
|
|
|
|
|
|
my $prefix = substr($digest, 0 , $pref); |
69
|
|
|
|
|
|
|
$prefices{$prefix}->{$digest} = $_; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
for(keys %prefices){ |
73
|
|
|
|
|
|
|
my $cube_hunk; |
74
|
|
|
|
|
|
|
my @cube_keys = values %{$prefices{$_}}; |
75
|
|
|
|
|
|
|
$cube_hunk->{$_} = $cube_hash->{$_} for @cube_keys; |
76
|
|
|
|
|
|
|
nstore($cube_hunk, $cube_targ . "/$_"); |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
return $self; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub merge_all { |
84
|
|
|
|
|
|
|
my($self,$dir) = @_; |
85
|
|
|
|
|
|
|
my @dir = grep {$_ !~ /^merge$/ } $self->dir($dir); |
86
|
|
|
|
|
|
|
my $merge_dir = "$dir/merge"; |
87
|
|
|
|
|
|
|
unless(-d($merge_dir)){ |
88
|
|
|
|
|
|
|
mkdir($merge_dir) or die "DataCube::FileSplitter(merge_all):\ncant make directory:$merge_dir\n$!\n" |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub merge { |
94
|
|
|
|
|
|
|
my($self,%opts) = @_; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $target = $opts{target}; |
97
|
|
|
|
|
|
|
my $source_files = $opts{source_files}; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my $schema; |
100
|
|
|
|
|
|
|
my $sources; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
unless(-d($target)){ |
103
|
|
|
|
|
|
|
mkdir($target) or die "DataCube::FileSplitter(merge : mkdir):\ncant make target directory:\n$target\n$!\n"; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
base_check:{ |
107
|
|
|
|
|
|
|
my $first = $source_files->[0]; |
108
|
|
|
|
|
|
|
my @first = grep{/^[a-f0-9]+$/i}$self->dir($first); |
109
|
|
|
|
|
|
|
for(@first){ |
110
|
|
|
|
|
|
|
my $name = $_; |
111
|
|
|
|
|
|
|
for(@$source_files){ |
112
|
|
|
|
|
|
|
die "DataCube::FileSplitter(merge : base_check):\nmissing cube named:\n$name\nin merge source:\n$_" |
113
|
|
|
|
|
|
|
unless (-d("$_/$name")) |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
my $i = 0; |
118
|
|
|
|
|
|
|
for(@$source_files){ |
119
|
|
|
|
|
|
|
my $dir = $_; |
120
|
|
|
|
|
|
|
my @cube_dirs = $self->dir($dir); |
121
|
|
|
|
|
|
|
for(@cube_dirs){ |
122
|
|
|
|
|
|
|
my $cube_dir = $_; |
123
|
|
|
|
|
|
|
if($i == 0){ |
124
|
|
|
|
|
|
|
my $schema = Storable::retrieve("$dir/$cube_dir/.schema"); |
125
|
|
|
|
|
|
|
$sources->{$cube_dir}->{schema} = $schema; |
126
|
|
|
|
|
|
|
$sources->{$cube_dir}->{updater} = DataCube::MeasureUpdater->new($schema); |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
my @data_files = grep{$_ ne '.schema'}$self->dir("$dir/$cube_dir"); |
129
|
|
|
|
|
|
|
for(@data_files){ |
130
|
|
|
|
|
|
|
my $prefix = $_; |
131
|
|
|
|
|
|
|
push @ { $sources->{$cube_dir}->{parts}->{$prefix} }, "$dir/$cube_dir/$prefix"; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
$i++; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
for(keys %$sources){ |
139
|
|
|
|
|
|
|
unless(-d("$target/$_")){ |
140
|
|
|
|
|
|
|
mkdir("$target/$_") or die |
141
|
|
|
|
|
|
|
"DataCube::FileSplitter(merge : mkdir):\ncant make target directory:\n$target/$_\n$!\n"; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
my $cube_name = $_; |
144
|
|
|
|
|
|
|
my %parts = %{$sources->{$cube_name}->{parts}}; |
145
|
|
|
|
|
|
|
for(sort keys %parts){ |
146
|
|
|
|
|
|
|
my $prefix = $_; |
147
|
|
|
|
|
|
|
$self->merge_files( |
148
|
|
|
|
|
|
|
files => $sources->{$cube_name}->{parts}->{$prefix}, |
149
|
|
|
|
|
|
|
target => $target . "/$cube_name/$prefix", |
150
|
|
|
|
|
|
|
updater => $sources->{$cube_name}->{updater}, |
151
|
|
|
|
|
|
|
); |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
return $self; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub merge_files { |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
my($self,%opts) = @_; |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
my $files = $opts{files}; |
162
|
|
|
|
|
|
|
my $target = $opts{target}; |
163
|
|
|
|
|
|
|
my $updater = $opts{updater}; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
if( -f($target) ) { |
167
|
|
|
|
|
|
|
unshift @$files, $target; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $big_hunk = {}; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
for(@$files){ |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
my $small_hunk = Storable::retrieve($_); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
unless (ref($small_hunk)){ |
177
|
|
|
|
|
|
|
die "DataCube::FileSplitter(merge_files):\nStorable returned a non-ref\n$!" |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
for(keys %$small_hunk){ |
181
|
|
|
|
|
|
|
$updater->update( |
182
|
|
|
|
|
|
|
target => $big_hunk, |
183
|
|
|
|
|
|
|
source => $small_hunk, |
184
|
|
|
|
|
|
|
source_key => $_, |
185
|
|
|
|
|
|
|
target_key => $_, |
186
|
|
|
|
|
|
|
); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Storable::nstore($big_hunk,$target); |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
return $self; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub dir { |
197
|
|
|
|
|
|
|
my($self,$path) = @_; |
198
|
|
|
|
|
|
|
opendir(my $D, $path) or die "DataCube::FileSplitter(dir):\ncant open directory:$path\n$!\n"; |
199
|
|
|
|
|
|
|
grep {/[^\.]/} readdir($D); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
1; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
__END__ |