line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::JBrowse::Store::NCList; |
2
|
|
|
|
|
|
|
BEGIN { |
3
|
1
|
|
|
1
|
|
79438
|
$Bio::JBrowse::Store::NCList::AUTHORITY = 'cpan:RBUELS'; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
{ |
6
|
|
|
|
|
|
|
$Bio::JBrowse::Store::NCList::VERSION = '0.1'; |
7
|
|
|
|
|
|
|
} |
8
|
|
|
|
|
|
|
#ABSTRACT: stores feature data in an on-disk lazy nested-containment list optimized for fetching over HTTP |
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
13
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
459
|
|
11
|
1
|
|
|
1
|
|
8
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
41
|
|
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
6
|
use Carp (); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
14
|
1
|
|
|
1
|
|
6
|
use Scalar::Util (); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
16
|
|
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
1
|
|
2528
|
use Storable (); |
|
1
|
|
|
|
|
5791
|
|
|
1
|
|
|
|
|
29
|
|
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
872
|
use Bio::JBrowse::Store::NCList::ArrayRepr (); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
25
|
|
19
|
1
|
|
|
1
|
|
744
|
use Bio::JBrowse::Store::NCList::IntervalStore (); |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
28
|
|
20
|
1
|
|
|
1
|
|
822
|
use Bio::JBrowse::Store::NCList::JSONFileStorage (); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# TODO IMPLEMENT RETRIEVAL |
24
|
|
|
|
|
|
|
# # retrieve feature data from the store |
25
|
|
|
|
|
|
|
# my $fstream = $store->get_features({ seq_id => 'chr1', start => 60, end => 85 }); |
26
|
|
|
|
|
|
|
# while( my $feature = $fstream->() ) { |
27
|
|
|
|
|
|
|
# # do something with the feature |
28
|
|
|
|
|
|
|
# } |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub new { |
32
|
|
|
|
|
|
|
return shift->_new( { %{+shift}, write => 1 } ); |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub open { |
36
|
|
|
|
|
|
|
return shift->_new( { %{+shift}, write => 0 } ); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub _new { |
40
|
|
|
|
|
|
|
my ( $class, $args ) = @_; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $self = bless { %$args }, $class; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
$self->{array_rep} = Bio::JBrowse::Store::NCList::ArrayRepr->new; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
if( $self->{write} ) { |
47
|
|
|
|
|
|
|
if( -e $self->{path} ) { |
48
|
|
|
|
|
|
|
File::Path::rmtree( $self->{path} ); |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
File::Path::mkpath( $self->{path} ); |
51
|
|
|
|
|
|
|
unless( -d $self->{path} ) { |
52
|
|
|
|
|
|
|
die "$! attempting to make directory '$self->{path}'\n"; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
-e $self->{path} |
57
|
|
|
|
|
|
|
or die "Target directory $self->{path} does not exist, and cannot create.\n"; |
58
|
|
|
|
|
|
|
-d $self->{path} |
59
|
|
|
|
|
|
|
or die "Target directory $self->{path} exists, but is not a directory.\n"; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
return $self; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
# || Bio::JBrowse::Store::NCList::JSONFileStorage->new( $outDir, $args->{compress}), |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub insert_presorted { |
67
|
|
|
|
|
|
|
my ( $self, @streams ) = @_; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $arep = $self->{array_rep}; |
70
|
|
|
|
|
|
|
my $stream = $self->_combine_streams( @streams ); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my $curr_refseq = 'no reference sequence yet, cousin.'; |
73
|
|
|
|
|
|
|
my $interval_store; |
74
|
|
|
|
|
|
|
while( my $f = $stream->() ) { |
75
|
|
|
|
|
|
|
unless( $interval_store && $curr_refseq eq $f->{seq_id} ) { |
76
|
|
|
|
|
|
|
$interval_store->finishLoad if $interval_store; |
77
|
|
|
|
|
|
|
$interval_store = Bio::JBrowse::Store::NCList::IntervalStore->new({ |
78
|
|
|
|
|
|
|
store => Bio::JBrowse::Store::NCList::JSONFileStorage->new( |
79
|
|
|
|
|
|
|
$self->_refseq_path( $f->{seq_id} ), |
80
|
|
|
|
|
|
|
$self->{compress} |
81
|
|
|
|
|
|
|
), |
82
|
|
|
|
|
|
|
arrayRepr => $arep |
83
|
|
|
|
|
|
|
}); |
84
|
|
|
|
|
|
|
$interval_store->startLoad( sub { 1 }, 2_000 ); |
85
|
|
|
|
|
|
|
$curr_refseq = $f->{seq_id}; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
my $a = $arep->convert_hashref( $f ); |
88
|
|
|
|
|
|
|
$interval_store->addSorted( $a ); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
$interval_store->finishLoad if $interval_store; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub _refseq_path { |
94
|
|
|
|
|
|
|
my ( $self, $refseq_name ) = @_; |
95
|
|
|
|
|
|
|
return File::Spec->catdir( $self->{path}, $refseq_name ); |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub insert { |
100
|
|
|
|
|
|
|
my $self = shift; |
101
|
|
|
|
|
|
|
$self->insert_presorted( $self->_sort( @_ ) ); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# take zero or more streams and make one stream that feeds from them |
105
|
|
|
|
|
|
|
# all |
106
|
|
|
|
|
|
|
sub _combine_streams { |
107
|
|
|
|
|
|
|
my ( $self, @streams ) = @_; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
return sub {} unless @streams; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
return $streams[0] if @streams == 1; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
return sub { |
114
|
|
|
|
|
|
|
return $streams[0]->() || @streams > 1 && do { |
115
|
|
|
|
|
|
|
shift @streams; |
116
|
|
|
|
|
|
|
$streams[0]->(); |
117
|
|
|
|
|
|
|
}; |
118
|
|
|
|
|
|
|
}; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub _sort { |
122
|
|
|
|
|
|
|
my ( $self, @streams ) = @_; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
require Sort::External; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# make a single stream |
127
|
|
|
|
|
|
|
my $stream = $self->_combine_streams( @streams ); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# put the stream through an external sorter, sorting by ref seq |
130
|
|
|
|
|
|
|
# and start coordinate |
131
|
|
|
|
|
|
|
my $sorter = Sort::External->new( cache_size => 1_000_000 ); |
132
|
|
|
|
|
|
|
while( my $f = $stream->() ) { |
133
|
|
|
|
|
|
|
# use Data::Dump 'dump'; |
134
|
|
|
|
|
|
|
# warn dump( $f ); |
135
|
|
|
|
|
|
|
$sorter->feed( "$f->{seq_id}\0".pack('N',$f->{start}).pack('N',~(0+$f->{end})).Storable::freeze( $f ) ); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
$sorter->finish; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# return a stream that reads from the external sorter |
140
|
|
|
|
|
|
|
return sub { |
141
|
|
|
|
|
|
|
my $s = $sorter->fetch |
142
|
|
|
|
|
|
|
or return; |
143
|
|
|
|
|
|
|
return Storable::thaw( substr( $s, 1+index( $s, "\0" )+8 ) ); |
144
|
|
|
|
|
|
|
}; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
1; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
__END__ |