line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::GFF3::LowLevel::Parser; |
2
|
|
|
|
|
|
|
BEGIN { |
3
|
2
|
|
|
2
|
|
133908
|
$Bio::GFF3::LowLevel::Parser::AUTHORITY = 'cpan:RBUELS'; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
{ |
6
|
|
|
|
|
|
|
$Bio::GFF3::LowLevel::Parser::VERSION = '2.0'; |
7
|
|
|
|
|
|
|
} |
8
|
|
|
|
|
|
|
# ABSTRACT: a fast, low-level gff3 parser |
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
18
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
51
|
|
11
|
2
|
|
|
2
|
|
53
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
61
|
|
12
|
2
|
|
|
2
|
|
9
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
111
|
|
13
|
|
|
|
|
|
|
|
14
|
2
|
|
|
2
|
|
9
|
use IO::Handle (); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
27
|
|
15
|
2
|
|
|
2
|
|
9
|
use Scalar::Util (); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
41
|
|
16
|
|
|
|
|
|
|
|
17
|
2
|
|
|
2
|
|
1771
|
use List::MoreUtils (); |
|
2
|
|
|
|
|
2595
|
|
|
2
|
|
|
|
|
38
|
|
18
|
|
|
|
|
|
|
|
19
|
2
|
|
|
2
|
|
1188
|
use Bio::GFF3::LowLevel (); |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
2084
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub open { |
24
|
40
|
|
|
40
|
1
|
145154
|
my $class = shift; |
25
|
40
|
|
|
|
|
214
|
return bless { |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
filethings => \@_, |
28
|
|
|
|
|
|
|
filehandles => [ map $class->_open($_), @_ ], |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# features that are ready to go out and be flushed |
31
|
|
|
|
|
|
|
item_buffer => [], |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# features that we have to keep on hand for now because they |
34
|
|
|
|
|
|
|
# might be referenced by something else |
35
|
|
|
|
|
|
|
under_construction_top_level => [], |
36
|
|
|
|
|
|
|
# index of the above by ID |
37
|
|
|
|
|
|
|
under_construction_by_id => {}, |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# features that reference something we have not seen yet |
40
|
|
|
|
|
|
|
# structured as: |
41
|
|
|
|
|
|
|
# { 'some_id' => { |
42
|
|
|
|
|
|
|
# 'Parent' => [ orphans that have a Parent attr referencing it ], |
43
|
|
|
|
|
|
|
# 'Derives_from' => [ orphans that have a Derives_from attr referencing it ], |
44
|
|
|
|
|
|
|
# } |
45
|
|
|
|
|
|
|
under_construction_orphans => {}, |
46
|
|
|
|
|
|
|
}, $class; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
sub _open { |
49
|
40
|
|
|
40
|
|
94
|
my ( $class, $thing ) = @_; |
50
|
40
|
100
|
33
|
|
|
373
|
return $thing if ref $thing eq 'GLOB' || Scalar::Util::blessed( $thing ) && $thing->can('getline'); |
|
|
|
66
|
|
|
|
|
51
|
2
|
50
|
|
2
|
|
165
|
CORE::open my $f, '<', $thing or croak "$! opening '$thing' for reading"; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
16
|
|
|
38
|
|
|
|
|
2531
|
|
52
|
38
|
|
|
|
|
3935
|
return $f; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub max_lookback { |
57
|
17
|
|
|
17
|
1
|
102
|
my ( $self, $count ) = @_; |
58
|
17
|
|
|
|
|
49
|
$self->{max_lookback} = $count |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub new { |
63
|
16
|
|
|
16
|
1
|
72400
|
my $class = shift; |
64
|
16
|
|
|
|
|
1368
|
require Bio::GFF3::LowLevel::Parser::1_0_backcompat; |
65
|
16
|
|
|
|
|
126
|
return Bio::GFF3::LowLevel::Parser::1_0_backcompat->new( @_ ); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub next_item { |
70
|
2330
|
|
|
2330
|
1
|
8490
|
my ( $self ) = @_; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# try to get more items if the buffer is empty |
73
|
2330
|
100
|
|
|
|
3873
|
$self->_buffer_items unless $self->_buffered_items_count; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# return the next item if we have some |
76
|
2328
|
100
|
|
|
|
4996
|
return shift @{ $self->{item_buffer}} if $self->_buffered_items_count; |
|
2294
|
|
|
|
|
6476
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# if we were not able to get any more items, return nothing |
79
|
34
|
|
|
|
|
144
|
return; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub _buffer_item { |
83
|
2100
|
|
|
2100
|
|
2453
|
push @{$_[0]->{item_buffer}}, $_[1]; |
|
2100
|
|
|
|
|
5336
|
|
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub _buffered_items_count { |
87
|
9923
|
|
|
9923
|
|
12673
|
scalar @{ $_[0]->{item_buffer} } |
|
9923
|
|
|
|
|
42321
|
|
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
## get and parse lines from the files(s) to add at least one item to |
91
|
|
|
|
|
|
|
## the buffer |
92
|
|
|
|
|
|
|
sub _buffer_items { |
93
|
2251
|
|
|
2251
|
|
2533
|
my ( $self ) = @_; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# File position buffer, needed for correct parsing of |
96
|
|
|
|
|
|
|
# implicit beginning of a FASTA section |
97
|
2251
|
|
|
|
|
2434
|
my $pos_buffer = 0; |
98
|
2251
|
|
|
|
|
9450
|
while( my $line = $self->_next_line ) { |
99
|
5267
|
100
|
|
|
|
21275
|
if( $line =~ /^ \s* [^#\s>] /x ) { #< feature line, most common case |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Remember current position |
101
|
5100
|
|
|
|
|
6372
|
$pos_buffer = tell; |
102
|
|
|
|
|
|
|
|
103
|
5100
|
|
|
|
|
19227
|
my $f = Bio::GFF3::LowLevel::gff3_parse_feature( $line ); |
104
|
5100
|
|
|
|
|
11807
|
$self->_buffer_feature( $f ); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
# directive or comment |
107
|
|
|
|
|
|
|
elsif( my ( $hashsigns, $contents ) = $line =~ /^ \s* (\#+) (.*) /x ) { |
108
|
|
|
|
|
|
|
# Remember current position |
109
|
133
|
|
|
|
|
202
|
$pos_buffer = tell; |
110
|
|
|
|
|
|
|
|
111
|
133
|
100
|
|
|
|
387
|
if( length $hashsigns == 3 ) { #< sync directive, all forward-references are resolved. |
|
|
100
|
|
|
|
|
|
112
|
63
|
|
|
|
|
143
|
$self->_buffer_all_under_construction_features; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
elsif( length $hashsigns == 2 ) { |
115
|
64
|
|
|
|
|
248
|
my $directive = Bio::GFF3::LowLevel::gff3_parse_directive( $line ); |
116
|
64
|
100
|
|
|
|
171
|
if( $directive->{directive} eq 'FASTA' ) { |
117
|
11
|
|
|
|
|
30
|
$self->_buffer_all_under_construction_features; |
118
|
11
|
|
|
|
|
22
|
$self->_buffer_item( { directive => 'FASTA', filehandle => shift @{$self->{filehandles} } }); |
|
11
|
|
|
|
|
55
|
|
119
|
11
|
|
|
|
|
15
|
shift @{$self->{filethings}}; |
|
11
|
|
|
|
|
34
|
|
120
|
|
|
|
|
|
|
} else { |
121
|
53
|
|
|
|
|
118
|
$self->_buffer_item( $directive ); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
else { |
125
|
6
|
|
|
|
|
73
|
$contents =~ s/\s*$//; |
126
|
6
|
|
|
|
|
27
|
$self->_buffer_item( { comment => $contents } ); |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
elsif( $line =~ /^ \s* $/x ) { |
130
|
|
|
|
|
|
|
# blank line, do nothing |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Remember current position |
133
|
30
|
|
|
|
|
48
|
$pos_buffer = tell; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
elsif( $line =~ /^ \s* > /x ) { |
136
|
|
|
|
|
|
|
# implicit beginning of a FASTA section. a very stupid |
137
|
|
|
|
|
|
|
# idea to include this in the format spec. increases |
138
|
|
|
|
|
|
|
# implementation complexity by a lot. |
139
|
4
|
|
|
|
|
12
|
$self->_buffer_all_under_construction_features; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# rewind to previous file position to include the fasta header |
142
|
4
|
|
|
|
|
7
|
my $fh = shift @{$self->{filehandles}}; |
|
4
|
|
|
|
|
10
|
|
143
|
4
|
|
|
|
|
40
|
seek $fh, $pos_buffer, 0; |
144
|
|
|
|
|
|
|
|
145
|
4
|
|
|
|
|
21
|
$self->_buffer_item( { directive => 'FASTA', filehandle => $fh } ); |
146
|
4
|
|
|
|
|
7
|
shift @{$self->{filethings}}; |
|
4
|
|
|
|
|
9
|
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# update file position |
149
|
4
|
|
|
|
|
9
|
$pos_buffer = tell; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
else { # it's a parse error |
152
|
|
|
|
|
|
|
# Remember current position |
153
|
0
|
|
|
|
|
0
|
$pos_buffer = tell; |
154
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
0
|
chomp $line; |
156
|
0
|
|
|
|
|
0
|
$self->_parse_error("parse error. Cannot parse '$line'."); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# buffer old features if we are starting to approach our mem limit |
160
|
5265
|
|
|
|
|
15584
|
$self->_ensure_lookback_limit; |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# return now if we were able to find some things to put in the |
163
|
|
|
|
|
|
|
# output buffer |
164
|
5265
|
100
|
|
|
|
9260
|
return if $self->_buffered_items_count; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# if we are out of lines, buffer all under-construction features |
168
|
52
|
|
|
|
|
143
|
$self->_buffer_all_under_construction_features; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _parse_error { |
172
|
2
|
|
|
2
|
|
3
|
my ( $self, $error ) = @_; |
173
|
2
|
|
|
|
|
417
|
croak "$self->{filethings}[0]:$.: $error"; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
## take all under-construction features and put them in the |
177
|
|
|
|
|
|
|
## item_buffer to be output |
178
|
|
|
|
|
|
|
sub _buffer_all_under_construction_features { |
179
|
130
|
|
|
130
|
|
176
|
my ( $self ) = @_; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# since the under_construction_top_level buffer is likely to be |
182
|
|
|
|
|
|
|
# much larger than the item_buffer, we swap them and unshift the |
183
|
|
|
|
|
|
|
# existing buffer onto it to avoid a big copy. |
184
|
130
|
|
|
|
|
245
|
my $old_buffer = $self->{item_buffer}; |
185
|
130
|
|
|
|
|
233
|
$self->{item_buffer} = $self->{under_construction_top_level}; |
186
|
130
|
|
|
|
|
151
|
unshift @{$self->{item_buffer}}, @$old_buffer; |
|
130
|
|
|
|
|
256
|
|
187
|
130
|
|
|
|
|
184
|
undef $old_buffer; |
188
|
|
|
|
|
|
|
|
189
|
130
|
|
|
|
|
236
|
$self->{under_construction_top_level} = []; |
190
|
130
|
|
|
|
|
210
|
$self->{under_construction_by_id} = {}; |
191
|
130
|
|
|
|
|
1095
|
$self->{completed_references} = {}; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# if we have any orphans hanging around still, this is a problem. die with a parse error |
194
|
130
|
50
|
|
|
|
988
|
if( grep %$_, values %{$self->{under_construction_orphans}} ) { |
|
130
|
|
|
|
|
512
|
|
195
|
0
|
|
|
|
|
0
|
die <_unresolved_references_report; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
GFF3 parse error: some features reference other features that do not exist in the file (or in the same '###' scope). A list of them: |
198
|
|
|
|
|
|
|
EOM |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub _ensure_lookback_limit { |
203
|
5265
|
|
|
5265
|
|
6421
|
my ( $self ) = @_; |
204
|
|
|
|
|
|
|
|
205
|
5265
|
100
|
|
|
|
12771
|
return unless defined $self->{max_lookback}; |
206
|
|
|
|
|
|
|
|
207
|
2562
|
|
|
|
|
3487
|
my $toplevel = $self->{under_construction_top_level}; |
208
|
2562
|
|
|
|
|
3400
|
my $byid = $self->{under_construction_by_id}; |
209
|
2562
|
|
|
|
|
3340
|
my $out_buffer = $self->{item_buffer}; |
210
|
2
|
|
|
2
|
|
12
|
no warnings 'uninitialized'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
356
|
|
211
|
2562
|
|
|
|
|
8505
|
while( @$toplevel > $self->{max_lookback} ) { |
212
|
40
|
|
|
|
|
75
|
my $f = shift @$toplevel; |
213
|
40
|
|
|
|
|
173
|
delete $byid->{$_} for map @{$_->{attributes}{ID}}, @$f; |
|
40
|
|
|
|
|
294
|
|
214
|
40
|
|
|
|
|
178
|
push @$out_buffer, $f; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# makes a string with a tabular report of all the currently unresolved |
219
|
|
|
|
|
|
|
# references: Parent, Derives_from, etc. |
220
|
|
|
|
|
|
|
sub _unresolved_references_report { |
221
|
0
|
|
|
0
|
|
0
|
my ( $self ) = @_; |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
0
|
require IO::Handle; |
224
|
|
|
|
|
|
|
|
225
|
0
|
|
|
|
|
0
|
our $id; |
226
|
0
|
|
|
|
|
0
|
our $reference_string; |
227
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
0
|
my $report = ''; |
229
|
0
|
|
|
|
|
0
|
CORE::open my $fh, '>', \$report; |
230
|
0
|
|
|
|
|
0
|
$fh->print(<<''); |
231
|
|
|
|
|
|
|
ID | Cannot Find |
232
|
|
|
|
|
|
|
---------------------------------------------------------------------- |
233
|
|
|
|
|
|
|
|
234
|
2
|
|
|
2
|
|
10
|
no strict 'subs'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
622
|
|
235
|
|
|
|
|
|
|
format UNRESOLVED_REFERENCES_REPORT = |
236
|
|
|
|
|
|
|
@<<<<<<<<<<<<<<<<<< | @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
237
|
|
|
|
|
|
|
$id, $reference_string |
238
|
|
|
|
|
|
|
. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
|
242
|
0
|
|
|
|
|
0
|
for my $refid ( keys %{$self->{under_construction_orphans}} ) { |
|
0
|
|
|
|
|
0
|
|
243
|
0
|
|
|
|
|
0
|
my $escaped_refid = Bio::GFF3::LowLevel::gff3_escape( $refid ); |
244
|
0
|
|
|
|
|
0
|
my $references = $self->{under_construction_orphans}{$refid}; |
245
|
0
|
|
|
|
|
0
|
for my $reftype ( keys %$references ) { |
246
|
0
|
|
|
|
|
0
|
my $references = $references->{$reftype}; |
247
|
0
|
|
|
|
|
0
|
for my $feature (@$references) { |
248
|
0
|
|
|
|
|
0
|
for my $location ( @$feature ) { |
249
|
0
|
|
0
|
|
|
0
|
$id = (($location->{attributes}||{})->{ID}||[])->[0]; #< avoid autovivification |
250
|
0
|
0
|
|
|
|
0
|
$id = '(no id)' unless defined $id; |
251
|
0
|
|
|
|
|
0
|
$reference_string = "$reftype=$escaped_refid"; |
252
|
0
|
|
|
|
|
0
|
$fh->format_write( Bio::GFF3::LowLevel::Parser::UNRESOLVED_REFERENCES_REPORT ); |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} |
257
|
0
|
|
|
|
|
0
|
return $report; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
## get the next line from our file(s), returning nothing if we are out |
262
|
|
|
|
|
|
|
## of lines and files |
263
|
|
|
|
|
|
|
sub _next_line { |
264
|
2
|
|
|
2
|
|
12
|
no warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
1511
|
|
265
|
|
|
|
|
|
|
# fast code path for reading a line from the first filehandle, |
266
|
5319
|
|
|
5319
|
|
9260
|
my $first_fh = $_[0]->{filehandles}[0]; |
267
|
5319
|
|
66
|
|
|
34470
|
return <$first_fh> || do { |
268
|
|
|
|
|
|
|
# slower case where we are at the end, or need to change |
269
|
|
|
|
|
|
|
# filehandles |
270
|
|
|
|
|
|
|
my ( $self ) = @_; |
271
|
|
|
|
|
|
|
my $filehandles = $self->{filehandles}; |
272
|
|
|
|
|
|
|
while ( @$filehandles ) { |
273
|
|
|
|
|
|
|
my $line = $filehandles->[0]->getline; |
274
|
|
|
|
|
|
|
return $line if $line; |
275
|
|
|
|
|
|
|
shift @$filehandles; |
276
|
|
|
|
|
|
|
shift @{$self->{filethings}}; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
return; |
279
|
|
|
|
|
|
|
}; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my %container_attributes = ( Parent => 'child_features', Derives_from => 'derived_features' ); |
283
|
|
|
|
|
|
|
## do the right thing with a newly-parsed feature line |
284
|
|
|
|
|
|
|
sub _buffer_feature { |
285
|
5100
|
|
|
5100
|
|
6441
|
my ( $self, $feature_line ) = @_; |
286
|
|
|
|
|
|
|
|
287
|
5100
|
|
|
|
|
11167
|
$feature_line->{'child_features'} = []; |
288
|
5100
|
|
|
|
|
8686
|
$feature_line->{'derived_features'} = []; |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# NOTE: a feature is an arrayref of one or more feature lines. |
291
|
5100
|
|
100
|
|
|
20417
|
my $ids = $feature_line->{attributes}{ID} || []; |
292
|
5100
|
|
100
|
|
|
15692
|
my $parents = $feature_line->{attributes}{Parent} || []; |
293
|
5100
|
|
100
|
|
|
19142
|
my $derives = $feature_line->{attributes}{Derives_from} || []; |
294
|
|
|
|
|
|
|
|
295
|
5100
|
50
|
66
|
|
|
23200
|
if( !@$ids && !@$parents && !@$derives ) { |
|
|
|
66
|
|
|
|
|
296
|
|
|
|
|
|
|
# if it has no IDs and does not refer to anything, we can just |
297
|
|
|
|
|
|
|
# output it |
298
|
2026
|
|
|
|
|
6017
|
$self->_buffer_item( [ $feature_line ] ); |
299
|
2026
|
|
|
|
|
4762
|
return; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
3074
|
|
|
|
|
2901
|
my $feature; |
303
|
3074
|
|
|
|
|
4642
|
for my $id ( @$ids ) { |
304
|
1097
|
100
|
|
|
|
2836
|
if( my $existing = $self->{under_construction_by_id}{$id} ) { |
305
|
|
|
|
|
|
|
# another location of the same feature |
306
|
27
|
100
|
|
|
|
89
|
unless( $existing->[-1]->{type} eq $feature_line->{type} ) { |
307
|
2
|
|
|
|
|
13
|
$self->_parse_error("type ".$feature_line->{type}." is not the same as previous type ".$existing->[-1]->{type}." for ID '".$id."'"); |
308
|
|
|
|
|
|
|
} |
309
|
25
|
|
|
|
|
42
|
push @$existing, $feature_line; |
310
|
25
|
|
|
|
|
66
|
$feature = $existing; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
else { |
313
|
|
|
|
|
|
|
# haven't seen it yet |
314
|
1070
|
|
|
|
|
1807
|
$feature = [ $feature_line ]; |
315
|
1070
|
100
|
66
|
|
|
4956
|
if( !@$parents && !@$derives ) { |
316
|
204
|
|
|
|
|
218
|
push @{ $self->{under_construction_top_level} }, $feature; |
|
204
|
|
|
|
|
839
|
|
317
|
|
|
|
|
|
|
} |
318
|
1070
|
|
|
|
|
6534
|
$self->{under_construction_by_id}{$id} = $feature; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# see if we have anything buffered that refers to it |
321
|
1070
|
|
|
|
|
2078
|
$self->_resolve_references_to( $feature, $id ); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# try to resolve all its references |
326
|
3072
|
|
100
|
|
|
22872
|
$self->_resolve_references_from( $feature || [ $feature_line ], { Parent => $parents, Derives_from => $derives }, $ids ); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
sub _resolve_references_to { |
330
|
1070
|
|
|
1070
|
|
1951
|
my ( $self, $feature, $id ) = @_; |
331
|
1070
|
100
|
|
|
|
4658
|
my $references = $self->{under_construction_orphans}{$id} |
332
|
|
|
|
|
|
|
or return; |
333
|
2
|
|
|
|
|
6
|
for my $attrname ( keys %$references ) { |
334
|
3
|
|
33
|
|
|
11
|
my $pname = $container_attributes{$attrname} || lc $attrname; |
335
|
3
|
|
|
|
|
7
|
for my $loc ( @$feature ) { |
336
|
3
|
|
|
|
|
6
|
push @{ $loc->{$pname} }, |
|
3
|
|
|
|
|
19
|
|
337
|
3
|
|
|
|
|
4
|
@{ delete $references->{$attrname} }; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
sub _resolve_references_from { |
342
|
3072
|
|
|
3072
|
|
4478
|
my ( $self, $feature, $references, $ids ) = @_; |
343
|
|
|
|
|
|
|
# go through our references |
344
|
|
|
|
|
|
|
# if we have the feature under construction, put this feature in the right place |
345
|
|
|
|
|
|
|
# otherwise, put this feature in the right slot in the orphans |
346
|
|
|
|
|
|
|
|
347
|
3072
|
|
|
|
|
7767
|
for my $attrname ( keys %$references ) { |
348
|
6144
|
|
|
|
|
6399
|
my $pname; |
349
|
6144
|
|
|
|
|
5956
|
for my $to_id ( @{ $references->{ $attrname } } ) { |
|
6144
|
|
|
|
|
14987
|
|
350
|
2884
|
100
|
|
|
|
7274
|
if( my $other_feature = $self->{under_construction_by_id}{ $to_id } ) { |
351
|
2879
|
|
33
|
|
|
12423
|
$pname ||= $container_attributes{$attrname} || lc $attrname; |
|
|
|
66
|
|
|
|
|
352
|
2879
|
100
|
|
|
|
18305
|
unless( grep $self->{completed_references}{$_}{$attrname}{$to_id}++, @$ids ) { |
353
|
2861
|
|
|
|
|
4087
|
for my $loc ( @$other_feature ) { |
354
|
2873
|
|
|
|
|
2828
|
push @{ $loc->{ $pname } }, $feature; |
|
2873
|
|
|
|
|
15844
|
|
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
else { |
359
|
5
|
|
100
|
|
|
7
|
push @{ $self->{under_construction_orphans}{$to_id}{$attrname} ||= [] }, $feature; |
|
5
|
|
|
|
|
41
|
|
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
1; |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
__END__ |