line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::AGP::LowLevel; |
2
|
1
|
|
|
1
|
|
986
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
3
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
4
|
1
|
|
|
1
|
|
781
|
use English; |
|
1
|
|
|
|
|
837
|
|
|
1
|
|
|
|
|
6
|
|
5
|
1
|
|
|
1
|
|
670
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
63
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
6
|
use File::Basename; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
74
|
|
8
|
1
|
|
|
1
|
|
1066
|
use UNIVERSAL qw/isa/; |
|
1
|
|
|
|
|
13
|
|
|
1
|
|
|
|
|
5
|
|
9
|
1
|
|
|
1
|
|
468
|
use List::Util qw/first/; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
103
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Bio::AGP::LowLevel - functions for dealing with AGP files |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
$lines_arrayref = agp_parse('my_agp_file.agp'); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
agp_write( $lines => 'my_agp_file.agp'); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
functions for working with AGP files. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 FUNCTIONS |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
All functions below are EXPORT_OK. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=cut |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
1
|
|
5
|
use base qw/ Exporter /; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
142
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our @EXPORT_OK; |
37
|
|
|
|
|
|
|
BEGIN { |
38
|
1
|
|
|
1
|
|
401
|
@EXPORT_OK = qw( |
39
|
|
|
|
|
|
|
agp_parse |
40
|
|
|
|
|
|
|
agp_write |
41
|
|
|
|
|
|
|
agp_format_part |
42
|
|
|
|
|
|
|
agp_contigs |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head2 str_in |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Usage: print "it's valid" if str_in($thingy,qw/foo bar baz/); |
50
|
|
|
|
|
|
|
Desc : return 1 if the first argument is string equal to at least one of the |
51
|
|
|
|
|
|
|
subsequent arguments |
52
|
|
|
|
|
|
|
Ret : 1 or 0 |
53
|
|
|
|
|
|
|
Args : string to search for, array of strings to search in |
54
|
|
|
|
|
|
|
Side Effects: none |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
I kept writing this over and over in validation code and got sick of it. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub str_in { |
61
|
1138
|
|
|
1138
|
1
|
1742
|
my $needle = shift; |
62
|
1138
|
50
|
|
2402
|
|
3946
|
return defined(first {$needle eq $_} @_) ? 1 : 0; |
|
2402
|
|
|
|
|
4928
|
|
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head2 is_filehandle |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Usage: print "it's a filehandle" if is_filehandle($my_thing); |
68
|
|
|
|
|
|
|
Desc : check whether the given thing is usable as a filehandle. |
69
|
|
|
|
|
|
|
I put this in a module cause a filehandle might be either |
70
|
|
|
|
|
|
|
a GLOB or isa IO::Handle or isa Apache::Upload |
71
|
|
|
|
|
|
|
Ret : true if it is a filehandle, false otherwise |
72
|
|
|
|
|
|
|
Args : a single thing |
73
|
|
|
|
|
|
|
Side Effects: none |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=cut |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub is_filehandle { |
78
|
9
|
|
|
9
|
1
|
17
|
my ($thing) = @_; |
79
|
9
|
|
66
|
|
|
161
|
return isa($thing,'IO::Handle') || isa($thing,'Apache::Upload') || ref($thing) eq 'GLOB'; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 agp_parse |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Usage: my $lines = agp_parse('~/myagp.agp',validate_syntax => 1, validate_identifiers => 1); |
86
|
|
|
|
|
|
|
Desc : parse an agp file |
87
|
|
|
|
|
|
|
Args : filename or filehandle, hash-style list of options as |
88
|
|
|
|
|
|
|
validate_syntax => if true, error |
89
|
|
|
|
|
|
|
if there are any syntax errors, |
90
|
|
|
|
|
|
|
validate_identifiers => if true, error |
91
|
|
|
|
|
|
|
if there are any identifiers that |
92
|
|
|
|
|
|
|
CXGN::Tools::Identifiers doesn't recognize |
93
|
|
|
|
|
|
|
IMPLIES validate_syntax |
94
|
|
|
|
|
|
|
error_array => an arrayref. if given, will push |
95
|
|
|
|
|
|
|
error descriptions onto this array instead of |
96
|
|
|
|
|
|
|
using warn to print them to stderr |
97
|
|
|
|
|
|
|
Ret : undef if error, otherwise return an |
98
|
|
|
|
|
|
|
arrayref containing line records, each of which is like: |
99
|
|
|
|
|
|
|
{ comment => 'text' } if a comment, |
100
|
|
|
|
|
|
|
or if a data line: |
101
|
|
|
|
|
|
|
{ objname => the name of the object being assembled |
102
|
|
|
|
|
|
|
(same for every record), |
103
|
|
|
|
|
|
|
ostart => start coordinate for this component (object), |
104
|
|
|
|
|
|
|
oend => end coordinate for this component (object), |
105
|
|
|
|
|
|
|
partnum => the part number appearing in the 4th column, |
106
|
|
|
|
|
|
|
linenum => the line number in the file, |
107
|
|
|
|
|
|
|
type => letter type present in the file (/[ADFGNOPUW]/), |
108
|
|
|
|
|
|
|
typedesc => description of the type, one of: |
109
|
|
|
|
|
|
|
- (A) active_finishing |
110
|
|
|
|
|
|
|
- (D) draft |
111
|
|
|
|
|
|
|
- (F) finished |
112
|
|
|
|
|
|
|
- (G) wgs_finishing |
113
|
|
|
|
|
|
|
- (N) known_gap |
114
|
|
|
|
|
|
|
- (O) other |
115
|
|
|
|
|
|
|
- (P) predraft |
116
|
|
|
|
|
|
|
- (U) unknown_gap |
117
|
|
|
|
|
|
|
- (W) wgs_contig |
118
|
|
|
|
|
|
|
ident => identifier of the component, if any, |
119
|
|
|
|
|
|
|
length => length of the component, |
120
|
|
|
|
|
|
|
is_gap => 1 if the line is some kind of gap, 0 if it |
121
|
|
|
|
|
|
|
is covered by a component, |
122
|
|
|
|
|
|
|
gap_type => one of: |
123
|
|
|
|
|
|
|
fragment: gap between two sequence contigs (also |
124
|
|
|
|
|
|
|
called a "sequence gap"), |
125
|
|
|
|
|
|
|
clone: a gap between two clones that do not overlap. |
126
|
|
|
|
|
|
|
contig: a gap between clone contigs (also called a |
127
|
|
|
|
|
|
|
"layout gap"). |
128
|
|
|
|
|
|
|
centromere: a gap inserted for the centromere. |
129
|
|
|
|
|
|
|
short_arm: a gap inserted at the start of an |
130
|
|
|
|
|
|
|
acrocentric chromosome. |
131
|
|
|
|
|
|
|
heterochromatin: a gap inserted for an especially |
132
|
|
|
|
|
|
|
large region of heterochromatic sequence (may also |
133
|
|
|
|
|
|
|
include the centromere). |
134
|
|
|
|
|
|
|
telomere: a gap inserted for the telomere. |
135
|
|
|
|
|
|
|
repeat: an unresolvable repeat. |
136
|
|
|
|
|
|
|
cstart => start coordinate relative to the component, |
137
|
|
|
|
|
|
|
cend => end coordinate relative to the component, |
138
|
|
|
|
|
|
|
linkage => 'yes' or 'no', only set for type of 'N', |
139
|
|
|
|
|
|
|
orient => '+', '-', 0, or 'na' |
140
|
|
|
|
|
|
|
orientation of the component |
141
|
|
|
|
|
|
|
relative to the object, |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Side Effects: unless error_array is given, will print error |
145
|
|
|
|
|
|
|
descriptions to STDERR with warn() |
146
|
|
|
|
|
|
|
Example: |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=cut |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub agp_parse { |
151
|
6
|
|
|
6
|
1
|
4161
|
my $agpfile = shift; |
152
|
6
|
|
|
|
|
28
|
our %opt = @_; |
153
|
|
|
|
|
|
|
|
154
|
6
|
50
|
|
|
|
23
|
$agpfile or croak 'must provide an AGP filename'; |
155
|
|
|
|
|
|
|
|
156
|
6
|
50
|
|
|
|
21
|
if($opt{validate_identifiers}) { |
157
|
0
|
|
|
|
|
0
|
$opt{validate_syntax} = 1; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
#if the argument is a filehandle, use it, otherwise try to use it as |
161
|
|
|
|
|
|
|
#a filename |
162
|
6
|
|
|
|
|
11
|
my $agp_in; #< filehandle for reading AGP |
163
|
6
|
|
|
|
|
7
|
our $bn; #< basename of file we're parsing |
164
|
6
|
|
|
|
|
10
|
($agp_in,$bn) = do { |
165
|
6
|
50
|
|
|
|
60
|
if( is_filehandle($agpfile) ) { |
166
|
0
|
|
|
|
|
0
|
($agpfile,'') |
167
|
|
|
|
|
|
|
} else { |
168
|
6
|
50
|
|
|
|
430
|
open my $f,$agpfile |
169
|
|
|
|
|
|
|
or die "$! opening '$agpfile'\n"; |
170
|
6
|
|
|
|
|
23
|
($f,$agpfile) |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
}; |
173
|
|
|
|
|
|
|
|
174
|
6
|
|
|
|
|
13
|
our $parse_error_flag = 0; |
175
|
|
|
|
|
|
|
sub parse_error(@) { |
176
|
|
|
|
|
|
|
|
177
|
0
|
0
|
|
0
|
0
|
0
|
return unless $opt{validate_syntax}; |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
0
|
$parse_error_flag = 1; |
180
|
0
|
|
|
|
|
0
|
my $errstr = "$bn:$.: ".join('',@_)."\n"; |
181
|
|
|
|
|
|
|
#if we're pushing errors onto an error_array, do that |
182
|
0
|
0
|
|
|
|
0
|
if ($opt{error_array}) { |
183
|
0
|
|
|
|
|
0
|
push @{$opt{error_array}},$errstr; |
|
0
|
|
|
|
|
0
|
|
184
|
|
|
|
|
|
|
} else { # otherwise just warn |
185
|
0
|
|
|
|
|
0
|
warn $errstr; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
6
|
|
|
|
|
9
|
my @records; |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
my $last_end; |
192
|
0
|
|
|
|
|
0
|
my $last_partnum; |
193
|
0
|
|
|
|
|
0
|
my $last_objname; |
194
|
|
|
|
|
|
|
|
195
|
6
|
|
|
|
|
11
|
my $assembled_sequence = ''; |
196
|
6
|
|
|
|
|
294
|
while (my $line = <$agp_in>) { |
197
|
1
|
|
|
1
|
|
5
|
no warnings 'uninitialized'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1248
|
|
198
|
|
|
|
|
|
|
# warn "parsing $line"; |
199
|
646
|
|
|
|
|
698
|
chomp $line; |
200
|
646
|
|
|
|
|
851
|
$line =~ s/\r//g; #remove windows \r chars |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
#deal with comments |
203
|
646
|
100
|
|
|
|
1277
|
if($line =~ /#/) { |
204
|
4
|
50
|
|
|
|
25
|
if( $line =~ s/^#// ) { |
205
|
4
|
|
|
|
|
12
|
push @records, { comment => $line }; |
206
|
4
|
|
|
|
|
16
|
next; |
207
|
|
|
|
|
|
|
} |
208
|
0
|
|
|
|
|
0
|
parse_error("not a valid comment line, # must be first character on line"); |
209
|
0
|
|
|
|
|
0
|
next; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
642
|
|
|
|
|
4429
|
my @fields = split /\t/,$line,10; |
213
|
642
|
50
|
|
|
|
1440
|
@fields == 9 |
214
|
|
|
|
|
|
|
or parse_error "This line contains ".scalar(@fields)." columns. All lines must have 9 columns."; |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
#if there just really aren't many columns, this probably isn't a valid AGP line |
217
|
642
|
50
|
33
|
|
|
2382
|
next unless @fields >= 5 && @fields <= 10; |
218
|
|
|
|
|
|
|
|
219
|
642
|
|
|
|
|
1740
|
my %r = (linenum => $.); #< the record we're building for this line, starting with line number |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
#parse and check the first 5 cols |
222
|
642
|
|
|
|
|
1761
|
@r{qw( objname ostart oend partnum type )} = splice @fields,0,5; |
223
|
642
|
50
|
|
|
|
1318
|
$r{objname} |
224
|
|
|
|
|
|
|
or parse_error "'$r{obj_name}' is a valid object name"; |
225
|
|
|
|
|
|
|
#end |
226
|
642
|
50
|
66
|
|
|
3626
|
if ( defined $last_end && defined $last_objname && $r{objname} eq $last_objname ) { |
|
|
|
66
|
|
|
|
|
227
|
636
|
50
|
|
|
|
1782
|
$r{ostart} == $last_end+1 |
228
|
|
|
|
|
|
|
or parse_error "start coordinate not contiguous with previous line's end"; |
229
|
|
|
|
|
|
|
} |
230
|
642
|
|
|
|
|
850
|
$last_end = $r{oend}; |
231
|
642
|
|
|
|
|
749
|
$last_objname = $r{objname}; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
#start |
234
|
642
|
50
|
|
|
|
1443
|
$r{oend} >= $r{ostart} or parse_error("end must be >= start"); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
#part num |
237
|
642
|
|
100
|
|
|
1011
|
$last_partnum ||= 0; |
238
|
642
|
50
|
|
|
|
1336
|
$r{partnum} == $last_partnum + 1 |
239
|
|
|
|
|
|
|
or parse_error("part numbers not sequential"); |
240
|
|
|
|
|
|
|
|
241
|
642
|
|
|
|
|
871
|
$last_partnum = $r{partnum}; |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#type |
244
|
642
|
100
|
|
|
|
1841
|
if ( $r{type} =~ /^[NU]$/ ) { |
|
|
50
|
|
|
|
|
|
245
|
496
|
|
|
|
|
1990
|
(@r{qw( length gap_type linkage)}, my $empty, my $undefined) = @fields; |
246
|
496
|
|
|
|
|
817
|
@fields = (); |
247
|
496
|
|
|
|
|
1099
|
my %descmap = qw/ U unknown_gap N known_gap /; |
248
|
496
|
50
|
|
|
|
1344
|
$r{typedesc} = $descmap{$r{type}} |
249
|
|
|
|
|
|
|
or parse_error("unregistered type $r{type}"); |
250
|
496
|
|
|
|
|
669
|
$r{is_gap} = 1; |
251
|
|
|
|
|
|
|
|
252
|
496
|
|
33
|
|
|
1703
|
my $gap_size_to_use = $opt{gap_length} || $r{length}; |
253
|
|
|
|
|
|
|
|
254
|
496
|
50
|
|
|
|
1338
|
$r{length} == $r{oend} - $r{ostart} + 1 |
255
|
|
|
|
|
|
|
or parse_error("gap size of '$r{length}' does not agree with ostart, oend of ($r{ostart},$r{oend})"); |
256
|
|
|
|
|
|
|
|
257
|
496
|
50
|
|
|
|
1037
|
str_in($r{gap_type},qw/fragment clone contig centromere short_arm heterochromatin telomere repeat/) |
258
|
|
|
|
|
|
|
or parse_error("invalid gap type '$r{gap_type}'"); |
259
|
|
|
|
|
|
|
|
260
|
496
|
50
|
|
|
|
1496
|
str_in($r{linkage},qw/yes no/) |
261
|
|
|
|
|
|
|
or parse_error("linkage (column 8) should be 'yes' or 'no'\n"); |
262
|
|
|
|
|
|
|
|
263
|
496
|
50
|
33
|
|
|
2264
|
defined $empty && $empty eq '' |
264
|
|
|
|
|
|
|
or parse_error("9th column should be present and empty\n"); |
265
|
|
|
|
|
|
|
|
266
|
496
|
|
|
|
|
2456
|
push @records,\%r; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
} elsif ( $r{type} =~ /^[ADFGOPW]$/ ) { |
269
|
146
|
|
|
|
|
731
|
my %descmap = qw/A active_finishing D draft F finished G wgs_finishing N known_gap O other P predraft U unknown_gap W wgs_contig/; |
270
|
146
|
50
|
|
|
|
380
|
$r{typedesc} = $descmap{$r{type}} |
271
|
|
|
|
|
|
|
or parse_error("unregistered type $r{type}"); |
272
|
146
|
|
|
|
|
322
|
$r{is_gap} = 0; |
273
|
|
|
|
|
|
|
|
274
|
146
|
|
|
|
|
437
|
@r{qw(ident cstart cend orient)} = @fields; |
275
|
146
|
50
|
|
|
|
355
|
if($opt{validate_identifiers}) { |
276
|
0
|
0
|
|
|
|
0
|
my $comp_type = identifier_namespace($r{ident}) |
277
|
|
|
|
|
|
|
or parse_error("cannot guess type of '$r{ident}'"); |
278
|
|
|
|
|
|
|
} else { |
279
|
146
|
50
|
|
|
|
263
|
$r{ident} or parse_error("invalid identifier '$r{ident}'"); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
146
|
50
|
|
|
|
311
|
str_in($r{orient},qw/+ - 0 na/) |
283
|
|
|
|
|
|
|
or parse_error("orientation must be one of +,-,0,na"); |
284
|
|
|
|
|
|
|
|
285
|
146
|
50
|
33
|
|
|
892
|
$r{cstart} >= 1 && $r{cend} > $r{cstart} |
286
|
|
|
|
|
|
|
or parse_error("invalid component start and/or end ($r{cstart},$r{cend})"); |
287
|
|
|
|
|
|
|
|
288
|
146
|
|
|
|
|
378
|
$r{length} = $r{cend}-$r{cstart}+1; |
289
|
|
|
|
|
|
|
|
290
|
146
|
50
|
|
|
|
315
|
$r{length} == $r{oend} - $r{ostart} + 1 |
291
|
|
|
|
|
|
|
or parse_error("distance between object start, end ($r{ostart},$r{oend}) does not agree with distance between component start, end ($r{cstart},$r{cend})"); |
292
|
|
|
|
|
|
|
|
293
|
146
|
|
|
|
|
932
|
push @records, \%r; |
294
|
|
|
|
|
|
|
} else { |
295
|
0
|
|
|
|
|
0
|
parse_error("invalid component type '$r{type}', it should be one of {A D F G N O P U W}"); |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
6
|
50
|
|
|
|
15
|
return if $parse_error_flag; |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
#otherwise, everything was well |
302
|
6
|
|
|
|
|
214
|
return \@records; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=head2 agp_write |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
Usage: agp_write($lines,$file); |
309
|
|
|
|
|
|
|
Desc : writes a properly formatted AGP file |
310
|
|
|
|
|
|
|
Args : arrayref of line records to write, with the line records being |
311
|
|
|
|
|
|
|
in the same format as those returned by agp_parse above, |
312
|
|
|
|
|
|
|
filename or filehandle to write to, |
313
|
|
|
|
|
|
|
Ret : nothing meaningful |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Side Effects: dies on failure. if you gave it a filehandle, does |
316
|
|
|
|
|
|
|
not close it |
317
|
|
|
|
|
|
|
Example: |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=cut |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub agp_write { |
322
|
3
|
|
|
3
|
1
|
61459
|
my ($lines,$file) = @_; |
323
|
3
|
50
|
|
|
|
18
|
$file or confess "must provide file to write to!\n"; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
my $out_fh = is_filehandle($file) ? $file |
326
|
3
|
50
|
|
|
|
10
|
: do { |
327
|
0
|
0
|
|
|
|
0
|
open my $f,">$file" or croak "$! opening '$file' for writing"; |
328
|
0
|
|
|
|
|
0
|
$f |
329
|
|
|
|
|
|
|
}; |
330
|
|
|
|
|
|
|
|
331
|
3
|
|
|
|
|
12
|
foreach my $line (@$lines) { |
332
|
323
|
|
|
|
|
607
|
print $out_fh agp_format_part( $line ); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
3
|
|
|
|
|
12
|
return; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head2 agp_format_part( $record ) |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
Format a single AGP part line (string terminated with a newline) from |
341
|
|
|
|
|
|
|
the given record hashref. |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=cut |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
sub agp_format_part { |
346
|
323
|
|
|
323
|
1
|
348
|
my ( $line ) = @_; |
347
|
|
|
|
|
|
|
|
348
|
323
|
100
|
|
|
|
710
|
return "#$line->{comment}\n" if $line->{comment}; |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
#and all other lines |
351
|
321
|
|
|
|
|
324
|
my @fields = @{$line}{qw(objname ostart oend partnum type)}; |
|
321
|
|
|
|
|
1048
|
|
352
|
321
|
100
|
|
|
|
952
|
if( $line->{type} =~ /^[NU]$/ ) { |
353
|
248
|
|
|
|
|
235
|
push @fields, @{$line}{qw(length gap_type linkage)},''; |
|
248
|
|
|
|
|
644
|
|
354
|
|
|
|
|
|
|
} else { |
355
|
73
|
|
|
|
|
73
|
push @fields, @{$line}{qw(ident cstart cend orient)}; |
|
73
|
|
|
|
|
210
|
|
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
321
|
|
|
|
|
1683
|
return join("\t", @fields)."\n"; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head2 agp_contigs |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Usage: my @contigs = agp_contigs( agp_parse($agp_filename) ); |
365
|
|
|
|
|
|
|
Desc : extract and number contigs from a parsed AGP file |
366
|
|
|
|
|
|
|
Args : arrayref of AGP lines, like those returned by agp_parse() above |
367
|
|
|
|
|
|
|
Ret : list of contigs, in the same order as they occur in the |
368
|
|
|
|
|
|
|
file, formatted as: |
369
|
|
|
|
|
|
|
[ agp_line_hashref, agp_line_hashref, ... ], |
370
|
|
|
|
|
|
|
[ agp_line_hashref, agp_line_hashref, ... ], |
371
|
|
|
|
|
|
|
... |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub agp_contigs { |
376
|
3
|
|
|
3
|
1
|
88365
|
my $lines = shift; |
377
|
|
|
|
|
|
|
|
378
|
3
|
|
|
|
|
12
|
my @contigs = ([]); |
379
|
3
|
|
|
|
|
9
|
foreach my $l (@$lines) { |
380
|
323
|
100
|
|
|
|
620
|
next if $l->{comment}; |
381
|
321
|
100
|
|
|
|
895
|
if( $l->{typedesc} =~ /_gap$/ ) { |
382
|
248
|
100
|
|
|
|
221
|
push @contigs,[] if @{$contigs[-1]}; |
|
248
|
|
|
|
|
803
|
|
383
|
|
|
|
|
|
|
} else { |
384
|
73
|
|
|
|
|
67
|
push @{$contigs[-1]},$l; |
|
73
|
|
|
|
|
141
|
|
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
} |
387
|
3
|
100
|
|
|
|
6
|
pop @contigs if @{$contigs[-1]} == 0; |
|
3
|
|
|
|
|
13
|
|
388
|
3
|
|
|
|
|
24
|
return @contigs; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head1 AUTHOR(S) |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Robert Buels |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Sheena Scroggins |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
### |
400
|
|
|
|
|
|
|
1;#do not remove |
401
|
|
|
|
|
|
|
### |