|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::AGP::LowLevel;  | 
| 
2
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
986
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
781
 | 
 use English;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
837
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
670
 | 
 use Carp;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6
 | 
 use File::Basename;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
    | 
| 
8
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
1066
 | 
 use UNIVERSAL qw/isa/;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
9
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
468
 | 
 use List::Util qw/first/;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
103
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::AGP::LowLevel - functions for dealing with AGP files  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  $lines_arrayref = agp_parse('my_agp_file.agp');  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  agp_write( $lines => 'my_agp_file.agp');  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 functions for working with AGP files.  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 FUNCTIONS  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All functions below are EXPORT_OK.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use base qw/ Exporter /;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
142
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK;  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {  | 
| 
38
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
401
 | 
   @EXPORT_OK = qw(  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   agp_parse  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   agp_write  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   agp_format_part  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   agp_contigs                   | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  );  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 str_in  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Usage: print "it's valid" if str_in($thingy,qw/foo bar baz/);  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Desc : return 1 if the first argument is string equal to at least one of the  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          subsequent arguments  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Ret  : 1 or 0  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Args : string to search for, array of strings to search in  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Side Effects: none  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   I kept writing this over and over in validation code and got sick of it.  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub str_in {  | 
| 
61
 | 
1138
 | 
 
 | 
 
 | 
  
1138
  
 | 
  
1
  
 | 
1742
 | 
   my $needle = shift;  | 
| 
62
 | 
1138
 | 
  
 50
  
 | 
 
 | 
  
2402
  
 | 
 
 | 
3946
 | 
   return defined(first {$needle eq $_} @_) ? 1 : 0;  | 
| 
 
 | 
2402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4928
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 is_filehandle  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Usage: print "it's a filehandle" if is_filehandle($my_thing);  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Desc : check whether the given thing is usable as a filehandle.  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          I put this in a module cause a filehandle might be either  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          a GLOB or isa IO::Handle or isa Apache::Upload  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Ret  : true if it is a filehandle, false otherwise  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Args : a single thing  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Side Effects: none  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub is_filehandle {  | 
| 
78
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
17
 | 
   my ($thing) = @_;  | 
| 
79
 | 
9
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
161
 | 
   return isa($thing,'IO::Handle') || isa($thing,'Apache::Upload') || ref($thing) eq 'GLOB';  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 agp_parse  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Usage: my $lines = agp_parse('~/myagp.agp',validate_syntax => 1, validate_identifiers => 1);  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Desc : parse an agp file  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Args : filename or filehandle, hash-style list of options as   | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        validate_syntax => if true, error  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                            if there are any syntax errors,  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        validate_identifiers => if true, error  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           if there are any identifiers that  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           CXGN::Tools::Identifiers doesn't recognize  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           IMPLIES validate_syntax  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        error_array => an arrayref.  if given, will push  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           error descriptions onto this array instead of  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           using warn to print them to stderr  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Ret  : undef if error, otherwise return an  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          arrayref containing line records, each of which is like:  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          { comment => 'text' } if a comment,  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          or if a data line:  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          {  objname  => the name of the object being assembled  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                        (same for every record),  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ostart   => start coordinate for this component (object),  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             oend     => end coordinate for this component   (object),  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             partnum  => the part number appearing in the 4th column,  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             linenum  => the line number in the file,  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             type     => letter type present in the file (/[ADFGNOPUW]/),  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             typedesc => description of the type, one of:  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (A) active_finishing  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (D) draft  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (F) finished  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (G) wgs_finishing  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (N) known_gap  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (O) other  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (P) predraft  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (U) unknown_gap  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          - (W) wgs_contig  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ident    => identifier of the component, if any,  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             length   => length of the component,  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             is_gap   => 1 if the line is some kind of gap, 0 if it  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         is covered by a component,  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             gap_type => one of:  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  fragment: gap between two sequence contigs (also  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     called a "sequence gap"),  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  clone: a gap between two clones that do not overlap.  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		 contig: a gap between clone contigs (also called a  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		    "layout gap").  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		 centromere: a gap inserted for the centromere.  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		 short_arm: a gap inserted at the start of an  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		    acrocentric chromosome.  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		 heterochromatin: a gap inserted for an especially  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      		    large region of heterochromatic sequence (may also  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		    include the centromere).  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		 telomere: a gap inserted for the telomere.  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   		 repeat: an unresolvable repeat.  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             cstart   => start coordinate relative to the component,  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             cend     => end coordinate relative to the component,  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             linkage  => 'yes' or 'no', only set for type of 'N',  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             orient   => '+', '-', 0, or 'na'  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         orientation of the component  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         relative to the object,  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          }  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Side Effects: unless error_array is given, will print error  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 descriptions to STDERR with warn()  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Example:  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub agp_parse {  | 
| 
151
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
4161
 | 
   my $agpfile = shift;  | 
| 
152
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
   our %opt = @_;  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
   $agpfile or croak 'must provide an AGP filename';  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
   if($opt{validate_identifiers}) {  | 
| 
157
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $opt{validate_syntax} = 1;  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #if the argument is a filehandle, use it, otherwise try to use it as  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #a filename  | 
| 
162
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
   my $agp_in; #< filehandle for reading AGP  | 
| 
163
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   our $bn;    #< basename of file we're parsing  | 
| 
164
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   ($agp_in,$bn) = do {  | 
| 
165
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     if( is_filehandle($agpfile) ) {  | 
| 
166
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       ($agpfile,'')  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
168
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
430
 | 
       open my $f,$agpfile  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or die "$! opening '$agpfile'\n";  | 
| 
170
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
       ($f,$agpfile)  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
   our $parse_error_flag = 0;  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   sub parse_error(@) {  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
0
 | 
  
  0
  
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
     return unless $opt{validate_syntax};  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $parse_error_flag = 1;  | 
| 
180
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $errstr = "$bn:$.: ".join('',@_)."\n";  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #if we're pushing errors onto an error_array, do that  | 
| 
182
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ($opt{error_array}) {  | 
| 
183
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       push @{$opt{error_array}},$errstr;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else { # otherwise just warn  | 
| 
185
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       warn $errstr;  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   my @records;  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $last_end;  | 
| 
192
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $last_partnum;  | 
| 
193
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $last_objname;  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
195
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
   my $assembled_sequence = '';  | 
| 
196
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
294
 | 
   while (my $line = <$agp_in>) {  | 
| 
197
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
     no warnings 'uninitialized';  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1248
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    warn "parsing $line";  | 
| 
199
 | 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
698
 | 
     chomp $line;  | 
| 
200
 | 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
851
 | 
     $line =~ s/\r//g; #remove windows \r chars  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #deal with comments  | 
| 
203
 | 
646
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1277
 | 
     if($line =~ /#/) {  | 
| 
204
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
       if( $line =~ s/^#// ) {  | 
| 
205
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	push @records, { comment => $line };  | 
| 
206
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	next;  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
208
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       parse_error("not a valid comment line, # must be first character on line");  | 
| 
209
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       next;  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4429
 | 
     my @fields = split /\t/,$line,10;  | 
| 
213
 | 
642
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1440
 | 
     @fields == 9  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or parse_error "This line contains ".scalar(@fields)." columns.  All lines must have 9 columns.";  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #if there just really aren't many columns, this probably isn't a valid AGP line  | 
| 
217
 | 
642
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
2382
 | 
     next unless @fields >= 5 && @fields <= 10;  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1740
 | 
     my %r = (linenum => $.); #< the record we're building for this line, starting with line number  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #parse and check the first 5 cols  | 
| 
222
 | 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1761
 | 
     @r{qw( objname ostart oend partnum type )} = splice @fields,0,5;  | 
| 
223
 | 
642
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1318
 | 
     $r{objname}  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or parse_error "'$r{obj_name}' is a valid object name";  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #end  | 
| 
226
 | 
642
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
3626
 | 
     if ( defined $last_end && defined $last_objname && $r{objname} eq $last_objname ) {  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
636
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1782
 | 
       $r{ostart} == $last_end+1  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error "start coordinate not contiguous with previous line's end";  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
230
 | 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
850
 | 
     $last_end = $r{oend};  | 
| 
231
 | 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
749
 | 
     $last_objname = $r{objname};  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #start  | 
| 
234
 | 
642
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1443
 | 
     $r{oend} >= $r{ostart} or parse_error("end must be >= start");  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #part num  | 
| 
237
 | 
642
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1011
 | 
     $last_partnum ||= 0;  | 
| 
238
 | 
642
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1336
 | 
     $r{partnum} == $last_partnum + 1  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or parse_error("part numbers not sequential");  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
871
 | 
     $last_partnum = $r{partnum};  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #type  | 
| 
244
 | 
642
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1841
 | 
     if ( $r{type} =~ /^[NU]$/ ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1990
 | 
       (@r{qw( length gap_type linkage)}, my $empty, my $undefined) = @fields;  | 
| 
246
 | 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
817
 | 
       @fields = ();  | 
| 
247
 | 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1099
 | 
       my %descmap = qw/ U unknown_gap N known_gap /;  | 
| 
248
 | 
496
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1344
 | 
       $r{typedesc} = $descmap{$r{type}}  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("unregistered type $r{type}");  | 
| 
250
 | 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
669
 | 
       $r{is_gap}   = 1;  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
252
 | 
496
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
1703
 | 
       my $gap_size_to_use = $opt{gap_length} || $r{length};  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
496
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1338
 | 
       $r{length} == $r{oend} - $r{ostart} + 1  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("gap size of '$r{length}' does not agree with ostart, oend of ($r{ostart},$r{oend})");  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
496
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1037
 | 
       str_in($r{gap_type},qw/fragment clone contig centromere short_arm heterochromatin telomere repeat/)  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("invalid gap type '$r{gap_type}'");  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
496
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1496
 | 
       str_in($r{linkage},qw/yes no/)  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("linkage (column 8) should be 'yes' or 'no'\n");  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
496
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
2264
 | 
       defined $empty && $empty eq ''  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("9th column should be present and empty\n");  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2456
 | 
       push @records,\%r;  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ( $r{type} =~ /^[ADFGOPW]$/ ) {  | 
| 
269
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
731
 | 
       my %descmap = qw/A active_finishing D draft F finished G wgs_finishing N known_gap O other P predraft U unknown_gap W wgs_contig/;  | 
| 
270
 | 
146
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
380
 | 
       $r{typedesc} = $descmap{$r{type}}  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("unregistered type $r{type}");  | 
| 
272
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
322
 | 
       $r{is_gap} = 0;  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
437
 | 
       @r{qw(ident cstart cend orient)} = @fields;  | 
| 
275
 | 
146
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
355
 | 
       if($opt{validate_identifiers}) {  | 
| 
276
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $comp_type = identifier_namespace($r{ident})  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  or parse_error("cannot guess type of '$r{ident}'");  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
279
 | 
146
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
263
 | 
 	$r{ident} or parse_error("invalid identifier '$r{ident}'");  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
146
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
311
 | 
       str_in($r{orient},qw/+ - 0 na/)  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("orientation must be one of +,-,0,na");  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
146
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
892
 | 
       $r{cstart} >= 1 && $r{cend} > $r{cstart}  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("invalid component start and/or end ($r{cstart},$r{cend})");  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
378
 | 
       $r{length} = $r{cend}-$r{cstart}+1;  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
146
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
315
 | 
       $r{length} == $r{oend} - $r{ostart} + 1  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or parse_error("distance between object start, end ($r{ostart},$r{oend}) does not agree with distance between component start, end ($r{cstart},$r{cend})");  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
293
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
932
 | 
       push @records, \%r;  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
295
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       parse_error("invalid component type '$r{type}', it should be one of {A D F G N O P U W}");  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
   return if $parse_error_flag;  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #otherwise, everything was well  | 
| 
302
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
214
 | 
   return \@records;  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 agp_write  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Usage: agp_write($lines,$file);  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Desc : writes a properly formatted AGP file  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Args : arrayref of line records to write, with the line records being  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              in the same format as those returned by agp_parse above,  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          filename or filehandle to write to,  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Ret :  nothing meaningful  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Side Effects: dies on failure.  if you gave it a filehandle, does  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 not close it  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Example:  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub agp_write {  | 
| 
322
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
61459
 | 
   my ($lines,$file) = @_;  | 
| 
323
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   $file or confess "must provide file to write to!\n";  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $out_fh = is_filehandle($file) ? $file  | 
| 
326
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     : do {  | 
| 
327
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       open my $f,">$file" or croak "$! opening '$file' for writing";  | 
| 
328
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $f  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
331
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   foreach my $line (@$lines) {  | 
| 
332
 | 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
607
 | 
       print $out_fh agp_format_part( $line );  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
335
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   return;  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 agp_format_part( $record )  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Format a single AGP part line (string terminated with a newline) from  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the given record hashref.  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub agp_format_part {  | 
| 
346
 | 
323
 | 
 
 | 
 
 | 
  
323
  
 | 
  
1
  
 | 
348
 | 
     my ( $line ) = @_;  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
348
 | 
323
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
710
 | 
     return "#$line->{comment}\n" if $line->{comment};  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #and all other lines  | 
| 
351
 | 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
324
 | 
     my @fields = @{$line}{qw(objname ostart oend partnum type)};  | 
| 
 
 | 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1048
 | 
    | 
| 
352
 | 
321
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
952
 | 
     if( $line->{type} =~ /^[NU]$/ ) {  | 
| 
353
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
235
 | 
       push @fields, @{$line}{qw(length gap_type linkage)},'';  | 
| 
 
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
644
 | 
    | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
355
 | 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
       push @fields, @{$line}{qw(ident cstart cend orient)};  | 
| 
 
 | 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
210
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1683
 | 
     return join("\t", @fields)."\n";  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 agp_contigs  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Usage: my @contigs = agp_contigs( agp_parse($agp_filename) );  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Desc : extract and number contigs from a parsed AGP file  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Args : arrayref of AGP lines, like those returned by agp_parse() above  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Ret  : list of contigs, in the same order as they occur in the  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          file, formatted as:  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             [ agp_line_hashref, agp_line_hashref, ... ],  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             [ agp_line_hashref, agp_line_hashref, ... ],  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ...  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub agp_contigs {  | 
| 
376
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
88365
 | 
   my $lines = shift;  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   my @contigs = ([]);  | 
| 
379
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   foreach my $l (@$lines) {  | 
| 
380
 | 
323
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
620
 | 
     next if $l->{comment};  | 
| 
381
 | 
321
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
895
 | 
     if( $l->{typedesc} =~ /_gap$/ ) {  | 
| 
382
 | 
248
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
221
 | 
       push @contigs,[] if @{$contigs[-1]};  | 
| 
 
 | 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
803
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
384
 | 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
       push @{$contigs[-1]},$l;  | 
| 
 
 | 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
    | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
387
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   pop @contigs if @{$contigs[-1]} == 0;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
388
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
   return @contigs;  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR(S)  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Robert Buels  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Sheena Scroggins  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ###  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;#do not remove  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ###  |