| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Bio::ToolBox::Data::core; | 
| 2 |  |  |  |  |  |  | our $VERSION = '1.69'; | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | =head1 NAME | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | Bio::ToolBox::Data::core - Common functions to Bio:ToolBox::Data family | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | Common methods for metadata and manipulation in a L | 
| 11 |  |  |  |  |  |  | data table and L file stream. This module | 
| 12 |  |  |  |  |  |  | should not be used directly. See the respective modules for more information. | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =cut | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 3 |  |  | 3 |  | 19 | use strict; | 
|  | 3 |  |  |  |  | 4 |  | 
|  | 3 |  |  |  |  | 73 |  | 
| 17 | 3 |  |  | 3 |  | 11 | use Carp qw(carp cluck croak confess); | 
|  | 3 |  |  |  |  | 4 |  | 
|  | 3 |  |  |  |  | 108 |  | 
| 18 | 3 |  |  | 3 |  | 12 | use base 'Bio::ToolBox::Data::file'; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 1195 |  | 
| 19 | 3 |  |  |  |  | 179 | use Bio::ToolBox::db_helper qw( | 
| 20 |  |  |  |  |  |  | open_db_connection | 
| 21 |  |  |  |  |  |  | verify_or_request_feature_types | 
| 22 |  |  |  |  |  |  | use_bam_adapter | 
| 23 |  |  |  |  |  |  | use_big_adapter | 
| 24 | 3 |  |  | 3 |  | 2047 | ); | 
|  | 3 |  |  |  |  | 14 |  | 
| 25 | 3 |  |  | 3 |  | 16 | use Module::Load; | 
|  | 3 |  |  |  |  | 4 |  | 
|  | 3 |  |  |  |  | 12 |  | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | 1; | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | #### Initialization and verification ############################################### | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | sub new { | 
| 32 | 73 |  |  | 73 | 1 | 109 | my $class = shift; | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # in case someone calls this from an established object | 
| 35 | 73 | 100 |  |  |  | 176 | if (ref($class)) { | 
| 36 | 1 |  |  |  |  | 1 | $class = ref($class); | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | # Initialize the hash structure | 
| 40 | 73 |  |  |  |  | 840 | my %data = ( | 
| 41 |  |  |  |  |  |  | 'program'        => undef, | 
| 42 |  |  |  |  |  |  | 'feature'        => undef, | 
| 43 |  |  |  |  |  |  | 'feature_type'   => undef, | 
| 44 |  |  |  |  |  |  | 'db'             => undef, | 
| 45 |  |  |  |  |  |  | 'format'         => '', | 
| 46 |  |  |  |  |  |  | 'gff'            => 0, | 
| 47 |  |  |  |  |  |  | 'bed'            => 0, | 
| 48 |  |  |  |  |  |  | 'ucsc'           => 0, | 
| 49 |  |  |  |  |  |  | 'vcf'            => 0, | 
| 50 |  |  |  |  |  |  | 'number_columns' => 0, | 
| 51 |  |  |  |  |  |  | 'last_row'       => 0, | 
| 52 |  |  |  |  |  |  | 'headers'        => 1, | 
| 53 |  |  |  |  |  |  | 'column_names'   => [], | 
| 54 |  |  |  |  |  |  | 'filename'       => undef, | 
| 55 |  |  |  |  |  |  | 'basename'       => undef, | 
| 56 |  |  |  |  |  |  | 'extension'      => undef, | 
| 57 |  |  |  |  |  |  | 'path'           => undef, | 
| 58 |  |  |  |  |  |  | 'comments'       => [], | 
| 59 |  |  |  |  |  |  | 'data_table'     => [], | 
| 60 |  |  |  |  |  |  | 'header_line_count' => 0, | 
| 61 |  |  |  |  |  |  | 'zerostart'      => 0, | 
| 62 |  |  |  |  |  |  | ); | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | # Finished | 
| 65 | 73 |  |  |  |  | 217 | return bless \%data, $class; | 
| 66 |  |  |  |  |  |  | } | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | sub verify { | 
| 70 |  |  |  |  |  |  | # this function does not rely on any self functions for two reasons | 
| 71 |  |  |  |  |  |  | # this is a low level integrity checker | 
| 72 |  |  |  |  |  |  | # this is very old code from before the days of an OO API of Bio::ToolBox | 
| 73 |  |  |  |  |  |  | # although a lot of things have been added and changed since then.... | 
| 74 | 52 |  |  | 52 | 1 | 692 | my $self = shift; | 
| 75 | 52 |  | 100 |  |  | 128 | my $silence = shift || 0; # default is to yell | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | # check for data table | 
| 78 | 52 | 50 | 33 |  |  | 247 | unless ( | 
| 79 |  |  |  |  |  |  | defined $self->{'data_table'} and | 
| 80 |  |  |  |  |  |  | ref $self->{'data_table'} eq 'ARRAY' | 
| 81 |  |  |  |  |  |  | ) { | 
| 82 | 0 | 0 |  |  |  | 0 | carp sprintf "\n DATA INTEGRITY ERROR: No data table in %s object!", ref $self | 
| 83 |  |  |  |  |  |  | unless $silence; | 
| 84 | 0 |  |  |  |  | 0 | return; | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | # check for last row index | 
| 88 | 52 | 50 |  |  |  | 104 | if (defined $self->{'last_row'}) { | 
| 89 | 52 |  |  |  |  | 55 | my $number = scalar( @{ $self->{'data_table'} } ) - 1; | 
|  | 52 |  |  |  |  | 104 |  | 
| 90 | 52 | 50 |  |  |  | 116 | if ($self->{'last_row'} != $number) { | 
| 91 |  |  |  |  |  |  | # 			carp sprintf "TABLE INTEGRITY ERROR: data table last_row index [%d] doesn't match " . | 
| 92 |  |  |  |  |  |  | # 				"metadata value [%d]!\n",  $number, $self->{'last_row'}; | 
| 93 |  |  |  |  |  |  | # fix it for them | 
| 94 | 0 |  |  |  |  | 0 | $self->{'last_row'} = $number; | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  | else { | 
| 98 |  |  |  |  |  |  | # define it for them | 
| 99 |  |  |  |  |  |  | $self->{'last_row'} = | 
| 100 | 0 |  |  |  |  | 0 | scalar( @{ $self->{'data_table'} } ) - 1; | 
|  | 0 |  |  |  |  | 0 |  | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | # check for consistent number of columns | 
| 104 | 52 | 50 |  |  |  | 101 | if (defined $self->{'number_columns'}) { | 
| 105 | 52 |  |  |  |  | 82 | my $number = $self->{'number_columns'}; | 
| 106 | 52 |  |  |  |  | 78 | my @problems; | 
| 107 | 52 |  |  |  |  | 74 | my $too_low = 0; | 
| 108 | 52 |  |  |  |  | 69 | my $too_high = 0; | 
| 109 | 52 |  |  |  |  | 132 | for (my $row = 0; $row <= $self->{'last_row'}; $row++) { | 
| 110 | 945 |  |  |  |  | 786 | my $count = scalar @{ $self->{'data_table'}->[$row] }; | 
|  | 945 |  |  |  |  | 940 |  | 
| 111 | 945 | 50 |  |  |  | 1451 | if ($count != $number) { | 
| 112 | 0 |  |  |  |  | 0 | push @problems, $row; | 
| 113 | 0 | 0 |  |  |  | 0 | $too_low++ if $count < $number; | 
| 114 | 0 | 0 |  |  |  | 0 | $too_high++ if $count > $number; | 
| 115 | 0 |  |  |  |  | 0 | while ($count < $number) { | 
| 116 |  |  |  |  |  |  | # we can sort-of-fix this problem | 
| 117 | 0 |  |  |  |  | 0 | $self->{'data_table'}->[$row][$count] = '.'; | 
| 118 | 0 |  |  |  |  | 0 | $count++; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | # we found errors | 
| 124 | 52 | 50 |  |  |  | 109 | if (@problems) { | 
| 125 |  |  |  |  |  |  | # collapse problem list into compact string | 
| 126 |  |  |  |  |  |  | # from http://www.perlmonks.org/?node_id=87538 | 
| 127 | 0 |  |  |  |  | 0 | my $problem = join(',', @problems); | 
| 128 | 0 |  |  |  |  | 0 | $problem =~ s/(? | 
|  | 0 |  |  |  |  | 0 |  | 
| 129 |  |  |  |  |  |  |  | 
| 130 | 0 | 0 |  |  |  | 0 | if ($too_low) { | 
| 131 | 0 | 0 |  |  |  | 0 | print "\n COLUMN INCONSISTENCY ERRORS: $too_low rows had fewer than expected " . | 
| 132 |  |  |  |  |  |  | "columns!\n  padded rows $problem with null values\n" | 
| 133 |  |  |  |  |  |  | unless $silence; | 
| 134 |  |  |  |  |  |  | } | 
| 135 | 0 | 0 |  |  |  | 0 | if ($too_high) { | 
| 136 | 0 | 0 |  |  |  | 0 | print "\n COLUMN INCONSISTENCY ERRORS: $too_high rows had more columns than " . | 
| 137 |  |  |  |  |  |  | "expected!\n  Problem rows: $problem\n" | 
| 138 |  |  |  |  |  |  | unless $silence; | 
| 139 | 0 |  |  |  |  | 0 | return; | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | else { | 
| 144 |  |  |  |  |  |  | # this wasn't set???? then set it | 
| 145 |  |  |  |  |  |  | $self->{'number_columns'} = | 
| 146 | 0 |  |  |  |  | 0 | scalar @{ $self->{'data_table'}->[0] }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | # check metadata and table names | 
| 150 | 52 |  |  |  |  | 74 | my $mdcheck = 0; | 
| 151 | 52 |  |  |  |  | 125 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 152 | 415 | 50 |  |  |  | 653 | unless ( | 
| 153 |  |  |  |  |  |  | $self->{$i}{'name'} eq | 
| 154 |  |  |  |  |  |  | $self->{'data_table'}->[0][$i] | 
| 155 |  |  |  |  |  |  | ) { | 
| 156 |  |  |  |  |  |  | printf( "\n TABLE/METADATA MISMATCH ERROR: Column header names don't" . | 
| 157 |  |  |  |  |  |  | " match metadata name values for index $i!" . | 
| 158 |  |  |  |  |  |  | "\n  compare '%s' with '%s'\n", $self->{'data_table'}->[0][$i], | 
| 159 | 0 | 0 |  |  |  | 0 | $self->{$i}{'name'} ) unless $silence; | 
| 160 | 0 |  |  |  |  | 0 | $mdcheck++; | 
| 161 |  |  |  |  |  |  | } | 
| 162 | 415 | 50 |  |  |  | 765 | unless ($self->{$i}{'index'} == $i) { | 
| 163 |  |  |  |  |  |  | printf( "\n METADATA INDEX ERROR: index $i metadata doesn't match its " . | 
| 164 | 0 | 0 |  |  |  | 0 | "index value %s\n", $self->{$i}{'index'} ) unless $silence; | 
| 165 | 0 |  |  |  |  | 0 | $mdcheck++; | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  | } | 
| 168 | 52 | 50 |  |  |  | 102 | return if $mdcheck; | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | ### Defined file format structure integrity | 
| 171 | 52 |  |  |  |  | 60 | my $error; | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | # check for proper gff structure | 
| 174 | 52 | 100 |  |  |  | 136 | if ($self->{'gff'}) { | 
| 175 |  |  |  |  |  |  | # if any of these checks fail, we will reset the gff version to | 
| 176 |  |  |  |  |  |  | # the default of 0, or no gff | 
| 177 | 6 |  |  |  |  | 9 | my $gff_check = 1; # start with assumption it is true | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | # check number of columns | 
| 180 | 6 | 100 |  |  |  | 16 | if ($self->{'number_columns'} != 9) { | 
| 181 | 1 |  |  |  |  | 2 | $gff_check = 0; | 
| 182 | 1 |  |  |  |  | 2 | $error .= " Number of columns not 9."; | 
| 183 |  |  |  |  |  |  | } | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | # check column indices | 
| 186 | 6 | 50 | 33 |  |  | 49 | if ( | 
| 187 |  |  |  |  |  |  | # column 0 should look like chromosome | 
| 188 |  |  |  |  |  |  | exists $self->{0} and | 
| 189 |  |  |  |  |  |  | $self->{0}{'name'} !~ | 
| 190 |  |  |  |  |  |  | m/^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i | 
| 191 |  |  |  |  |  |  | ) { | 
| 192 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 193 | 0 |  |  |  |  | 0 | $error .= " Column 0 name not chromosome-like."; | 
| 194 |  |  |  |  |  |  | } | 
| 195 | 6 | 50 | 33 |  |  | 46 | if ( | 
| 196 |  |  |  |  |  |  | # column 3 should look like start | 
| 197 |  |  |  |  |  |  | exists $self->{3} and | 
| 198 |  |  |  |  |  |  | $self->{3}{'name'} !~ m/start|pos|position/i | 
| 199 |  |  |  |  |  |  | ) { | 
| 200 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 201 | 0 |  |  |  |  | 0 | $error .= " Column 3 name not start-like."; | 
| 202 |  |  |  |  |  |  | } | 
| 203 | 6 | 50 | 33 |  |  | 46 | if ( | 
| 204 |  |  |  |  |  |  | # column 4 should look like end | 
| 205 |  |  |  |  |  |  | exists $self->{4} and | 
| 206 |  |  |  |  |  |  | $self->{4}{'name'} !~ m/stop|end|pos|position/i | 
| 207 |  |  |  |  |  |  | ) { | 
| 208 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 209 | 0 |  |  |  |  | 0 | $error .= " Column 4 name not stop-like."; | 
| 210 |  |  |  |  |  |  | } | 
| 211 | 6 | 50 | 33 |  |  | 26 | if ( | 
| 212 |  |  |  |  |  |  | # column 6 should look like strand | 
| 213 |  |  |  |  |  |  | exists $self->{6} and | 
| 214 |  |  |  |  |  |  | $self->{6}{'name'} !~ m/strand/i | 
| 215 |  |  |  |  |  |  | ) { | 
| 216 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 217 | 0 |  |  |  |  | 0 | $error .= " Column 6 name not strand-like."; | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | # check column data | 
| 221 | 6 | 50 |  |  |  | 24 | unless ($self->_column_is_integers(3,4)) { | 
| 222 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 223 | 0 |  |  |  |  | 0 | $error .= " Columns 3,4 not integers."; | 
| 224 |  |  |  |  |  |  | } | 
| 225 | 6 | 50 |  |  |  | 22 | unless ($self->_column_is_numeric(5)) { | 
| 226 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 227 | 0 |  |  |  |  | 0 | $error .= " Column 5 not numeric."; | 
| 228 |  |  |  |  |  |  | } | 
| 229 | 6 | 50 |  |  |  | 21 | unless ($self->_column_is_stranded(6)) { | 
| 230 | 0 |  |  |  |  | 0 | $gff_check = 0; | 
| 231 | 0 |  |  |  |  | 0 | $error .= " Column 6 not strand values."; | 
| 232 |  |  |  |  |  |  | } | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | # update gff value as necessary | 
| 235 | 6 | 100 |  |  |  | 16 | if ($gff_check == 0) { | 
| 236 |  |  |  |  |  |  | # reset metadata | 
| 237 | 1 |  |  |  |  | 2 | $self->{'gff'} = 0; | 
| 238 | 1 |  |  |  |  | 2 | $self->{'headers'} = 1; | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | # remove the AUTO key from the metadata | 
| 241 | 1 |  |  |  |  | 4 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 242 | 8 | 50 |  |  |  | 11 | if (exists $self->{$i}{'AUTO'}) { | 
| 243 | 8 |  |  |  |  | 12 | delete $self->{$i}{'AUTO'}; | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  | } | 
| 246 | 1 | 50 |  |  |  | 3 | print "\n GFF FILE FORMAT ERROR: $error\n" unless $silence; | 
| 247 |  |  |  |  |  |  | } | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | # check for proper BED structure | 
| 251 | 52 | 100 |  |  |  | 104 | if ($self->{'bed'}) { | 
| 252 |  |  |  |  |  |  | # if any of these checks fail, we will reset the bed flag to 0 | 
| 253 |  |  |  |  |  |  | # to make it not a bed file format | 
| 254 | 21 |  |  |  |  | 35 | my $bed_check = 1; # start with assumption it is correct | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | # check number of columns | 
| 257 | 21 | 50 |  |  |  | 61 | if ($self->{'number_columns'} < 3) { | 
| 258 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 259 | 0 |  |  |  |  | 0 | $error .= " Number of columns not at least 3."; | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | # check column index names | 
| 263 | 21 | 50 | 33 |  |  | 166 | if ( | 
| 264 |  |  |  |  |  |  | exists $self->{0} and | 
| 265 |  |  |  |  |  |  | $self->{0}{'name'} !~ | 
| 266 |  |  |  |  |  |  | m/^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i | 
| 267 |  |  |  |  |  |  | ) { | 
| 268 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 269 | 0 |  |  |  |  | 0 | $error .= " Column 0 name not chromosome-like."; | 
| 270 |  |  |  |  |  |  | } | 
| 271 | 21 | 50 | 33 |  |  | 156 | if ( | 
| 272 |  |  |  |  |  |  | exists $self->{1} and | 
| 273 |  |  |  |  |  |  | $self->{1}{'name'} !~ m/start|pos|position/i | 
| 274 |  |  |  |  |  |  | ) { | 
| 275 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 276 | 0 |  |  |  |  | 0 | $error .= " Column 1 name not start-like."; | 
| 277 |  |  |  |  |  |  | } | 
| 278 | 21 | 50 | 33 |  |  | 121 | if ( | 
| 279 |  |  |  |  |  |  | exists $self->{2} and | 
| 280 |  |  |  |  |  |  | $self->{2}{'name'} !~ m/stop|end|pos|position/i | 
| 281 |  |  |  |  |  |  | ) { | 
| 282 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 283 | 0 |  |  |  |  | 0 | $error .= " Column 2 name not stop-like."; | 
| 284 |  |  |  |  |  |  | } | 
| 285 | 21 | 50 | 66 |  |  | 95 | if ( | 
| 286 |  |  |  |  |  |  | exists $self->{5} and | 
| 287 |  |  |  |  |  |  | $self->{5}{'name'} !~ m/strand/i | 
| 288 |  |  |  |  |  |  | ) { | 
| 289 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 290 | 0 |  |  |  |  | 0 | $error .= " Column 5 name not strand-like."; | 
| 291 |  |  |  |  |  |  | } | 
| 292 | 21 | 50 | 100 |  |  | 129 | if ( | 
|  |  |  | 66 |  |  |  |  | 
| 293 |  |  |  |  |  |  | exists $self->{6} and | 
| 294 |  |  |  |  |  |  | $self->{'format'} !~ /narrow|broad/i and | 
| 295 |  |  |  |  |  |  | $self->{6}{'name'} !~ m/start|thick|cds/i | 
| 296 |  |  |  |  |  |  | ) { | 
| 297 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 298 | 0 |  |  |  |  | 0 | $error .= " Column 6 name not thickStart-like."; | 
| 299 |  |  |  |  |  |  | } | 
| 300 | 21 | 50 | 100 |  |  | 129 | if ( | 
|  |  |  | 66 |  |  |  |  | 
| 301 |  |  |  |  |  |  | exists $self->{7} and | 
| 302 |  |  |  |  |  |  | $self->{'format'} !~ /narrow|broad/i and | 
| 303 |  |  |  |  |  |  | $self->{7}{'name'} !~ m/end|stop|thick|cds/i | 
| 304 |  |  |  |  |  |  | ) { | 
| 305 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 306 | 0 |  |  |  |  | 0 | $error .= " Column 7 name not thickEnd-like."; | 
| 307 |  |  |  |  |  |  | } | 
| 308 | 21 | 50 | 100 |  |  | 104 | if ( | 
|  |  |  | 66 |  |  |  |  | 
| 309 |  |  |  |  |  |  | exists $self->{8} and | 
| 310 |  |  |  |  |  |  | $self->{'format'} !~ /narrow|broad/i and | 
| 311 |  |  |  |  |  |  | $self->{8}{'name'} !~ m/item|rgb|color/i | 
| 312 |  |  |  |  |  |  | ) { | 
| 313 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 314 | 0 |  |  |  |  | 0 | $error .= " Column 8 name not itemRGB-like."; | 
| 315 |  |  |  |  |  |  | } | 
| 316 | 21 | 50 | 100 |  |  | 129 | if ( | 
|  |  |  | 66 |  |  |  |  | 
| 317 |  |  |  |  |  |  | exists $self->{9} and | 
| 318 |  |  |  |  |  |  | $self->{'format'} !~ /narrow|broad/i and | 
| 319 |  |  |  |  |  |  | $self->{9}{'name'} !~ m/count|number|block|exon/i | 
| 320 |  |  |  |  |  |  | ) { | 
| 321 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 322 | 0 |  |  |  |  | 0 | $error .= " Column 9 name not blockCount-like."; | 
| 323 |  |  |  |  |  |  | } | 
| 324 | 21 | 50 | 66 |  |  | 75 | if ( | 
| 325 |  |  |  |  |  |  | exists $self->{10} and | 
| 326 |  |  |  |  |  |  | $self->{10}{'name'} !~ m/size|length|block|exon/i | 
| 327 |  |  |  |  |  |  | ) { | 
| 328 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 329 | 0 |  |  |  |  | 0 | $error .= " Column 10 name not blockSizes-like."; | 
| 330 |  |  |  |  |  |  | } | 
| 331 | 21 | 50 | 66 |  |  | 72 | if ( | 
| 332 |  |  |  |  |  |  | exists $self->{11} and | 
| 333 |  |  |  |  |  |  | $self->{11}{'name'} !~ m/start|block|exon/i | 
| 334 |  |  |  |  |  |  | ) { | 
| 335 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 336 | 0 |  |  |  |  | 0 | $error .= " Column 11 name not blockStarts-like."; | 
| 337 |  |  |  |  |  |  | } | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | # check column data | 
| 340 | 21 | 50 |  |  |  | 66 | unless ($self->_column_is_integers(1,2)) { | 
| 341 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 342 | 0 |  |  |  |  | 0 | $error .= " Columns 1,2 not integers."; | 
| 343 |  |  |  |  |  |  | } | 
| 344 | 21 | 100 |  |  |  | 54 | if ($self->{'number_columns'} >= 5) { | 
| 345 |  |  |  |  |  |  | # only check if it is actually present, since could be optional | 
| 346 | 15 | 50 |  |  |  | 43 | unless ($self->_column_is_numeric(4) ) { | 
| 347 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 348 | 0 |  |  |  |  | 0 | $error .= " Column 4 not numeric."; | 
| 349 |  |  |  |  |  |  | } | 
| 350 |  |  |  |  |  |  | } | 
| 351 | 21 | 100 |  |  |  | 55 | if ($self->{'number_columns'} >= 6) { | 
| 352 |  |  |  |  |  |  | # only check if it is actually present, since could be optional | 
| 353 | 15 | 50 |  |  |  | 49 | unless ($self->_column_is_stranded(5) ) { | 
| 354 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 355 | 0 |  |  |  |  | 0 | $error .= " Column 5 not strand values."; | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  | } | 
| 358 | 21 | 100 | 100 |  |  | 93 | if ($self->{'format'} and | 
| 359 |  |  |  |  |  |  | $self->{'format'} =~ /narrow|broad/i) { | 
| 360 | 4 | 50 |  |  |  | 13 | unless ($self->_column_is_numeric(6,7,8) ) { | 
| 361 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 362 | 0 |  |  |  |  | 0 | $error .= " Columns 6,7,8 not numeric."; | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  | } | 
| 365 | 21 | 100 |  |  |  | 50 | if ($self->{'number_columns'} == 12) { | 
| 366 |  |  |  |  |  |  | # bed12 has extra special limitations | 
| 367 | 3 | 50 |  |  |  | 7 | unless ($self->_column_is_integers(6,7,9) ) { | 
| 368 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 369 | 0 |  |  |  |  | 0 | $error .= " Column 6,7,9 not integers."; | 
| 370 |  |  |  |  |  |  | } | 
| 371 | 3 | 50 |  |  |  | 8 | unless ($self->_column_is_comma_integers(10,11) ) { | 
| 372 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 373 | 0 |  |  |  |  | 0 | $error .= " Column 10,11 not comma-delimited integers."; | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  | } | 
| 376 | 21 | 100 | 66 |  |  | 715 | if ( | 
| 377 |  |  |  |  |  |  | $self->{'number_columns'} == 15 and | 
| 378 |  |  |  |  |  |  | $self->{'format'} =~ /gapped/i | 
| 379 |  |  |  |  |  |  | ) { | 
| 380 |  |  |  |  |  |  | # gappedPeak has extra special limitations | 
| 381 | 4 | 50 |  |  |  | 13 | unless ($self->_column_is_integers(6,7,9) ) { | 
| 382 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 383 | 0 |  |  |  |  | 0 | $error .= " Column 6,7,9 not integers."; | 
| 384 |  |  |  |  |  |  | } | 
| 385 | 4 | 50 |  |  |  | 18 | unless ($self->_column_is_comma_integers(10,11) ) { | 
| 386 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 387 | 0 |  |  |  |  | 0 | $error .= " Column 10,11 not comma-delimited integers."; | 
| 388 |  |  |  |  |  |  | } | 
| 389 | 4 | 50 |  |  |  | 9 | unless ($self->_column_is_numeric(12,13,14) ) { | 
| 390 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 391 | 0 |  |  |  |  | 0 | $error .= " Columns 12,13,14 not numeric."; | 
| 392 |  |  |  |  |  |  | } | 
| 393 |  |  |  |  |  |  | } | 
| 394 |  |  |  |  |  |  |  | 
| 395 |  |  |  |  |  |  | # peak file format | 
| 396 | 21 | 50 | 100 |  |  | 108 | if ($self->{'format'} and | 
|  |  |  | 66 |  |  |  |  | 
| 397 |  |  |  |  |  |  | $self->{'format'} =~ /narrowpeak/i and | 
| 398 |  |  |  |  |  |  | $self->{'number_columns'} != 10 | 
| 399 |  |  |  |  |  |  | ) { | 
| 400 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 401 | 0 |  |  |  |  | 0 | $error .= " NarrowPeak has 10 columns only."; | 
| 402 |  |  |  |  |  |  | } | 
| 403 | 21 | 50 | 66 |  |  | 85 | if ($self->{'format'} and | 
|  |  |  | 33 |  |  |  |  | 
| 404 |  |  |  |  |  |  | $self->{'format'} =~ /broadpeak/i and | 
| 405 |  |  |  |  |  |  | $self->{'number_columns'} != 9 | 
| 406 |  |  |  |  |  |  | ) { | 
| 407 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 408 | 0 |  |  |  |  | 0 | $error .= " BroadPeak has 9 columns only."; | 
| 409 |  |  |  |  |  |  | } | 
| 410 | 21 | 50 | 100 |  |  | 91 | if ($self->{'format'} and | 
|  |  |  | 66 |  |  |  |  | 
| 411 |  |  |  |  |  |  | $self->{'format'} =~ /gappedpeak/i and | 
| 412 |  |  |  |  |  |  | $self->{'number_columns'} != 15 | 
| 413 |  |  |  |  |  |  | ) { | 
| 414 | 0 |  |  |  |  | 0 | $bed_check = 0; | 
| 415 | 0 |  |  |  |  | 0 | $error .= " GappeddPeak has 15 columns only."; | 
| 416 |  |  |  |  |  |  | } | 
| 417 |  |  |  |  |  |  |  | 
| 418 |  |  |  |  |  |  | # reset the BED tag value as appropriate | 
| 419 | 21 | 50 |  |  |  | 45 | if ($bed_check) { | 
| 420 | 21 |  |  |  |  | 46 | $self->{'bed'} = $self->{'number_columns'}; # in case we had a fake true | 
| 421 |  |  |  |  |  |  | } | 
| 422 |  |  |  |  |  |  | else { | 
| 423 |  |  |  |  |  |  | # reset metadata | 
| 424 | 0 |  |  |  |  | 0 | $self->{'bed'} = 0; | 
| 425 | 0 |  |  |  |  | 0 | $self->{'headers'} = 1; | 
| 426 | 0 |  |  |  |  | 0 | my $ext = $self->{'extension'}; | 
| 427 | 0 |  |  |  |  | 0 | $self->{'filename'} =~ s/$ext/.txt/; | 
| 428 | 0 |  |  |  |  | 0 | $self->{'extension'} = '.txt'; | 
| 429 | 0 |  |  |  |  | 0 | $self->{'format'} = ''; | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | # remove the AUTO key from the metadata | 
| 432 | 0 |  |  |  |  | 0 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 433 | 0 | 0 |  |  |  | 0 | if (exists $self->{$i}{'AUTO'}) { | 
| 434 | 0 |  |  |  |  | 0 | delete $self->{$i}{'AUTO'}; | 
| 435 |  |  |  |  |  |  | } | 
| 436 |  |  |  |  |  |  | } | 
| 437 | 0 | 0 |  |  |  | 0 | print "\n BED FILE FORMAT ERROR: $error\n" unless $silence; | 
| 438 |  |  |  |  |  |  | } | 
| 439 |  |  |  |  |  |  | } | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | # check refFlat or genePred gene structure | 
| 442 | 52 | 100 |  |  |  | 94 | if ($self->{'ucsc'}) { | 
| 443 |  |  |  |  |  |  | # if any of these checks fail, we will reset the extension | 
| 444 | 6 |  |  |  |  | 6 | my $ucsc_check = 1; # start with assumption it is correct | 
| 445 | 6 |  |  |  |  | 9 | my $ucsc_type; | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | # check number of columns | 
| 448 | 6 |  |  |  |  | 12 | my $colnumber = $self->{number_columns}; | 
| 449 | 6 | 50 | 0 |  |  | 11 | if ($colnumber == 16) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 450 | 6 |  |  |  |  | 9 | $ucsc_type = 'genePredExtBin'; | 
| 451 |  |  |  |  |  |  | # genePredExt with bin | 
| 452 |  |  |  |  |  |  | # bin name chrom strand txStart txEnd cdsStart cdsEnd | 
| 453 |  |  |  |  |  |  | # exonCount exonStarts exonEnds score name2 cdsStartSt | 
| 454 |  |  |  |  |  |  | # cdsEndStat exonFrames | 
| 455 | 6 | 50 |  |  |  | 27 | unless($self->{2}{name} =~ | 
| 456 |  |  |  |  |  |  | /^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i) { | 
| 457 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 458 | 0 |  |  |  |  | 0 | $error .= " Column 2 name not chromosome-like."; | 
| 459 |  |  |  |  |  |  | } | 
| 460 | 6 | 100 |  |  |  | 26 | unless($self->{4}{name} =~ /start|position/i) { | 
| 461 | 3 |  |  |  |  | 5 | $ucsc_check = 0; | 
| 462 | 3 |  |  |  |  | 8 | $error .= " Column 4 name not start-like."; | 
| 463 |  |  |  |  |  |  | } | 
| 464 | 6 | 100 |  |  |  | 22 | unless($self->{5}{name} =~ /stop|end|position/i) { | 
| 465 | 3 |  |  |  |  | 6 | $ucsc_check = 0; | 
| 466 | 3 |  |  |  |  | 5 | $error .= " Column 5 name not stop-like."; | 
| 467 |  |  |  |  |  |  | } | 
| 468 | 6 | 100 |  |  |  | 21 | unless($self->{6}{name} =~ /start|position/i) { | 
| 469 | 3 |  |  |  |  | 4 | $ucsc_check = 0; | 
| 470 | 3 |  |  |  |  | 4 | $error .= " Column 6 name not start-like."; | 
| 471 |  |  |  |  |  |  | } | 
| 472 | 6 | 100 |  |  |  | 19 | unless($self->{7}{name} =~ /stop|end|position/i) { | 
| 473 | 3 |  |  |  |  | 5 | $ucsc_check = 0; | 
| 474 | 3 |  |  |  |  | 5 | $error .= " Column 7 name not stop-like."; | 
| 475 |  |  |  |  |  |  | } | 
| 476 | 6 | 50 |  |  |  | 17 | unless($self->_column_is_integers(4,5,6,7,8)) { | 
| 477 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 478 | 0 |  |  |  |  | 0 | $error .= " Columns 4,5,6,7,8 not integers."; | 
| 479 |  |  |  |  |  |  | } | 
| 480 | 6 | 50 |  |  |  | 26 | unless ($self->_column_is_comma_integers(9,10)) { | 
| 481 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 482 | 0 |  |  |  |  | 0 | $error .= " Columns 9,10 not comma-delimited integers."; | 
| 483 |  |  |  |  |  |  | } | 
| 484 | 6 | 50 |  |  |  | 14 | unless($self->_column_is_stranded(3)) { | 
| 485 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 486 | 0 |  |  |  |  | 0 | $error .= " Column 3 not strand values."; | 
| 487 |  |  |  |  |  |  | } | 
| 488 |  |  |  |  |  |  | } | 
| 489 |  |  |  |  |  |  | elsif ($colnumber == 15 or $colnumber == 12) { | 
| 490 | 0 | 0 |  |  |  | 0 | $ucsc_type = $colnumber == 15 ? 'genePredExt' : 'knownGene'; | 
| 491 |  |  |  |  |  |  | # GenePredExt | 
| 492 |  |  |  |  |  |  | # name chrom strand txStart txEnd cdsStart cdsEnd | 
| 493 |  |  |  |  |  |  | # exonCount exonStarts exonEnds score name2 cdsStartSt | 
| 494 |  |  |  |  |  |  | # cdsEndStat exonFrames | 
| 495 |  |  |  |  |  |  | # or knownGene | 
| 496 |  |  |  |  |  |  | # name chrom strand txStart txEnd cdsStart cdsEnd | 
| 497 |  |  |  |  |  |  | # exonCount exonStarts exonEnds proteinID alignID | 
| 498 | 0 | 0 |  |  |  | 0 | unless($self->{1}{name} =~ | 
| 499 |  |  |  |  |  |  | /^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i) { | 
| 500 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 501 | 0 |  |  |  |  | 0 | $error .= " Column 1 name not chromosome-like."; | 
| 502 |  |  |  |  |  |  | } | 
| 503 | 0 | 0 |  |  |  | 0 | unless($self->{3}{name} =~ /start|position/i) { | 
| 504 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 505 | 0 |  |  |  |  | 0 | $error .= " Column 3 name not start-like."; | 
| 506 |  |  |  |  |  |  | } | 
| 507 | 0 | 0 |  |  |  | 0 | unless($self->{4}{name} =~ /stop|end|position/i) { | 
| 508 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 509 | 0 |  |  |  |  | 0 | $error .= " Column 4 name not stop-like."; | 
| 510 |  |  |  |  |  |  | } | 
| 511 | 0 | 0 |  |  |  | 0 | unless($self->{5}{name} =~ /start|position/i) { | 
| 512 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 513 | 0 |  |  |  |  | 0 | $error .= " Column 5 name not start-like."; | 
| 514 |  |  |  |  |  |  | } | 
| 515 | 0 | 0 |  |  |  | 0 | unless($self->{6}{name} =~ /stop|end|position/i) { | 
| 516 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 517 | 0 |  |  |  |  | 0 | $error .= " Column 6 name not stop-like."; | 
| 518 |  |  |  |  |  |  | } | 
| 519 | 0 | 0 |  |  |  | 0 | unless($self->_column_is_integers(3,4,5,6,7)) { | 
| 520 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 521 | 0 |  |  |  |  | 0 | $error .=  " Columns 3,4,5,6,7 not integers."; | 
| 522 |  |  |  |  |  |  | } | 
| 523 | 0 | 0 |  |  |  | 0 | unless ($self->_column_is_comma_integers(8,9)) { | 
| 524 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 525 | 0 |  |  |  |  | 0 | $error .= " Columns 8,9 not comma-delimited integers."; | 
| 526 |  |  |  |  |  |  | } | 
| 527 | 0 | 0 |  |  |  | 0 | unless($self->_column_is_stranded(2)) { | 
| 528 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 529 | 0 |  |  |  |  | 0 | $error .= " Column 2 not strand values."; | 
| 530 |  |  |  |  |  |  | } | 
| 531 |  |  |  |  |  |  | } | 
| 532 |  |  |  |  |  |  | elsif ($colnumber == 11) { | 
| 533 | 0 |  |  |  |  | 0 | $ucsc_type = 'refFlat'; | 
| 534 |  |  |  |  |  |  | # geneName transcriptName chrom strand txStart txEnd | 
| 535 |  |  |  |  |  |  | # cdsStart cdsEnd exonCount exonStarts exonEnds | 
| 536 | 0 | 0 |  |  |  | 0 | unless($self->{2}{name} =~ | 
| 537 |  |  |  |  |  |  | /^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i) { | 
| 538 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 539 | 0 |  |  |  |  | 0 | $error .= " Column 2 name not chromosome-like."; | 
| 540 |  |  |  |  |  |  | } | 
| 541 | 0 | 0 |  |  |  | 0 | unless($self->{4}{name} =~ /start|position/i) { | 
| 542 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 543 | 0 |  |  |  |  | 0 | $error .= " Column 4 name not start-like."; | 
| 544 |  |  |  |  |  |  | } | 
| 545 | 0 | 0 |  |  |  | 0 | unless($self->{5}{name} =~ /stop|end|position/i) { | 
| 546 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 547 | 0 |  |  |  |  | 0 | $error .= " Column 5 name not stop-like."; | 
| 548 |  |  |  |  |  |  | } | 
| 549 | 0 | 0 |  |  |  | 0 | unless($self->{6}{name} =~ /start|position/i) { | 
| 550 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 551 | 0 |  |  |  |  | 0 | $error .= " Column 6 name not start-like."; | 
| 552 |  |  |  |  |  |  | } | 
| 553 | 0 | 0 |  |  |  | 0 | unless($self->{7}{name} =~ /stop|end|position/i) { | 
| 554 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 555 | 0 |  |  |  |  | 0 | $error .= " Column 7 name not stop-like."; | 
| 556 |  |  |  |  |  |  | } | 
| 557 | 0 | 0 |  |  |  | 0 | unless($self->_column_is_integers(4,5,6,7,8)) { | 
| 558 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 559 | 0 |  |  |  |  | 0 | $error .= " Columns 4,5,6,7,8 not integers."; | 
| 560 |  |  |  |  |  |  | } | 
| 561 | 0 | 0 |  |  |  | 0 | unless ($self->_column_is_comma_integers(9,10)) { | 
| 562 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 563 | 0 |  |  |  |  | 0 | $error .= " Columns 9,10 not comma-delimited integers."; | 
| 564 |  |  |  |  |  |  | } | 
| 565 | 0 | 0 |  |  |  | 0 | unless($self->_column_is_stranded(3)) { | 
| 566 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 567 | 0 |  |  |  |  | 0 | $error .= " Column 3 not strand values."; | 
| 568 |  |  |  |  |  |  | } | 
| 569 |  |  |  |  |  |  | } | 
| 570 |  |  |  |  |  |  | elsif ($colnumber == 10) { | 
| 571 | 0 |  |  |  |  | 0 | $ucsc_type = 'genePred'; | 
| 572 |  |  |  |  |  |  | # name chrom strand txStart txEnd cdsStart cdsEnd | 
| 573 |  |  |  |  |  |  | # exonCount exonStarts exonEnds | 
| 574 | 0 | 0 |  |  |  | 0 | unless($self->{1}{name} =~ | 
| 575 |  |  |  |  |  |  | /^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i) { | 
| 576 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 577 | 0 |  |  |  |  | 0 | $error .= " Column 1 name not chromosome-like."; | 
| 578 |  |  |  |  |  |  | } | 
| 579 | 0 | 0 |  |  |  | 0 | unless($self->{3}{name} =~ /start|position/i) { | 
| 580 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 581 | 0 |  |  |  |  | 0 | $error .= " Column 3 name not start-like."; | 
| 582 |  |  |  |  |  |  | } | 
| 583 | 0 | 0 |  |  |  | 0 | unless($self->{4}{name} =~ /stop|end|position/i) { | 
| 584 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 585 | 0 |  |  |  |  | 0 | $error .= " Column 4 name not stop-like."; | 
| 586 |  |  |  |  |  |  | } | 
| 587 | 0 | 0 |  |  |  | 0 | unless($self->{5}{name} =~ /start|position/i) { | 
| 588 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 589 | 0 |  |  |  |  | 0 | $error .= " Column 5 name not start-like."; | 
| 590 |  |  |  |  |  |  | } | 
| 591 | 0 | 0 |  |  |  | 0 | unless($self->{6}{name} =~ /stop|end|position/i) { | 
| 592 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 593 | 0 |  |  |  |  | 0 | $error .= " Column 6 name not stop-like."; | 
| 594 |  |  |  |  |  |  | } | 
| 595 | 0 | 0 |  |  |  | 0 | unless($self->_column_is_integers(3,4,5,6,7)) { | 
| 596 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 597 | 0 |  |  |  |  | 0 | $error .= " Columns 3,4,5,6,7 not integers."; | 
| 598 |  |  |  |  |  |  | } | 
| 599 | 0 | 0 |  |  |  | 0 | unless ($self->_column_is_comma_integers(8,9)) { | 
| 600 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 601 | 0 |  |  |  |  | 0 | $error .= " Columns 8,9 not comma-delimited integers."; | 
| 602 |  |  |  |  |  |  | } | 
| 603 | 0 | 0 |  |  |  | 0 | unless($self->_column_is_stranded(2)) { | 
| 604 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 605 | 0 |  |  |  |  | 0 | $error .= " Column 2 not strand values."; | 
| 606 |  |  |  |  |  |  | } | 
| 607 |  |  |  |  |  |  | } | 
| 608 |  |  |  |  |  |  | else { | 
| 609 | 0 |  |  |  |  | 0 | $ucsc_type = 'UCSC'; | 
| 610 | 0 |  |  |  |  | 0 | $ucsc_check = 0; | 
| 611 | 0 |  |  |  |  | 0 | $error .= " Wrong # of columns."; | 
| 612 |  |  |  |  |  |  | } | 
| 613 |  |  |  |  |  |  |  | 
| 614 | 6 | 100 |  |  |  | 16 | if ($ucsc_check == 0) { | 
| 615 |  |  |  |  |  |  | # failed the check | 
| 616 | 3 |  |  |  |  | 7 | my $ext = $self->{'extension'}; | 
| 617 | 3 |  |  |  |  | 27 | $self->{'filename'} =~ s/$ext/.txt/; | 
| 618 | 3 |  |  |  |  | 8 | $self->{'extension'} = '.txt'; | 
| 619 | 3 |  |  |  |  | 5 | $self->{'ucsc'} = 0; | 
| 620 | 3 |  |  |  |  | 8 | $self->{'format'} = ''; | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | # remove the AUTO key | 
| 623 | 3 |  |  |  |  | 9 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 624 | 48 | 50 |  |  |  | 66 | if (exists $self->{$i}{'AUTO'}) { | 
| 625 | 48 |  |  |  |  | 65 | delete $self->{$i}{'AUTO'}; | 
| 626 |  |  |  |  |  |  | } | 
| 627 |  |  |  |  |  |  | } | 
| 628 | 3 | 50 |  |  |  | 8 | print "\n $ucsc_type FILE FORMAT ERROR: $error\n" unless $silence; | 
| 629 |  |  |  |  |  |  | } | 
| 630 |  |  |  |  |  |  | } | 
| 631 |  |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | # check VCF format | 
| 633 | 52 | 50 |  |  |  | 100 | if ($self->{vcf}) { | 
| 634 |  |  |  |  |  |  | # if any of these checks fail, we will reset the vcf flag to 0 | 
| 635 |  |  |  |  |  |  | # to make it not a vcf file format | 
| 636 | 0 |  |  |  |  | 0 | my $vcf_check = 1; # start with assumption it is correct | 
| 637 |  |  |  |  |  |  |  | 
| 638 |  |  |  |  |  |  | # check number of columns | 
| 639 | 0 | 0 |  |  |  | 0 | if ($self->{'number_columns'} < 8) { | 
| 640 | 0 |  |  |  |  | 0 | $vcf_check = 0; | 
| 641 | 0 |  |  |  |  | 0 | $error .= " Number of Columns is too few."; | 
| 642 |  |  |  |  |  |  | } | 
| 643 |  |  |  |  |  |  |  | 
| 644 |  |  |  |  |  |  | # check column index names | 
| 645 | 0 | 0 |  |  |  | 0 | if ($self->{0}{'name'} !~ m/chrom/i) { | 
| 646 | 0 |  |  |  |  | 0 | $vcf_check = 0; | 
| 647 | 0 |  |  |  |  | 0 | $error .= " Column 0 name not chromosome."; | 
| 648 |  |  |  |  |  |  | } | 
| 649 | 0 | 0 | 0 |  |  | 0 | if ( | 
| 650 |  |  |  |  |  |  | exists $self->{1} and | 
| 651 |  |  |  |  |  |  | $self->{1}{'name'} !~ m/^pos|start/i | 
| 652 |  |  |  |  |  |  | ) { | 
| 653 | 0 |  |  |  |  | 0 | $vcf_check = 0; | 
| 654 | 0 |  |  |  |  | 0 | $error .= " Column 1 name not position."; | 
| 655 |  |  |  |  |  |  | } | 
| 656 |  |  |  |  |  |  |  | 
| 657 |  |  |  |  |  |  | # check column data | 
| 658 | 0 | 0 |  |  |  | 0 | unless ($self->_column_is_integers(1)) { | 
| 659 | 0 |  |  |  |  | 0 | $vcf_check = 0; | 
| 660 | 0 |  |  |  |  | 0 | $error .= " Columns 1 not integers."; | 
| 661 |  |  |  |  |  |  | } | 
| 662 |  |  |  |  |  |  |  | 
| 663 |  |  |  |  |  |  | # reset the vcf tag value as appropriate | 
| 664 | 0 | 0 |  |  |  | 0 | if ($vcf_check) { | 
| 665 |  |  |  |  |  |  | # in case we had a fake true set it to a more reasonable value? | 
| 666 | 0 | 0 |  |  |  | 0 | $self->{'vcf'} = 4 if $self->{'vcf'} == 1; | 
| 667 |  |  |  |  |  |  | } | 
| 668 |  |  |  |  |  |  | else { | 
| 669 |  |  |  |  |  |  | # reset metadata | 
| 670 | 0 |  |  |  |  | 0 | $self->{'vcf'} = 0; | 
| 671 | 0 |  |  |  |  | 0 | $self->{'headers'} = 1; | 
| 672 | 0 |  |  |  |  | 0 | $self->{'format'} = ''; | 
| 673 |  |  |  |  |  |  |  | 
| 674 |  |  |  |  |  |  | # remove the AUTO key from the metadata | 
| 675 | 0 |  |  |  |  | 0 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 676 | 0 | 0 |  |  |  | 0 | if (exists $self->{$i}{'AUTO'}) { | 
| 677 | 0 |  |  |  |  | 0 | delete $self->{$i}{'AUTO'}; | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  | } | 
| 680 | 0 | 0 |  |  |  | 0 | print "\n VCF FILE FORMAT ERROR: $error\n" unless $silence; | 
| 681 |  |  |  |  |  |  | } | 
| 682 |  |  |  |  |  |  | } | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | # check proper SGR file structure | 
| 685 | 52 | 50 | 66 |  |  | 295 | if (exists $self->{'extension'} and | 
|  |  |  | 66 |  |  |  |  | 
| 686 |  |  |  |  |  |  | defined $self->{'extension'} and | 
| 687 |  |  |  |  |  |  | $self->{'extension'} =~ /sgr/i | 
| 688 |  |  |  |  |  |  | ) { | 
| 689 |  |  |  |  |  |  | # there is no sgr field in the data structure | 
| 690 |  |  |  |  |  |  | # so we're just checking for the extension | 
| 691 |  |  |  |  |  |  | # we will change the extension as necessary if it doesn't conform | 
| 692 | 0 |  |  |  |  | 0 | my $sgr_check = 1; | 
| 693 | 0 | 0 |  |  |  | 0 | if ($self->{'number_columns'} != 3) { | 
| 694 | 0 |  |  |  |  | 0 | $sgr_check = 0; | 
| 695 | 0 |  |  |  |  | 0 | $error .= " Column number is not 3."; | 
| 696 |  |  |  |  |  |  | } | 
| 697 | 0 | 0 |  |  |  | 0 | if ($self->{0}{'name'} !~ m/^#?(?:chr|chromo|seq|refseq|ref_seq|seq|seq_id)/i) { | 
| 698 | 0 |  |  |  |  | 0 | $sgr_check = 0; | 
| 699 | 0 |  |  |  |  | 0 | $error .= " Column 0 name not chromosome-like."; | 
| 700 |  |  |  |  |  |  | } | 
| 701 | 0 | 0 |  |  |  | 0 | if ($self->{1}{'name'} !~ /start|position/i) { | 
| 702 | 0 |  |  |  |  | 0 | $sgr_check = 0; | 
| 703 | 0 |  |  |  |  | 0 | $error .= " Column 1 name not start-like."; | 
| 704 |  |  |  |  |  |  | } | 
| 705 | 0 | 0 |  |  |  | 0 | unless ($self->_column_is_integers(1)) { | 
| 706 | 0 |  |  |  |  | 0 | $sgr_check = 0; | 
| 707 | 0 |  |  |  |  | 0 | $error .= " Columns 1 not integers."; | 
| 708 |  |  |  |  |  |  | } | 
| 709 | 0 | 0 |  |  |  | 0 | if ($sgr_check == 0) { | 
| 710 |  |  |  |  |  |  | # doesn't smell like a SGR file | 
| 711 |  |  |  |  |  |  | # change the extension so the write subroutine won't think it is | 
| 712 |  |  |  |  |  |  | # make it a text file | 
| 713 | 0 |  |  |  |  | 0 | $self->{'extension'} =~ s/sgr/txt/i; | 
| 714 | 0 |  |  |  |  | 0 | $self->{'filename'}  =~ s/sgr/txt/i; | 
| 715 | 0 |  |  |  |  | 0 | $self->{'headers'} = 1; | 
| 716 | 0 |  |  |  |  | 0 | $self->{'format'} = ''; | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | # remove the AUTO key from the metadata | 
| 719 | 0 |  |  |  |  | 0 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 720 | 0 | 0 |  |  |  | 0 | if (exists $self->{$i}{'AUTO'}) { | 
| 721 | 0 |  |  |  |  | 0 | delete $self->{$i}{'AUTO'}; | 
| 722 |  |  |  |  |  |  | } | 
| 723 |  |  |  |  |  |  | } | 
| 724 | 0 | 0 |  |  |  | 0 | print "\n SGR FILE FORMAT ERROR: $error\n" unless $silence; | 
| 725 |  |  |  |  |  |  | } | 
| 726 |  |  |  |  |  |  | } | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | # check file headers value because this may have changed | 
| 729 |  |  |  |  |  |  | # this can happen when we reset bed/gff/vcf flags when we add columns | 
| 730 | 52 | 100 | 100 |  |  | 442 | if ( | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 731 |  |  |  |  |  |  | $self->{'bed'} or | 
| 732 |  |  |  |  |  |  | $self->{'gff'} or | 
| 733 |  |  |  |  |  |  | $self->{'ucsc'} or | 
| 734 |  |  |  |  |  |  | ($self->{'extension'} and $self->{'extension'} =~ /sgr/i) | 
| 735 |  |  |  |  |  |  | ) { | 
| 736 | 29 |  |  |  |  | 62 | $self->{'headers'} = 0; | 
| 737 |  |  |  |  |  |  | } | 
| 738 |  |  |  |  |  |  | elsif ( | 
| 739 |  |  |  |  |  |  | $self->{'bed'} == 0 and | 
| 740 |  |  |  |  |  |  | $self->{'gff'} == 0 and | 
| 741 |  |  |  |  |  |  | $self->{'ucsc'} == 0 and | 
| 742 |  |  |  |  |  |  | ($self->{'extension'} and $self->{'extension'} !~ /sgr/i) | 
| 743 |  |  |  |  |  |  | ) { | 
| 744 | 18 | 50 |  |  |  | 47 | $self->{'headers'} = 1 unless $self->{'headers'} == -1; | 
| 745 |  |  |  |  |  |  | } | 
| 746 |  |  |  |  |  |  |  | 
| 747 |  |  |  |  |  |  | # if we have made it here, then there were no major structural problems | 
| 748 |  |  |  |  |  |  | # file is verified, any minor issues should have been fixed | 
| 749 | 52 |  |  |  |  | 120 | return 1; | 
| 750 |  |  |  |  |  |  | } | 
| 751 |  |  |  |  |  |  |  | 
| 752 |  |  |  |  |  |  | # internal method to check if a column is nothing but integers, i.e. start, stop | 
| 753 |  |  |  |  |  |  | sub _column_is_integers { | 
| 754 | 40 |  |  | 40 |  | 51 | my $self = shift; | 
| 755 | 40 |  |  |  |  | 82 | my @index = @_; | 
| 756 | 40 | 100 |  |  |  | 98 | return 1 if ($self->{last_row} == 0); # can't check if table is empty | 
| 757 | 38 |  |  |  |  | 68 | foreach (@index) { | 
| 758 | 101 | 50 |  |  |  | 201 | return 0 unless exists $self->{$_}; | 
| 759 |  |  |  |  |  |  | } | 
| 760 | 38 |  |  |  |  | 80 | for my $row (1 .. $self->{last_row}) { | 
| 761 | 709 |  |  |  |  | 723 | for my $i (@index) { | 
| 762 | 1648 | 50 |  |  |  | 2801 | return 0 unless ($self->{data_table}->[$row][$i] =~ /^\d+$/); | 
| 763 |  |  |  |  |  |  | } | 
| 764 |  |  |  |  |  |  | } | 
| 765 | 38 |  |  |  |  | 92 | return 1; | 
| 766 |  |  |  |  |  |  | } | 
| 767 |  |  |  |  |  |  |  | 
| 768 |  |  |  |  |  |  | # internal method to check if a column appears numeric, i.e. scores | 
| 769 |  |  |  |  |  |  | sub _column_is_numeric { | 
| 770 | 29 |  |  | 29 |  | 39 | my $self = shift; | 
| 771 | 29 |  |  |  |  | 44 | my @index = @_; | 
| 772 | 29 | 50 |  |  |  | 68 | return 1 if ($self->{last_row} == 0); # can't check if table is empty | 
| 773 | 29 |  |  |  |  | 54 | foreach (@index) { | 
| 774 | 45 | 50 |  |  |  | 91 | return 0 unless exists $self->{$_}; | 
| 775 |  |  |  |  |  |  | } | 
| 776 | 29 |  |  |  |  | 52 | for my $row (1 .. $self->{last_row}) { | 
| 777 | 327 |  |  |  |  | 348 | for my $i (@index) { | 
| 778 |  |  |  |  |  |  | # we have a very loose definition of numeric: exponents, signs, commas | 
| 779 | 407 | 50 |  |  |  | 748 | return 0 unless ($self->{data_table}->[$row][$i] =~ /^[\d\-\+\.,eE]+$/); | 
| 780 |  |  |  |  |  |  | } | 
| 781 |  |  |  |  |  |  | } | 
| 782 | 29 |  |  |  |  | 63 | return 1; | 
| 783 |  |  |  |  |  |  | } | 
| 784 |  |  |  |  |  |  |  | 
| 785 |  |  |  |  |  |  |  | 
| 786 |  |  |  |  |  |  |  | 
| 787 |  |  |  |  |  |  | # internal method to check if a column is nothing but comma delimited integers | 
| 788 |  |  |  |  |  |  | sub _column_is_comma_integers { | 
| 789 | 13 |  |  | 13 |  | 17 | my $self = shift; | 
| 790 | 13 |  |  |  |  | 22 | my @index = @_; | 
| 791 | 13 | 50 |  |  |  | 34 | return 1 if ($self->{last_row} == 0); # can't check if table is empty | 
| 792 | 13 |  |  |  |  | 23 | foreach (@index) { | 
| 793 | 26 | 50 |  |  |  | 53 | return 0 unless exists $self->{$_}; | 
| 794 |  |  |  |  |  |  | } | 
| 795 | 13 |  |  |  |  | 24 | for my $row (1 .. $self->{last_row}) { | 
| 796 | 110 |  |  |  |  | 121 | for my $i (@index) { | 
| 797 | 220 | 50 |  |  |  | 413 | return 0 unless ($self->{data_table}->[$row][$i] =~ /^[\d,]+$/); | 
| 798 |  |  |  |  |  |  | } | 
| 799 |  |  |  |  |  |  | } | 
| 800 | 13 |  |  |  |  | 37 | return 1; | 
| 801 |  |  |  |  |  |  | } | 
| 802 |  |  |  |  |  |  |  | 
| 803 |  |  |  |  |  |  | # internal method to check if a column looks like a strand column | 
| 804 |  |  |  |  |  |  | sub _column_is_stranded { | 
| 805 | 27 |  |  | 27 |  | 53 | my ($self, $index) = @_; | 
| 806 | 27 | 50 |  |  |  | 65 | return unless exists $self->{$index}; | 
| 807 | 27 |  |  |  |  | 63 | for my $row (1 .. $self->{last_row}) { | 
| 808 | 347 | 50 |  |  |  | 690 | return 0 if ($self->{data_table}->[$row][$index] !~ /^(?:\-1|0|1|\+|\-|\.)$/); | 
| 809 |  |  |  |  |  |  | } | 
| 810 | 27 |  |  |  |  | 62 | return 1; | 
| 811 |  |  |  |  |  |  | } | 
| 812 |  |  |  |  |  |  |  | 
| 813 |  |  |  |  |  |  |  | 
| 814 |  |  |  |  |  |  |  | 
| 815 |  |  |  |  |  |  |  | 
| 816 |  |  |  |  |  |  |  | 
| 817 |  |  |  |  |  |  | #### Database methods ############################################################## | 
| 818 |  |  |  |  |  |  |  | 
| 819 |  |  |  |  |  |  | sub open_database { | 
| 820 | 1 |  |  | 1 | 1 | 2 | my $self = shift; | 
| 821 | 1 | 50 | 0 |  |  | 3 | if (not defined $_[0]) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 822 | 1 |  |  |  |  | 4 | return $self->open_meta_database; | 
| 823 |  |  |  |  |  |  | } | 
| 824 |  |  |  |  |  |  | elsif ($_[0] eq '0' or $_[0] eq '1') { | 
| 825 |  |  |  |  |  |  | # likely a boolean value to indicate force | 
| 826 | 0 |  |  |  |  | 0 | return $self->open_meta_database($_[0]); | 
| 827 |  |  |  |  |  |  | } | 
| 828 |  |  |  |  |  |  | elsif ($_[0] =~ /[a-zA-Z]+/) { | 
| 829 |  |  |  |  |  |  | # likely the name of a database | 
| 830 | 0 |  |  |  |  | 0 | return $self->open_new_database(@_); | 
| 831 |  |  |  |  |  |  | } | 
| 832 |  |  |  |  |  |  | else { | 
| 833 |  |  |  |  |  |  | # original default | 
| 834 | 0 |  |  |  |  | 0 | return $self->open_meta_database(@_); | 
| 835 |  |  |  |  |  |  | } | 
| 836 |  |  |  |  |  |  | } | 
| 837 |  |  |  |  |  |  |  | 
| 838 |  |  |  |  |  |  | sub open_meta_database { | 
| 839 | 15 |  |  | 15 | 1 | 18 | my $self = shift; | 
| 840 | 15 |  | 50 |  |  | 37 | my $force = shift || 0; | 
| 841 | 15 | 50 |  |  |  | 25 | return unless $self->{db}; | 
| 842 | 15 | 50 |  |  |  | 31 | return if $self->{db} =~ /^Parsed:/; # we don't open parsed annotation files | 
| 843 | 15 | 100 |  |  |  | 23 | if (exists $self->{db_connection}) { | 
| 844 | 14 | 50 |  |  |  | 50 | return $self->{db_connection} unless $force; | 
| 845 |  |  |  |  |  |  | } | 
| 846 | 1 |  |  |  |  | 5 | my $db = open_db_connection($self->{db}, $force); | 
| 847 | 1 | 50 |  |  |  | 2 | return unless $db; | 
| 848 | 1 |  |  |  |  | 2 | $self->{db_connection} = $db; | 
| 849 | 1 |  |  |  |  | 2 | return $db; | 
| 850 |  |  |  |  |  |  | } | 
| 851 |  |  |  |  |  |  |  | 
| 852 |  |  |  |  |  |  | sub open_new_database { | 
| 853 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 854 | 0 |  |  |  |  | 0 | my $database = shift; | 
| 855 | 0 |  | 0 |  |  | 0 | my $force = shift || 0; | 
| 856 | 0 |  |  |  |  | 0 | return open_db_connection($database, $force); | 
| 857 |  |  |  |  |  |  | } | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | sub verify_dataset { | 
| 860 | 12 |  |  | 12 | 1 | 21 | my ($self, $dataset, $database) = @_; | 
| 861 | 12 | 50 |  |  |  | 23 | return unless $dataset; | 
| 862 | 12 | 100 |  |  |  | 48 | if (exists $self->{verfied_dataset}{$dataset}) { | 
| 863 | 11 |  |  |  |  | 32 | return $self->{verfied_dataset}{$dataset}; | 
| 864 |  |  |  |  |  |  | } | 
| 865 |  |  |  |  |  |  | else { | 
| 866 | 1 | 50 |  |  |  | 5 | if ($dataset =~ /^(?:file|http|ftp)/) { | 
| 867 |  |  |  |  |  |  | # local or remote file already verified? | 
| 868 | 0 |  |  |  |  | 0 | $self->{verfied_dataset}{$dataset} = $dataset; | 
| 869 | 0 |  |  |  |  | 0 | return $dataset; | 
| 870 |  |  |  |  |  |  | } | 
| 871 | 1 |  | 33 |  |  | 2 | $database ||= $self->open_meta_database; | 
| 872 | 1 |  |  |  |  | 5 | my ($verified) = verify_or_request_feature_types( | 
| 873 |  |  |  |  |  |  | # normally returns an array of verified features, we're only checking one | 
| 874 |  |  |  |  |  |  | db      => $database, | 
| 875 |  |  |  |  |  |  | feature => $dataset, | 
| 876 |  |  |  |  |  |  | ); | 
| 877 | 1 | 50 |  |  |  | 3 | if ($verified) { | 
| 878 | 1 |  |  |  |  | 2 | $self->{verfied_dataset}{$dataset} = $verified; | 
| 879 | 1 |  |  |  |  | 3 | return $verified; | 
| 880 |  |  |  |  |  |  | } | 
| 881 |  |  |  |  |  |  | } | 
| 882 | 0 |  |  |  |  | 0 | return; | 
| 883 |  |  |  |  |  |  | } | 
| 884 |  |  |  |  |  |  |  | 
| 885 |  |  |  |  |  |  |  | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | #### Column Manipulation #### | 
| 888 |  |  |  |  |  |  |  | 
| 889 |  |  |  |  |  |  | sub delete_column { | 
| 890 | 2 |  |  | 2 | 1 | 4 | my $self = shift; | 
| 891 |  |  |  |  |  |  |  | 
| 892 |  |  |  |  |  |  | # check for Stream | 
| 893 | 2 | 50 |  |  |  | 5 | if (ref $self eq 'Bio::ToolBox::Data::Stream') { | 
| 894 | 0 | 0 |  |  |  | 0 | unless ($self->mode) { | 
| 895 | 0 |  |  |  |  | 0 | cluck "We have a read-only Stream object, cannot add columns"; | 
| 896 | 0 |  |  |  |  | 0 | return; | 
| 897 |  |  |  |  |  |  | } | 
| 898 | 0 | 0 |  |  |  | 0 | if (defined $self->{fh}) { | 
| 899 |  |  |  |  |  |  | # Stream file handle is opened | 
| 900 | 0 |  |  |  |  | 0 | cluck "Cannot modify columns when a Stream file handle is opened!"; | 
| 901 | 0 |  |  |  |  | 0 | return; | 
| 902 |  |  |  |  |  |  | } | 
| 903 |  |  |  |  |  |  | } | 
| 904 | 2 | 50 |  |  |  | 4 | unless (@_) { | 
| 905 | 0 |  |  |  |  | 0 | cluck "must provide a list"; | 
| 906 | 0 |  |  |  |  | 0 | return; | 
| 907 |  |  |  |  |  |  | } | 
| 908 |  |  |  |  |  |  |  | 
| 909 | 2 |  |  |  |  | 5 | my @deletion_list = sort {$a <=> $b} @_; | 
|  | 0 |  |  |  |  | 0 |  | 
| 910 | 2 |  |  |  |  | 3 | my @retain_list; | 
| 911 | 2 |  |  |  |  | 5 | for (my $i = 0; $i < $self->number_columns; $i++) { | 
| 912 |  |  |  |  |  |  | # compare each current index with the first one in the list of | 
| 913 |  |  |  |  |  |  | # deleted indices. if it matches, delete. if not, keep | 
| 914 | 18 | 100 |  |  |  | 20 | if ( $i == $deletion_list[0] ) { | 
| 915 |  |  |  |  |  |  | # this particular index should be deleted | 
| 916 | 2 |  |  |  |  | 4 | shift @deletion_list; | 
| 917 |  |  |  |  |  |  | } | 
| 918 |  |  |  |  |  |  | else { | 
| 919 |  |  |  |  |  |  | # this particular index should be kept | 
| 920 | 16 |  |  |  |  | 21 | push @retain_list, $i; | 
| 921 |  |  |  |  |  |  | } | 
| 922 |  |  |  |  |  |  | } | 
| 923 | 2 |  |  |  |  | 7 | return $self->reorder_column(@retain_list); | 
| 924 |  |  |  |  |  |  | } | 
| 925 |  |  |  |  |  |  |  | 
| 926 |  |  |  |  |  |  | sub reorder_column { | 
| 927 | 3 |  |  | 3 | 1 | 5 | my $self = shift; | 
| 928 |  |  |  |  |  |  |  | 
| 929 |  |  |  |  |  |  | # check for Stream | 
| 930 | 3 | 50 |  |  |  | 6 | if (ref $self eq 'Bio::ToolBox::Data::Stream') { | 
| 931 | 0 | 0 |  |  |  | 0 | unless ($self->mode) { | 
| 932 | 0 |  |  |  |  | 0 | cluck "We have a read-only Stream object, cannot add columns"; | 
| 933 | 0 |  |  |  |  | 0 | return; | 
| 934 |  |  |  |  |  |  | } | 
| 935 | 0 | 0 |  |  |  | 0 | if (defined $self->{fh}) { | 
| 936 |  |  |  |  |  |  | # Stream file handle is opened | 
| 937 | 0 |  |  |  |  | 0 | cluck "Cannot modify columns when a Stream file handle is opened!"; | 
| 938 | 0 |  |  |  |  | 0 | return; | 
| 939 |  |  |  |  |  |  | } | 
| 940 |  |  |  |  |  |  | } | 
| 941 |  |  |  |  |  |  |  | 
| 942 |  |  |  |  |  |  | # reorder data table | 
| 943 | 3 | 50 |  |  |  | 5 | unless (@_) { | 
| 944 | 0 |  |  |  |  | 0 | carp "must provide a list"; | 
| 945 | 0 |  |  |  |  | 0 | return; | 
| 946 |  |  |  |  |  |  | } | 
| 947 | 3 |  |  |  |  | 6 | my @order = @_; | 
| 948 | 3 |  |  |  |  | 7 | for (my $row = 0; $row <= $self->last_row; $row++) { | 
| 949 | 237 |  |  |  |  | 303 | my @old = $self->row_values($row); | 
| 950 | 237 |  |  |  |  | 252 | my @new = map { $old[$_] } @order; | 
|  | 1580 |  |  |  |  | 1660 |  | 
| 951 | 237 |  |  |  |  | 215 | splice( @{ $self->{data_table} }, $row, 1, \@new); | 
|  | 237 |  |  |  |  | 527 |  | 
| 952 |  |  |  |  |  |  | } | 
| 953 |  |  |  |  |  |  |  | 
| 954 |  |  |  |  |  |  | # reorder metadata | 
| 955 | 3 |  |  |  |  | 5 | my %old_metadata; | 
| 956 | 3 |  |  |  |  | 6 | for (my $i = 0; $i < $self->number_columns; $i++) { | 
| 957 |  |  |  |  |  |  | # copy the metadata info hash into a temporary hash | 
| 958 | 28 |  |  |  |  | 38 | $old_metadata{$i} = $self->{$i}; | 
| 959 | 28 |  |  |  |  | 37 | delete $self->{$i}; # delete original | 
| 960 |  |  |  |  |  |  | } | 
| 961 | 3 |  |  |  |  | 6 | for (my $i = 0; $i < scalar(@order); $i++) { | 
| 962 |  |  |  |  |  |  | # now copy back from the old_metadata into the main data hash | 
| 963 |  |  |  |  |  |  | # using the new index number in the @order array | 
| 964 |  |  |  |  |  |  | # must regenerate the hash, not just link to the old anonymous hash, in | 
| 965 |  |  |  |  |  |  | # case we're duplicating columns | 
| 966 | 20 |  |  |  |  | 25 | $self->{$i} = {}; | 
| 967 | 20 |  |  |  |  | 18 | foreach my $k (keys %{ $old_metadata{$order[$i]} }) { | 
|  | 20 |  |  |  |  | 32 |  | 
| 968 | 50 |  |  |  |  | 73 | $self->{$i}{$k} = $old_metadata{$order[$i]}{$k}; | 
| 969 |  |  |  |  |  |  | } | 
| 970 |  |  |  |  |  |  | # assign new index number | 
| 971 | 20 |  |  |  |  | 38 | $self->{$i}{'index'} = $i; | 
| 972 |  |  |  |  |  |  | } | 
| 973 | 3 |  |  |  |  | 3 | $self->{'number_columns'} = scalar @order; | 
| 974 | 3 | 100 |  |  |  | 9 | delete $self->{column_indices} if exists $self->{column_indices}; | 
| 975 | 3 | 50 | 66 |  |  | 6 | if ($self->gff or $self->bed or $self->ucsc or $self->vcf) { | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 976 |  |  |  |  |  |  | # check if we maintain integrity, at least insofar what we test | 
| 977 | 1 |  |  |  |  | 2 | $self->verify(1); # silence so user doesn't get these messages | 
| 978 |  |  |  |  |  |  | } | 
| 979 | 3 |  |  |  |  | 18 | return 1; | 
| 980 |  |  |  |  |  |  | } | 
| 981 |  |  |  |  |  |  |  | 
| 982 |  |  |  |  |  |  |  | 
| 983 |  |  |  |  |  |  |  | 
| 984 |  |  |  |  |  |  | #### General Metadata #### | 
| 985 |  |  |  |  |  |  |  | 
| 986 |  |  |  |  |  |  | sub feature { | 
| 987 | 54 |  |  | 54 | 1 | 404 | my $self = shift; | 
| 988 | 54 | 100 |  |  |  | 128 | if (@_) { | 
| 989 | 20 |  |  |  |  | 36 | $self->{feature} = shift; | 
| 990 |  |  |  |  |  |  | } | 
| 991 | 54 |  |  |  |  | 135 | return $self->{feature}; | 
| 992 |  |  |  |  |  |  | } | 
| 993 |  |  |  |  |  |  |  | 
| 994 |  |  |  |  |  |  | sub feature_type { | 
| 995 | 37 |  |  | 37 | 1 | 49 | my $self = shift; | 
| 996 | 37 | 50 |  |  |  | 64 | carp "feature_type is a read only method" if @_; | 
| 997 | 37 | 100 |  |  |  | 69 | if (defined $self->{feature_type}) { | 
| 998 | 24 |  |  |  |  | 70 | return $self->{feature_type}; | 
| 999 |  |  |  |  |  |  | } | 
| 1000 | 13 |  |  |  |  | 47 | my $feature_type; | 
| 1001 | 13 | 100 | 66 |  |  | 40 | if (defined $self->chromo_column and defined $self->start_column) { | 
|  |  | 50 | 0 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 1002 | 8 |  |  |  |  | 12 | $feature_type = 'coordinate'; | 
| 1003 |  |  |  |  |  |  | } | 
| 1004 |  |  |  |  |  |  | elsif (defined $self->id_column or | 
| 1005 |  |  |  |  |  |  | ( defined $self->type_column and defined $self->name_column ) or | 
| 1006 |  |  |  |  |  |  | ( defined $self->feature and defined $self->name_column ) | 
| 1007 |  |  |  |  |  |  | ) { | 
| 1008 | 5 |  |  |  |  | 8 | $feature_type = 'named'; | 
| 1009 |  |  |  |  |  |  | } | 
| 1010 |  |  |  |  |  |  | else { | 
| 1011 | 0 |  |  |  |  | 0 | $feature_type = 'unknown'; | 
| 1012 |  |  |  |  |  |  | } | 
| 1013 | 13 |  |  |  |  | 26 | $self->{feature_type} = $feature_type; | 
| 1014 | 13 |  |  |  |  | 56 | return $feature_type; | 
| 1015 |  |  |  |  |  |  | } | 
| 1016 |  |  |  |  |  |  |  | 
| 1017 |  |  |  |  |  |  | sub program { | 
| 1018 | 68 |  |  | 68 | 1 | 90 | my $self = shift; | 
| 1019 | 68 | 100 |  |  |  | 133 | if (@_) { | 
| 1020 | 60 |  |  |  |  | 88 | $self->{program} = shift; | 
| 1021 |  |  |  |  |  |  | } | 
| 1022 | 68 |  |  |  |  | 166 | return $self->{program}; | 
| 1023 |  |  |  |  |  |  | } | 
| 1024 |  |  |  |  |  |  |  | 
| 1025 |  |  |  |  |  |  | sub database { | 
| 1026 | 46 |  |  | 46 | 1 | 396 | my $self = shift; | 
| 1027 | 46 | 100 |  |  |  | 116 | if (@_) { | 
| 1028 | 20 |  |  |  |  | 41 | $self->{db} = shift; | 
| 1029 | 20 | 0 | 33 |  |  | 43 | if (exists $self->{db_connection} and $self->{db_connection}) { | 
| 1030 | 0 |  |  |  |  | 0 | $self->open_meta_database(1); | 
| 1031 |  |  |  |  |  |  | } | 
| 1032 |  |  |  |  |  |  | } | 
| 1033 | 46 |  |  |  |  | 194 | return $self->{db}; | 
| 1034 |  |  |  |  |  |  | } | 
| 1035 |  |  |  |  |  |  |  | 
| 1036 |  |  |  |  |  |  | sub bam_adapter { | 
| 1037 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1038 | 0 |  |  |  |  | 0 | return use_bam_adapter(@_); | 
| 1039 |  |  |  |  |  |  | } | 
| 1040 |  |  |  |  |  |  |  | 
| 1041 |  |  |  |  |  |  | sub big_adapter { | 
| 1042 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1043 | 0 |  |  |  |  | 0 | return use_big_adapter(@_); | 
| 1044 |  |  |  |  |  |  | } | 
| 1045 |  |  |  |  |  |  |  | 
| 1046 |  |  |  |  |  |  | sub format { | 
| 1047 | 222 |  |  | 222 | 1 | 255 | my $self = shift; | 
| 1048 | 222 | 100 |  |  |  | 406 | if (defined $_[0]) { | 
| 1049 | 72 |  |  |  |  | 117 | $self->{format} = $_[0]; | 
| 1050 |  |  |  |  |  |  | } | 
| 1051 | 222 |  |  |  |  | 1014 | return $self->{format}; | 
| 1052 |  |  |  |  |  |  | } | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 |  |  |  |  |  |  | sub gff { | 
| 1055 | 102 |  |  | 102 | 1 | 1672 | my $self = shift; | 
| 1056 | 102 | 100 | 66 |  |  | 270 | if (defined $_[0] and $_[0] =~ /^(?:0|1|2|2\.[2|5]|3)$/) { | 
| 1057 | 5 |  |  |  |  | 12 | $self->{gff} = $_[0]; | 
| 1058 |  |  |  |  |  |  | } | 
| 1059 | 102 |  |  |  |  | 383 | return $self->{gff}; | 
| 1060 |  |  |  |  |  |  | } | 
| 1061 |  |  |  |  |  |  |  | 
| 1062 |  |  |  |  |  |  | sub bed { | 
| 1063 | 137 |  |  | 137 | 1 | 508 | my $self = shift; | 
| 1064 | 137 | 100 | 66 |  |  | 449 | if (defined $_[0] and $_[0] =~ /^\d+$/) { | 
| 1065 | 43 |  |  |  |  | 86 | $self->{bed} = $_[0]; | 
| 1066 |  |  |  |  |  |  | } | 
| 1067 | 137 |  |  |  |  | 363 | return $self->{bed}; | 
| 1068 |  |  |  |  |  |  | } | 
| 1069 |  |  |  |  |  |  |  | 
| 1070 |  |  |  |  |  |  | sub ucsc { | 
| 1071 | 62 |  |  | 62 | 1 | 75 | my $self = shift; | 
| 1072 | 62 | 100 | 66 |  |  | 166 | if (defined $_[0] and $_[0] =~ /^\d+$/) { | 
| 1073 | 3 |  |  |  |  | 7 | $self->{ucsc} = $_[0]; | 
| 1074 |  |  |  |  |  |  | } | 
| 1075 | 62 |  |  |  |  | 212 | return $self->{ucsc}; | 
| 1076 |  |  |  |  |  |  | } | 
| 1077 |  |  |  |  |  |  |  | 
| 1078 |  |  |  |  |  |  | sub vcf { | 
| 1079 | 44 |  |  | 44 | 1 | 60 | my $self = shift; | 
| 1080 | 44 | 50 | 33 |  |  | 78 | if (defined $_[0] and $_[0] =~ /^[\d\.]+$/) { | 
| 1081 | 0 |  |  |  |  | 0 | $self->{vcf} = $_[0]; | 
| 1082 |  |  |  |  |  |  | } | 
| 1083 | 44 |  |  |  |  | 127 | return $self->{vcf}; | 
| 1084 |  |  |  |  |  |  | } | 
| 1085 |  |  |  |  |  |  |  | 
| 1086 |  |  |  |  |  |  | sub number_columns { | 
| 1087 | 553 |  |  | 553 | 1 | 7066 | my $self = shift; | 
| 1088 | 553 | 50 |  |  |  | 794 | carp "number_columns is a read only method" if @_; | 
| 1089 | 553 |  |  |  |  | 1210 | return $self->{number_columns}; | 
| 1090 |  |  |  |  |  |  | } | 
| 1091 |  |  |  |  |  |  |  | 
| 1092 |  |  |  |  |  |  | sub number_rows { | 
| 1093 | 3 |  |  | 3 | 1 | 7 | my $self = shift; | 
| 1094 | 3 | 50 |  |  |  | 10 | carp "number_rows is a read only method" if @_; | 
| 1095 | 3 |  |  |  |  | 10 | return $self->{last_row}; | 
| 1096 |  |  |  |  |  |  | } | 
| 1097 |  |  |  |  |  |  |  | 
| 1098 |  |  |  |  |  |  | sub last_column { | 
| 1099 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1100 | 0 | 0 |  |  |  | 0 | carp "last_column is a read only method" if @_; | 
| 1101 | 0 |  |  |  |  | 0 | return $self->{number_columns} - 1; | 
| 1102 |  |  |  |  |  |  | } | 
| 1103 |  |  |  |  |  |  |  | 
| 1104 |  |  |  |  |  |  | sub last_row { | 
| 1105 | 892 |  |  | 892 | 1 | 2803 | my $self = shift; | 
| 1106 | 892 | 50 |  |  |  | 1221 | carp "last_row is a read only method" if @_; | 
| 1107 | 892 |  |  |  |  | 1680 | return $self->{last_row}; | 
| 1108 |  |  |  |  |  |  | } | 
| 1109 |  |  |  |  |  |  |  | 
| 1110 |  |  |  |  |  |  | sub filename { | 
| 1111 | 138 |  |  | 138 | 1 | 181 | my $self = shift; | 
| 1112 | 138 | 50 |  |  |  | 250 | carp "filename is a read only method. Use add_file_metadata()." if @_; | 
| 1113 | 138 |  |  |  |  | 382 | return $self->{filename}; | 
| 1114 |  |  |  |  |  |  | } | 
| 1115 |  |  |  |  |  |  |  | 
| 1116 |  |  |  |  |  |  | sub basename { | 
| 1117 | 29 |  |  | 29 | 1 | 2782 | my $self = shift; | 
| 1118 | 29 | 50 |  |  |  | 74 | carp "basename is a read only method. Use add_file_metadata()." if @_; | 
| 1119 | 29 |  |  |  |  | 117 | return $self->{basename}; | 
| 1120 |  |  |  |  |  |  | } | 
| 1121 |  |  |  |  |  |  |  | 
| 1122 |  |  |  |  |  |  | sub path { | 
| 1123 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1124 | 0 | 0 |  |  |  | 0 | carp "path is a read only method. Use add_file_metadata()." if @_; | 
| 1125 | 0 |  |  |  |  | 0 | return $self->{path}; | 
| 1126 |  |  |  |  |  |  | } | 
| 1127 |  |  |  |  |  |  |  | 
| 1128 |  |  |  |  |  |  | sub extension { | 
| 1129 | 129 |  |  | 129 | 1 | 174 | my $self = shift; | 
| 1130 | 129 | 50 |  |  |  | 203 | carp "extension() is a read only method. Use add_file_metadata()." if @_; | 
| 1131 | 129 |  |  |  |  | 471 | return $self->{extension}; | 
| 1132 |  |  |  |  |  |  | } | 
| 1133 |  |  |  |  |  |  |  | 
| 1134 |  |  |  |  |  |  |  | 
| 1135 |  |  |  |  |  |  |  | 
| 1136 |  |  |  |  |  |  | #### General Comments #### | 
| 1137 |  |  |  |  |  |  |  | 
| 1138 |  |  |  |  |  |  | sub comments { | 
| 1139 | 26 |  |  | 26 | 1 | 40 | my $self = shift; | 
| 1140 | 26 |  |  |  |  | 36 | my @comments = @{ $self->{comments} }; | 
|  | 26 |  |  |  |  | 64 |  | 
| 1141 | 26 |  |  |  |  | 56 | foreach (@comments) {s/[\r\n]+//g} | 
|  | 25 |  |  |  |  | 92 |  | 
| 1142 |  |  |  |  |  |  | # comments are not chomped when loading | 
| 1143 |  |  |  |  |  |  | # side effect of dealing with rare commented header lines with null values at end | 
| 1144 | 26 |  |  |  |  | 68 | return @comments; | 
| 1145 |  |  |  |  |  |  | } | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 |  |  |  |  |  |  | sub add_comment { | 
| 1148 | 53 |  |  | 53 | 1 | 516 | my $self = shift; | 
| 1149 | 53 | 50 |  |  |  | 118 | my $comment = shift or return; | 
| 1150 |  |  |  |  |  |  | # comment is not required to be prefixed with "# ", it will be added when saving | 
| 1151 | 53 |  |  |  |  | 78 | push @{ $self->{comments} }, $comment; | 
|  | 53 |  |  |  |  | 110 |  | 
| 1152 | 53 |  |  |  |  | 93 | return 1; | 
| 1153 |  |  |  |  |  |  | } | 
| 1154 |  |  |  |  |  |  |  | 
| 1155 |  |  |  |  |  |  | sub delete_comment { | 
| 1156 | 1 |  |  | 1 | 1 | 1153 | my $self = shift; | 
| 1157 | 1 |  |  |  |  | 1 | my $index = shift; | 
| 1158 | 1 | 50 |  |  |  | 3 | if (defined $index) { | 
| 1159 | 1 |  |  |  |  | 1 | eval {splice @{$self->{comments}}, $index, 1}; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 3 |  | 
| 1160 |  |  |  |  |  |  | } | 
| 1161 |  |  |  |  |  |  | else { | 
| 1162 | 0 |  |  |  |  | 0 | $self->{comments} = []; | 
| 1163 |  |  |  |  |  |  | } | 
| 1164 |  |  |  |  |  |  | } | 
| 1165 |  |  |  |  |  |  |  | 
| 1166 |  |  |  |  |  |  | sub vcf_headers { | 
| 1167 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1168 | 0 | 0 |  |  |  | 0 | return unless $self->vcf; | 
| 1169 | 0 | 0 |  |  |  | 0 | return $self->{vcf_headers} if exists $self->{vcf_headers}; | 
| 1170 | 0 |  |  |  |  | 0 | my $headers = {}; | 
| 1171 | 0 |  |  |  |  | 0 | foreach my $comment ($self->comments) { | 
| 1172 | 0 |  |  |  |  | 0 | my ($key, $value); | 
| 1173 | 0 | 0 |  |  |  | 0 | if ($comment =~ /^##([\w\-\.]+)=(.+)$/) { | 
| 1174 | 0 |  |  |  |  | 0 | $key = $1; | 
| 1175 | 0 |  |  |  |  | 0 | $value = $2; | 
| 1176 |  |  |  |  |  |  | } | 
| 1177 |  |  |  |  |  |  | else { | 
| 1178 |  |  |  |  |  |  | # invalid vcf header format!? | 
| 1179 | 0 |  |  |  |  | 0 | next; | 
| 1180 |  |  |  |  |  |  | } | 
| 1181 | 0 | 0 |  |  |  | 0 | if ($value !~ /^<.+>$/) { | 
| 1182 |  |  |  |  |  |  | # simple value | 
| 1183 | 0 |  |  |  |  | 0 | $headers->{$key} = $value; | 
| 1184 |  |  |  |  |  |  | } | 
| 1185 |  |  |  |  |  |  | else { | 
| 1186 |  |  |  |  |  |  | # process complex values | 
| 1187 |  |  |  |  |  |  | # extract ID with regex which should have | 
| 1188 | 0 |  |  |  |  | 0 | my $id = ($value =~ /ID=([\w\-\.:]+)/)[0]; | 
| 1189 | 0 |  |  |  |  | 0 | $headers->{$key}{$id} = $value; | 
| 1190 |  |  |  |  |  |  | } | 
| 1191 |  |  |  |  |  |  | } | 
| 1192 |  |  |  |  |  |  |  | 
| 1193 |  |  |  |  |  |  | # store and return | 
| 1194 | 0 |  |  |  |  | 0 | $self->{vcf_headers} = $headers; | 
| 1195 | 0 |  |  |  |  | 0 | return $headers; | 
| 1196 |  |  |  |  |  |  | } | 
| 1197 |  |  |  |  |  |  |  | 
| 1198 |  |  |  |  |  |  | sub rewrite_vcf_headers { | 
| 1199 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1200 | 0 | 0 |  |  |  | 0 | return unless $self->vcf; | 
| 1201 | 0 | 0 |  |  |  | 0 | return unless exists $self->{vcf_headers}; | 
| 1202 | 0 |  |  |  |  | 0 | my @newComments; | 
| 1203 |  |  |  |  |  |  |  | 
| 1204 |  |  |  |  |  |  | # file format always comes first | 
| 1205 |  |  |  |  |  |  | push @newComments, sprintf("##fileformat=%s\n", | 
| 1206 | 0 |  |  |  |  | 0 | $self->{vcf_headers}{fileformat}); | 
| 1207 |  |  |  |  |  |  |  | 
| 1208 |  |  |  |  |  |  | # common attributes | 
| 1209 | 0 |  |  |  |  | 0 | foreach my $key (sort {$a cmp $b} keys %{ $self->{vcf_headers} } ) { | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 1210 | 0 | 0 |  |  |  | 0 | next if $key eq 'fileformat'; | 
| 1211 | 0 | 0 |  |  |  | 0 | if (ref $self->{vcf_headers}{$key} eq 'HASH') { | 
| 1212 |  |  |  |  |  |  | # we have a complex VCF header with multiple keys | 
| 1213 |  |  |  |  |  |  | # we will rewrite for each ID | 
| 1214 | 0 |  |  |  |  | 0 | foreach my $id (sort {$a cmp $b} | 
|  | 0 |  |  |  |  | 0 |  | 
| 1215 | 0 |  |  |  |  | 0 | keys %{ $self->{vcf_headers}{$key} } | 
| 1216 |  |  |  |  |  |  | ) { | 
| 1217 |  |  |  |  |  |  | # to avoid complexity of writing correct formatting | 
| 1218 |  |  |  |  |  |  | push @newComments, sprintf("##%s=%s\n", $key, | 
| 1219 | 0 |  |  |  |  | 0 | $self->{vcf_headers}{$key}{$id} ); | 
| 1220 |  |  |  |  |  |  | } | 
| 1221 |  |  |  |  |  |  | } | 
| 1222 |  |  |  |  |  |  | else { | 
| 1223 |  |  |  |  |  |  | # a simple value | 
| 1224 |  |  |  |  |  |  | push @newComments, sprintf("##%s=%s\n", $key, | 
| 1225 | 0 |  |  |  |  | 0 | $self->{vcf_headers}{$key} ); | 
| 1226 |  |  |  |  |  |  | } | 
| 1227 |  |  |  |  |  |  | } | 
| 1228 |  |  |  |  |  |  |  | 
| 1229 |  |  |  |  |  |  | # replace the headers | 
| 1230 | 0 |  |  |  |  | 0 | $self->{comments} = \@newComments; | 
| 1231 |  |  |  |  |  |  | } | 
| 1232 |  |  |  |  |  |  |  | 
| 1233 |  |  |  |  |  |  |  | 
| 1234 |  |  |  |  |  |  |  | 
| 1235 |  |  |  |  |  |  | #### Column Metadata #### | 
| 1236 |  |  |  |  |  |  |  | 
| 1237 |  |  |  |  |  |  | sub list_columns { | 
| 1238 | 4 |  |  | 4 | 1 | 17 | my $self = shift; | 
| 1239 | 4 | 50 |  |  |  | 10 | carp "list_columns is a read only method" if @_; | 
| 1240 | 4 |  |  |  |  | 5 | my @list; | 
| 1241 | 4 |  |  |  |  | 9 | for (my $i = 0; $i < $self->number_columns; $i++) { | 
| 1242 | 26 |  |  |  |  | 48 | push @list, $self->{$i}{'name'}; | 
| 1243 |  |  |  |  |  |  | } | 
| 1244 | 4 | 100 |  |  |  | 15 | return wantarray ? @list : \@list; | 
| 1245 |  |  |  |  |  |  | } | 
| 1246 |  |  |  |  |  |  |  | 
| 1247 |  |  |  |  |  |  | sub name { | 
| 1248 | 24 |  |  | 24 | 1 | 1845 | my $self = shift; | 
| 1249 | 24 |  |  |  |  | 44 | my ($index, $new_name) = @_; | 
| 1250 | 24 | 50 |  |  |  | 49 | return unless defined $index; | 
| 1251 | 24 | 50 |  |  |  | 68 | return unless exists $self->{$index}{name}; | 
| 1252 | 24 | 100 |  |  |  | 46 | if (defined $new_name) { | 
| 1253 | 1 |  |  |  |  | 2 | $self->{$index}{name} = $new_name; | 
| 1254 | 1 | 50 |  |  |  | 3 | if (exists $self->{data_table}) { | 
|  |  | 0 |  |  |  |  |  | 
| 1255 | 1 |  |  |  |  | 3 | $self->{data_table}->[0][$index] = $new_name; | 
| 1256 |  |  |  |  |  |  | } | 
| 1257 |  |  |  |  |  |  | elsif (exists $self->{column_names}) { | 
| 1258 | 0 |  |  |  |  | 0 | $self->{column_names}->[$index] = $new_name; | 
| 1259 |  |  |  |  |  |  | } | 
| 1260 |  |  |  |  |  |  | } | 
| 1261 | 24 |  |  |  |  | 138 | return $self->{$index}{name}; | 
| 1262 |  |  |  |  |  |  | } | 
| 1263 |  |  |  |  |  |  |  | 
| 1264 |  |  |  |  |  |  | sub metadata { | 
| 1265 | 26 |  |  | 26 | 1 | 26 | my $self = shift; | 
| 1266 | 26 |  |  |  |  | 35 | my ($index, $key, $value) = @_; | 
| 1267 | 26 | 50 |  |  |  | 36 | return unless defined $index; | 
| 1268 | 26 | 50 |  |  |  | 44 | return unless exists $self->{$index}; | 
| 1269 | 26 | 100 | 100 |  |  | 51 | if ($key and $key eq 'name') { | 
| 1270 | 2 |  |  |  |  | 4 | return $self->name($index, $value); | 
| 1271 |  |  |  |  |  |  | } | 
| 1272 | 24 | 100 | 66 |  |  | 54 | if ($key and defined $value) { | 
|  |  | 50 | 33 |  |  |  |  | 
| 1273 |  |  |  |  |  |  | # we are setting a new value | 
| 1274 | 3 |  |  |  |  | 5 | $self->{$index}{$key} = $value; | 
| 1275 | 3 |  |  |  |  | 6 | return $value; | 
| 1276 |  |  |  |  |  |  | } | 
| 1277 |  |  |  |  |  |  | elsif ($key and not defined $value) { | 
| 1278 | 0 | 0 |  |  |  | 0 | if (exists $self->{$index}{$key}) { | 
| 1279 |  |  |  |  |  |  | # retrieve a value | 
| 1280 | 0 |  |  |  |  | 0 | return $self->{$index}{$key}; | 
| 1281 |  |  |  |  |  |  | } | 
| 1282 |  |  |  |  |  |  | else { | 
| 1283 |  |  |  |  |  |  | # key does not exist | 
| 1284 | 0 |  |  |  |  | 0 | return; | 
| 1285 |  |  |  |  |  |  | } | 
| 1286 |  |  |  |  |  |  | } | 
| 1287 |  |  |  |  |  |  | else { | 
| 1288 | 21 |  |  |  |  | 19 | my %hash = %{ $self->{$index} }; | 
|  | 21 |  |  |  |  | 64 |  | 
| 1289 | 21 | 100 |  |  |  | 83 | return wantarray ? %hash : \%hash; | 
| 1290 |  |  |  |  |  |  | } | 
| 1291 |  |  |  |  |  |  | } | 
| 1292 |  |  |  |  |  |  |  | 
| 1293 |  |  |  |  |  |  | sub delete_metadata { | 
| 1294 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1295 | 0 |  |  |  |  | 0 | my ($index, $key) = @_; | 
| 1296 | 0 | 0 |  |  |  | 0 | return unless defined $index; | 
| 1297 | 0 | 0 |  |  |  | 0 | if (defined $key) { | 
| 1298 | 0 | 0 |  |  |  | 0 | if (exists $self->{$index}{$key}) { | 
| 1299 | 0 |  |  |  |  | 0 | return delete $self->{$index}{$key}; | 
| 1300 |  |  |  |  |  |  | } | 
| 1301 |  |  |  |  |  |  | } | 
| 1302 |  |  |  |  |  |  | else { | 
| 1303 |  |  |  |  |  |  | # user wants to delete the metadata | 
| 1304 |  |  |  |  |  |  | # but we need to keep the basics name and index | 
| 1305 | 0 |  |  |  |  | 0 | foreach my $key (keys %{ $self->{$index} }) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 1306 | 0 | 0 |  |  |  | 0 | next if $key eq 'name'; | 
| 1307 | 0 | 0 |  |  |  | 0 | next if $key eq 'index'; | 
| 1308 | 0 |  |  |  |  | 0 | delete $self->{$index}{$key}; | 
| 1309 |  |  |  |  |  |  | } | 
| 1310 |  |  |  |  |  |  | } | 
| 1311 |  |  |  |  |  |  | } | 
| 1312 |  |  |  |  |  |  |  | 
| 1313 |  |  |  |  |  |  | sub copy_metadata { | 
| 1314 | 2 |  |  | 2 | 1 | 4 | my ($self, $source, $target) = @_; | 
| 1315 | 2 | 50 | 33 |  |  | 9 | return unless (exists $self->{$source}{name} and exists $self->{$target}{name}); | 
| 1316 | 2 |  |  |  |  | 4 | my $md = $self->metadata($source); | 
| 1317 | 2 |  |  |  |  | 4 | delete $md->{name}; | 
| 1318 | 2 |  |  |  |  | 3 | delete $md->{'index'}; | 
| 1319 | 2 | 50 |  |  |  | 20 | delete $md->{'AUTO'} if exists $md->{'AUTO'}; # presume this is no longer auto index | 
| 1320 | 2 |  |  |  |  | 6 | foreach (keys %$md) { | 
| 1321 | 1 |  |  |  |  | 2 | $self->{$target}{$_} = $md->{$_}; | 
| 1322 |  |  |  |  |  |  | } | 
| 1323 | 2 |  |  |  |  | 5 | return 1; | 
| 1324 |  |  |  |  |  |  | } | 
| 1325 |  |  |  |  |  |  |  | 
| 1326 |  |  |  |  |  |  |  | 
| 1327 |  |  |  |  |  |  |  | 
| 1328 |  |  |  |  |  |  | #### Column Indices #### | 
| 1329 |  |  |  |  |  |  |  | 
| 1330 |  |  |  |  |  |  | sub find_column { | 
| 1331 | 220 |  |  | 220 | 1 | 340 | my ($self, $name) = @_; | 
| 1332 | 220 | 50 |  |  |  | 305 | return unless $name; | 
| 1333 |  |  |  |  |  |  |  | 
| 1334 |  |  |  |  |  |  | # the $name variable will be used as a regex in identifying the name | 
| 1335 |  |  |  |  |  |  | # fix it so that it will possible accept a # character at the beginning | 
| 1336 |  |  |  |  |  |  | # without a following space, in case the first column has a # prefix | 
| 1337 |  |  |  |  |  |  | # also place the remainder of the text in a non-capturing parentheses for | 
| 1338 |  |  |  |  |  |  | # grouping purposes while maintaining the anchors | 
| 1339 | 220 |  |  |  |  | 1050 | $name =~ s/ \A (\^?) (.+) (\$?)\Z /$1#?(?:$2)$3/x; | 
| 1340 |  |  |  |  |  |  |  | 
| 1341 |  |  |  |  |  |  | # walk through each column index | 
| 1342 | 220 |  |  |  |  | 276 | my $index; | 
| 1343 | 220 |  |  |  |  | 369 | for (my $i = 0; $i < $self->{'number_columns'}; $i++) { | 
| 1344 |  |  |  |  |  |  | # check the names of each column | 
| 1345 | 736 | 100 |  |  |  | 5613 | if ($self->{$i}{'name'} =~ /$name/i) { | 
| 1346 | 88 |  |  |  |  | 129 | $index = $i; | 
| 1347 | 88 |  |  |  |  | 96 | last; | 
| 1348 |  |  |  |  |  |  | } | 
| 1349 |  |  |  |  |  |  | } | 
| 1350 | 220 |  |  |  |  | 443 | return $index; | 
| 1351 |  |  |  |  |  |  | } | 
| 1352 |  |  |  |  |  |  |  | 
| 1353 |  |  |  |  |  |  | sub _find_column_indices { | 
| 1354 | 24 |  |  | 24 |  | 34 | my $self = shift; | 
| 1355 |  |  |  |  |  |  | # these are hard coded index name regex to accomodate different possibilities | 
| 1356 |  |  |  |  |  |  | # these do not include parentheses for grouping | 
| 1357 |  |  |  |  |  |  | # non-capturing parentheses will be added later in the sub for proper | 
| 1358 |  |  |  |  |  |  | # anchoring and grouping - long story why, don't ask | 
| 1359 | 24 |  |  |  |  | 69 | my $name   = $self->find_column('^name|gene.?name|transcript.?name|geneid|id|gene|alias'); | 
| 1360 | 24 |  |  |  |  | 53 | my $type   = $self->find_column('^type|class|primary_tag|biotype'); | 
| 1361 | 24 |  |  |  |  | 63 | my $id     = $self->find_column('^primary_id'); | 
| 1362 | 24 |  |  |  |  | 53 | my $chromo = $self->find_column('^chr|seq|ref|ref.?seq'); | 
| 1363 | 24 |  |  |  |  | 48 | my $start  = $self->find_column('^start|position|pos|txStart'); | 
| 1364 | 24 |  |  |  |  | 46 | my $stop   = $self->find_column('^stop|end|txEnd'); | 
| 1365 | 24 |  |  |  |  | 52 | my $strand = $self->find_column('^strand'); | 
| 1366 | 24 |  |  |  |  | 229 | my $score  = $self->find_column('^score$'); | 
| 1367 |  |  |  |  |  |  |  | 
| 1368 |  |  |  |  |  |  | # check for coordinate string | 
| 1369 | 24 |  |  |  |  | 44 | my $coord  = $self->find_column('^coordinate$'); | 
| 1370 | 24 | 50 | 66 |  |  | 120 | if (not defined $coord and not defined $chromo and not defined $start) { | 
|  |  |  | 66 |  |  |  |  | 
| 1371 |  |  |  |  |  |  | # check if ID or name id looks like a coordinate string | 
| 1372 | 12 | 50 | 33 |  |  | 171 | if (defined $id and defined $self->{data_table}->[1]) { | 
| 1373 | 12 | 100 |  |  |  | 61 | if ($self->{data_table}->[1][$id] =~ /^[\w\-\.]+:\d+(?:[\-\.]{1,2}\d+)?$/) { | 
| 1374 |  |  |  |  |  |  | # first value looks like a coordinate string | 
| 1375 | 8 |  |  |  |  | 15 | $coord = $id; | 
| 1376 |  |  |  |  |  |  | } | 
| 1377 |  |  |  |  |  |  | } | 
| 1378 | 12 | 50 | 66 |  |  | 53 | if ( | 
|  |  |  | 66 |  |  |  |  | 
| 1379 |  |  |  |  |  |  | not defined $coord and defined $name and | 
| 1380 |  |  |  |  |  |  | defined defined $self->{data_table}->[1] | 
| 1381 |  |  |  |  |  |  | ) { | 
| 1382 | 4 | 50 |  |  |  | 14 | if ($self->{data_table}->[1][$name] =~ /^[\w\-\.]+:\d+(?:[\-\.]{1,2}\d+)?$/) { | 
| 1383 |  |  |  |  |  |  | # first value looks like a coordinate string | 
| 1384 | 0 |  |  |  |  | 0 | $coord = $name; | 
| 1385 |  |  |  |  |  |  | } | 
| 1386 |  |  |  |  |  |  | } | 
| 1387 |  |  |  |  |  |  | } | 
| 1388 |  |  |  |  |  |  |  | 
| 1389 |  |  |  |  |  |  | # determine zero-based start coordinates | 
| 1390 | 24 | 100 | 100 |  |  | 115 | if ( | 
|  |  |  | 100 |  |  |  |  | 
| 1391 |  |  |  |  |  |  | $self->{zerostart} == 0 and | 
| 1392 |  |  |  |  |  |  | defined $start and | 
| 1393 |  |  |  |  |  |  | substr($self->name($start), -1) eq '0' | 
| 1394 |  |  |  |  |  |  | ) { | 
| 1395 | 1 |  |  |  |  | 3 | $self->{zerostart} = 1; | 
| 1396 |  |  |  |  |  |  | } | 
| 1397 |  |  |  |  |  |  |  | 
| 1398 |  |  |  |  |  |  | # cache the indexes | 
| 1399 |  |  |  |  |  |  | $self->{column_indices} = { | 
| 1400 | 24 |  |  |  |  | 209 | 'name'      => $name, | 
| 1401 |  |  |  |  |  |  | 'type'      => $type, | 
| 1402 |  |  |  |  |  |  | 'id'        => $id, | 
| 1403 |  |  |  |  |  |  | 'seq_id'    => $chromo, | 
| 1404 |  |  |  |  |  |  | 'chromo'    => $chromo, | 
| 1405 |  |  |  |  |  |  | 'start'     => $start, | 
| 1406 |  |  |  |  |  |  | 'stop'      => $stop, | 
| 1407 |  |  |  |  |  |  | 'end'       => $stop, | 
| 1408 |  |  |  |  |  |  | 'strand'    => $strand, | 
| 1409 |  |  |  |  |  |  | 'score'     => $score, | 
| 1410 |  |  |  |  |  |  | 'coord'     => $coord, | 
| 1411 |  |  |  |  |  |  | }; | 
| 1412 | 24 |  |  |  |  | 40 | return 1; | 
| 1413 |  |  |  |  |  |  | } | 
| 1414 |  |  |  |  |  |  |  | 
| 1415 |  |  |  |  |  |  | sub chromo_column { | 
| 1416 | 26 |  |  | 26 | 1 | 36 | my $self = shift; | 
| 1417 | 26 | 100 |  |  |  | 102 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1418 | 26 | 100 |  |  |  | 52 | if (defined $_[0]) { | 
| 1419 | 2 | 50 | 33 |  |  | 14 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1420 | 2 |  |  |  |  | 3 | $self->{column_indices}{chromo} = $_[0]; | 
| 1421 |  |  |  |  |  |  | } | 
| 1422 |  |  |  |  |  |  | else { | 
| 1423 | 0 |  |  |  |  | 0 | carp sprintf("chromo_column %s is not a valid index!", $_[0]); | 
| 1424 |  |  |  |  |  |  | } | 
| 1425 |  |  |  |  |  |  | } | 
| 1426 | 26 |  |  |  |  | 103 | return $self->{column_indices}{chromo}; | 
| 1427 |  |  |  |  |  |  | } | 
| 1428 |  |  |  |  |  |  |  | 
| 1429 |  |  |  |  |  |  | sub start_column { | 
| 1430 | 117 |  |  | 117 | 1 | 131 | my $self = shift; | 
| 1431 | 117 | 100 |  |  |  | 173 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1432 | 117 | 100 |  |  |  | 156 | if (defined $_[0]) { | 
| 1433 | 2 | 50 | 33 |  |  | 11 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1434 | 2 |  |  |  |  | 4 | $self->{column_indices}{start} = $_[0]; | 
| 1435 |  |  |  |  |  |  | } | 
| 1436 |  |  |  |  |  |  | else { | 
| 1437 | 0 |  |  |  |  | 0 | carp sprintf("start_column %s is not a valid index!", $_[0]); | 
| 1438 |  |  |  |  |  |  | } | 
| 1439 |  |  |  |  |  |  | } | 
| 1440 | 117 |  |  |  |  | 226 | return $self->{column_indices}{start}; | 
| 1441 |  |  |  |  |  |  | } | 
| 1442 |  |  |  |  |  |  |  | 
| 1443 |  |  |  |  |  |  | sub stop_column { | 
| 1444 | 21 |  |  | 21 | 1 | 33 | my $self = shift; | 
| 1445 | 21 | 50 |  |  |  | 47 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1446 | 21 | 100 |  |  |  | 40 | if (defined $_[0]) { | 
| 1447 | 2 | 50 | 33 |  |  | 11 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1448 | 2 |  |  |  |  | 3 | $self->{column_indices}{stop} = $_[0]; | 
| 1449 |  |  |  |  |  |  | } | 
| 1450 |  |  |  |  |  |  | else { | 
| 1451 | 0 |  |  |  |  | 0 | carp sprintf("stop_column %s is not a valid index!", $_[0]); | 
| 1452 |  |  |  |  |  |  | } | 
| 1453 |  |  |  |  |  |  | } | 
| 1454 | 21 |  |  |  |  | 51 | return $self->{column_indices}{stop}; | 
| 1455 |  |  |  |  |  |  | } | 
| 1456 |  |  |  |  |  |  | *end_column = \&stop_column; | 
| 1457 |  |  |  |  |  |  |  | 
| 1458 |  |  |  |  |  |  | sub coord_column { | 
| 1459 | 109 |  |  | 109 | 0 | 105 | my $self = shift; | 
| 1460 | 109 | 50 |  |  |  | 169 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1461 | 109 | 50 |  |  |  | 140 | if (defined $_[0]) { | 
| 1462 | 0 | 0 | 0 |  |  | 0 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1463 | 0 |  |  |  |  | 0 | $self->{column_indices}{coord} = $_[0]; | 
| 1464 |  |  |  |  |  |  | } | 
| 1465 |  |  |  |  |  |  | else { | 
| 1466 | 0 |  |  |  |  | 0 | carp sprintf("coord_column %s is not a valid index!", $_[0]); | 
| 1467 |  |  |  |  |  |  | } | 
| 1468 |  |  |  |  |  |  | } | 
| 1469 | 109 |  |  |  |  | 162 | return $self->{column_indices}{coord}; | 
| 1470 |  |  |  |  |  |  | } | 
| 1471 |  |  |  |  |  |  |  | 
| 1472 |  |  |  |  |  |  | sub strand_column { | 
| 1473 | 20 |  |  | 20 | 1 | 32 | my $self = shift; | 
| 1474 | 20 | 100 |  |  |  | 55 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1475 | 20 | 100 |  |  |  | 44 | if (defined $_[0]) { | 
| 1476 | 2 | 50 | 33 |  |  | 11 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1477 | 2 |  |  |  |  | 4 | $self->{column_indices}{strand} = $_[0]; | 
| 1478 |  |  |  |  |  |  | } | 
| 1479 |  |  |  |  |  |  | else { | 
| 1480 | 0 |  |  |  |  | 0 | carp sprintf("strand_column %s is not a valid index!", $_[0]); | 
| 1481 |  |  |  |  |  |  | } | 
| 1482 |  |  |  |  |  |  | } | 
| 1483 | 20 |  |  |  |  | 64 | return $self->{column_indices}{strand}; | 
| 1484 |  |  |  |  |  |  | } | 
| 1485 |  |  |  |  |  |  |  | 
| 1486 |  |  |  |  |  |  | sub name_column { | 
| 1487 | 72 |  |  | 72 | 1 | 76 | my $self = shift; | 
| 1488 | 72 | 100 |  |  |  | 109 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1489 | 72 | 50 |  |  |  | 107 | if (defined $_[0]) { | 
| 1490 | 0 | 0 | 0 |  |  | 0 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1491 | 0 |  |  |  |  | 0 | $self->{column_indices}{name} = $_[0]; | 
| 1492 |  |  |  |  |  |  | } | 
| 1493 |  |  |  |  |  |  | else { | 
| 1494 | 0 |  |  |  |  | 0 | carp sprintf("name_column %s is not a valid index!", $_[0]); | 
| 1495 |  |  |  |  |  |  | } | 
| 1496 |  |  |  |  |  |  | } | 
| 1497 | 72 |  |  |  |  | 152 | return $self->{column_indices}{name}; | 
| 1498 |  |  |  |  |  |  | } | 
| 1499 |  |  |  |  |  |  |  | 
| 1500 |  |  |  |  |  |  | sub type_column { | 
| 1501 | 14 |  |  | 14 | 1 | 20 | my $self = shift; | 
| 1502 | 14 | 100 |  |  |  | 49 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1503 | 14 | 100 |  |  |  | 26 | if (defined $_[0]) { | 
| 1504 | 2 | 50 | 33 |  |  | 12 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1505 | 2 |  |  |  |  | 3 | $self->{column_indices}{type} = $_[0]; | 
| 1506 |  |  |  |  |  |  | } | 
| 1507 |  |  |  |  |  |  | else { | 
| 1508 | 0 |  |  |  |  | 0 | carp sprintf("type_column %s is not a valid index!", $_[0]); | 
| 1509 |  |  |  |  |  |  | } | 
| 1510 |  |  |  |  |  |  | } | 
| 1511 | 14 |  |  |  |  | 34 | return $self->{column_indices}{type}; | 
| 1512 |  |  |  |  |  |  | } | 
| 1513 |  |  |  |  |  |  |  | 
| 1514 |  |  |  |  |  |  | sub id_column { | 
| 1515 | 71 |  |  | 71 | 1 | 83 | my $self = shift; | 
| 1516 | 71 | 100 |  |  |  | 111 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1517 | 71 | 50 |  |  |  | 103 | if (defined $_[0]) { | 
| 1518 | 0 | 0 | 0 |  |  | 0 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1519 | 0 |  |  |  |  | 0 | $self->{column_indices}{id} = $_[0]; | 
| 1520 |  |  |  |  |  |  | } | 
| 1521 |  |  |  |  |  |  | else { | 
| 1522 | 0 |  |  |  |  | 0 | carp sprintf("id_column %s is not a valid index!", $_[0]); | 
| 1523 |  |  |  |  |  |  | } | 
| 1524 |  |  |  |  |  |  | } | 
| 1525 | 71 |  |  |  |  | 133 | return $self->{column_indices}{id}; | 
| 1526 |  |  |  |  |  |  | } | 
| 1527 |  |  |  |  |  |  |  | 
| 1528 |  |  |  |  |  |  | sub score_column { | 
| 1529 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 1530 | 0 | 0 |  |  |  | 0 | $self->_find_column_indices unless exists $self->{column_indices}; | 
| 1531 | 0 | 0 |  |  |  | 0 | if (defined $_[0]) { | 
| 1532 | 0 | 0 | 0 |  |  | 0 | if ($_[0] =~ /^\d+$/ and $_[0] < $self->{number_columns}) { | 
| 1533 | 0 |  |  |  |  | 0 | $self->{column_indices}{score} = $_[0]; | 
| 1534 |  |  |  |  |  |  | } | 
| 1535 |  |  |  |  |  |  | else { | 
| 1536 | 0 |  |  |  |  | 0 | carp sprintf("score_column %s is not a valid index!", $_[0]); | 
| 1537 |  |  |  |  |  |  | } | 
| 1538 |  |  |  |  |  |  | } | 
| 1539 | 0 |  |  |  |  | 0 | return $self->{column_indices}{score}; | 
| 1540 |  |  |  |  |  |  | } | 
| 1541 |  |  |  |  |  |  |  | 
| 1542 |  |  |  |  |  |  | *zero_start = \&interbase; | 
| 1543 |  |  |  |  |  |  | sub interbase { | 
| 1544 | 90 |  |  | 90 | 1 | 97 | my $self = shift; | 
| 1545 | 90 | 50 |  |  |  | 116 | if (@_) { | 
| 1546 | 0 |  |  |  |  | 0 | my $i = $self->start_column; | 
| 1547 | 0 |  |  |  |  | 0 | my $n = $self->name($i); | 
| 1548 | 0 | 0 | 0 |  |  | 0 | if ($_[0] eq '1' and $n =~ /^start$/i) { | 
|  |  | 0 | 0 |  |  |  |  | 
| 1549 | 0 |  |  |  |  | 0 | $self->{zerostart} = 1; | 
| 1550 | 0 |  |  |  |  | 0 | $self->name($i, 'Start0'); | 
| 1551 |  |  |  |  |  |  | } | 
| 1552 |  |  |  |  |  |  | elsif ($_[0] eq '0' and $n =~ /^start0$/i) { | 
| 1553 | 0 |  |  |  |  | 0 | $self->{zerostart} = 0; | 
| 1554 | 0 |  |  |  |  | 0 | $self->name($i, 'Start'); | 
| 1555 |  |  |  |  |  |  | } | 
| 1556 |  |  |  |  |  |  | else { | 
| 1557 | 0 |  |  |  |  | 0 | carp "use 1 (true) or 0 (false) to set interbase mode"; | 
| 1558 |  |  |  |  |  |  | } | 
| 1559 |  |  |  |  |  |  | } | 
| 1560 | 90 |  |  |  |  | 157 | return $self->{zerostart}; | 
| 1561 |  |  |  |  |  |  | } | 
| 1562 |  |  |  |  |  |  |  | 
| 1563 |  |  |  |  |  |  | #### Special Row methods #### | 
| 1564 |  |  |  |  |  |  |  | 
| 1565 |  |  |  |  |  |  | # Why is this in core and not in Data? I keep asking myself. | 
| 1566 |  |  |  |  |  |  | # Because this can get called from a Data::Feature object, which might be | 
| 1567 |  |  |  |  |  |  | # associated with a Data::Stream object. No, Stream objects don't have stored | 
| 1568 |  |  |  |  |  |  | # SeqFeatures, but I don't want the entire program to crash because of an | 
| 1569 |  |  |  |  |  |  | # undefined method because some doofus forgot. Since both Data and Stream | 
| 1570 |  |  |  |  |  |  | # objects inherit from Data::core, this is in here. | 
| 1571 |  |  |  |  |  |  | sub get_seqfeature { | 
| 1572 | 13 |  |  | 13 | 1 | 34 | my ($self, $row) = @_; | 
| 1573 | 13 | 50 |  |  |  | 51 | return unless (ref($self) eq 'Bio::ToolBox::Data'); | 
| 1574 | 13 | 50 | 33 |  |  | 78 | return unless ($row and $row <= $self->{last_row}); | 
| 1575 | 13 | 50 |  |  |  | 40 | return unless exists $self->{SeqFeatureObjects}; | 
| 1576 | 13 |  | 50 |  |  | 48 | return $self->{SeqFeatureObjects}->[$row] || undef; | 
| 1577 |  |  |  |  |  |  | } | 
| 1578 |  |  |  |  |  |  |  | 
| 1579 |  |  |  |  |  |  |  | 
| 1580 |  |  |  |  |  |  |  | 
| 1581 |  |  |  |  |  |  | __END__ |