|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package CracTools::Utils;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $CracTools::Utils::DIST = 'CracTools';  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: A set of useful functions  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $CracTools::Utils::VERSION = '1.25';  | 
| 
7
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
13658
 | 
 use strict;  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
173
 | 
    | 
| 
8
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
24
 | 
 use warnings;  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
150
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
22
 | 
 use Carp;  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
395
 | 
    | 
| 
11
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
27
 | 
 use Fcntl qw( SEEK_SET );  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21024
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub reverseComplement($) {  | 
| 
15
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
6
 | 
   my $dna = shift;  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # reverse the DNA sequence  | 
| 
18
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $revcomp = reverse $dna;  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # complement the reversed DNA sequence  | 
| 
21
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   $revcomp =~ tr/ACGTacgt/TGCAtgca/;  | 
| 
22
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   return $revcomp;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub reverse_tab($) {  | 
| 
27
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
   my $string = shift;  | 
| 
28
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   my @tab = split(/,/,$string);  | 
| 
29
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my $newString;  | 
| 
30
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   if(@tab > 0) {  | 
| 
31
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     for (my $i=$#tab ; $i > 0 ; $i--){  | 
| 
32
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       $newString .= $tab[$i];  | 
| 
33
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
       $newString .= ",";  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
35
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     $newString .= $tab[0];  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
37
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   return $newString;  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub isVersionGreaterOrEqual($$) {  | 
| 
42
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my ($v1,$v2) = @_;  | 
| 
43
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my @v1_nums = split(/\./,$v1);  | 
| 
44
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my @v2_nums = split(/\./,$v2);  | 
| 
45
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   for(my $i = 0; $i < @v1_nums; $i++) {  | 
| 
46
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if($v1_nums[$i] >= $v2_nums[$i]) {  | 
| 
47
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return 1;  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }else {  | 
| 
49
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return 0;  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
52
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if(scalar @v2_nums > @v1_nums) {  | 
| 
53
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return 0;  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
55
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return 1;  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %conversion_hash = ( '+' => 1, '-' => '-1', '1' => '+', '-1' => '-');  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub convertStrand($) {  | 
| 
62
 | 
50
 | 
 
 | 
 
 | 
  
50
  
 | 
  
1
  
 | 
43
 | 
   my $strand = shift;  | 
| 
63
 | 
50
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
143
 | 
   return defined $strand? $conversion_hash{$strand} : undef;  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub removeChrPrefix($) {  | 
| 
67
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my $string = shift;  | 
| 
68
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $string =~ s/^chr//i;  | 
| 
69
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $string;  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub addChrPrefix($) {  | 
| 
73
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my $string = shift;  | 
| 
74
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return "chr".removeChrPrefix($string);  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $Base64_BITNESS = 6;  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @Base64_ENCODING = qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 + /);  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Encode error list into base64  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub encodePosListToBase64 {  | 
| 
83
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2533
 | 
   my @pos_list = @_;  | 
| 
84
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my @bitList;  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $encoded_str;  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   return "" if @pos_list == 0;  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Compute the appropriate length for the bitList  | 
| 
90
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $length = $pos_list[-1] + 1;  | 
| 
91
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $bitList_length = int($length / $Base64_BITNESS);  | 
| 
92
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   if ($length % $Base64_BITNESS > 0) {  | 
| 
93
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       $bitList_length++;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Put 0 at all position  | 
| 
97
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   for (my $i = 0; $i < $bitList_length; $i++) {  | 
| 
98
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $bitList[$i] = 0;  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Create bit vector in Base64_ENCODING  | 
| 
102
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   foreach my $j (@pos_list) {  | 
| 
103
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $bitList[int($j / $Base64_BITNESS)] |= (1 << ($j % $Base64_BITNESS));  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Convert bitList into string_Base64_ENCODING  | 
| 
107
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   for(my $k=0; $k < $bitList_length; $k++) {  | 
| 
108
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $encoded_str .= scalar($Base64_ENCODING[$bitList[$k]]);  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
110
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   return $encoded_str;  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Decode base 64 error list  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub decodePosListInBase64 {  | 
| 
116
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
   my $encoded_str = shift;  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my @encoded_list = split "", $encoded_str;  | 
| 
119
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my (@index,@decoded_list);  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #Looking for index of each char  | 
| 
122
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   foreach my $j (@encoded_list) {  | 
| 
123
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     my @ind = grep { $Base64_ENCODING[$_] eq $j } 0 .. $#Base64_ENCODING;  | 
| 
 
 | 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
279
 | 
    | 
| 
124
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     push(@index,$ind[0]);  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Convert into list position  | 
| 
128
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
   for (my $e = 0; $e < (0+@encoded_list);$e++) {  | 
| 
129
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     for(my $i=0; $i<$Base64_BITNESS; $i++) {  | 
| 
130
 | 
36
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
58
 | 
       if ($index[$e] & (1 << $i)) {  | 
| 
131
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         my $pos = (int($i+$Base64_BITNESS*$e));  | 
| 
132
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         push(@decoded_list,$pos);  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   return @decoded_list;  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub seqFileIterator {  | 
| 
144
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
2723
 | 
   my ($file,$format) = @_;  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   croak "Missing file in argument of seqFileIterator" if !defined $file;  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Get file handle for $file  | 
| 
149
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   my $fh = getReadingFileHandle($file);  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Automatic file extension detection  | 
| 
152
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   if(!defined $format) {  | 
| 
153
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     if($file =~ /\.(fasta|fa)(\.|$)/) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
       $format = 'fasta';  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif($file =~ /\.(fastq|fq)(\.|$)/) {  | 
| 
156
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
       $format = 'fastq';  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
158
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       croak "Undefined file extension";  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
161
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $format = lc $format;  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # FASTA ITERATOR  | 
| 
165
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   if ($format eq 'fasta') {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Read prev line for FASTA because we dont know the number  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # of line used for the sequence  | 
| 
168
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $prev_line = <$fh>;  | 
| 
169
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     chomp $prev_line;  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return sub {  | 
| 
171
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
3711
 | 
       my ($name,$seq,$qual);  | 
| 
172
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
       if(defined $prev_line) {  | 
| 
173
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
         ($name) = $prev_line =~ />(.*)$/;  | 
| 
174
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         $prev_line = <$fh>;  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Until we find a new sequence identifier ">", we  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # concatenate the lines corresponding to the sequence  | 
| 
177
 | 
5
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
24
 | 
         while(defined $prev_line && $prev_line !~ /^>/) {  | 
| 
178
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
           chomp $prev_line;  | 
| 
179
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
           $seq .= $prev_line;  | 
| 
180
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
           $prev_line = <$fh>;  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
182
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
         return {name => $name, seq => $seq, qual => $qual};  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
184
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         return undef;  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
186
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     };  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # FASTQ ITERATOR  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($format eq 'fastq') {  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return sub {  | 
| 
190
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
1398
 | 
       my ($name,$seq,$qual);  | 
| 
191
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
       my $line = <$fh>;  | 
| 
192
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
       ($name) = $line =~ /@(.*)$/ if defined $line;  | 
| 
193
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
       if(defined $name) {  | 
| 
194
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         $seq = <$fh>;  | 
| 
195
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         chomp $seq;  | 
| 
196
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         <$fh>; # skip second seq name (useless line)  | 
| 
197
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         $qual = <$fh>;  | 
| 
198
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         chomp $qual;  | 
| 
199
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
         return {name => $name, seq => $seq, qual => $qual};  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
201
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         return undef;  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
203
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     };  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
205
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     croak "Undefined file format";  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub pairedEndSeqFileIterator {  | 
| 
211
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1355
 | 
   my($file1,$file2,$format) = @_;  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $it1 = seqFileIterator($file1,$format);  | 
| 
214
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   my $it2 = seqFileIterator($file2,$format);  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return sub {  | 
| 
217
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
1491
 | 
     my $entry1 = $it1->();  | 
| 
218
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     my $entry2 = $it2->();  | 
| 
219
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
9
 | 
     if(defined $entry1 && defined $entry2) {  | 
| 
220
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
       return { read1 => $entry1, read2 => $entry2 };  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
222
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return undef;  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
224
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   };  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub writeSeq {  | 
| 
229
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my ($fh,$format,$name,$seq,$qual) = @_;  | 
| 
230
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if($format eq 'fasta') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
231
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     print $fh ">$name\n$seq\n";  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif($format eq 'fastq') {  | 
| 
233
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     print $fh '@'."$name\n$seq\n+\n$qual\n";  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
235
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     die "Unknown file format. Must be either a FASTA or a FASTQ";  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub bedFileIterator {  | 
| 
241
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3017
 | 
   return getFileIterator(file => shift, type => 'bed');  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub gffFileIterator {  | 
| 
246
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2413
 | 
   my $file = shift;  | 
| 
247
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $type = shift;  | 
| 
248
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   return getFileIterator(file => $file, type => $type);  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub vcfFileIterator {  | 
| 
253
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4179
 | 
   my $file = shift;  | 
| 
254
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   return getFileIterator(file => $file, type => 'vcf');  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub chimCTFileIterator {  | 
| 
259
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   return getFileIterator(file => shift, type => 'chimCT');  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub bamFileIterator {  | 
| 
264
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my ($file,$region) = @_;  | 
| 
265
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $region = "" if !defined $region;  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $fh;  | 
| 
268
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if($file =~ /\.bam$/) {  | 
| 
269
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     open($fh, "-|", "samtools view $file $region" )or die "Cannot open $file, check if samtools are installed.";  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
271
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $fh = getReadingFileHandle($file);  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return sub {  | 
| 
275
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     return <$fh>;  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 }  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getSeqFromIndexedRef {  | 
| 
282
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my ($ref_file,$chr,$pos,$length,$format) = @_;  | 
| 
283
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $format = 'fasta' if !defined $format;  | 
| 
284
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $pos++; # +1 because samtools faidx is 1-based  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # First we try without the "chr" prefix  | 
| 
286
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $region = removeChrPrefix($chr).":$pos-".($pos+$length-1);  | 
| 
287
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my $fasta_seq = `samtools faidx $ref_file "$region"`;  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # If it has not worked, we try without  | 
| 
289
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if($fasta_seq !~ /^>\S+\n\S+/) {  | 
| 
290
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $region = addChrPrefix($chr).":$pos-".($pos+$length-1);  | 
| 
291
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $fasta_seq = `samtools faidx $ref_file "$region"`;  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
293
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   if($format eq 'raw') {  | 
| 
294
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my ($seq) = $fasta_seq =~ /^>\S+\n(\S+)/;  | 
| 
295
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $seq;  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
297
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $fasta_seq;  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parseBedLine {  | 
| 
302
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
   my $line = shift;  | 
| 
303
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my %args = @_;  | 
| 
304
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   my($chr,$start,$end,$name,$score,$strand,$thick_start,$thick_end,$rgb,$block_count,$block_size,$block_starts) = split("\t",$line);  | 
| 
305
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my @blocks;  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Manage blocks if we have a bed12  | 
| 
308
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   if(defined $block_starts) {  | 
| 
309
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my @block_size = split(",",$block_size);  | 
| 
310
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my @block_starts = split(",",$block_starts);  | 
| 
311
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $cumulated_block_size = 0;  | 
| 
312
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     for(my $i = 0; $i < $block_count; $i++) {  | 
| 
313
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
       push(@blocks,{size        => $block_size[$i],  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    start        => $block_starts[$i],  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    end          => $block_starts[$i] + $block_size[$i],  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    block_start  => $cumulated_block_size,  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    block_end    => $cumulated_block_size + $block_size[$i],  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    ref_start    => $block_starts[$i] + $start,  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                    ref_end      => $block_starts[$i] + $start + $block_size[$i],  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  });  | 
| 
321
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
       $cumulated_block_size += $block_size[$i];  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   return { chr        => $chr,  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     start       => $start,  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     end         => $end,  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     name        => $name,  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     score       => $score,  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     strand      => $strand,  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     thick_start => $thick_start,  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     thick_end   => $thick_end,  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rgb         => $rgb,  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     blocks      => \@blocks,  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parseGFFLine {  | 
| 
340
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
   my $line = shift;  | 
| 
341
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $type = shift;  | 
| 
342
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my $attribute_split;  | 
| 
343
 | 
1
 | 
  
 50
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
3
 | 
   if($type =~ /gff3/i) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
344
 | 
1
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
3
 | 
     $attribute_split = sub {my $attr = shift; return $attr =~ /(\S+)=(.*)/;};  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } elsif ($type eq 'gtf' || $type eq 'gff2') {  | 
| 
346
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     $attribute_split = sub {my $attr = shift; return $attr  =~ /(\S+)\s+"(.*)"/;};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
348
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     croak "Undefined GFF format (must be either gff3,gtf, or gff2)";  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
350
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   my($chr,$source,$feature,$start,$end,$score,$strand,$frame,$attributes) = split("\t",$line);  | 
| 
351
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my @attributes_tab = split(";",$attributes);  | 
| 
352
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my %attributes_hash;  | 
| 
353
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   foreach my $attr (@attributes_tab) {  | 
| 
354
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my ($k,$v) = $attribute_split->($attr);  | 
| 
355
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     $attributes_hash{$k} = $v;  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
357
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   return { chr        => $chr,  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     source     => $source,  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     feature    => $feature,  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     start      => $start,  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     end        => $end,  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     score      => $score,  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     strand     => $strand,  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     frame      => $frame,  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     attributes => \%attributes_hash,  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parseVCFLine {  | 
| 
371
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3
 | 
   my $line = shift;  | 
| 
372
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my($chr,$pos,$id,$ref,$alt,$qual,$filter,$info) = split("\t",$line);  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
374
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my @alts = defined $alt? split(",",$alt) : ();  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my %infos = defined $info?  | 
| 
376
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   map { my ($k,$v) = split '='; $k => $v } split ';', $info : ();  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   return { chr => $chr,  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     pos     => $pos,  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     id      => $id,  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ref     => $ref,  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     alt     => \@alts,  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     qual    => $qual,  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     filter  => $filter,  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     info    => \%infos,  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parseChimCTLine {  | 
| 
391
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my $line = shift;  | 
| 
392
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my($id,$name,$chr1,$pos1,$strand1,$chr2,$pos2,$strand2,$chim_value,$spanning_junction,$spanning_PE,$class,$comments,@others) = split("\t",$line);  | 
| 
393
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my($sample,$chim_key) = split(":",$id);  | 
| 
394
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my @comments = split(",",$comments);  | 
| 
395
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my %comments;  | 
| 
396
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   foreach my $com (@comments){  | 
| 
397
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my ($key,$val) = split("=",$com);  | 
| 
398
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $comments{$key} = $val;  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
400
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   my %extend_fields;  | 
| 
401
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   foreach my $field (@others) {  | 
| 
402
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my ($key,$val) = split("=",$field);  | 
| 
403
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if(defined $key && defined $val) {  | 
| 
404
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $extend_fields{$key} = $val;  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return {  | 
| 
408
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     sample            => $sample,  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     chim_key          => $chim_key,  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     name              => $name,  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     chr1              => $chr1,  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     pos1              => $pos1,  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     strand1           => $strand1,  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     chr2              => $chr2,  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     pos2              => $pos2,  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     strand2           => $strand2,  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     chim_value        => $chim_value,  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     spanning_junction => $spanning_junction,  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     spanning_PE       => $spanning_PE,  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     class             => $class,  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     comments          => \%comments,  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     extended_fields     => \%extend_fields,  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parseSAMLineLite {  | 
| 
428
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2803
 | 
   my $line = shift;  | 
| 
429
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my ($qname,$flag,$rname,$pos,$mapq,$cigar,$rnext,$pnext,$tlen,$seq,$qual,@others) = split("\t",$line);  | 
| 
430
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   my @cigar_hash = map { { op => substr($_,-1), nb => substr($_,0,length($_)-1)} } $cigar =~ /(\d+\D)/g;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return {  | 
| 
432
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     qname => $qname,  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     flag => $flag,  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rname => $rname,  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     pos => $pos,  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     mapq => $mapq,  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     cigar => \@cigar_hash,  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     original_cigar => $cigar,  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rnext => $rnext,  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     pnext => $pnext,  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     tlen => $tlen,  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     seq => $seq,  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     qual => $qual,  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     extended_fields => \@others,  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parseCigarChain {  | 
| 
450
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
8
 | 
   my $cigar_string = shift;  | 
| 
451
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my @cigar;  | 
| 
452
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   while($cigar_string =~ /(\d+)(\S)/g) {  | 
| 
453
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     push @cigar, { op => $2, nb => $1 };  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
455
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   return \@cigar;  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getFileIterator {  | 
| 
461
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
12
 | 
   my %args = @_;  | 
| 
462
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   my $file = $args{file};  | 
| 
463
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my $type = $args{type};  | 
| 
464
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $skip = $args{skip};  | 
| 
465
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my $header_regex = $args{header_regex};  | 
| 
466
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my $parsing_method = $args{parsing_method};  | 
| 
467
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my @parsing_arguments = ();  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
470
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
   croak "Missing arguments in getFileIterator" if !defined $file;  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
472
 | 
4
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
19
 | 
   if(!defined $parsing_method && defined $type) {  | 
| 
473
 | 
3
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
31
 | 
     if($type =~ /gff3/i || $type eq 'gtf' || $type eq 'gff2') {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
474
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       $header_regex = '^#';  | 
| 
475
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
       $parsing_method = sub { return parseGFFLine(@_,$type) };  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
476
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       push (@parsing_arguments,$type);  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif($type =~ /bed/i) {  | 
| 
478
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
       $header_regex = '(^track\s)|(^#)';  | 
| 
479
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       $parsing_method = \&parseBedLine;  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($type =~ /vcf/i) {  | 
| 
481
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       $header_regex = '^#';  | 
| 
482
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       $parsing_method = \&parseVCFLine;  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($type =~ /chimCT/i) {  | 
| 
484
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $header_regex = '^#';  | 
| 
485
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $parsing_method = \&parseChimCTLine;  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($type =~ /SAM/i || $type =~ /BAM/i) {  | 
| 
487
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $header_regex = '^@';  | 
| 
488
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $parsing_method = \&parseSAMLineLite;  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
490
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       croak "Undefined format type";  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # set defaults  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #$header_regex = '^#' if !defined $header_regex;  | 
| 
496
 | 
4
 | 
  
 50
  
 | 
 
 | 
  
0
  
 | 
 
 | 
11
 | 
   $parsing_method = sub{ return { line => shift } } if !defined $parsing_method;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # We get a filehandle on the file  | 
| 
499
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
   my $fh = getReadingFileHandle($file);  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # $curr_pos will hold the SEEK_POS of the current_line  | 
| 
502
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   my $curr_pos = tell($fh);  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # $line will hold the content of the current_line  | 
| 
504
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
   my $line = <$fh>;  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # if we need to skip lines  | 
| 
508
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   if(defined $skip) {  | 
| 
509
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for(my $i = 0; $i < $skip; $i++) {  | 
| 
510
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $curr_pos = tell($fh);  | 
| 
511
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       $line = <$fh>;  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Skip line that match a specific regex  | 
| 
516
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   if(defined $header_regex) {  | 
| 
517
 | 
4
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
63
 | 
     while($line && $line =~ /$header_regex/) {  | 
| 
518
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
       $curr_pos = tell($fh);  | 
| 
519
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
       $line = <$fh>;  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # The iterator itself (an unnamed subroutine  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return sub {  | 
| 
525
 | 
35
 | 
  
100
  
 | 
 
 | 
  
35
  
 | 
 
 | 
55
 | 
     if (defined $line) {  | 
| 
526
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
       chomp $line;  | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # We parse the line with the appropriate methd  | 
| 
528
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
       my $parsed_line = $parsing_method->($line,@parsing_arguments);  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # Add seek pos to the output hashref  | 
| 
530
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
       $parsed_line->{seek_pos} = $curr_pos;  | 
| 
531
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
       $parsed_line->{_original_line} = $line;  | 
| 
532
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
       $curr_pos = tell($fh);  | 
| 
533
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
       $line = <$fh>; # Get next line  | 
| 
534
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
       return $parsed_line;  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
536
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
       return undef;  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
538
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
   };  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getReadingFileHandle {  | 
| 
543
 | 
10
 | 
 
 | 
 
 | 
  
10
  
 | 
  
1
  
 | 
10
 | 
   my $file = shift;  | 
| 
544
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   my $fh;  | 
| 
545
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
   if($file =~ /\.gz$/) {  | 
| 
546
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     open($fh,"gunzip -c $file |") or die ("Cannot open $file");  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
548
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
120
 | 
     open($fh,"< $file") or die ("Cannot open $file");  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
550
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
273
 | 
   return $fh;  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getWritingFileHandle {  | 
| 
555
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my $file = shift;  | 
| 
556
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $fh;  | 
| 
557
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if($file =~ /\.gz$/) {  | 
| 
558
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     open($fh,"| gzip > $file") or die ("Cannot open $file");  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
560
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     open($fh,"> $file") or die ("Cannot open $file");  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
562
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $fh;  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getLineFromSeekPos {  | 
| 
567
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my($fh,$seek_pos) = @_;  | 
| 
568
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   seek($fh,$seek_pos,SEEK_SET);  | 
| 
569
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $line = <$fh>;  | 
| 
570
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   chomp($line);  | 
| 
571
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $line;  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |