line
stmt
bran
cond
sub
pod
time
code
1
# BioPerl module for Bio::Draw::Pictogram
2
#
3
# Please direct questions and support issues to
4
#
5
# Cared for by Shawn Hoon
6
#
7
# Copyright Shawn Hoon
8
#
9
# You may distribute this module under the same terms as perl itself
10
11
# POD documentation - main docs before the code
12
13
=head1 NAME
14
15
Bio::Draw::Pictogram - generate SVG output of Pictogram display for consensus motifs
16
17
=head1 SYNOPSIS
18
19
use Bio::Draw::Pictogram;
20
use Bio::SeqIO;
21
22
my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
23
my @seq;
24
while(my $seq = $sio->next_seq){
25
push @seq, $seq;
26
}
27
28
my $picto = Bio::Draw::Pictogram->new(-width=>"800",
29
-height=>"500",
30
-fontsize=>"60",
31
-plot_bits=>1,
32
-background=>{
33
'A'=>0.25,
34
'C'=>0.18,
35
'T'=>0.32,
36
'G'=>0.25},
37
-color=>{'A'=>'red',
38
'G'=>'blue',
39
'C'=>'green',
40
'T'=>'magenta'});
41
42
my $svg = $picto->make_svg(\@seq);
43
44
print $svg->xmlify."\n";
45
46
#Support for Bio::Matrix::PSM::SiteMatrix now included
47
48
use Bio::Matrix::PSM::IO;
49
50
my $picto = Bio::Draw::Pictogram->new(-width=>"800",
51
-height=>"500",
52
-fontsize=>"60",
53
-plot_bits=>1,
54
-background=>{
55
'A'=>0.25,
56
'C'=>0.18,
57
'T'=>0.32,
58
'G'=>0.25},
59
-color=>{'A'=>'red',
60
'G'=>'blue',
61
'C'=>'green',
62
'T'=>'magenta'});
63
64
my $psm = $psmIO->next_psm;
65
my $svg = $picto->make_svg($psm);
66
print $svg->xmlify;
67
68
=head1 DESCRIPTION
69
70
A module for generating SVG output of Pictogram display for consensus
71
motifs. This method of representation was describe by Burge and
72
colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
73
525-560, CSHL press, 1999)
74
75
This is a simple module that takes in an array of sequences (assuming
76
equal lengths) and calculates relative base frequencies where the
77
height of each letter reflects the frequency of each nucleotide at a
78
given position. It can also plot the information content at each
79
position scaled by the background frequencies of each nucleotide.
80
81
It requires the SVG-2.26 or later module by Ronan Oger available at
82
http://www.cpan.org
83
84
Recommended viewing of the SVG is the plugin available at Adobe:
85
http://www.adobe.com/svg
86
87
=head1 FEEDBACK
88
89
90
=head2 Mailing Lists
91
92
User feedback is an integral part of the evolution of this and other
93
Bioperl modules. Send your comments and suggestions preferably to one
94
of the Bioperl mailing lists. Your participation is much appreciated.
95
96
bioperl-l@bioperl.org - General discussion
97
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
98
99
=head2 Support
100
101
Please direct usage questions or support issues to the mailing list:
102
103
I
104
105
rather than to the module maintainer directly. Many experienced and
106
reponsive experts will be able look at the problem and quickly
107
address it. Please include a thorough description of the problem
108
with code and data examples if at all possible.
109
110
=head2 Reporting Bugs
111
112
Report bugs to the Bioperl bug tracking system to help us keep track
113
the bugs and their resolution. Bug reports can be submitted via the
114
web:
115
116
https://github.com/bioperl/bioperl-live/issues
117
118
=head1 AUTHOR - Shawn Hoon
119
120
Email shawnh@fugu-sg.org
121
122
=head1 APPENDIX
123
124
The rest of the documentation details each of the object
125
methods. Internal methods are usually preceded with a "_".
126
127
=cut
128
129
package Bio::Draw::Pictogram;
130
1
1
423
use strict;
1
2
1
36
131
1
1
3
use SVG 2.26;
1
28
1
5
132
1
1
923
use Bio::SeqIO;
1
3
1
30
133
1
1
4
use base qw(Bio::Root::Root);
1
1
1
59
134
135
1
1
4
use constant MAXBITS => 2;
1
2
1
1282
136
137
=head2 new
138
139
Title : new
140
Usage : my $picto = Bio::Draw::Pictogram->new(-width=>"800",
141
-height=>"500",
142
-fontsize=>"60",
143
-plot_bits=>1,
144
-background=>{
145
'A'=>0.25,
146
'C'=>0.18,
147
'T'=>0.32,
148
'G'=>0.25},
149
-color=>{'A'=>'red',
150
'G'=>'blue',
151
'C'=>'green',
152
'T'=>'magenta'});
153
Function: Constructor for Pictogram Object
154
Returns : L
155
156
=cut
157
158
sub new {
159
2
2
1
35
my ($caller,@args) = @_;
160
2
12
my $self = $caller->SUPER::new(@args);
161
2
12
my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args);
162
2
50
7
$width||=800;
163
2
50
8
$height||=600;
164
2
16
my $svg = SVG->new(width=>$width,height=>$height);
165
2
465
$self->svg_obj($svg);
166
2
50
5
$fontsize ||= 80;
167
2
50
10
$self->fontsize($fontsize) if $fontsize;
168
2
50
4
$color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
169
2
5
$self->color($color);
170
2
50
12
$background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
171
2
7
$self->background($background);
172
2
50
9
$self->plot_bits($bit) if $bit;
173
2
100
5
$self->normalize($normalize) if $normalize;
174
175
2
7
return $self;
176
}
177
178
=head2 make_svg
179
180
Title : make_svg
181
Usage : $picto->make_svg();
182
Function: make the SVG object
183
Returns : L
184
Arguments: A fasta file or array ref of L objects or a L
185
186
=cut
187
188
sub make_svg {
189
2
2
1
453
my ($self,$input) = @_;
190
2
5
my $fontsize = $self->fontsize;
191
2
7
my $size = $fontsize * 0.75;
192
2
2
my $width= $size;
193
2
4
my $height= $size+40;
194
2
3
my $color = $self->color;
195
196
#starting x coordinate for pictogram
197
2
3
my $x = 45+$size/2;
198
2
3
my $pos_y = $size * 2;
199
2
4
my $bit_y = $pos_y+40;
200
2
2
my @pwm;
201
202
2
2
my $bp = 1;
203
204
#input can be file or array ref of sequences
205
2
100
33
18
if(ref($input) eq 'ARRAY'){
50
206
1
1
@pwm = @{$self->_make_pwm($input)};
1
4
207
}
208
elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
209
1
6
@pwm = $self->_make_pwm_from_site_matrix($input);
210
}
211
else {
212
0
0
my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta");
213
0
0
my @seq;
214
0
0
while (my $seq = $sio->next_seq){
215
0
0
push @seq, $seq;
216
}
217
0
0
@pwm = @{$self->_make_pwm(\@seq)};
0
0
218
}
219
220
221
2
5
my $svg = $self->svg_obj;
222
2
6
my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
223
2
2
my $seq_grp;
224
225
#scale the svg if length greater than svg width
226
2
100
10
if($seq_length > $svg->{-document}->{'width'}){
227
1
4
my $ratio = $svg->{-document}->{'width'}/($seq_length);
228
1
10
$seq_grp = $svg->group(transform=>"scale($ratio,1)");
229
}
230
else {
231
1
10
$seq_grp= $svg->group();
232
}
233
234
#do the drawing, each set is a base position
235
2
86
foreach my $set(@pwm){
236
34
50
my ($A,$C,$G,$T,$bits) = @$set;
237
34
27
my @array;
238
34
43
push @array, ['a',($A)];
239
34
37
push @array, ['g',($G)];
240
34
34
push @array, ['c',($C)];
241
34
33
push @array, ['t',($T)];
242
34
65
@array = sort {$b->[1]<=>$a->[1]}@array;
158
136
243
34
27
my $count = 1;
244
34
73
my $pos_group = $seq_grp->group(id=>"bp $bp");
245
34
1059
my $prev_size;
246
my $y_trans;
247
248
#draw each letter at each position
249
34
33
foreach my $letter(@array){
250
136
78
my $scale;
251
136
100
145
if($self->normalize){
252
100
80
$scale = $letter->[1];
253
} else {
254
36
38
$scale = $letter->[1] * ($bits / MAXBITS);
255
}
256
257
136
100
147
if($count == 1){
258
34
100
35
if($self->normalize){
259
25
22
$y_trans = 0;
260
} else {
261
9
11
$y_trans = (1 - ($bits / MAXBITS)) * $size;
262
}
263
}
264
else {
265
102
82
$y_trans += $prev_size;
266
}
267
$pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height,
268
'width'=>$width,x=>$x,y=>$size,
269
'transform'=>"translate(0,$y_trans),scale(1,$scale)",
270
'style'=>{"font-size"=>$fontsize,
271
'text-anchor'=>'middle',
272
'font-family'=>'Verdana',
273
136
100
1011
'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
274
275
136
6261
$prev_size = $scale * $size;
276
136
131
$count++;
277
}
278
#plot the bit if required
279
34
50
47
if($self->plot_bits){
280
34
92
$seq_grp->text('x'=>$x,
281
'y'=>$bit_y,
282
'style'=>{"font-size"=>'10',
283
'text-anchor'=>'middle',
284
'font-family'=>'Verdana',
285
'fill'=>'black'})->cdata($bits);
286
}
287
34
1235
$bp++;
288
34
67
$x+=$width;
289
}
290
291
#plot the tags
292
2
50
5
$seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
293
294
2
78
$seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
295
296
#plot the base positions
297
2
82
$x = 45+$size/2-int($width/2);
298
2
8
foreach my $nbr(1..($bp-1)){
299
34
90
$seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
300
34
1113
$x+=$width;
301
}
302
303
304
# $seq_grp->transform("scale(2,2)");
305
306
2
6
return $self->svg_obj($svg);
307
}
308
309
sub _make_pwm_from_site_matrix{
310
1
1
2
my ($self,$matrix) = @_;
311
1
3
my $bgd = $self->background;
312
1
2
my @pwm;
313
1
6
my $consensus = $matrix->consensus;
314
1
4
foreach my $i(1..length($consensus)){
315
25
42
my %base = $matrix->next_pos;
316
25
28
my $bits;
317
25
39
$bits+=($base{pA} * log2($base{pA}/$bgd->{'A'}));
318
25
36
$bits+=($base{pC} * log2($base{pC}/$bgd->{'C'}));
319
25
30
$bits+=($base{pG} * log2($base{pG}/$bgd->{'G'}));
320
25
31
$bits+=($base{pT} * log2($base{pT}/$bgd->{'T'}));
321
25
150
push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))];
322
}
323
1
6
return @pwm;
324
}
325
326
sub _make_pwm {
327
1
1
1
my ($self,$input) = @_;
328
1
1
my $count = 1;
329
1
1
my %hash;
330
1
2
my $bgd = $self->background;
331
#sum up the frequencies at each base pair
332
1
2
foreach my $seq(@$input){
333
14
39
my $string = $seq->seq;
334
14
11
$string = uc $string;
335
14
22
my @motif = split('',$string);
336
14
9
my $pos = 1;
337
14
15
foreach my $t(@motif){
338
126
88
$hash{$pos}{$t}++;
339
126
93
$pos++;
340
}
341
14
19
$count++;
342
}
343
344
#calculate relative freq
345
1
2
my @pwm;
346
347
#decrement last count
348
1
2
$count--;
349
1
6
foreach my $pos(sort{$a<=>$b} keys %hash){
20
17
350
9
5
my @array;
351
9
100
23
push @array,($hash{$pos}{'A'}||0)/$count;
352
9
100
16
push @array,($hash{$pos}{'C'}||0)/$count;
353
9
100
16
push @array,($hash{$pos}{'G'}||0)/$count;
354
9
100
17
push @array,($hash{$pos}{'T'}||0)/$count;
355
356
#calculate bits
357
# relative entropy (RelEnt) or Kullback-Liebler distance
358
# relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
359
# gk the background frequency of nucleotide k
360
361
9
6
my $bits;
362
9
100
40
$bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
100
363
9
100
29
$bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
100
364
9
100
29
$bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
100
365
9
100
35
$bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
100
366
9
43
push @array, abs(sprintf("%.3f",$bits));
367
368
9
14
push @pwm,\@array;
369
}
370
1
4
return $self->pwm(\@pwm);
371
}
372
373
374
###various get/sets
375
376
=head2 fontsize
377
378
Title : fontsize
379
Usage : $picto->fontsize();
380
Function: get/set for fontsize
381
Returns : int
382
Arguments: int
383
384
=cut
385
386
sub fontsize {
387
4
4
1
5
my ($self,$obj) = @_;
388
4
100
9
if($obj){
389
2
4
$self->{'_fontsize'} = $obj;
390
}
391
4
5
return $self->{'_fontsize'};
392
}
393
394
=head2 color
395
396
Title : color
397
Usage : $picto->color();
398
Function: get/set for color
399
Returns : a hash reference
400
Arguments: a hash reference
401
402
=cut
403
404
sub color {
405
4
4
1
4
my ($self,$obj) = @_;
406
4
100
11
if($obj){
407
2
3
$self->{'_color'} = $obj;
408
}
409
4
5
return $self->{'_color'};
410
}
411
412
=head2 svg_obj
413
414
Title : svg_obj
415
Usage : $picto->svg_obj();
416
Function: get/set for svg_obj
417
Returns : L
418
Arguments: L
419
420
=cut
421
422
sub svg_obj {
423
6
6
1
7
my ($self,$obj) = @_;
424
6
100
15
if($obj){
425
4
7
$self->{'_svg_obj'} = $obj;
426
}
427
6
16
return $self->{'_svg_obj'};
428
}
429
430
=head2 plot_bits
431
432
Title : plot_bits
433
Usage : $picto->plot_bits();
434
Function: get/set for plot_bits to indicate whether to plot
435
information content at each base position
436
Returns :1/0
437
Arguments: 1/0
438
439
=cut
440
441
sub plot_bits {
442
38
38
1
36
my ($self,$obj) = @_;
443
38
100
49
if($obj){
444
2
4
$self->{'_plot_bits'} = $obj;
445
}
446
38
73
return $self->{'_plot_bits'};
447
}
448
449
=head2 normalize
450
451
Title : normalize
452
Usage : $picto->normalize($newval)
453
Function: get/set to make all columns the same height.
454
default is to scale height with information
455
content.
456
Returns : value of normalize (a scalar)
457
Args : on set, new value (a scalar or undef, optional)
458
459
460
=cut
461
462
sub normalize{
463
171
171
1
106
my $self = shift;
464
465
171
100
230
return $self->{'normalize'} = shift if @_;
466
170
205
return $self->{'normalize'};
467
}
468
469
=head2 background
470
471
Title : background
472
Usage : $picto->background();
473
Function: get/set for hash reference of nucleodtide bgd frequencies
474
Returns : hash reference
475
Arguments: hash reference
476
477
=cut
478
479
sub background {
480
4
4
1
4
my ($self,$obj) = @_;
481
4
100
7
if($obj){
482
2
5
$self->{'_background'} = $obj;
483
}
484
4
6
return $self->{'_background'};
485
}
486
487
=head2 pwm
488
489
Title : pwm
490
Usage : $picto->pwm();
491
Function: get/set for pwm
492
Returns : int
493
Arguments: int
494
495
=cut
496
497
sub pwm {
498
1
1
1
2
my ($self,$pwm) = @_;
499
1
50
3
if($pwm){
500
1
2
$self->{'_pwm'} = $pwm;
501
}
502
1
6
return $self->{'_pwm'};
503
}
504
505
#utility method for returning log 2
506
sub log2 {
507
136
136
0
94
my ($val) = @_;
508
136
100
162
return 0 if $val==0;
509
121
133
return log($val)/log(2);
510
}
511
512
513
1;