line
stmt
bran
cond
sub
pod
time
code
1
# BioPerl module for Bio::Draw::Pictogram
2
#
3
# Please direct questions and support issues to
4
#
5
# Cared for by Shawn Hoon
6
#
7
# Copyright Shawn Hoon
8
#
9
# You may distribute this module under the same terms as perl itself
10
11
# POD documentation - main docs before the code
12
13
=head1 NAME
14
15
Bio::Draw::Pictogram - generate SVG output of Pictogram display for consensus motifs
16
17
=head1 SYNOPSIS
18
19
use Bio::Draw::Pictogram;
20
use Bio::SeqIO;
21
22
my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'fasta');
23
my @seq;
24
while(my $seq = $sio->next_seq){
25
push @seq, $seq;
26
}
27
28
my $picto = Bio::Draw::Pictogram->new(-width=>"800",
29
-height=>"500",
30
-fontsize=>"60",
31
-plot_bits=>1,
32
-background=>{
33
'A'=>0.25,
34
'C'=>0.18,
35
'T'=>0.32,
36
'G'=>0.25},
37
-color=>{'A'=>'red',
38
'G'=>'blue',
39
'C'=>'green',
40
'T'=>'magenta'});
41
42
my $svg = $picto->make_svg(\@seq);
43
44
print $svg->xmlify."\n";
45
46
#Support for Bio::Matrix::PSM::SiteMatrix now included
47
48
use Bio::Matrix::PSM::IO;
49
50
my $picto = Bio::Draw::Pictogram->new(-width=>"800",
51
-height=>"500",
52
-fontsize=>"60",
53
-plot_bits=>1,
54
-background=>{
55
'A'=>0.25,
56
'C'=>0.18,
57
'T'=>0.32,
58
'G'=>0.25},
59
-color=>{'A'=>'red',
60
'G'=>'blue',
61
'C'=>'green',
62
'T'=>'magenta'});
63
64
my $psm = $psmIO->next_psm;
65
my $svg = $picto->make_svg($psm);
66
print $svg->xmlify;
67
68
=head1 DESCRIPTION
69
70
A module for generating SVG output of Pictogram display for consensus
71
motifs. This method of representation was describe by Burge and
72
colleagues: (Burge, C.B.,Tuschl, T., Sharp, P.A. in The RNA world II,
73
525-560, CSHL press, 1999)
74
75
This is a simple module that takes in an array of sequences (assuming
76
equal lengths) and calculates relative base frequencies where the
77
height of each letter reflects the frequency of each nucleotide at a
78
given position. It can also plot the information content at each
79
position scaled by the background frequencies of each nucleotide.
80
81
It requires the SVG-2.26 or later module by Ronan Oger available at
82
http://www.cpan.org
83
84
Recommended viewing of the SVG is the plugin available at Adobe:
85
http://www.adobe.com/svg
86
87
=head1 FEEDBACK
88
89
90
=head2 Mailing Lists
91
92
User feedback is an integral part of the evolution of this and other
93
Bioperl modules. Send your comments and suggestions preferably to one
94
of the Bioperl mailing lists. Your participation is much appreciated.
95
96
bioperl-l@bioperl.org - General discussion
97
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
98
99
=head2 Support
100
101
Please direct usage questions or support issues to the mailing list:
102
103
I
104
105
rather than to the module maintainer directly. Many experienced and
106
reponsive experts will be able look at the problem and quickly
107
address it. Please include a thorough description of the problem
108
with code and data examples if at all possible.
109
110
=head2 Reporting Bugs
111
112
Report bugs to the Bioperl bug tracking system to help us keep track
113
the bugs and their resolution. Bug reports can be submitted via the
114
web:
115
116
https://github.com/bioperl/bioperl-live/issues
117
118
=head1 AUTHOR - Shawn Hoon
119
120
Email shawnh@fugu-sg.org
121
122
=head1 APPENDIX
123
124
The rest of the documentation details each of the object
125
methods. Internal methods are usually preceded with a "_".
126
127
=cut
128
129
package Bio::Draw::Pictogram;
130
1
1
447
use strict;
1
1
1
37
131
1
1
3
use SVG 2.26;
1
21
1
4
132
1
1
947
use Bio::SeqIO;
1
2
1
28
133
1
1
4
use base qw(Bio::Root::Root);
1
1
1
57
134
135
1
1
4
use constant MAXBITS => 2;
1
1
1
1285
136
137
=head2 new
138
139
Title : new
140
Usage : my $picto = Bio::Draw::Pictogram->new(-width=>"800",
141
-height=>"500",
142
-fontsize=>"60",
143
-plot_bits=>1,
144
-background=>{
145
'A'=>0.25,
146
'C'=>0.18,
147
'T'=>0.32,
148
'G'=>0.25},
149
-color=>{'A'=>'red',
150
'G'=>'blue',
151
'C'=>'green',
152
'T'=>'magenta'});
153
Function: Constructor for Pictogram Object
154
Returns : L
155
156
=cut
157
158
sub new {
159
2
2
1
33
my ($caller,@args) = @_;
160
2
10
my $self = $caller->SUPER::new(@args);
161
2
15
my ($width,$height,$fontsize,$color,$background,$bit,$normalize) = $self->_rearrange([qw(WIDTH HEIGHT FONTSIZE COLOR BACKGROUND PLOT_BITS NORMALIZE)],@args);
162
2
50
7
$width||=800;
163
2
50
8
$height||=600;
164
2
16
my $svg = SVG->new(width=>$width,height=>$height);
165
2
444
$self->svg_obj($svg);
166
2
50
3
$fontsize ||= 80;
167
2
50
29
$self->fontsize($fontsize) if $fontsize;
168
2
50
5
$color = $color || {'T'=>'black','C'=>'blue','G'=>'green','A'=>'red'};
169
2
7
$self->color($color);
170
2
50
13
$background = $background || {'T'=>0.25,'C'=>0.25,'G'=>0.25,'A'=>0.25};
171
2
6
$self->background($background);
172
2
50
6
$self->plot_bits($bit) if $bit;
173
2
100
6
$self->normalize($normalize) if $normalize;
174
175
2
6
return $self;
176
}
177
178
=head2 make_svg
179
180
Title : make_svg
181
Usage : $picto->make_svg();
182
Function: make the SVG object
183
Returns : L
184
Arguments: A fasta file or array ref of L objects or a L
185
186
=cut
187
188
sub make_svg {
189
2
2
1
674
my ($self,$input) = @_;
190
2
4
my $fontsize = $self->fontsize;
191
2
6
my $size = $fontsize * 0.75;
192
2
3
my $width= $size;
193
2
4
my $height= $size+40;
194
2
3
my $color = $self->color;
195
196
#starting x coordinate for pictogram
197
2
4
my $x = 45+$size/2;
198
2
3
my $pos_y = $size * 2;
199
2
3
my $bit_y = $pos_y+40;
200
2
1
my @pwm;
201
202
2
3
my $bp = 1;
203
204
#input can be file or array ref of sequences
205
2
100
33
16
if(ref($input) eq 'ARRAY'){
50
206
1
1
@pwm = @{$self->_make_pwm($input)};
1
8
207
}
208
elsif(ref($input) && $input->isa("Bio::Matrix::PSM::SiteMatrixI")){
209
1
9
@pwm = $self->_make_pwm_from_site_matrix($input);
210
}
211
else {
212
0
0
my $sio = Bio::SeqIO->new(-file=>$input,-format=>"fasta");
213
0
0
my @seq;
214
0
0
while (my $seq = $sio->next_seq){
215
0
0
push @seq, $seq;
216
}
217
0
0
@pwm = @{$self->_make_pwm(\@seq)};
0
0
218
}
219
220
221
2
6
my $svg = $self->svg_obj;
222
2
6
my $seq_length = scalar(@pwm + 1) * $width + $x + $x;
223
2
1
my $seq_grp;
224
225
#scale the svg if length greater than svg width
226
2
100
8
if($seq_length > $svg->{-document}->{'width'}){
227
1
3
my $ratio = $svg->{-document}->{'width'}/($seq_length);
228
1
9
$seq_grp = $svg->group(transform=>"scale($ratio,1)");
229
}
230
else {
231
1
9
$seq_grp= $svg->group();
232
}
233
234
#do the drawing, each set is a base position
235
2
77
foreach my $set(@pwm){
236
34
53
my ($A,$C,$G,$T,$bits) = @$set;
237
34
23
my @array;
238
34
42
push @array, ['a',($A)];
239
34
27
push @array, ['g',($G)];
240
34
32
push @array, ['c',($C)];
241
34
36
push @array, ['t',($T)];
242
34
69
@array = sort {$b->[1]<=>$a->[1]}@array;
158
134
243
34
25
my $count = 1;
244
34
73
my $pos_group = $seq_grp->group(id=>"bp $bp");
245
34
1067
my $prev_size;
246
my $y_trans;
247
248
#draw each letter at each position
249
34
38
foreach my $letter(@array){
250
136
77
my $scale;
251
136
100
152
if($self->normalize){
252
100
86
$scale = $letter->[1];
253
} else {
254
36
34
$scale = $letter->[1] * ($bits / MAXBITS);
255
}
256
257
136
100
142
if($count == 1){
258
34
100
36
if($self->normalize){
259
25
20
$y_trans = 0;
260
} else {
261
9
11
$y_trans = (1 - ($bits / MAXBITS)) * $size;
262
}
263
}
264
else {
265
102
77
$y_trans += $prev_size;
266
}
267
$pos_group->text('id'=> uc($letter->[0]).$bp,height=>$height,
268
'width'=>$width,x=>$x,y=>$size,
269
'transform'=>"translate(0,$y_trans),scale(1,$scale)",
270
'style'=>{"font-size"=>$fontsize,
271
'text-anchor'=>'middle',
272
'font-family'=>'Verdana',
273
136
100
1024
'fill'=>$color->{uc $letter->[0]}})->cdata(uc $letter->[0]) if $scale > 0;
274
275
136
6331
$prev_size = $scale * $size;
276
136
120
$count++;
277
}
278
#plot the bit if required
279
34
50
45
if($self->plot_bits){
280
34
89
$seq_grp->text('x'=>$x,
281
'y'=>$bit_y,
282
'style'=>{"font-size"=>'10',
283
'text-anchor'=>'middle',
284
'font-family'=>'Verdana',
285
'fill'=>'black'})->cdata($bits);
286
}
287
34
1231
$bp++;
288
34
65
$x+=$width;
289
}
290
291
#plot the tags
292
2
50
5
$seq_grp->text(x=>int($width/2),y=>$bit_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Bits:") if $self->plot_bits;
293
294
2
83
$seq_grp->text(x=>int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'middle','font-family'=>'Verdana','fill'=>'black'})->cdata("Position:");
295
296
#plot the base positions
297
2
76
$x = 45+$size/2-int($width/2);
298
2
5
foreach my $nbr(1..($bp-1)){
299
34
87
$seq_grp->text(x=>$x+int($width/2),y=>$pos_y,style=>{"font-size"=>'10','text-anchor'=>'left','font-family'=>'Verdana','fill'=>'black'})->cdata($nbr);
300
34
1153
$x+=$width;
301
}
302
303
304
# $seq_grp->transform("scale(2,2)");
305
306
2
5
return $self->svg_obj($svg);
307
}
308
309
sub _make_pwm_from_site_matrix{
310
1
1
2
my ($self,$matrix) = @_;
311
1
3
my $bgd = $self->background;
312
1
1
my @pwm;
313
1
5
my $consensus = $matrix->consensus;
314
1
4
foreach my $i(1..length($consensus)){
315
25
39
my %base = $matrix->next_pos;
316
25
28
my $bits;
317
25
31
$bits+=($base{pA} * log2($base{pA}/$bgd->{'A'}));
318
25
28
$bits+=($base{pC} * log2($base{pC}/$bgd->{'C'}));
319
25
31
$bits+=($base{pG} * log2($base{pG}/$bgd->{'G'}));
320
25
36
$bits+=($base{pT} * log2($base{pT}/$bgd->{'T'}));
321
25
133
push @pwm, [$base{pA},$base{pC},$base{pG},$base{pT},abs(sprintf("%.3f",$bits))];
322
}
323
1
5
return @pwm;
324
}
325
326
sub _make_pwm {
327
1
1
1
my ($self,$input) = @_;
328
1
2
my $count = 1;
329
1
1
my %hash;
330
1
2
my $bgd = $self->background;
331
#sum up the frequencies at each base pair
332
1
2
foreach my $seq(@$input){
333
14
30
my $string = $seq->seq;
334
14
11
$string = uc $string;
335
14
25
my @motif = split('',$string);
336
14
10
my $pos = 1;
337
14
8
foreach my $t(@motif){
338
126
92
$hash{$pos}{$t}++;
339
126
81
$pos++;
340
}
341
14
16
$count++;
342
}
343
344
#calculate relative freq
345
1
2
my @pwm;
346
347
#decrement last count
348
1
1
$count--;
349
1
4
foreach my $pos(sort{$a<=>$b} keys %hash){
19
15
350
9
5
my @array;
351
9
100
22
push @array,($hash{$pos}{'A'}||0)/$count;
352
9
100
15
push @array,($hash{$pos}{'C'}||0)/$count;
353
9
100
21
push @array,($hash{$pos}{'G'}||0)/$count;
354
9
100
16
push @array,($hash{$pos}{'T'}||0)/$count;
355
356
#calculate bits
357
# relative entropy (RelEnt) or Kullback-Liebler distance
358
# relent = sum fk * log2(fk/gk) where fk is frequency of nucleotide k and
359
# gk the background frequency of nucleotide k
360
361
9
7
my $bits;
362
9
100
32
$bits+=(($hash{$pos}{'A'}||0) / $count) * log2((($hash{$pos}{'A'}||0)/$count) / ($bgd->{'A'}));
100
363
9
100
29
$bits+=(($hash{$pos}{'C'}||0) / $count) * log2((($hash{$pos}{'C'}||0)/$count) / ($bgd->{'C'}));
100
364
9
100
28
$bits+=(($hash{$pos}{'G'}||0) / $count) * log2((($hash{$pos}{'G'}||0)/$count) / ($bgd->{'G'}));
100
365
9
100
37
$bits+=(($hash{$pos}{'T'}||0) / $count) * log2((($hash{$pos}{'T'}||0)/$count) / ($bgd->{'T'}));
100
366
9
38
push @array, abs(sprintf("%.3f",$bits));
367
368
9
15
push @pwm,\@array;
369
}
370
1
3
return $self->pwm(\@pwm);
371
}
372
373
374
###various get/sets
375
376
=head2 fontsize
377
378
Title : fontsize
379
Usage : $picto->fontsize();
380
Function: get/set for fontsize
381
Returns : int
382
Arguments: int
383
384
=cut
385
386
sub fontsize {
387
4
4
1
6
my ($self,$obj) = @_;
388
4
100
9
if($obj){
389
2
4
$self->{'_fontsize'} = $obj;
390
}
391
4
7
return $self->{'_fontsize'};
392
}
393
394
=head2 color
395
396
Title : color
397
Usage : $picto->color();
398
Function: get/set for color
399
Returns : a hash reference
400
Arguments: a hash reference
401
402
=cut
403
404
sub color {
405
4
4
1
5
my ($self,$obj) = @_;
406
4
100
7
if($obj){
407
2
4
$self->{'_color'} = $obj;
408
}
409
4
5
return $self->{'_color'};
410
}
411
412
=head2 svg_obj
413
414
Title : svg_obj
415
Usage : $picto->svg_obj();
416
Function: get/set for svg_obj
417
Returns : L
418
Arguments: L
419
420
=cut
421
422
sub svg_obj {
423
6
6
1
8
my ($self,$obj) = @_;
424
6
100
13
if($obj){
425
4
8
$self->{'_svg_obj'} = $obj;
426
}
427
6
11
return $self->{'_svg_obj'};
428
}
429
430
=head2 plot_bits
431
432
Title : plot_bits
433
Usage : $picto->plot_bits();
434
Function: get/set for plot_bits to indicate whether to plot
435
information content at each base position
436
Returns :1/0
437
Arguments: 1/0
438
439
=cut
440
441
sub plot_bits {
442
38
38
1
34
my ($self,$obj) = @_;
443
38
100
52
if($obj){
444
2
4
$self->{'_plot_bits'} = $obj;
445
}
446
38
71
return $self->{'_plot_bits'};
447
}
448
449
=head2 normalize
450
451
Title : normalize
452
Usage : $picto->normalize($newval)
453
Function: get/set to make all columns the same height.
454
default is to scale height with information
455
content.
456
Returns : value of normalize (a scalar)
457
Args : on set, new value (a scalar or undef, optional)
458
459
460
=cut
461
462
sub normalize{
463
171
171
1
109
my $self = shift;
464
465
171
100
244
return $self->{'normalize'} = shift if @_;
466
170
217
return $self->{'normalize'};
467
}
468
469
=head2 background
470
471
Title : background
472
Usage : $picto->background();
473
Function: get/set for hash reference of nucleodtide bgd frequencies
474
Returns : hash reference
475
Arguments: hash reference
476
477
=cut
478
479
sub background {
480
4
4
1
4
my ($self,$obj) = @_;
481
4
100
8
if($obj){
482
2
3
$self->{'_background'} = $obj;
483
}
484
4
6
return $self->{'_background'};
485
}
486
487
=head2 pwm
488
489
Title : pwm
490
Usage : $picto->pwm();
491
Function: get/set for pwm
492
Returns : int
493
Arguments: int
494
495
=cut
496
497
sub pwm {
498
1
1
1
2
my ($self,$pwm) = @_;
499
1
50
2
if($pwm){
500
1
2
$self->{'_pwm'} = $pwm;
501
}
502
1
5
return $self->{'_pwm'};
503
}
504
505
#utility method for returning log 2
506
sub log2 {
507
136
136
0
87
my ($val) = @_;
508
136
100
157
return 0 if $val==0;
509
121
124
return log($val)/log(2);
510
}
511
512
513
1;