File Coverage

Bio/Structure/IO/pdb.pm

Criterion	Covered	Total	%
statement	592	751	78.8
branch	231	398	58.0
condition	79	165	47.8
subroutine	17	18	94.4
pod	2	2	100.0
total	921	1334	69.0

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::Structure::IO::pdb
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Kris Boulez
7							#
8							# Copyright 2001, 2002 Kris Boulez
9							#
10							# Framework is a copy of Bio::SeqIO::embl.pm
11							#
12							# You may distribute this module under the same terms as perl itself
13
14							# POD documentation - main docs before the code
15
16							=head1 NAME
17
18							Bio::Structure::IO::pdb - PDB input/output stream
19
20							=head1 SYNOPSIS
21
22							It is probably best not to use this object directly, but
23							rather go through the Bio::Structure::IO handler system. Go:
24
25							$stream = Bio::Structure::IO->new(-file => $filename,
26							-format => 'PDB');
27
28							while (my $structure = $stream->next_structure) {
29							# do something with $structure
30							}
31
32							=head1 DESCRIPTION
33
34							This object can transform Bio::Structure objects to and from PDB flat
35							file databases. The working is similar to that of the Bio::SeqIO handlers.
36
37							=head1 FEEDBACK
38
39							=head2 Mailing Lists
40
41							User feedback is an integral part of the evolution of this and other
42							Bioperl modules. Send your comments and suggestions preferably to one
43							of the Bioperl mailing lists. Your participation is much appreciated.
44
45							bioperl-l@bioperl.org - General discussion
46							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47
48							=head2 Support
49
50							Please direct usage questions or support issues to the mailing list:
51
52							I
53
54							rather than to the module maintainer directly. Many experienced and
55							reponsive experts will be able look at the problem and quickly
56							address it. Please include a thorough description of the problem
57							with code and data examples if at all possible.
58
59							=head2 Reporting Bugs
60
61							Report bugs to the Bioperl bug tracking system to help us keep track
62							the bugs and their resolution. Bug reports can be submitted via the
63							web:
64
65							https://github.com/bioperl/bioperl-live/issues
66
67							=head1 AUTHOR - Kris Boulez
68
69							Email kris.boulez@algonomics.com
70
71							=head1 APPENDIX
72
73							The rest of the documentation details each of the object methods.
74							Internal methods are usually preceded with a _
75
76							=cut
77
78
79							# Let the code begin...
80
81
82							package Bio::Structure::IO::pdb;
83	1			1		3	use strict;
	1					2
	1					23
84	1			1		310	use Bio::Structure::Entry;
	1					1
	1					24
85							#use Bio::Structure::Model;
86							#use Bio::Structure::Chain;
87							#use Bio::Structure::Residue;
88	1			1		306	use Bio::Structure::Atom;
	1					2
	1					18
89	1			1		359	use Bio::SeqFeature::Generic;
	1					1
	1					25
90	1			1		298	use Bio::Annotation::Reference;
	1					2
	1					25
91
92	1			1		4	use base qw(Bio::Structure::IO);
	1					1
	1					5604
93
94							sub _initialize {
95	3			3		5	my($self,@args) = @_;
96
97	3					14	$self->SUPER::_initialize(@args);
98
99	3					10	my ($noheader, $noatom) =
100							$self->_rearrange([qw(
101							NOHEADER
102							NOATOM
103							)],
104							@args);
105	3	50				8	$noheader && $self->_noheader($noheader);
106	3	50				8	$noatom && $self->_noatom($noatom);
107							}
108
109
110							=head2 next_structure;
111
112							Title : next_structure
113							Usage : $struc = $stream->next_structure()
114							Function: returns the next structure in the stream
115							Returns : Bio::Structure object
116							Args :
117
118							=cut
119
120							sub next_structure {
121	2			2	1	4	my ($self,@args) = @_;
122	2					2	my ($line);
123	2					2	my ($obslte, $title, $caveat, $compnd, $source, $keywds,
124							$expdta, $author, %revdat, $revdat, $sprsde, $jrnl, %remark, $dbref,
125							$turn, $ssbond, $link, $hydbnd, $sltbrg, $cispep,
126							$site, $cryst1, $tvect,);
127	2					15	my $struc = Bio::Structure::Entry->new(-id => 'created from pdb.pm');
128	2					6	my $all_headers = ( !$self->_noheader ); # we'll parse all headers and store as annotation
129	2					2	my %header; # stores all header RECORDs an is stored as annotations when ATOM is reached
130
131
132	2					8	$line = $self->_readline; # This needs to be before the first eof() test
133
134	2	50				4	if( !defined $line ) {
135	0					0	return; # no throws - end of file
136							}
137
138	2	50				9	if( $line =~ /^\s+$/ ) {
139	0					0	while( defined ($line = $self->_readline) ) {
140	0	0				0	$line =~/\S/ && last;
141							}
142							}
143	2	50				3	if( !defined $line ) {
144	0					0	return; # end of file
145							}
146	2	50				8	$line =~ /^HEADER\s+\S+/ \|\| $self->throw("PDB stream with no HEADER. Not pdb in my book");
147	2					8	my($header_line) = unpack "x10 a56", $line;
148	2					3	$header{'header'} = $header_line;
149	2					6	my($class, $depdate, $idcode) = unpack "x10 a40 a9 x3 a4", $line;
150	2					8	$idcode =~ s/^\s(\S+)\s$/$1/;
151	2					4	$struc->id($idcode);
152	2					11	$self->debug("PBD c $class d $depdate id $idcode\n"); # XXX KB
153
154	2					2	my $buffer = $line;
155
156							BEFORE_COORDINATES :
157	2					5	until( !defined $buffer ) {
158	292					205	$_ = $buffer;
159
160							# Exit at start of coordinate section
161	292	100				508	last if /^(MODEL\|ATOM\|HETATM)/;
162
163							# OBSLTE line(s)
164	290	50	33			389	if (/^OBSLTE / && $all_headers) {
165	0					0	$obslte = $self->_read_PDB_singlecontline("OBSLTE","12-70",\$buffer);
166	0					0	$header{'obslte'} = $obslte;
167							}
168
169							# TITLE line(s)
170	290	100	66			393	if (/^TITLE / && $all_headers) {
171	1					4	$title = $self->_read_PDB_singlecontline("TITLE","11-70",\$buffer);
172	1					2	$header{'title'} = $title;
173							}
174
175							# CAVEAT line(s)
176	290	50	33			373	if (/^CAVEAT / && $all_headers) {
177	0					0	$caveat = $self->_read_PDB_singlecontline("CAVEAT","12-70",\$buffer);
178	0					0	$header{'caveat'} = $caveat;
179							}
180
181							# COMPND line(s)
182	290	100	66			364	if (/^COMPND / && $all_headers) {
183	2					4	$compnd = $self->_read_PDB_singlecontline("COMPND","11-70",\$buffer);
184	2					3	$header{'compnd'} = $compnd;
185	2					23	$self->debug("get COMPND $compnd\n");
186							}
187
188							# SOURCE line(s)
189	290	100	66			398	if (/^SOURCE / && $all_headers) {
190	2					4	$source = $self->_read_PDB_singlecontline("SOURCE","11-70",\$buffer);
191	2					4	$header{'source'} = $source;
192							}
193
194							# KEYWDS line(s)
195	290	100	66			381	if (/^KEYWDS / && $all_headers) {
196	1					4	$keywds = $self->_read_PDB_singlecontline("KEYWDS","11-70",\$buffer);
197	1					2	$header{'keywds'} = $keywds;
198							}
199
200							# EXPDTA line(s)
201	290	100	66			396	if (/^EXPDTA / && $all_headers) {
202	1					3	$expdta = $self->_read_PDB_singlecontline("EXPDTA","11-70",\$buffer);
203	1					3	$header{'expdta'} = $expdta;
204							}
205
206							# AUTHOR line(s)
207	290	100	66			384	if (/^AUTHOR / && $all_headers) {
208	2					4	$author = $self->_read_PDB_singlecontline("AUTHOR","11-70",\$buffer);
209	2					3	$header{'author'} = $author;
210							}
211
212							# REVDAT line(s)
213							# a bit more elaborate as we also store the modification number
214	290	100	66			378	if (/^REVDAT / && $all_headers) {
215							##my($modnum,$rol) = unpack "x7 A3 x3 A53", $_;
216							##$modnum =~ s/\s+//; # remove spaces
217							##$revdat{$modnum} .= $rol;
218	3					6	my ($rol) = unpack "x7 a59", $_;
219	3					6	$revdat .= $rol;
220	3					4	$header{'revdat'} = $revdat;
221							}
222
223							# SPRSDE line(s)
224	290	50	33			401	if (/^SPRSDE / && $all_headers) {
225	0					0	$sprsde = $self->_read_PDB_singlecontline("SPRSDE","12-70",\$buffer);
226	0					0	$header{'sprsde'} = $sprsde;
227							}
228
229							# jRNL line(s)
230	290	100	66			385	if (/^JRNL / && $all_headers) {
231	2					5	$jrnl = $self->_read_PDB_jrnl(\$buffer);
232	2					7	$struc->annotation->add_Annotation('reference',$jrnl);
233	2					2	$header{'jrnl'} = 1; # when writing out, we need a way to check there was a JRNL record (not mandatory)
234							}
235
236							# REMARK line(s)
237							# we only parse the "REMARK 1" lines (additional references)
238							# thre rest is stored in %remark (indexed on remarkNum) (pack does space-padding)
239	290	100	66			896	if (/^REMARK\s+(\d+)\s*/ && $all_headers) {
240	240					259	my $remark_num = $1;
241	240	100				349	if ($remark_num == 1) {
242	2					6	my @refs = $self->_read_PDB_remark_1(\$buffer);
243							# How can we find the primary reference when writing (JRNL record) XXX KB
244	2					4	foreach my $ref (@refs) {
245	8					10	$struc->annotation->add_Annotation('reference', $ref);
246							}
247							# $_ still holds the REMARK_1 line, $buffer now contains the first non
248							# REMARK_1 line. We need to parse it in this pass (so no else block)
249	2					3	$_ = $buffer;
250							}
251							# for the moment I don't see a better solution (other then using goto)
252	240	50				423	if (/^REMARK\s+(\d+)\s*/) {
253	240					197	my $r_num = $1;
254	240	50				269	if ($r_num != 1) { # other remarks, we store literlly at the moment
255	240					391	my ($rol) = unpack "x11 a59", $_;
256	240					406	$remark{$r_num} .= $rol;
257							}
258							}
259							} # REMARK
260
261							# DBREF line(s)
262							# references to sequences in other databases
263							# we store as 'dblink' annotations and whole line as simple annotation (round-trip)
264	290	100	66			429	if (/^DBREF / && $all_headers) {
265	3					8	my ($rol) = unpack "x7 a61", $_;
266	3					5	$dbref .= $rol;
267	3					4	$header{'dbref'} = $dbref;
268	3					7	my ($db, $acc) = unpack "x26 a6 x1 a8", $_;
269	3					8	$db =~ s/\s*$//;
270	3					7	$acc =~ s/\s*$//;
271	3					10	my $link = Bio::Annotation::DBLink->new;
272	3					6	$link->database($db);
273	3					5	$link->primary_id($acc);
274	3					6	$struc->annotation->add_Annotation('dblink', $link);
275							} # DBREF
276
277							# SEQADV line(s)
278	290	50	33			396	if (/^SEQADV / && $all_headers) {
279	0					0	my ($rol) = unpack "x7 a63", $_;
280	0					0	$header{'seqadv'} .= $rol;
281							} # SEQADV
282
283							# SEQRES line(s)
284							# this is (I think) the sequence of macromolecule that was analysed
285							# this will be returned when doing $struc->seq
286	290	100	66			434	if (/^SEQRES / && $all_headers) {
287	8					16	my ($rol) = unpack "x8 a62", $_;
288	8					11	$header{'seqres'} .= $rol;
289							} # SEQRES
290
291							# MODRES line(s)
292	290	50	33			381	if (/^MODRES / && $all_headers) {
293	0					0	my ($rol) = unpack "x7 a63", $_;
294	0					0	$header{'modres'} .= $rol;
295							} # MODRES
296
297							# HET line(s)
298	290	100	66			404	if (/^HET / && $all_headers) {
299	3					6	my ($rol) = unpack "x7 a63", $_;
300	3					10	$header{'het'} .= $rol;
301							} # HET
302
303							# HETNAM line(s)
304	290	100	66			373	if (/^HETNAM / && $all_headers) {
305	1					2	my ($rol) = unpack "x8 a62", $_;
306	1					3	$header{'hetnam'} .= $rol;
307							} # HETNAM
308
309							# HETSYN line(s)
310	290	50	33			384	if (/^HETSYN / && $all_headers) {
311	0					0	my ($rol) = unpack "x8 a62", $_;
312	0					0	$header{'hetsyn'} .= $rol;
313							} # HETSYN
314
315							# FORMUL line(s)
316	290	100	66			364	if (/^FORMUL / && $all_headers) {
317	4					8	my ($rol) = unpack "x8 a62", $_;
318	4					6	$header{'formul'} .= $rol;
319							} # FORMUL
320
321							# HELIX line(s)
322							# store as specific object ??
323	290	100	66			420	if (/^HELIX / && $all_headers) {
324	2					4	my ($rol) = unpack "x7 a69", $_;
325	2					3	$header{'helix'} .= $rol;
326							} # HELIX
327
328							# SHEET line(s)
329							# store as specific object ??
330	290	100	66			374	if (/^SHEET / && $all_headers) {
331	3					5	my ($rol) = unpack "x7 a63", $_;
332	3					4	$header{'sheet'} .= $rol;
333							} # SHEET
334
335							# TURN line(s)
336							# store as specific object ??
337	290	50	33			403	if (/^TURN / && $all_headers) {
338	0					0	my ($rol) = unpack "x7 a63", $_;
339	0					0	$turn .= $rol;
340	0					0	$header{'turn'} = $turn;
341							} # TURN
342
343							# SSBOND line(s)
344							# store in connection-like object (see parsing of CONECT record)
345	290	100	66			413	if (/^SSBOND / && $all_headers) {
346	3					5	my ($rol) = unpack "x7 a65", $_;
347	3					5	$ssbond .= $rol;
348	3					3	$header{'ssbond'} = $ssbond;
349							} # SSBOND
350
351							# LINK
352							# store like SSBOND ?
353	290	100	66			431	if (/^LINK / && $all_headers) {
354	3					5	my ($rol) = unpack "x12 a60", $_;
355	3					6	$link .= $rol;
356	3					4	$header{'link'} = $link;
357							} # LINK
358
359							# HYDBND
360							# store like SSBOND
361	290	50	33			386	if (/^HYDBND / && $all_headers) {
362	0					0	my ($rol) = unpack "x12 a60", $_;
363	0					0	$hydbnd .= $rol;
364	0					0	$header{'hydbnd'} = $hydbnd;
365							} # HYDBND
366
367							# SLTBRG
368							# store like SSBOND ?
369	290	50	33			378	if (/^SLTBRG / && $all_headers) {
370	0					0	my ($rol) = unpack "x12 a60",$_;
371	0					0	$sltbrg .= $rol;
372	0					0	$header{'sltbrg'} = $sltbrg;
373							} # SLTBRG
374
375							# CISPEP
376							# store like SSBOND ?
377	290	50	33			424	if (/^CISPEP / && $all_headers) {
378	0					0	my ($rol) = unpack "x7 a52", $_;
379	0					0	$cispep .= $rol;
380	0					0	$header{'cispep'} = $cispep;
381							}
382
383							# SITE line(s)
384	290	50	33			384	if (/^SITE / && $all_headers) {
385	0					0	my ($rol) = unpack "x7 a54", $_;
386	0					0	$site .= $rol;
387	0					0	$header{'site'} = $site;
388							} # SITE
389
390							# CRYST1 line
391							# store in some crystallographic subobject ?
392	290	100	66			412	if (/^CRYST1/ && $all_headers) {
393	2					5	my ($rol) = unpack "x6 a64", $_;
394	2					3	$cryst1 .= $rol;
395	2					3	$header{'cryst1'} = $cryst1;
396							} # CRYST1
397
398							# ORIGXn line(s) (n=1,2,3)
399	290	100	66			410	if (/^(ORIGX\d) / && $all_headers) {
400	6					10	my $origxn = lc($1);
401	6					12	my ($rol) = unpack "x10 a45", $_;
402	6					12	$header{$origxn} .= $rol;
403							} # ORIGXn
404
405							# SCALEn line(s) (n=1,2,3)
406	290	100	66			389	if (/^(SCALE\d) / && $all_headers) {
407	6					9	my $scalen = lc($1);
408	6					12	my ($rol) = unpack "x10 a45", $_;
409	6					13	$header{$scalen} .= $rol;
410							} # SCALEn
411
412							# MTRIXn line(s) (n=1,2,3)
413	290	50	33			392	if (/^(MTRIX\d) / && $all_headers) {
414	0					0	my $mtrixn = lc($1);
415	0					0	my ($rol) = unpack "x7 a53", $_;
416	0					0	$header{$mtrixn} .= $rol;
417							} # MTRIXn
418
419							# TVECT line(s)
420	290	100	66			408	if (/^TVECT / && $all_headers) {
421	1					2	my ($rol) = unpack "x7 a63", $_;
422	1					2	$tvect .= $rol;
423	1					1	$header{'tvect'} = $tvect;
424							}
425
426							# Get next line.
427	290					398	$buffer = $self->_readline;
428							}
429
430							# store %header entries a annotations
431	2	50				5	if (%header) {
432	2					13	for my $record (keys %header) {
433	42					58	my $sim = Bio::Annotation::SimpleValue->new();
434	42					59	$sim->value($header{$record});
435	42					49	$struc->annotation->add_Annotation($record, $sim);
436							}
437							}
438							# store %remark entries as annotations
439	2	50				6	if (%remark) {
440	2					5	for my $remark_num (keys %remark) {
441	17					23	my $sim = Bio::Annotation::SimpleValue->new();
442	17					21	$sim->value($remark{$remark_num});
443	17					21	$struc->annotation->add_Annotation("remark_$remark_num", $sim);
444							}
445							}
446
447							# Coordinate section, the real meat
448							#
449							# $_ contains a line beginning with (ATOM\|MODEL)
450
451	2					3	$buffer = $_;
452
453
454	2	50	33			15	if (defined($buffer) && $buffer =~ /^(ATOM \|MODEL \|HETATM)/ ) { # can you have an entry without ATOM ?
455	2					5	while( defined ($buffer) ) { # (yes : 1a7z )
456							# read in one model at a time
457	2					5	my $model = $self->_read_PDB_coordinate_section(\$buffer, $struc);
458							# add this to $struc
459	2					9	$struc->add_model($struc, $model);
460
461	2	50	33			16	if ($buffer && $buffer !~ /^MODEL /) { # if we get here we have multiple MODELs
462	2					5	last;
463							}
464							}
465							}
466							else {
467	0					0	$self->throw("Could not find a coordinate section in this record\n");
468							}
469
470
471	2					5	until( !defined $buffer ) {
472	23					16	$_ = $buffer;
473
474							# CONNECT records
475	23	100				50	if (/^CONECT/) {
476							# do not differentiate between different type of connect (column dependant)
477	19					17	my $conect_unpack = "x6 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5";
478	19					71	my (@conect) = unpack $conect_unpack, $_;
479	19					34	for my $k (0 .. $#conect) {
480	209					396	$conect[$k] =~ s/\s//g;
481							}
482	19					16	my $source = shift @conect;
483	19					11	my $type;
484	19					20	for my $k (0 .. 9) {
485	190	100				261	next unless ($conect[$k] =~ /^\d+$/);
486							# 0..3 bond
487	32	50	0			33	if( $k <= 3 ) {
		0	0
		0	0
			0
488	32					27	$type = "bond";
489							}
490							# 4..5,7..8 hydrogen bonded
491							elsif( ($k >= 4 && $k <= 5) \|\| ($k >= 7 && $k <= 8) ) {
492	0					0	$type = "hydrogen";
493							}
494							# 6, 9 salt bridged
495							elsif( $k == 6 \|\| $k == 9 ) {
496	0					0	$type = "saltbridged";
497							} else {
498	0					0	$self->throw("k has impossible value ($k), check brain");
499							}
500	32					47	$struc->conect($source, $conect[$k], $type);
501							}
502							}
503
504							# MASTER record
505	23	100				30	if (/^MASTER /) {
506							# the numbers in here a checksums, we should use them :)
507	2					5	my ($rol) = unpack "x10 a60", $_;
508	2					9	$struc->master($rol);
509							}
510
511	23	100				39	if (/^END/) {
512							# this it the end ...
513							}
514
515	23					39	$buffer = $self->_readline;
516							}
517
518
519	2					38	return $struc;
520							}
521
522							=head2 write_structure
523
524							Title : write_structure
525							Usage : $stream->write_structure($struc)
526							Function: writes the $struc object (must be a Bio::Structure) to the stream
527							Returns : 1 for success and 0 for error
528							Args : Bio::Structure object
529
530							=cut
531
532							sub write_structure {
533	1			1	1	5	my ($self, $struc) = @_;
534	1	50				4	if( !defined $struc ) {
535	0					0	$self->throw("Attempting to write with no structure!");
536							}
537
538	1	50	33			9	if( ! ref $struc \|\| ! $struc->isa('Bio::Structure::StructureI') ) {
539	0					0	$self->throw(" $struc is not a StructureI compliant module.");
540							}
541	1					2	my ($ann, $string, $output_string, $key);
542							# HEADER
543	1					3	($ann) = $struc->annotation->get_Annotations("header");
544	1	50				3	if (defined $ann) {
545	1					4	$string = $ann->as_text;
546	1					4	$string =~ s/^Value: //;
547	1					10	$output_string = pack ("A10 A56", "HEADER", $string);
548							} else { # not read in via read_structure, create HEADER line
549	0					0	my $id = $struc->id;
550	0	0				0	if (!$id) {
551	0					0	$id = "UNK1";
552							}
553	0	0				0	if (length($id) > 4) {
554	0					0	$id = substr($id,0,4);
555							}
556	0					0	my $classification = "DEFAULT CLASSIFICATION";
557	0					0	my $dep_date = "24-JAN-70";
558	0					0	$output_string = pack ("A10 A40 A12 A4", "HEADER", $classification, $dep_date, $id);
559							}
560	1					4	$output_string .= " " x (80 - length($output_string) );
561	1					17	$self->_print("$output_string\n");
562
563	1					1	my (%header);
564	1					4	for $key ($struc->annotation->get_all_annotation_keys) {
565	38					34	$header{$key} = 1;;
566							}
567
568	1	50				4	exists $header{'obslte'} && $self->_write_PDB_simple_record(-name => "OBSLTE ", -cont => "9-10",
569							-annotation => $struc->annotation->get_Annotations("obslte"), -rol => "11-70");
570
571	1	50				6	exists $header{'title'} && $self->_write_PDB_simple_record(-name => "TITLE ", -cont => "9-10",
572							-annotation => $struc->annotation->get_Annotations("title"), -rol => "11-70");
573
574	1	50				4	exists $header{'caveat'} && $self->_write_PDB_simple_record(-name => "CAVEAT ", -cont => "9-10",
575							-annotation => $struc->annotation->get_Annotations("caveat"), -rol => "12-70");
576
577	1	50				7	exists $header{'compnd'} && $self->_write_PDB_simple_record(-name => "COMPND ", -cont => "9-10",
578							-annotation => $struc->annotation->get_Annotations("compnd"), -rol => "11-70");
579
580	1	50				7	exists $header{'source'} && $self->_write_PDB_simple_record(-name => "SOURCE ", -cont => "9-10",
581							-annotation => $struc->annotation->get_Annotations("source"), -rol => "11-70");
582
583	1	50				7	exists $header{'keywds'} && $self->_write_PDB_simple_record(-name => "KEYWDS ", -cont => "9-10",
584							-annotation => $struc->annotation->get_Annotations("keywds"), -rol => "11-70");
585
586	1	50				7	exists $header{'expdta'} && $self->_write_PDB_simple_record(-name => "EXPDTA ", -cont => "9-10",
587							-annotation => $struc->annotation->get_Annotations("expdta"), -rol => "11-70");
588
589	1	50				8	exists $header{'author'} && $self->_write_PDB_simple_record(-name => "AUTHOR ", -cont => "9-10",
590							-annotation => $struc->annotation->get_Annotations("author"), -rol => "11-70");
591
592	1	50				8	exists $header{'revdat'} && $self->_write_PDB_simple_record(-name => "REVDAT ",
593							-annotation => $struc->annotation->get_Annotations("revdat"), -rol => "8-66");
594
595	1	50				4	exists $header{'sprsde'} && $self->_write_PDB_simple_record(-name => "SPRSDE ", -cont => "9-10",
596							-annotation => $struc->annotation->get_Annotations("sprsde"), -rol => "12-70");
597
598							# JRNL en REMARK 1
599	1					2	my ($jrnl_done, $remark_1_counter);
600	1	50				4	if ( !exists $header{'jrnl'} ) {
601	0					0	$jrnl_done = 1;
602							}
603	1					4	foreach my $ref ($struc->annotation->get_Annotations('reference') ) {
604	1	50				3	if( !$jrnl_done ) { # JRNL record
605	1	50				5	$ref->authors && $self->_write_PDB_simple_record(-name => "JRNL AUTH",
606							-cont => "17-18", -rol => "20-70", -string => $ref->authors );
607	1	50				5	$ref->title && $self->_write_PDB_simple_record(-name => "JRNL TITL",
608							-cont => "17-18", -rol => "20-70", -string => $ref->title );
609	1	50				4	$ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
610							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
611	1	50				5	$ref->location && $self->_write_PDB_simple_record(-name => "JRNL REF ",
612							-cont => "17-18", -rol => "20-70", -string => $ref->location );
613	1	50				3	$ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
614							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
615	1	50				44	$ref->encoded_ref && $self->_write_PDB_simple_record(-name => "JRNL REFN",
616							-cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
617	1					4	$jrnl_done = 1;
618							} else { # REMARK 1
619	0	0				0	if (!$remark_1_counter) { # header line
620	0					0	my $remark_1_header_line = "REMARK 1" . " " x 70;
621	0					0	$self->_print("$remark_1_header_line\n");
622	0					0	$remark_1_counter = 1;
623							}
624							# per reference header
625	0					0	my $rem_line = "REMARK 1 REFERENCE " . $remark_1_counter;
626	0					0	$rem_line .= " " x (80 - length($rem_line) );
627	0					0	$self->_print($rem_line,"\n");
628	0	0				0	$ref->authors && $self->_write_PDB_simple_record(-name => "REMARK 1 AUTH",
629							-cont => "17-18", -rol => "20-70", -string => $ref->authors );
630	0	0				0	$ref->title && $self->_write_PDB_simple_record(-name => "REMARK 1 TITL",
631							-cont => "17-18", -rol => "20-70", -string => $ref->title );
632	0	0				0	$ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
633							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
634	0	0				0	$ref->location && $self->_write_PDB_simple_record(-name => "REMARK 1 REF ",
635							-cont => "17-18", -rol => "20-70", -string => $ref->location );
636	0	0				0	$ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
637							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
638	0	0				0	$ref->encoded_ref && $self->_write_PDB_simple_record(-name => "REMARK 1 REFN",
639							-cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
640	0					0	$remark_1_counter++;
641							}
642							}
643	1	50				3	if (! defined $remark_1_counter ) { # no remark 1 record written yet
644	1					2	my $remark_1_header_line = "REMARK 1" . " " x 70;
645	1					3	$self->_print("$remark_1_header_line\n"); # write dummy (we need this line)
646							}
647
648							# REMARK's (not 1 at the moment, references)
649	1					1	my (%remarks, $remark_num);
650	1					6	for $key (keys %header) {
651	38	100				51	next unless ($key =~ /^remark_(\d+)$/);
652	13	50				20	next if ($1 == 1);
653	13					15	$remarks{$1} = 1;
654							}
655	1					7	for $remark_num (sort {$a <=> $b} keys %remarks) {
	32					23
656	13					17	$self->_write_PDB_remark_record($struc, $remark_num);
657							}
658
659	1	50				8	exists $header{'dbref'} && $self->_write_PDB_simple_record(-name => "DBREF ",
660							-annotation => $struc->annotation->get_Annotations("dbref"), -rol => "8-68");
661	1	50				4	exists $header{'seqadv'} && $self->_write_PDB_simple_record(-name => "SEQADV ",
662							-annotation => $struc->annotation->get_Annotations("seqadv"), -rol => "8-70");
663	1	50				6	exists $header{'seqres'} && $self->_write_PDB_simple_record(-name => "SEQRES ",
664							-annotation => $struc->annotation->get_Annotations("seqres"), -rol => "9-70");
665	1	50				4	exists $header{'modres'} && $self->_write_PDB_simple_record(-name => "MODRES ",
666							-annotation => $struc->annotation->get_Annotations("modres"), -rol => "8-70");
667	1	50				6	exists $header{'het'} && $self->_write_PDB_simple_record(-name => "HET ",
668							-annotation => $struc->annotation->get_Annotations("het"), -rol => "8-70");
669	1	50				7	exists $header{'hetnam'} && $self->_write_PDB_simple_record(-name => "HETNAM ",
670							-annotation => $struc->annotation->get_Annotations("hetnam"), -rol => "9-70");
671	1	50				4	exists $header{'hetsyn'} && $self->_write_PDB_simple_record(-name => "HETSYN ",
672							-annotation => $struc->annotation->get_Annotations("hetsyn"), -rol => "9-70");
673	1	50				7	exists $header{'formul'} && $self->_write_PDB_simple_record(-name => "FORMUL ",
674							-annotation => $struc->annotation->get_Annotations("formul"), -rol => "9-70");
675	1	50				5	exists $header{'helix'} && $self->_write_PDB_simple_record(-name => "HELIX ",
676							-annotation => $struc->annotation->get_Annotations("helix"), -rol => "8-76");
677	1	50				4	exists $header{'sheet'} && $self->_write_PDB_simple_record(-name => "SHEET ",
678							-annotation => $struc->annotation->get_Annotations("sheet"), -rol => "8-70");
679	1	50				4	exists $header{'turn'} && $self->_write_PDB_simple_record(-name => "TURN ",
680							-annotation => $struc->annotation->get_Annotations("turn"), -rol => "8-70");
681	1	50				3	exists $header{'ssbond'} && $self->_write_PDB_simple_record(-name => "SSBOND ",
682							-annotation => $struc->annotation->get_Annotations("ssbond"), -rol => "8-72");
683	1	50				6	exists $header{'link'} && $self->_write_PDB_simple_record(-name => "LINK ",
684							-annotation => $struc->annotation->get_Annotations("link"), -rol => "13-72");
685	1	50				4	exists $header{'hydbnd'} && $self->_write_PDB_simple_record(-name => "HYDBND ",
686							-annotation => $struc->annotation->get_Annotations("hydbnd"), -rol => "13-72");
687	1	50				3	exists $header{'sltbrg'} && $self->_write_PDB_simple_record(-name => "SLTBRG ",
688							-annotation => $struc->annotation->get_Annotations("sltbrg"), -rol => "13-72");
689	1	50				4	exists $header{'cispep'} && $self->_write_PDB_simple_record(-name => "CISPEP ",
690							-annotation => $struc->annotation->get_Annotations("cispep"), -rol => "8-59");
691	1	50				3	exists $header{'site'} && $self->_write_PDB_simple_record(-name => "SITE ",
692							-annotation => $struc->annotation->get_Annotations("site"), -rol => "8-61");
693	1	50				6	exists $header{'cryst1'} && $self->_write_PDB_simple_record(-name => "CRYST1",
694							-annotation => $struc->annotation->get_Annotations("cryst1"), -rol => "7-70");
695	1					4	for my $k (1..3) {
696	3					4	my $origxn = "origx".$k;
697	3					5	my $ORIGXN = uc($origxn)." ";
698	3	50				12	exists $header{$origxn} && $self->_write_PDB_simple_record(-name => $ORIGXN,
699							-annotation => $struc->annotation->get_Annotations($origxn), -rol => "11-55");
700							}
701	1					3	for my $k (1..3) {
702	3					5	my $scalen = "scale".$k;
703	3					4	my $SCALEN = uc($scalen)." ";
704	3	50				12	exists $header{$scalen} && $self->_write_PDB_simple_record(-name => $SCALEN,
705							-annotation => $struc->annotation->get_Annotations($scalen), -rol => "11-55");
706							}
707	1					3	for my $k (1..3) {
708	3					2	my $mtrixn = "mtrix".$k;
709	3					4	my $MTRIXN = uc($mtrixn)." ";
710	3	50				7	exists $header{$mtrixn} && $self->_write_PDB_simple_record(-name => $MTRIXN,
711							-annotation => $struc->annotation->get_Annotations($mtrixn), -rol => "8-60");
712							}
713	1	50				7	exists $header{'tvect'} && $self->_write_PDB_simple_record(-name => "TVECT ",
714							-annotation => $struc->annotation->get_Annotations("tvect"), -rol => "8-70");
715
716							# write out coordinate section
717							#
718	1					1	my %het_res; # hetero residues
719	1					3	$het_res{'HOH'} = 1; # water is default
720	1	50				5	if (exists $header{'het'}) {
721	1					3	my ($het_line) = ($struc->annotation->get_Annotations("het"))[0]->as_text;
722	1					4	$het_line =~ s/^Value: //;
723	1					4	for ( my $k = 0; $k <= length $het_line ; $k += 63) {
724	3					4	my $l = substr $het_line, $k, 63;
725	3					8	$l =~ s/^\s(\S+)\s+.$/$1/;
726	3					7	$het_res{$l} = 1;
727							}
728							}
729	1					4	for my $model ($struc->get_models) {
730							# more then one model ?
731	1	50				2	if ($struc->get_models > 1) {
732	0					0	my $model_line = sprintf("MODEL %4d", $model->id);
733	0					0	$model_line .= " " x (80 - length($model_line) );
734	0					0	$self->_print($model_line, "\n");
735							}
736	1					3	for my $chain ($struc->get_chains($model)) {
737	4					4	my ($residue, $atom, $resname, $resnum, $atom_line, $atom_serial, $atom_icode, $chain_id);
738	0					0	my ($prev_resname, $prev_resnum, $prev_atomicode); # need these for TER record
739	4					5	my $last_record = ""; # Used to spot an ATOM -> HETATM change within a chain
740	4					10	$chain_id = $chain->id;
741	4	100				9	if ( $chain_id eq "default" ) {
742	1					2	$chain_id = " ";
743							}
744	4					20	$self->debug("model_id: $model->id chain_id: $chain_id\n");
745	4					10	for $residue ($struc->get_residues($chain)) {
746	60					340	($resname, $resnum) = split /-/, $residue->id;
747	60					117	for $atom ($struc->get_atoms($residue)) {
748	171	100				205	if ($het_res{$resname}) { # HETATM
749	45	50	66			89	if ( $resname ne "HOH" && $last_record eq "ATOM " ) {
750							# going from ATOM -> HETATM, we have to write TER
751	0					0	my $ter_line = "TER ";
752	0					0	$ter_line .= sprintf("%5d", $atom_serial + 1);
753	0					0	$ter_line .= " ";
754	0					0	$ter_line .= sprintf("%3s ", $prev_resname);
755	0					0	$ter_line .= $chain_id;
756	0					0	$ter_line .= sprintf("%4d", $prev_resnum);
757	0	0				0	$ter_line .= $atom_icode ? $prev_atomicode : " "; # 27
758	0					0	$ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
759	0					0	$self->_print($ter_line,"\n");
760							}
761	45					60	$atom_line = "HETATM";
762							} else {
763	126					108	$atom_line = "ATOM ";
764							}
765	171					113	$last_record = $atom_line;
766	171					246	$atom_line .= sprintf("%5d ", $atom->serial);
767	171					218	$atom_serial = $atom->serial; # we need it for TER record
768	171					196	$atom_icode = $atom->icode;
769							# remember some stuff if next iteration needs writing TER
770	171					124	$prev_resname = $resname;
771	171					119	$prev_resnum = $resnum;
772	171					104	$prev_atomicode = $atom_icode;
773							# getting the name of the atom correct is subtrivial
774	171					215	my $atom_id = $atom->id;
775							# is pdb_atomname set, then use this (most probably set when
776							# reading in the PDB record)
777	171					209	my $pdb_atomname = $atom->pdb_atomname;
778	171	50				193	if( defined $pdb_atomname ) {
779	171					191	$atom_line .= sprintf("%-4s", $pdb_atomname);
780							} else {
781							# start (educated) guessing
782	0					0	my $element = $atom->element;
783	0	0	0			0	if( defined $element && $element ne "H") {
784							# element should be at first two positions (right justified)
785							# ie. Calcium should be "CA "
786							# C alpha should be " CA "
787	0	0				0	if( length($element) == 2 ) {
788	0					0	$atom_line .= sprintf("%-4s", $atom->id);
789							} else {
790	0					0	$atom_line .= sprintf(" %-3s", $atom->id);
791							}
792							} else { # old behaviour do a best guess
793	0	0				0	if ($atom->id =~ /^\dH/) { # H: four positions, left justified
		0
794	0					0	$atom_line .= sprintf("%-4s", $atom->id);
795							} elsif (length($atom_id) == 4) {
796	0	0				0	if ($atom_id =~ /^(H\d\d)(\d)$/) { # turn H123 into 3H12
797	0					0	$atom_line .= $2.$1;
798							} else { # no more guesses, no more alternatives
799	0					0	$atom_line .= $atom_id;
800							}
801							} else { # if we get here and it is not correct let me know
802	0					0	$atom_line .= sprintf(" %-3s", $atom->id);
803							}
804							}
805							}
806							# we don't do alternate location at this moment
807	171					134	$atom_line .= " "; # 17
808	171					126	$atom_line .= sprintf("%3s",$resname); # 18-20
809	171					120	$atom_line .= " ".$chain_id; # 21, 22
810	171					146	$atom_line .= sprintf("%4d", $resnum); # 23-26
811	171	50				198	$atom_line .= $atom->icode ? $atom->icode : " "; # 27
812	171					110	$atom_line .= " "; # 28-30
813	171					217	$atom_line .= sprintf("%8.3f", $atom->x); # 31-38
814	171					238	$atom_line .= sprintf("%8.3f", $atom->y); # 39-46
815	171					256	$atom_line .= sprintf("%8.3f", $atom->z); # 47-54
816	171					243	$atom_line .= sprintf("%6.2f", $atom->occupancy); # 55-60
817	171					244	$atom_line .= sprintf("%6.2f", $atom->tempfactor); # 61-66
818	171					150	$atom_line .= " "; # 67-72
819	171	50				214	$atom_line .= $atom->segID ? # segID 73-76
820							sprintf("%-4s", $atom->segID) :
821							" ";
822	171	50				197	$atom_line .= $atom->element ?
823							sprintf("%2s", $atom->element) :
824							" ";
825	171	50				232	$atom_line .= $atom->charge ?
826							sprintf("%2s", $atom->charge) :
827							" ";
828
829	171					252	$self->_print($atom_line,"\n");
830							}
831							}
832							# write out TER record
833	4	100				17	if ( $resname ne "HOH" ) {
834	3					4	my $ter_line = "TER ";
835	3					7	$ter_line .= sprintf("%5d", $atom_serial + 1);
836	3					4	$ter_line .= " ";
837	3					4	$ter_line .= sprintf("%3s ", $resname);
838	3					4	$ter_line .= $chain_id;
839	3					4	$ter_line .= sprintf("%4d", $resnum);
840	3	50				5	$ter_line .= $atom_icode ? $atom_icode : " "; # 27
841	3					9	$ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
842	3					7	$self->_print($ter_line,"\n");
843							}
844							}
845	1	50				9	if ($struc->get_models > 1) { # we need ENDMDL
846	0					0	my $endmdl_line = "ENDMDL" . " " x 74;
847	0					0	$self->_print($endmdl_line, "\n");
848							}
849							} # for my $model
850
851							# CONECT
852	1					7	my @sources = $struc->get_all_conect_source;
853	1					2	my ($conect_line,@conect, @bond, @hydbond, @saltbridge, $to, $type);
854	1					3	for my $source (@sources) {
855							# get all conect's
856	8					15	my @conect = $struc->conect($source);
857							# classify
858	8					9	for my $con (@conect) {
859	12					20	($to, $type) = split /_/, $con;
860	12	50				18	if($type eq "bond") {
		0
		0
861	12					15	push @bond, $to;
862							} elsif($type eq "hydrogenbonded") {
863	0					0	push @hydbond, $to;
864							} elsif($type eq "saltbridged") {
865	0					0	push @saltbridge, $to;
866							} else {
867	0					0	$self->throw("type $type is unknown for conect");
868							}
869							}
870							# and write out CONECT lines as long as there is something
871							# in one of the arrays
872	8		66			17	while ( @bond \|\| @hydbond \|\| @saltbridge) {
			66
873	8					7	my ($b, $hb, $sb);
874	8					12	$conect_line = "CONECT". sprintf("%5d", $source);
875	8					10	for my $k (0..3) {
876	32					25	$b = shift @bond;
877	32	100				39	$conect_line .= $b ? sprintf("%5d", $b) : " ";
878							}
879	8					9	for my $k (4..5) {
880	16					11	$hb = shift @hydbond;
881	16	50				26	$conect_line .= $hb ? sprintf("%5d", $hb) : " ";
882							}
883	8					5	$sb = shift @saltbridge;
884	8	50				10	$conect_line .= $sb ? sprintf("%5d", $sb) : " ";
885	8					10	for my $k (7..8) {
886	16					9	$hb = shift @hydbond;
887	16	50				19	$conect_line .= $hb ? sprintf("%5d", $hb) : " ";
888							}
889	8					8	$sb = shift @saltbridge;
890	8	50				7	$conect_line .= $sb ? sprintf("%5d", $sb) : " ";
891
892	8					10	$conect_line .= " " x (80 - length($conect_line) );
893	8					16	$self->_print($conect_line, "\n");
894							}
895							}
896
897							# MASTER line contains checksums, we should calculate them of course :)
898	1					4	my $master_line = "MASTER " . $struc->master;
899	1					4	$master_line .= " " x (80 - length($master_line) );
900	1					3	$self->_print($master_line, "\n");
901
902	1					1	my $end_line = "END" . " " x 77;
903	1					3	$self->_print($end_line,"\n");
904
905							}
906
907							=head2 _filehandle
908
909							Title : _filehandle
910							Usage : $obj->_filehandle($newval)
911							Function:
912							Example :
913							Returns : value of _filehandle
914							Args : newvalue (optional)
915
916							=cut
917
918							sub _filehandle{
919	0			0		0	my ($obj,$value) = @_;
920	0	0				0	if( defined $value) {
921	0					0	$obj->{'_filehandle'} = $value;
922							}
923	0					0	return $obj->{'_filehandle'};
924
925							}
926
927							=head2 _noatom
928
929							Title : _noatom
930							Usage : $obj->_noatom($newval)
931							Function:
932							Example :
933							Returns : value of _noatom
934							Args : newvalue (optional)
935
936
937							=cut
938
939							sub _noatom{
940	2			2		2	my ($obj,$value) = @_;
941	2	50				5	if( defined $value) {
942	0					0	$obj->{'_noatom'} = $value;
943							}
944	2					4	return $obj->{'_noatom'};
945
946							}
947
948							=head2 _noheader
949
950							Title : _noheader
951							Usage : $obj->_noheader($newval)
952							Function:
953							Example :
954							Returns : value of _noheader
955							Args : newvalue (optional)
956
957
958							=cut
959
960							sub _noheader{
961	2			2		2	my ($obj,$value) = @_;
962	2	50				4	if( defined $value) {
963	0					0	$obj->{'_noheader'} = $value;
964							}
965	2					4	return $obj->{'_noheader'};
966
967							}
968
969							=head2 _read_PDB_singlecontline
970
971							Title : _read_PDB_singlecontline
972							Usage : $obj->_read_PDB_singlecontline($record, $fromto, $buffer))
973							Function: read single continued record from PDB
974							Returns : concatenated record entry (between $fromto columns)
975							Args : record, colunm delimiters, buffer
976
977							=cut
978
979							sub _read_PDB_singlecontline {
980	9			9		11	my ($self, $record, $fromto, $buffer) = @_;
981	9					9	my $concat_line;
982
983	9					55	my ($begin, $end) = (split (/-/, $fromto));
984	9					9	my $unpack_string = "x8 a2 ";
985	9	50				19	if($begin == 12) { # one additional space
986	0					0	$unpack_string .= "x1 a59";
987							} else {
988	9					8	$unpack_string .= "a60";
989							}
990	9					8	$_ = $$buffer;
991	9		66			18	while (defined( $_ \|\|= $self->_readline ) ) {
992	25	100				117	if ( /^$record/ ) {
993	16					32	my($cont, $rol) = unpack $unpack_string, $_;
994	16	100	66			47	if($cont =~ /\d$/ && $begin == 11) { # continuation line
995							# and text normally at pos 11
996	7					10	$rol =~ s/^\s//; # strip leading space
997							}
998							## no space (store litteraly) $concat_line .= $rol . " ";
999	16					22	$concat_line .= $rol;
1000							} else {
1001	9					9	last;
1002							}
1003
1004	16					46	$_ = undef;
1005							}
1006	9					18	$concat_line =~ s/\s$//; # remove trailing space
1007	9					9	$$buffer = $_;
1008
1009	9					15	return $concat_line;
1010							}
1011
1012
1013							=head2 _read_PDB_jrnl
1014
1015							Title : _read_PDB_jrnl
1016							Usage : $obj->_read_PDB_jrnl($\buffer))
1017							Function: read jrnl record from PDB
1018							Returns : Bio::Annotation::Reference object
1019							Args :
1020
1021							=cut
1022
1023							sub _read_PDB_jrnl {
1024	2			2		3	my ($self, $buffer) = @_;
1025
1026	2					2	$_ = $$buffer;
1027	2					3	my ($auth, $titl,$edit,$ref,$publ,$refn, $pmid, $doi);
1028	2		66			6	while (defined( $_ \|\|= $self->_readline )) {
1029	15	100				30	if (/^JRNL /) {
1030							# this code belgons in a seperate method (shared with
1031							# remark 1 parsing)
1032	13					33	my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1033	13	100				30	$auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1034	13	100				20	$titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1035	13	50				15	$edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1036	13	100				18	$ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1037	13	50				15	$publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1038	13	100				16	$refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1039	13	50				15	$pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1040	13	50				14	$doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1041							} else {
1042	2					3	last;
1043							}
1044
1045	13					29	$_ = undef; # trigger reading of next line
1046							} # while
1047
1048	2					2	$$buffer = $_;
1049	2					12	my $jrnl_ref = Bio::Annotation::Reference->new;
1050
1051	2					5	$jrnl_ref->authors($auth);
1052	2					4	$jrnl_ref->title($titl);
1053	2					4	$jrnl_ref->location($ref);
1054	2					5	$jrnl_ref->publisher($publ);
1055	2					3	$jrnl_ref->editors($edit);
1056	2					4	$jrnl_ref->encoded_ref($refn);
1057	2					4	$jrnl_ref->pubmed($pmid);
1058	2					4	$jrnl_ref->doi($doi);
1059
1060	2					5	return $jrnl_ref;
1061							} # sub _read_PDB_jrnl
1062
1063
1064							=head2 _read_PDB_remark_1
1065
1066							Title : _read_PDB_remark_1
1067							Usage : $obj->_read_PDB_remark_1($\buffer))
1068							Function: read "remark 1" record from PDB
1069							Returns : array of Bio::Annotation::Reference objects
1070							Args :
1071
1072							=cut
1073
1074							sub _read_PDB_remark_1 {
1075	2			2		2	my ($self, $buffer) = @_;
1076
1077	2					3	$_ = $$buffer;
1078	2					2	my ($auth, $titl,$edit,$ref,$publ,$refn,$refnum,$pmid, $doi);
1079	0					0	my @refs;
1080
1081	2		66			9	while (defined( $_ \|\|= $self->_readline )) {
1082	59	100				111	if (/^REMARK 1 /) {
1083	57	100				88	if (/^REMARK 1\s+REFERENCE\s+(\d+)\s*/) {
1084	8					11	$refnum = $1;
1085	8	100				14	if ($refnum != 1) { # this is first line of a reference
1086	7					10	my $rref = Bio::Annotation::Reference->new;
1087	7					13	$rref->authors($auth);
1088	7					7	$rref->title($titl);
1089	7					8	$rref->location($ref);
1090	7					8	$rref->publisher($publ);
1091	7					9	$rref->editors($edit);
1092	7					12	$rref->encoded_ref($refn);
1093	7					9	$rref->pubmed($pmid);
1094	7					7	$rref->doi($doi);
1095	7					7	$auth = $titl = $edit = $ref = $publ = $refn = undef;
1096	7					7	push @refs, $rref;
1097							}
1098							} else {
1099							# this code belgons in a seperate method (shared with
1100							# remark 1 parsing)
1101	49					107	my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1102	49	100				70	$auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1103	49	100				73	$titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1104	49	50				55	$edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1105	49	100				61	$ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1106	49	50				51	$publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1107	49	100				56	$refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1108	49	50				52	$pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1109	49	50				62	$doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1110							}
1111							} else {
1112							# have we seen any reference at all (could be single REMARK 1 line
1113	2	100				5	if ( ! defined ($refnum) ) {
1114	1					2	last; # get out of while()
1115							}
1116
1117							# create last reference
1118	1					2	my $rref = Bio::Annotation::Reference->new;
1119	1					2	$rref->authors($auth);
1120	1					2	$rref->title($titl);
1121	1					2	$rref->location($ref);
1122	1					2	$rref->publisher($publ);
1123	1					1	$rref->editors($edit);
1124	1					2	$rref->encoded_ref($refn);
1125	1					2	$rref->pubmed($pmid);
1126	1					2	$rref->doi($doi);
1127	1					1	push @refs, $rref;
1128	1					2	last;
1129							}
1130
1131	57					111	$_ = undef; # trigger reading of next line
1132							} # while
1133
1134	2					3	$$buffer = $_;
1135
1136	2					5	return @refs;
1137							} # sub _read_PDB_jrnl
1138
1139
1140							=head2 _read_PDB_coordinate_section
1141
1142							Title : _read_PDB_coordinate_section
1143							Usage : $obj->_read_PDB_coordinate_section($\buffer))
1144							Function: read one model from a PDB
1145							Returns : Bio::Structure::Model object
1146							Args :
1147
1148							=cut
1149
1150							sub _read_PDB_coordinate_section {
1151	2			2		4	my ($self, $buffer, $struc) = @_;
1152	2					2	my ($model_num, $chain_name, $residue_name, $atom_name); # to keep track of state
1153	2					2	$model_num = "";
1154	2					2	$chain_name = "";
1155	2					2	$residue_name = "";
1156	2					1	$atom_name = "";
1157
1158	2					2	my $atom_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x3 a8 a8 a8 a6 a6 x6 a4 a2 a2";
1159	2					1	my $anisou_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x1 a7 a7 a7 a7 a7 a7 a4 a2 a2";
1160
1161	2					13	my $model = Bio::Structure::Model->new;
1162	2					5	$model->id('default');
1163	2					3	my $noatom = $self->_noatom;
1164	2					2	my ($chain, $residue, $atom, $old);
1165	0					0	my (%_ch_in_model); # which chains are already in this model
1166
1167	2					3	$_ = $$buffer;
1168	2		66			5	while (defined( $_ \|\|= $self->_readline )) {
1169							# start of a new model
1170	659	50				837	if (/^MODEL\s+(\d+)/) {
1171	0					0	$model_num = $1;
1172	0					0	$self->debug("_read_PDB_coor: parsing model $model_num\n");
1173	0					0	$model->id($model_num);
1174	0	0				0	if (/^MODEL\s+\d+\s+\S+/) { # old format (pre 2.1)
1175	0					0	$old = 1;
1176							}
1177							}
1178							# old hier ook setten XXX
1179							# ATOM lines, if first set chain
1180	659	100				1449	if (/^(ATOM \|HETATM\|SIGATM)/) {
1181	653					3466	my @line_elements = unpack $atom_unpack, $_;
1182	653					685	my $pdb_atomname = $line_elements[1]; # need to get this before removing spaces
1183	653					1058	for my $k (0 .. $#line_elements) {
1184	9795					10642	$line_elements[$k] =~ s/^\s+//; # remove leading space
1185	9795					7472	$line_elements[$k] =~ s/\s+$//; # remove trailing space
1186	9795	100				13758	$line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1187							}
1188	653					1036	my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode, $x, $y, $z,
1189							$occupancy, $tempfactor, $segID, $element, $charge) = @line_elements;
1190	653	100				851	$chainID = 'default' if ( !defined $chainID );
1191	653	100				780	if ($chainID ne $chain_name) { # possibly a new chain
1192							# fix for bug #1187
1193							# we can have ATOM/HETATM of an already defined chain (A B A B)
1194							# e.g. 1abm
1195
1196	5	50				6	if (exists $_ch_in_model{$chainID} ) { # we have already seen this chain in this model
1197	0					0	$chain = $_ch_in_model{$chainID};
1198							} else { # we create a new chain
1199	5					19	$chain = Bio::Structure::Chain->new;
1200	5					10	$struc->add_chain($model,$chain);
1201	5					8	$chain->id($chainID);
1202	5					6	$_ch_in_model{$chainID} = $chain;
1203							}
1204	5					7	$chain_name = $chain->id;
1205							}
1206							#my $res_name_num = $resname."-".$resseq;
1207	653					718	my $res_name_num = $resname."-".$resseq;
1208	653	50				697	$res_name_num .= '.'.$icode if $icode;
1209	653	100				730	if ($res_name_num ne $residue_name) { # new residue
1210	157					286	$residue = Bio::Structure::Residue->new;
1211	157					296	$struc->add_residue($chain,$residue);
1212	157					213	$residue->id($res_name_num);
1213	157					112	$residue_name = $res_name_num;
1214	157					119	$atom_name = ""; # only needed inside a residue
1215							}
1216							# get out of here if we don't want the atom objects
1217	653	50				803	if ($noatom) {
1218	0					0	$_ = undef;
1219	0					0	next;
1220							}
1221							# alternative location: only take first one
1222	653	0	33			931	if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
			33
1223	0					0	$_ = undef; # trigger reading next line
1224	0					0	next;
1225							}
1226	653	50				1193	if (/^(ATOM \|HETATM)/) { # ATOM / HETATM
1227	653					490	$atom_name = $atomname;
1228	653					1143	$atom = Bio::Structure::Atom->new;
1229	653					1103	$struc->add_atom($residue,$atom);
1230	653					1049	$atom->id($atomname);
1231	653					896	$atom->pdb_atomname($pdb_atomname); # store away PDB atomname for writing out
1232	653					806	$atom->serial($serial);
1233	653					726	$atom->icode($icode);
1234	653					787	$atom->x($x);
1235	653					756	$atom->y($y);
1236	653					669	$atom->z($z);
1237	653					785	$atom->occupancy($occupancy);
1238	653					671	$atom->tempfactor($tempfactor);
1239	653					816	$atom->segID($segID); # deprecated but used by people
1240	653	50				753	if (! $old ) {
1241	653					690	$atom->element($element);
1242	653					738	$atom->charge($charge);
1243							}
1244							}
1245							else { # SIGATM
1246	0					0	my $sigx = $x;
1247	0					0	my $sigy = $y;
1248	0					0	my $sigz = $z;
1249	0					0	my $sigocc = $occupancy;
1250	0					0	my $sigtemp = $tempfactor;
1251	0	0				0	if ($atom_name ne $atomname) { # something wrong with PDB file
1252	0					0	$self->throw("A SIGATM record should have the same $atomname as the previous record $atom_name\n");
1253							}
1254	0					0	$atom->sigx($sigx);
1255	0					0	$atom->sigy($sigy);
1256	0					0	$atom->sigz($sigz);
1257	0					0	$atom->sigocc($sigocc);
1258	0					0	$atom->sigtemp($sigtemp);
1259
1260							}
1261							} # ATOM\|HETARM\|SIGATM
1262
1263							# ANISOU \| SIGUIJ lines
1264	659	50				1279	if (/^(ANISOU\|SIGUIJ)/) {
1265	0	0				0	if ($noatom) {
1266	0					0	$_ = undef;
1267	0					0	next;
1268							}
1269	0					0	my @line_elements = unpack $anisou_unpack, $_;
1270	0					0	for my $k (0 .. $#line_elements) {
1271	0					0	$line_elements[$k] =~ s/^\s+//; # remove leading space
1272	0					0	$line_elements[$k] =~ s/\s+$//; # remove trailing space
1273	0	0				0	$line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1274							}
1275	0					0	my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode,
1276							$u11,$u22, $u33, $u12, $u13, $u23, $segID, $element, $charge) = @line_elements;
1277	0					0	$self->debug("read_PDB_coor: parsing ANISOU record: $serial $atomname\n");
1278	0	0	0			0	if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
			0
1279	0					0	$_ = undef;
1280	0					0	next;
1281							}
1282	0	0				0	if (/^ANISOU/) {
1283	0	0				0	if ($atom_name ne $atomname) { # something wrong with PDB file
1284	0					0	$self->throw("A ANISOU record should have the same $atomname as the previous record $atom_name\n");
1285							}
1286	0					0	$atom->aniso("u11",$u11);
1287	0					0	$atom->aniso("u22",$u22);
1288	0					0	$atom->aniso("u33",$u33);
1289	0					0	$atom->aniso("u12",$u12);
1290	0					0	$atom->aniso("u13",$u13);
1291	0					0	$atom->aniso("u23",$u23);
1292							}
1293							else { # SIGUIJ
1294	0	0				0	if ($atom_name ne $atomname) { # something wrong with PDB file
1295	0					0	$self->throw("A SIGUIJ record should have the same $atomname as the previous record $atom_name\n");
1296							}
1297							# could use different variable names, but hey ...
1298	0					0	$atom->aniso("sigu11",$u11);
1299	0					0	$atom->aniso("sigu22",$u22);
1300	0					0	$atom->aniso("sigu33",$u33);
1301	0					0	$atom->aniso("sigu12",$u12);
1302	0					0	$atom->aniso("sigu13",$u13);
1303	0					0	$atom->aniso("sigu23",$u23);
1304							}
1305							} # ANISOU \| SIGUIJ
1306
1307	659	100				842	if (/^TER /) {
1308	4					6	$_ = undef;
1309	4					13	next;
1310							}
1311
1312	655	50				724	if (/^ENDMDL/) {
1313	0					0	$_ = $self->_readline;
1314	0					0	last;
1315							}
1316
1317	655	100				903	if (/^(CONECT\|MASTER)/) { # get out of here
1318							# current line is OK
1319	2					4	last;
1320							}
1321	653					1707	$_ = undef;
1322
1323							} # while
1324
1325	2					4	$$buffer = $_;
1326
1327	2					9	return $model;
1328							} # _read_PDB_coordinate_section
1329
1330
1331							sub _write_PDB_simple_record {
1332	38			38		65	my ($self, @args) = @_;
1333	38					91	my ($name, $cont , $annotation, $rol, $string) =
1334							$self->_rearrange([qw(
1335							NAME
1336							CONT
1337							ANNOTATION
1338							ROL
1339							STRING
1340							)],
1341							@args);
1342	38	50	66			99	if (defined $string && defined $annotation) {
1343	0					0	$self->throw("you can only supply one of -annoation or -string");
1344							}
1345	38					25	my ($output_string, $ann_string, $t_string);
1346	38					109	my ($rol_begin, $rol_end) = $rol =~ /^(\d+)-(\d+)$/;
1347	38					59	my $rol_length = $rol_end - $rol_begin +1;
1348	38	100				52	if ($string) {
1349	4	100				6	if (length $string > $rol_length) {
1350							# we might need to split $string in multiple lines
1351	2					4	while (length $string > $rol_length) {
1352							# other option might be to go for a bunch of substr's
1353	3					26	my @c = split//,$string;
1354	3					3	my $t = $rol_length; # index into @c
1355	3					8	while ($c[$t] ne " ") { # find first space, going backwards
1356	21					40	$self->debug("c[t]: $c[$t] $t\n");
1357	21					18	$t--;
1358	21	50				36	if ($t == 0) { $self->throw("Found no space for $string\n"); }
	0					0
1359							}
1360	3					8	$self->debug("t: $t rol_length: $rol_length\n");
1361	3					5	$ann_string .= substr($string, 0, $t);
1362	3					5	$self->debug("ann_string: $ann_string\n");
1363	3					4	$ann_string .= " " x ($rol_length - $t );
1364	3					46	$string = substr($string, $t+1);
1365	3					8	$string =~ s/^\s+//;
1366	3					9	$self->debug("ann_string: $ann_string~~\nstring: $string~~\n");
1367							}
1368	2					3	$ann_string .= $string;
1369							} else {
1370	2					3	$ann_string = $string;
1371							}
1372							} else {
1373	34					51	$ann_string = $annotation->as_text;
1374	34					85	$ann_string =~ s/^Value: //;
1375							}
1376							# ann_string contains the thing to write out, writing out happens below
1377	38					33	my $ann_length = length $ann_string;
1378
1379	38					97	$self->debug("ann_string: $ann_string\n");
1380	38	100				49	if ($cont) {
1381	10					27	my ($c_begin, $c_end) = $cont =~ /^(\d+)-(\d+)$/;
1382	10	100				17	if ( $ann_length > $rol_length ) { # we need to continuation lines
1383	6					4	my $first_line = 1;
1384	6					5	my $cont_number = 2;
1385	6					6	my $out_line;
1386	6					2	my $num_pos = $rol_length;
1387	6					5	my $i = 0;
1388	6					11	while( $i < $ann_length ) {
1389	15					17	$t_string = substr($ann_string, $i, $num_pos);
1390	15					29	$self->debug("t_string: $t_string~~$i $num_pos\n");
1391	15	100				18	if ($first_line) {
1392	6					14	$out_line = $name . " " x ($rol_begin - $c_begin) . $t_string;
1393	6					10	$out_line .= " " x (80 - length($out_line) ) . "\n";
1394	6					4	$first_line = 0;
1395	6					5	$output_string = $out_line;
1396	6					5	$i += $num_pos; # first do counter
1397	6	100				11	if ($rol_begin - $c_end == 1) { # next line one character less
1398	4					7	$num_pos--;
1399							}
1400							} else {
1401	9					16	$out_line = $name . sprintf("%2d",$cont_number);
1402							# a space after continuation number
1403	9	100				11	if ($rol_begin - $c_end == 1) { # one space after cont number
1404	6					4	$out_line .= " ";
1405	6					4	$out_line .= $t_string;
1406							} else {
1407	3					6	$out_line .= " " x ($rol_begin - $c_end - 1) . $t_string;
1408							}
1409	9					12	$out_line .= " " x (80 -length($out_line) ) . "\n";
1410	9					7	$cont_number++;
1411	9					9	$output_string .= $out_line;
1412	9					14	$i += $num_pos;
1413							}
1414							}
1415							} else { # no continuation
1416	4					4	my $spaces = $rol_begin - $c_begin; # number of spaces need to insert
1417	4					7	$output_string = $name . " " x $spaces . $ann_string;
1418	4					6	$output_string .= " " x (80 - length($output_string) );
1419							}
1420							} else { # no contintuation lines
1421	28	50				40	if ($ann_length < $rol_length) {
1422	0					0	$output_string = $name . $ann_string;
1423	0					0	$output_string .= " " x (80 - length($output_string) );
1424							} else {
1425	28					39	for (my $i = 0; $i < $ann_length; $i += $rol_length) {
1426	249					131	my $out_line;
1427	249					171	$t_string = substr($ann_string, $i, $rol_length);
1428	249					198	$out_line = $name . $t_string;
1429	249					185	$out_line .= " " x (80 -length($out_line) ) . "\n";
1430	249					335	$output_string .= $out_line;
1431							}
1432							}
1433							}
1434	38					90	$output_string =~ s/\n$//; # remove trailing newline
1435	38					93	$self->_print("$output_string\n");
1436
1437							}
1438
1439							sub _write_PDB_remark_record {
1440	13			13		12	my ($self, $struc, $remark_num) = @_;
1441	13					24	my ($ann) = $struc->annotation->get_Annotations("remark_$remark_num");
1442	13					26	my $name = sprintf("REMARK %3d ",$remark_num);
1443	13					28	$self->_write_PDB_simple_record(-name => $name, -annotation => $ann, -rol => "12-70");
1444							}
1445
1446							1;