1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
|
# Copyright 2004-2008 Nanorex, Inc. See LICENSE file for details.
"""
DnaGenHelper.py -- DNA generator helper classes, based on empirical data.
WARNING: this file has been mostly superseded by DnaDuplex.py.
It is used only in DnaGenerator.py, also superseded.
@author: Will Ware
@version: $Id$
@copyright: 2004-2008 Nanorex, Inc. See LICENSE file for details.
History:
Jeff 2007-06-13 (created)
- Transfered class Dna, A_Dna, A_Dna_PAM5, B_Dna, B_Dna_PAM5,
Z_Dna abd Z_Dna_PAM5 from Will Ware's DNAGenerator.py.
- Added accessor methods getBaseRise/setBaseRise
Mark 2007-08-17:
- make() creates a duplex structure only (no single strands).
- Created standard directory structure for DNA MMP files.
- PAM5 MMP files reworked and simplfied.
- A single PAM5 base-pair duplex is made correctly.
- Moved Dna constants to Dna_Constants.py.
- Changed default base to "X" (unassigned) from "N" (aNy base).
- Implemented Standard IUB Codes (see Dna_Constants.py)
Mark 2007-10-18:
- Did a major rewrite of this module, superseding it --
DnaDuplex.py.
Bruce 2008-1-1:
- renamed this from Dna.py to DnaGenHelper.py so it's
not in a way of a proposed "dna" package.
"""
# To do:
#
# 1) Atomistic and PAM5 generated models should have exact orientation
# (i.e. rotational origin).
import foundation.env as env
import os
import re
from math import sin, cos, pi
from utilities.debug import print_compact_traceback
from platform_dependent.PlatformDependent import find_plugin_dir
from files.mmp.files_mmp import readmmp
from geometry.VQT import V
from commands.Fuse.fusechunksMode import fusechunksBase
from utilities.Log import orangemsg
from utilities.exception_classes import PluginBug
from dna.model.Dna_Constants import basesDict, dnaDict
atompat = re.compile("atom (\d+) \((\d+)\) \((-?\d+), (-?\d+), (-?\d+)\)")
numberPattern = re.compile(r"^\s*(\d+)\s*$")
basepath_ok, basepath = find_plugin_dir("DNA")
if not basepath_ok:
env.history.message(orangemsg("The cad/plugins/DNA directory is missing."))
RIGHT_HANDED = -1
LEFT_HANDED = 1
class Dna:
"""
Dna base class. It is inherited by B_Dna and Z_Dna subclasses.
@ivar baseRise: The rise (spacing) between base-pairs along the helical
(Z) axis.
@type baseRise: float
@ivar handedness: Right-handed (B and A forms) or left-handed (Z form).
@type handedness: int
@ivar model: The model representation, where:
- "Atomistic" = atomistic model
- "PAM5" = PAM5 reduced model.
@type model: str
@ivar sequence: The sequence (of strand A of the duplex).
@type sequence: str
"""
def make(self, group, inSequence, basesPerTurn, position = V(0, 0, 0)):
"""
Makes a DNA duplex with the sequence I{inSequence}.
The duplex is oriented with its central axis coincident
to the Z axis.
@param assy: The assembly (part).
@type assy: L{assembly}
@param group: The group node object containing the DNA. The caller
is responsible for creating an empty group and passing
it here. When finished, this group will contain the DNA
model.
@type group: L{Group}
@param basesPerTurn: The number of bases per helical turn.
@type basesPerTurn: float
@param position: The position in 3d model space at which to create
the DNA strand. This should always be 0, 0, 0.
@type position: position
"""
self.sequence = inSequence
assy = group.assy
baseList = []
def insertBaseFromMmp(filename, subgroup, tfm, position = position):
"""
Insert the atoms for a nucleic acid base from an MMP file into
a single chunk.
- If atomistic, the atoms for each base are in a separate chunk.
- If PAM5, the pseudo atoms for each base-pair are together in a
chunk.
@param filename: The mmp filename containing the base
(or base-pair).
@type filename: str
@param subgroup: The part group to add the atoms to.
@type subgroup: L{Group}
@param tfm: Transform applied to all new base atoms.
@type tfm: V
@param position: The origin in space of the DNA duplex, where the
3' end of strand A is 0, 0, 0.
@type position: L{V}
"""
try:
ok, grouplist = readmmp(assy, filename, isInsert = True)
except IOError:
raise PluginBug("Cannot read file: " + filename)
if not grouplist:
raise PluginBug("No atoms in DNA base? " + filename)
viewdata, mainpart, shelf = grouplist
for member in mainpart.members:
# 'member' is a chunk containing a set of base-pair atoms.
for atm in member.atoms.values():
atm._posn = tfm(atm._posn) + position
if atm.element.symbol in ('Se3', 'Ss3', 'Ss5'):
if atm.getDnaBaseName() == "a":
baseLetter = currentBaseLetter
else:
try:
baseLetter = \
basesDict[currentBaseLetter]['Complement']
except:
# If complement not found, just assign same
# letter.
baseLetter = currentBaseLetter
if 0:
print "Ss(%r) being set to %r." \
% (atm.getDnaBaseName(), baseLetter)
atm.setDnaBaseName(baseLetter)
member.name = currentBaseLetter
subgroup.addchild(member)
baseList.append(member)
# Clean up.
del viewdata
shelf.kill()
def rotateTranslateXYZ(inXYZ, theta, z):
"""
Returns the new XYZ coordinate rotated by I{theta} and
translated by I{z}.
@param inXYZ: The original XYZ coordinate.
@type inXYZ: V
@param theta: The base twist angle.
@type theta: float
@param z: The base rise.
@type z: float
@return: The new XYZ coordinate.
@rtype: V
"""
c, s = cos(theta), sin(theta)
x = c * inXYZ[0] + s * inXYZ[1]
y = -s * inXYZ[0] + c * inXYZ[1]
return V(x, y, inXYZ[2] + z)
# Begin making DNA.
subgroup = group
subgroup.open = False
# Calculate the twist per base in radians.
twistPerBase = (self.handedness * 2 * pi) / basesPerTurn
theta = 0.0
z = 0.5 * self.baseRise * (len(self.sequence) - 1)
# Create PAM5 or PAM3 duplex.
for i in range(len(self.sequence)):
currentBaseLetter = self.sequence[i]
basefile, zoffset, thetaOffset = \
self._strandAinfo(currentBaseLetter, i)
def tfm(v, theta = theta + thetaOffset, z1 = z + zoffset):
return rotateTranslateXYZ(v, theta, z1)
insertBaseFromMmp(basefile, subgroup, tfm)
theta -= twistPerBase
z -= self.baseRise
# Fuse the base chunks together into continuous strands.
fcb = fusechunksBase()
fcb.tol = 1.5
for i in range(len(baseList) - 1):
fcb.find_bondable_pairs([baseList[i]], [baseList[i + 1]])
fcb.make_bonds(assy)
try:
self._postProcess(baseList)
except:
if env.debug():
print_compact_traceback(
"debug: exception in %r._postProcess(baseList = %r) \
(reraising): " % (self, baseList,))
raise
return
def _postProcess(self, baseList):
return
def _baseFileName(self, basename):
"""
Returns the full pathname to the mmp file containing the atoms
of a nucleic acid base (or base-pair).
Example: If I{basename} is "MidBasePair" and this is a PAM5 model of
B-DNA, this returns:
- "C:$HOME\cad\plugins\DNA\B-DNA\PAM5-bases\MidBasePair.mmp"
@param basename: The basename of the mmp file without the extention
(i.e. "adenine", "MidBasePair", etc.).
@type basename: str
@return: The full pathname to the mmp file.
@rtype: str
"""
form = self.form # A-DNA, B-DNA or Z-DNA
model = self.model + '-bases' # Atomistic or PAM5
return os.path.join(basepath, form, model, '%s.mmp' % basename)
def getBaseRise( self ):
"""
Get the base rise (spacing) between base-pairs.
"""
return float( self.baseRise )
def setBaseRise( self, inBaseRise ):
"""
Set the base rise (spacing) between base-pairs.
@param inBaseRise: The base rise in Angstroms.
@type inBaseRise: float
"""
self.baseRise = inBaseRise
pass
class A_Dna(Dna):
"""
Provides an atomistic model of the A form of DNA.
The geometry for A-DNA is very twisty and funky. We need to to research
the A form since it's not a simple helix (like B) or an alternating helix
(like Z).
@attention: This class is not implemented yet.
"""
form = "A-DNA"
model = "Atomistic"
baseRise = dnaDict['A-DNA']['DuplexRise']
handedness = RIGHT_HANDED
def _strandAinfo(self, baseLetter, index):
"""
Raise exception since A-DNA is not support.
"""
raise PluginBug("A-DNA is not yet implemented.")
def _strandBinfo(self, sequence, index):
"""
Raise exception since A-DNA is not support.
"""
raise PluginBug("A-DNA is not yet implemented.")
pass
class A_Dna_Atomistic(A_Dna):
"""
Provides an atomistic model of the A form of DNA.
@attention: This class is not implemented yet.
"""
pass
class A_Dna_PAM5(A_Dna):
"""
Provides a PAM5 reduced model of the B form of DNA.
@attention: This class is not implemented yet.
"""
pass
class B_Dna(Dna):
"""
Provides an atomistic model of the B form of DNA.
"""
form = "B-DNA"
baseRise = dnaDict['B-DNA']['DuplexRise']
handedness = RIGHT_HANDED
pass
class B_Dna_Atomistic(B_Dna):
"""
Provides an atomistic model of the B form of DNA.
"""
model = "Atomistic"
def _strandAinfo(self, baseLetter, index):
"""
Returns parameters needed to add a base to strand A.
@param baseLetter: The base letter.
@type baseLetter: str
@param index: Index in base sequence. This is unused.
@type index: int
"""
zoffset = 0.0
thetaOffset = 0.0
basename = basesDict[baseLetter]['Name']
basefile = self._baseFileName(basename)
return (basefile, zoffset, thetaOffset)
def _strandBinfo(self, baseLetter, index):
"""
Returns parameters needed to add a base to strand B.
@param baseLetter: The base letter.
@type baseLetter: str
@param index: Index in base sequence. This is unused.
@type index: int
"""
zoffset = 0.0
thetaOffset = 210 * (pi / 180)
basename = basesDict[baseLetter]['Name']
basefile = self._baseFileName(basename)
return (basefile, zoffset, thetaOffset)
pass
class B_Dna_PAM5(B_Dna):
"""
Provides a PAM5 reduced model of the B form of DNA.
"""
model = "PAM5"
def _isStartPosition(self, index):
"""
Returns True if I{index} points the first base position (5') of
self's sequence.
@param index: Index in base sequence.
@type index: int
@return: True if index is zero.
@rtype : bool
"""
if index == 0:
return True
else:
return False
def _isEndPosition(self, index):
"""
Returns True if I{index} points the last base position (3') of self's
sequence.
@param index: Index in base sequence.
@type index: int
@return: True if index is zero.
@rtype : bool
"""
if index == len(self.sequence) - 1:
return True
else:
return False
def _strandAinfo(self, baseLetter, index):
"""
Returns parameters needed to add a base to strand A.
@param baseLetter: The base letter.
@type baseLetter: str
@param index: Index in base sequence.
@type index: int
"""
zoffset = 0.0
thetaOffset = 0.0
basename = "MiddleBasePair"
if self._isStartPosition(index):
basename = "StartBasePair"
if self._isEndPosition(index):
basename = "EndBasePair"
if len(self.sequence) == 1:
basename = "SingleBasePair"
basefile = self._baseFileName(basename)
return (basefile, zoffset, thetaOffset)
def _postProcess(self, baseList): # bruce 070414
"""
Set bond direction on the backbone bonds.
@param baseList: List of basepair chunks that make up the duplex.
@type baseList: list
"""
# This implem depends on the specifics of how the end-representations
# are terminated. If that's changed, it might stop working or it might
# start giving wrong results. In the current representation,
# baseList[0] (a chunk) has two bonds whose directions we must set,
# which will determine the directions of their strands:
# Ss5 -> Sh5, and Ss5 <- Pe5.
# Just find those bonds and set the strand directions (until such time
# as they can be present to start with in the end1 mmp file).
# (If we were instead passed all the atoms, we could be correct if we
# just did this to the first Pe5 and Sh5 we saw, or to both of each if
# setting the same direction twice is allowed.)
atoms = baseList[0].atoms.values()
Pe_list = filter( lambda atom: atom.element.symbol in ('Pe5'), atoms)
Sh_list = filter( lambda atom: atom.element.symbol in ('Sh5'), atoms)
if len(Pe_list) == len(Sh_list) == 1:
for atom in Pe_list:
assert len(atom.bonds) == 1
atom.bonds[0].set_bond_direction_from(atom, 1, propogate = True)
for atom in Sh_list:
assert len(atom.bonds) == 1
atom.bonds[0].set_bond_direction_from(atom, -1, propogate = True)
else:
#bruce 070604 mitigate bug in above code when number of bases == 1
# by not raising an exception when it fails.
msg = "Warning: strand not terminated, bond direction not set \
(too short)"
env.history.message( orangemsg( msg))
# Note: It turns out this bug is caused by a bug in the rest of the
# generator (which I didn't try to diagnose) -- for number of
# bases == 1 it doesn't terminate the strands, so the above code
# can't find the termination atoms (which is how it figures out
# what to do without depending on intimate knowledge of the base
# mmp file contents).
# print "baseList = %r, its len = %r, atoms in [0] = %r" % \
# (baseList, len(baseList), atoms)
## baseList = [<molecule 'unknown' (11 atoms) at 0xb3d6f58>],
## its len = 1, atoms in [0] = [Ax1, X2, X3, Ss4, Pl5, X6, X7, Ss8, Pl9, X10, X11]
# It would be a mistake to fix this here (by giving it that
# intimate knowledge) -- instead we need to find and fix the bug
# in the rest of generator when number of bases == 1.
return
pass
class B_Dna_PAM3(B_Dna_PAM5):
"""
Provides a PAM3 reduced model of the B form of DNA.
"""
model = "PAM3"
def _postProcess(self, baseList): # bruce 070414
"""
Set bond direction on the backbone bonds.
@param baseList: List of basepair chunks that make up the duplex.
@type baseList: list
@note: baseList must contain at least two base-pair chunks.
"""
# This implem depends on the specifics of how the end-representations
# are terminated. If that's changed, it might stop working or it might
# start giving wrong results. In the current representation,
# baseList[0] (a chunk) is the starting end of the duplex. It has two
# bonds whose directions we should set, which will determine the
# directions of their strands: Se3 -> Ss3, and Sh3 <- Ss3.
# Just find those bonds and set the strand directions.
assert len(baseList) >= 2
basepair1_atoms = baseList[0].atoms.values() # StartBasePair.MMP
basepair2_atoms = baseList[1].atoms.values() # MiddleBasePair or EndBasePair.MMP
Se3_list = filter( lambda atom: atom.element.symbol in ('Se3'), basepair1_atoms)
Sh3_list = filter( lambda atom: atom.element.symbol in ('Sh3'), basepair1_atoms)
# To set the direction of the Se3 -> Ss3 bond, we need the Ss3 atom
# from the second base-pair (i.e. baseList[1]) that is connected to
# the Se3 atom in the first base-pair.
# Ss3_list will have only one Ss3 atom if the duplex is only two
# base-pairs long. Otherwise, Ss3_list will have two Ss3 atoms.
Ss3_list = filter( lambda atom: atom.element.symbol in ('Ss3'), basepair2_atoms)
if len(Se3_list) == len(Sh3_list) == 1:
for atom in Se3_list:
assert len(atom.bonds) == 2
# This is fragile since it is dependent on the bond order in the
# PAM3 StartBasePair.MMP file. If the order gets changed in that
# file, this will fail.
# A more robust approach would be to check for the Se3 -> Ss3
# bond, which is the one we want here. Mark 2007-09-27.
#atom.bonds[1].set_bond_direction_from(atom, 1, propogate = True)
# This implements the more robust stategy mentioned
# above. I'd like Bruce to review it and confirm it is good.
# Mark 2007-09-27.
#atom.bonds[1].set_bond_direction_from(Ss3_list[0], -1, propogate = True)
try: # Try the first Ss3 atom in Ss3_list.
atom.bonds[1].set_bond_direction_from(Ss3_list[0], -1, propogate = True)
except: # That wasn't it, so it must be the second Ss3 atom.
atom.bonds[1].set_bond_direction_from(Ss3_list[1], -1, propogate = True)
for atom in Sh3_list:
assert len(atom.bonds) == 1
atom.bonds[0].set_bond_direction_from(atom, -1, propogate = True)
else:
#bruce 070604 mitigate bug in above code when number of bases == 1
# by not raising an exception when it fails.
msg = "Warning: strand not terminated, bond direction not set \
(too short)"
env.history.message( orangemsg( msg))
return
class Z_Dna(Dna):
"""
Provides an atomistic model of the Z form of DNA.
"""
form = "Z-DNA"
baseRise = dnaDict['Z-DNA']['DuplexRise']
handedness = LEFT_HANDED
class Z_Dna_PAM5(Z_Dna):
"""
Provides an atomistic model of the Z form of DNA.
@attention: This class is not implemented yet.
"""
model = "Atomistic"
def _strandAinfo(self, baseLetter, index):
"""
Returns parameters needed to add a base to strand A.
@param baseLetter: The base letter.
@type baseLetter: str
@param index: Index in base sequence.
@type index: int
"""
thetaOffset = 0.0
basename = basesDict[baseLetter]['Name']
if (index & 1) != 0:
# Index is odd.
basename += "-outer"
zoffset = 2.045
else:
# Index is even.
basename += '-inner'
zoffset = 0.0
basefile = self._baseFileName(basename)
return (basefile, zoffset, thetaOffset)
def _strandBinfo(self, baseLetter, index):
"""
Returns parameters needed to add a base to strand B.
@param baseLetter: The base letter.
@type baseLetter: str
@param index: Index in base sequence. This is unused.
@type index: int
"""
thetaOffset = 0.5 * pi
basename = basesDict[baseLetter]['Name']
if (index & 1) != 0:
basename += '-inner'
zoffset = -0.055
else:
basename += "-outer"
zoffset = -2.1
basefile = self._baseFileName(basename)
return (basefile, zoffset, thetaOffset)
class Z_Dna_PAM5(Z_Dna):
"""
Provides a PAM5 reduced model of the Z form of DNA.
@attention: This class is not implemented yet.
"""
pass
|