1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
|
# Copyright 2005-2008 Nanorex, Inc. See LICENSE file for details.
"""
Dna_Constants.py -- constants for Dna.
Note: these are used both by the newer DnaDuplex.py,
and the older DnaGenHelper.py which it supersedes
(and their associated files).
@author: Mark Sims
@version: $Id$
@copyright: 2005-2008 Nanorex, Inc. See LICENSE file for details.
@see: References:
- U{The Standard IUB codes used in NanoEngineer-1
<http://www.idtdna.com/InstantKB/article.aspx?id=13763>}
- U{http://en.wikipedia.org/wiki/DNA}
- U{http://en.wikipedia.org/wiki/Image:Dna_pairing_aa.gif}
History:
2007-08-19 - Started out as part of DnaGenHelper.py
"""
import foundation.env as env
from utilities.constants import purple, brass, steelblue, lightgreen, darkgray, lightblue
from utilities.constants import darkorange, violet, copper, olive, gray
from utilities.prefs_constants import adnaBasesPerTurn_prefs_key, adnaRise_prefs_key
from utilities.prefs_constants import bdnaBasesPerTurn_prefs_key, bdnaRise_prefs_key
from utilities.prefs_constants import zdnaBasesPerTurn_prefs_key, zdnaRise_prefs_key
from PyQt4.Qt import QString
basesDict = \
{ 'A':{'Name':'Adenine', 'Complement':'T', 'Color':'darkorange' },
'C':{'Name':'Cytosine', 'Complement':'G', 'Color':'cyan' },
'G':{'Name':'Guanine', 'Complement':'C', 'Color':'green' },
'T':{'Name':'Thymine', 'Complement':'A', 'Color':'teal' },
'U':{'Name':'Uracil', 'Complement':'A', 'Color':'darkblue' },
'X':{'Name':'Undefined', 'Complement':'X', 'Color':'darkred' },
'N':{'Name':'aNy base', 'Complement':'N', 'Color':'orchid' },
'B':{'Name':'C,G or T', 'Complement':'V', 'Color':'dimgrey' },
'V':{'Name':'A,C or G', 'Complement':'B', 'Color':'dimgrey' },
'D':{'Name':'A,G or T', 'Complement':'H', 'Color':'dimgrey' },
'H':{'Name':'A,C or T', 'Complement':'D', 'Color':'dimgrey' },
'R':{'Name':'A or G (puRine)', 'Complement':'Y', 'Color':'dimgrey'},
'Y':{'Name':'C or T (pYrimidine)', 'Complement':'R', 'Color':'dimgrey'},
'K':{'Name':'G or T (Keto)', 'Complement':'M', 'Color':'dimgrey'},
'M':{'Name':'A or C (aMino)', 'Complement':'K', 'Color':'dimgrey'},
'S':{'Name':'G or C (Strong - 3H bonds)', 'Complement':'W', 'Color':'dimgrey'},
'W':{'Name':'A or T (Weak - 2H bonds)', 'Complement':'S', 'Color':'dimgrey'}
}
# I'd like to suggest that we change the name of key 'DuplexRise' to 'Rise'.
# Need to run this by Bruce and Ninad first. Mark 2008-01-31.
dnaDict = \
{ 'A-DNA':{'BasesPerTurn': env.prefs[adnaBasesPerTurn_prefs_key],
'DuplexRise': env.prefs[adnaRise_prefs_key]},
'B-DNA':{'BasesPerTurn': env.prefs[bdnaBasesPerTurn_prefs_key],
'DuplexRise': env.prefs[bdnaRise_prefs_key]},
'Z-DNA':{'BasesPerTurn': env.prefs[zdnaBasesPerTurn_prefs_key],
'DuplexRise': env.prefs[zdnaRise_prefs_key]}
}
#If the qiven strand atom doesn't have a compelmentary strand base atom,
#the sequence editor will show a specific character in the 'complement sequence
#text field (i.e. in self.sequenceTextEdit_mate) indicating that the
#complement is missing.
#@see: DnaSequenceEditor._determine_complementSequence()
#@see: DnaStrand.getStrandSequenceAndItsComplement()
MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL = '*'
# Common DNA helper functions. ######################################
# for getNextStrandColor:
# _strandColorList is used for assigning a color to a new strand created
# by breaking an existing strand.
# Do not use the following colors in _strandColorList:
# - white/lightgray (reserved for axis)
# - black (reserved as a default color for scaffold strand;
# also used for dna updater duplex errors (subject to change)
# [bruce 080206])
# - yellow (used for hover highlighting)
# - red (used as delete highlight color)
# - green (reserved for selection color)
# - orange (reserved for dna updater errors on atoms and bonds [bruce 080206]
# (subject to change))
# (update [bruce 080406]: orange is no longer used that way, but might
# still be, or in future be, used for other warnings, so is still
# left out of this list)
_strandColorList = [ purple, brass, steelblue, lightgreen, darkgray, lightblue,
darkorange, violet, copper, olive, gray]
_strand_color_counter = 0
def getNextStrandColor(currentColor = None):
"""
Return a color to assign to a strand
which is guaranteed to be different than
currentColor (which is typically that strand's
current color).
@param currentColor: The color to avoid returning,
or None if the next color is ok.
@type currentColor: RGB tuple
@return: New color.
@rtype: RGB tuple
"""
global _strand_color_counter
_new_color = _strandColorList[_strand_color_counter % len(_strandColorList)]
_strand_color_counter += 1
if _new_color == currentColor:
return getNextStrandColor()
# Note: this won't equal currentColor, since successive colors
# in _strandColorList are always different.
return _new_color
def getDuplexBasesPerTurn(conformation):
"""
Returns the number of U{bases per turn} specified in the user preferences.
@param conformation: "A-DNA", "B-DNA", or "Z-DNA"
@type conformation: str
@return: The number of bases per turn.
@rtype: float
"""
assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
return dnaDict[str(conformation)]['BasesPerTurn']
def getDuplexRise(conformation):
"""
Returns the duplex U{rise} specified in the user preferences.
@param conformation: "A-DNA", "B-DNA", or "Z-DNA"
@type conformation: str
@return: The rise in Angstroms.
@rtype: float
"""
assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
return dnaDict[str(conformation)]['DuplexRise']
def getDuplexLength(conformation, numberOfBases, duplexRise = 0):
"""
Returns the duplex length (in Angstroms) given the conformation
and number of bases.
@param conformation: "A-DNA", "B-DNA", or "Z-DNA"
@type conformation: str
@param numberOfBases: The number of base-pairs in the duplex.
@type numberOfBases: int
@param duplexRise: The duplex rise (in Angstroms). If not provided, the
user preference for DNA rise is used.
@return: The length of the duplex in Angstroms.
@rtype: float
"""
assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
assert numberOfBases >= 0
assert duplexRise >= 0
if duplexRise:
duplexLength = duplexRise * (numberOfBases - 1)
else:
duplexLength = getDuplexRise(conformation) * (numberOfBases - 1)
return duplexLength
def getNumberOfBasePairsFromDuplexLength(conformation, duplexLength, duplexRise = 0):
"""
Returns the number of base-pairs in the duplex given the conformation,
duplex length and duplex rise (optional).
The number of base-pairs returned is NOT rounded to the nearest integer.
The rounding is intentionally not done. Example: While drawing a dna line,
when user clicks on the screen to complete the second endpoint, the actual
dna axis endpoint might be trailing the clicked point because the total
dna length is not sufficient to complete the 'next step'.
Thus, by not rounding the number of bases, we make sure that the dna
consists of exactly same number of bases as displayed by the rubberband line
( The dna rubberband line gives enough visual indication about this.
see draweRibbons.drawDnaRibbons() for more details on the visual indication )
@param conformation: "A-DNA", "B-DNA", or "Z-DNA"
@type conformation: str
@param duplexLength: The duplex length (in Angstroms).
@type duplexLength: float
@param duplexRise: The duplex rise (in Angstroms). If not provided, the
user preference for DNA rise is used.
@type duplexRise: float
@return: The number of base-pairs in the duplex.
@rtype: int
"""
assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
assert duplexLength >= 0
assert duplexRise >= 0
if duplexRise:
numberOfBasePairs = 1.0005 + (duplexLength / duplexRise)
else:
numberOfBasePairs = 1.0005 + (duplexLength / getDuplexRise(conformation))
#Explanation on adding '1.0005':
#The number of base-pairs returned is NOT rounded to the nearest integer.
#See why its not done in this method's docstring. But why do we add 1.005
#instead of '1' while computing the number of basepairs? As of 2008-03-05
#there a bug observed in the number this method returns if we just add '1'
#Suppose a print statement shows the the numberOfBasePairs computed
#above as 5.0. But int(numberOfBasePairs) returns 4 and not 5! This happens
#sometime. I am not sure if in those cases the number of basepairs are
#something like 4.99999......N which python rounds off to 5.0, but int of
#that number actually returns 4. This is just a guess. But some print
#statements do show this happening! So a workaround is to add some tolerance
#of 0.0005 to 1. This addition is unlikely to have any user visible effect.
return int(numberOfBasePairs)
def getDuplexRiseFromNumberOfBasePairs(numberOfBasePairs, duplexLength):
"""
Returns the duplex rise from the number of base pairs and the duplex length
@param numberOfBasePairs: number of base pairs in the duplx
@type numberOfBasePairs: int
@param duplexLength: The length of duplex.
@type duplexLength: double
@return: The duplex rise.
@rtype: double
"""
duplexRise = duplexLength/ (numberOfBasePairs - 1)
return duplexRise
def getComplementSequence(inSequence):
"""
Returns the complement of the DNA sequence I{inSequence}.
@param inSequence: The original DNA sequence.
@type inSequence: str (possible error: the code looks more like it
requires a QString [bruce 080101 comment])
@return: The complement DNA sequence.
@rtype: str (possible error: the code looks more like it
might return a QString [bruce 080101 comment])
"""
#If user enters an empty 'space' or 'tab key', treat it as an empty space
#in the complement sequence. (don't convert it to 'N' base)
#This is needed in B{DnaSequenceEditor} where , if user enters an empty space
#in the 'Strand' Sequence, its 'Mate' also enters an empty space.
validSpaceSymbol = QString(' ')
validTabSymbol = QString('\t')
assert isinstance(inSequence, str)
outSequence = ""
for baseLetter in inSequence:
if baseLetter not in basesDict.keys():
if baseLetter in validSpaceSymbol:
pass
elif baseLetter in validTabSymbol:
baseLetter = '\t'
else:
baseLetter = "N"
else:
baseLetter = basesDict[baseLetter]['Complement']
outSequence += baseLetter
return outSequence
def getReverseSequence(inSequence):
"""
Returns the reverse order of the DNA sequence I{inSequence}.
@param inSequence: The original DNA sequence.
@type inSequence: str
@return: The reversed sequence.
@rtype: str
"""
assert isinstance(inSequence, str)
outSequence = list(inSequence)
outSequence.reverse()
outSequence = ''.join(outSequence)
return outSequence
def replaceUnrecognized(inSequence, replaceBase = "N"):
"""
Replaces any unrecognized/invalid characters (alphanumeric or
symbolic) from the DNA sequence and replaces them with I{replaceBase}.
This can also be used to remove all unrecognized bases by setting
I{replaceBase} to an empty string.
@param inSequence: The original DNA sequence.
@type inSequence: str
@param replaceBase: The base letter to put in place of an unrecognized base.
The default is "N".
@type replaceBase: str
@return: The sequence.
@rtype: str
"""
assert isinstance(inSequence, str)
assert isinstance(replaceBase, str)
outSequence = ""
for baseLetter in inSequence:
if baseLetter not in basesDict.keys():
baseLetter = replaceBase
outSequence += baseLetter
if 0:
print " inSequence:", inSequence
print "outSequence:", outSequence
return outSequence
|