cad/src/dna/model/Dna_Constants.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

# Copyright 2005-2008 Nanorex, Inc.  See LICENSE file for details. 
"""
Dna_Constants.py -- constants for Dna.

Note: these are used both by the newer DnaDuplex.py,
and the older DnaGenHelper.py which it supersedes
(and their associated files).

@author: Mark Sims
@version: $Id$
@copyright: 2005-2008 Nanorex, Inc.  See LICENSE file for details.

@see: References:
      - U{The Standard IUB codes used in NanoEngineer-1
        <http://www.idtdna.com/InstantKB/article.aspx?id=13763>}
      - U{http://en.wikipedia.org/wiki/DNA}
      - U{http://en.wikipedia.org/wiki/Image:Dna_pairing_aa.gif}

History:

2007-08-19 - Started out as part of DnaGenHelper.py
"""

import foundation.env as env
from utilities.constants import purple, brass, steelblue, lightgreen, darkgray, lightblue
from utilities.constants import darkorange, violet, copper, olive, gray
from utilities.prefs_constants import adnaBasesPerTurn_prefs_key, adnaRise_prefs_key
from utilities.prefs_constants import bdnaBasesPerTurn_prefs_key, bdnaRise_prefs_key
from utilities.prefs_constants import zdnaBasesPerTurn_prefs_key, zdnaRise_prefs_key
from PyQt4.Qt import QString

basesDict = \
          { 'A':{'Name':'Adenine',  'Complement':'T', 'Color':'darkorange' },
            'C':{'Name':'Cytosine', 'Complement':'G', 'Color':'cyan'       },
            'G':{'Name':'Guanine',  'Complement':'C', 'Color':'green'      },
            'T':{'Name':'Thymine',  'Complement':'A', 'Color':'teal'       },
            'U':{'Name':'Uracil',   'Complement':'A', 'Color':'darkblue'   },
            
            'X':{'Name':'Undefined', 'Complement':'X', 'Color':'darkred' },
            'N':{'Name':'aNy base',  'Complement':'N', 'Color':'orchid'  },
            
            'B':{'Name':'C,G or T', 'Complement':'V', 'Color':'dimgrey' },
            'V':{'Name':'A,C or G', 'Complement':'B', 'Color':'dimgrey' },
            'D':{'Name':'A,G or T', 'Complement':'H', 'Color':'dimgrey' },
            'H':{'Name':'A,C or T', 'Complement':'D', 'Color':'dimgrey' },
            
            'R':{'Name':'A or G (puRine)',     'Complement':'Y', 'Color':'dimgrey'},
            'Y':{'Name':'C or T (pYrimidine)', 'Complement':'R', 'Color':'dimgrey'},
            'K':{'Name':'G or T (Keto)',       'Complement':'M', 'Color':'dimgrey'},
            'M':{'Name':'A or C (aMino)',      'Complement':'K', 'Color':'dimgrey'},
            
            'S':{'Name':'G or C (Strong - 3H bonds)',  'Complement':'W', 'Color':'dimgrey'},
            'W':{'Name':'A or T (Weak - 2H bonds)',    'Complement':'S', 'Color':'dimgrey'} 
        }

# I'd like to suggest that we change the name of key 'DuplexRise' to 'Rise'.
# Need to run this by Bruce and Ninad first. Mark 2008-01-31.
dnaDict = \
        { 'A-DNA':{'BasesPerTurn': env.prefs[adnaBasesPerTurn_prefs_key], 
                   'DuplexRise':   env.prefs[adnaRise_prefs_key]},
          'B-DNA':{'BasesPerTurn': env.prefs[bdnaBasesPerTurn_prefs_key], 
                   'DuplexRise':   env.prefs[bdnaRise_prefs_key]},
          'Z-DNA':{'BasesPerTurn': env.prefs[zdnaBasesPerTurn_prefs_key], 
                   'DuplexRise':   env.prefs[zdnaRise_prefs_key]} 
               }

#If the qiven strand atom doesn't have a compelmentary strand base atom, 
#the sequence editor will show a specific character in the 'complement sequence
#text field (i.e. in self.sequenceTextEdit_mate) indicating that the 
#complement is missing. 
#@see: DnaSequenceEditor._determine_complementSequence()
#@see: DnaStrand.getStrandSequenceAndItsComplement()
MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL = '*'


# Common DNA helper functions. ######################################

# for getNextStrandColor:

# _strandColorList is used for assigning a color to a new strand created
# by breaking an existing strand.
# Do not use the following colors in _strandColorList: 
#   - white/lightgray (reserved for axis)
#   - black (reserved as a default color for scaffold strand;
#      also used for dna updater duplex errors (subject to change)
#      [bruce 080206])
#   - yellow (used for hover highlighting)
#   - red (used as delete highlight color)
#   - green (reserved for selection color)
#   - orange (reserved for dna updater errors on atoms and bonds [bruce 080206]
#              (subject to change))
#     (update [bruce 080406]: orange is no longer used that way, but might
#      still be, or in future be, used for other warnings, so is still
#      left out of this list)
_strandColorList = [ purple, brass, steelblue, lightgreen, darkgray, lightblue,
                    darkorange, violet, copper, olive, gray]

_strand_color_counter = 0

def getNextStrandColor(currentColor = None):
    """
    Return a color to assign to a strand
    which is guaranteed to be different than
    currentColor (which is typically that strand's
    current color).
    
    @param currentColor: The color to avoid returning,
                         or None if the next color is ok.
    @type  currentColor: RGB tuple
    
    @return: New color.
    @rtype: RGB tuple
    """
    global _strand_color_counter
    _new_color = _strandColorList[_strand_color_counter % len(_strandColorList)]
    _strand_color_counter += 1
    if _new_color == currentColor:
        return getNextStrandColor()
        # Note: this won't equal currentColor, since successive colors
        # in _strandColorList are always different.
    return _new_color

def getDuplexBasesPerTurn(conformation):
    """
    Returns the number of U{bases per turn} specified in the user preferences.
    
    @param conformation: "A-DNA", "B-DNA", or "Z-DNA"
    @type  conformation: str
    
    @return: The number of bases per turn.
    @rtype: float
    """
    assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
    return dnaDict[str(conformation)]['BasesPerTurn']

def getDuplexRise(conformation):
    """
    Returns the duplex U{rise} specified in the user preferences.
    
    @param conformation: "A-DNA", "B-DNA", or "Z-DNA"
    @type  conformation: str
    
    @return: The rise in Angstroms.
    @rtype: float
    """
    assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
    return dnaDict[str(conformation)]['DuplexRise']

def getDuplexLength(conformation, numberOfBases, duplexRise = 0):
    """
    Returns the duplex length (in Angstroms) given the conformation
    and number of bases.
    
    @param conformation: "A-DNA", "B-DNA", or "Z-DNA"
    @type  conformation: str
    
    @param numberOfBases: The number of base-pairs in the duplex.
    @type  numberOfBases: int
    
    @param duplexRise: The duplex rise (in Angstroms). If not provided, the 
                       user preference for DNA rise is used.
    
    @return: The length of the duplex in Angstroms.
    @rtype: float
    """
    assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
    assert numberOfBases >= 0
    assert duplexRise >= 0
    if duplexRise:
        duplexLength = duplexRise * (numberOfBases - 1)
    else:
        duplexLength = getDuplexRise(conformation) * (numberOfBases - 1)
        
    return duplexLength

def getNumberOfBasePairsFromDuplexLength(conformation, duplexLength, duplexRise = 0):
    """
    Returns the number of base-pairs in the duplex given the conformation,  
    duplex length and duplex rise (optional). 
    
    The number of base-pairs returned is NOT rounded to the nearest integer. 
    The rounding is intentionally not done. Example: While drawing a dna line, 
    when user clicks on the screen to complete the second endpoint, the actual 
    dna axis endpoint might be trailing the clicked point because the total 
    dna length is not sufficient to complete the 'next step'. 
    Thus, by not rounding the number of bases, we make sure that the dna 
    consists of exactly same number of bases as displayed by the rubberband line    
    ( The dna rubberband line gives enough visual indication about this. 
    see draweRibbons.drawDnaRibbons() for more details on the visual indication )
    
    @param conformation: "A-DNA", "B-DNA", or "Z-DNA"
    @type  conformation: str
    
    @param duplexLength: The duplex length (in Angstroms).
    @type  duplexLength: float
    
    @param duplexRise: The duplex rise (in Angstroms). If not provided, the 
                       user preference for DNA rise is used.
    @type  duplexRise: float
    
    @return:  The number of base-pairs in the duplex.
    @rtype: int
    """
    assert conformation in ("A-DNA", "B-DNA", "Z-DNA")
    assert duplexLength >= 0
    assert duplexRise >= 0
    if duplexRise:
        numberOfBasePairs = 1.0005 + (duplexLength / duplexRise)
    else:
        numberOfBasePairs = 1.0005 + (duplexLength / getDuplexRise(conformation))
    
    #Explanation on adding '1.0005':
    #The number of base-pairs returned is NOT rounded to the nearest integer.
    #See why its not done in this method's docstring. But why do we add 1.005
    #instead of '1' while computing the number of basepairs? As of 2008-03-05
    #there a bug observed in the number this method returns if we just add '1'
    #Suppose a print statement shows the the numberOfBasePairs computed
    #above as 5.0. But int(numberOfBasePairs) returns 4 and not 5! This happens 
    #sometime. I am not sure if in those cases the number of basepairs are
    #something like 4.99999......N which python rounds off to 5.0, but int of 
    #that number actually returns 4. This is just a guess. But some print
    #statements do show this happening! So a workaround is to add some tolerance
    #of 0.0005 to 1. This addition is unlikely to have any user visible effect.
    return int(numberOfBasePairs)


def getDuplexRiseFromNumberOfBasePairs(numberOfBasePairs, duplexLength):
    """
    Returns the duplex rise from the number of base pairs and the duplex length
    @param numberOfBasePairs: number of base pairs in the duplx
    @type numberOfBasePairs: int
    @param duplexLength: The length of duplex. 
    @type duplexLength: double
    @return: The duplex rise. 
    @rtype: double
    """
    duplexRise = duplexLength/ (numberOfBasePairs - 1)
    return duplexRise


def getComplementSequence(inSequence):
    """
    Returns the complement of the DNA sequence I{inSequence}.
    
    @param inSequence: The original DNA sequence.
    @type  inSequence: str (possible error: the code looks more like it
                       requires a QString [bruce 080101 comment])
    
    @return: The complement DNA sequence.
    @rtype:  str (possible error: the code looks more like it
                  might return a QString [bruce 080101 comment])
    """
    #If user enters an empty 'space' or 'tab key', treat it as an empty space 
    #in the complement sequence. (don't convert it to 'N' base) 
    #This is needed in B{DnaSequenceEditor} where , if user enters an empty space
    #in the 'Strand' Sequence, its 'Mate' also enters an empty space. 
    validSpaceSymbol  =  QString(' ')
    validTabSymbol = QString('\t')
    assert isinstance(inSequence, str)
    outSequence = ""
    for baseLetter in inSequence:
        if baseLetter not in basesDict.keys():
            if baseLetter in validSpaceSymbol:
                pass
            elif baseLetter in validTabSymbol:
                baseLetter = '\t'
            else:                
                baseLetter = "N"
        else:
            baseLetter = basesDict[baseLetter]['Complement']
        outSequence += baseLetter
    return outSequence
    
def getReverseSequence(inSequence):
    """
    Returns the reverse order of the DNA sequence I{inSequence}.
    
    @param inSequence: The original DNA sequence.
    @type  inSequence: str
    
    @return: The reversed sequence.
    @rtype:  str
    """
    assert isinstance(inSequence, str)
    outSequence = list(inSequence)
    outSequence.reverse()
    outSequence = ''.join(outSequence)
    return outSequence

def replaceUnrecognized(inSequence, replaceBase = "N"):
    """
    Replaces any unrecognized/invalid characters (alphanumeric or
    symbolic) from the DNA sequence and replaces them with I{replaceBase}.
    
    This can also be used to remove all unrecognized bases by setting
    I{replaceBase} to an empty string.
    
    @param inSequence: The original DNA sequence.
    @type  inSequence: str
    
    @param replaceBase: The base letter to put in place of an unrecognized base.
                        The default is "N".
    @type  replaceBase: str
    
    @return: The sequence.
    @rtype:  str
    """
    assert isinstance(inSequence, str)
    assert isinstance(replaceBase, str)
    
    outSequence = ""
    for baseLetter in inSequence:
        if baseLetter not in basesDict.keys():
            baseLetter = replaceBase
        outSequence += baseLetter
    if 0:
        print " inSequence:", inSequence
        print "outSequence:", outSequence
    return outSequence