# Copyright 2005-2008 Nanorex, Inc. See LICENSE file for details. """ Dna_Constants.py -- constants for Dna. Note: these are used both by the newer DnaDuplex.py, and the older DnaGenHelper.py which it supersedes (and their associated files). @author: Mark Sims @version: $Id$ @copyright: 2005-2008 Nanorex, Inc. See LICENSE file for details. @see: References: - U{The Standard IUB codes used in NanoEngineer-1 } - U{http://en.wikipedia.org/wiki/DNA} - U{http://en.wikipedia.org/wiki/Image:Dna_pairing_aa.gif} History: 2007-08-19 - Started out as part of DnaGenHelper.py """ import foundation.env as env from utilities.constants import purple, brass, steelblue, lightgreen, darkgray, lightblue from utilities.constants import darkorange, violet, copper, olive, gray from utilities.prefs_constants import adnaBasesPerTurn_prefs_key, adnaRise_prefs_key from utilities.prefs_constants import bdnaBasesPerTurn_prefs_key, bdnaRise_prefs_key from utilities.prefs_constants import zdnaBasesPerTurn_prefs_key, zdnaRise_prefs_key from PyQt4.Qt import QString basesDict = \ { 'A':{'Name':'Adenine', 'Complement':'T', 'Color':'darkorange' }, 'C':{'Name':'Cytosine', 'Complement':'G', 'Color':'cyan' }, 'G':{'Name':'Guanine', 'Complement':'C', 'Color':'green' }, 'T':{'Name':'Thymine', 'Complement':'A', 'Color':'teal' }, 'U':{'Name':'Uracil', 'Complement':'A', 'Color':'darkblue' }, 'X':{'Name':'Undefined', 'Complement':'X', 'Color':'darkred' }, 'N':{'Name':'aNy base', 'Complement':'N', 'Color':'orchid' }, 'B':{'Name':'C,G or T', 'Complement':'V', 'Color':'dimgrey' }, 'V':{'Name':'A,C or G', 'Complement':'B', 'Color':'dimgrey' }, 'D':{'Name':'A,G or T', 'Complement':'H', 'Color':'dimgrey' }, 'H':{'Name':'A,C or T', 'Complement':'D', 'Color':'dimgrey' }, 'R':{'Name':'A or G (puRine)', 'Complement':'Y', 'Color':'dimgrey'}, 'Y':{'Name':'C or T (pYrimidine)', 'Complement':'R', 'Color':'dimgrey'}, 'K':{'Name':'G or T (Keto)', 'Complement':'M', 'Color':'dimgrey'}, 'M':{'Name':'A or C (aMino)', 'Complement':'K', 'Color':'dimgrey'}, 'S':{'Name':'G or C (Strong - 3H bonds)', 'Complement':'W', 'Color':'dimgrey'}, 'W':{'Name':'A or T (Weak - 2H bonds)', 'Complement':'S', 'Color':'dimgrey'} } # I'd like to suggest that we change the name of key 'DuplexRise' to 'Rise'. # Need to run this by Bruce and Ninad first. Mark 2008-01-31. dnaDict = \ { 'A-DNA':{'BasesPerTurn': env.prefs[adnaBasesPerTurn_prefs_key], 'DuplexRise': env.prefs[adnaRise_prefs_key]}, 'B-DNA':{'BasesPerTurn': env.prefs[bdnaBasesPerTurn_prefs_key], 'DuplexRise': env.prefs[bdnaRise_prefs_key]}, 'Z-DNA':{'BasesPerTurn': env.prefs[zdnaBasesPerTurn_prefs_key], 'DuplexRise': env.prefs[zdnaRise_prefs_key]} } #If the qiven strand atom doesn't have a compelmentary strand base atom, #the sequence editor will show a specific character in the 'complement sequence #text field (i.e. in self.sequenceTextEdit_mate) indicating that the #complement is missing. #@see: DnaSequenceEditor._determine_complementSequence() #@see: DnaStrand.getStrandSequenceAndItsComplement() MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL = '*' # Common DNA helper functions. ###################################### # for getNextStrandColor: # _strandColorList is used for assigning a color to a new strand created # by breaking an existing strand. # Do not use the following colors in _strandColorList: # - white/lightgray (reserved for axis) # - black (reserved as a default color for scaffold strand; # also used for dna updater duplex errors (subject to change) # [bruce 080206]) # - yellow (used for hover highlighting) # - red (used as delete highlight color) # - green (reserved for selection color) # - orange (reserved for dna updater errors on atoms and bonds [bruce 080206] # (subject to change)) # (update [bruce 080406]: orange is no longer used that way, but might # still be, or in future be, used for other warnings, so is still # left out of this list) _strandColorList = [ purple, brass, steelblue, lightgreen, darkgray, lightblue, darkorange, violet, copper, olive, gray] _strand_color_counter = 0 def getNextStrandColor(currentColor = None): """ Return a color to assign to a strand which is guaranteed to be different than currentColor (which is typically that strand's current color). @param currentColor: The color to avoid returning, or None if the next color is ok. @type currentColor: RGB tuple @return: New color. @rtype: RGB tuple """ global _strand_color_counter _new_color = _strandColorList[_strand_color_counter % len(_strandColorList)] _strand_color_counter += 1 if _new_color == currentColor: return getNextStrandColor() # Note: this won't equal currentColor, since successive colors # in _strandColorList are always different. return _new_color def getDuplexBasesPerTurn(conformation): """ Returns the number of U{bases per turn} specified in the user preferences. @param conformation: "A-DNA", "B-DNA", or "Z-DNA" @type conformation: str @return: The number of bases per turn. @rtype: float """ assert conformation in ("A-DNA", "B-DNA", "Z-DNA") return dnaDict[str(conformation)]['BasesPerTurn'] def getDuplexRise(conformation): """ Returns the duplex U{rise} specified in the user preferences. @param conformation: "A-DNA", "B-DNA", or "Z-DNA" @type conformation: str @return: The rise in Angstroms. @rtype: float """ assert conformation in ("A-DNA", "B-DNA", "Z-DNA") return dnaDict[str(conformation)]['DuplexRise'] def getDuplexLength(conformation, numberOfBases, duplexRise = 0): """ Returns the duplex length (in Angstroms) given the conformation and number of bases. @param conformation: "A-DNA", "B-DNA", or "Z-DNA" @type conformation: str @param numberOfBases: The number of base-pairs in the duplex. @type numberOfBases: int @param duplexRise: The duplex rise (in Angstroms). If not provided, the user preference for DNA rise is used. @return: The length of the duplex in Angstroms. @rtype: float """ assert conformation in ("A-DNA", "B-DNA", "Z-DNA") assert numberOfBases >= 0 assert duplexRise >= 0 if duplexRise: duplexLength = duplexRise * (numberOfBases - 1) else: duplexLength = getDuplexRise(conformation) * (numberOfBases - 1) return duplexLength def getNumberOfBasePairsFromDuplexLength(conformation, duplexLength, duplexRise = 0): """ Returns the number of base-pairs in the duplex given the conformation, duplex length and duplex rise (optional). The number of base-pairs returned is NOT rounded to the nearest integer. The rounding is intentionally not done. Example: While drawing a dna line, when user clicks on the screen to complete the second endpoint, the actual dna axis endpoint might be trailing the clicked point because the total dna length is not sufficient to complete the 'next step'. Thus, by not rounding the number of bases, we make sure that the dna consists of exactly same number of bases as displayed by the rubberband line ( The dna rubberband line gives enough visual indication about this. see draweRibbons.drawDnaRibbons() for more details on the visual indication ) @param conformation: "A-DNA", "B-DNA", or "Z-DNA" @type conformation: str @param duplexLength: The duplex length (in Angstroms). @type duplexLength: float @param duplexRise: The duplex rise (in Angstroms). If not provided, the user preference for DNA rise is used. @type duplexRise: float @return: The number of base-pairs in the duplex. @rtype: int """ assert conformation in ("A-DNA", "B-DNA", "Z-DNA") assert duplexLength >= 0 assert duplexRise >= 0 if duplexRise: numberOfBasePairs = 1.0005 + (duplexLength / duplexRise) else: numberOfBasePairs = 1.0005 + (duplexLength / getDuplexRise(conformation)) #Explanation on adding '1.0005': #The number of base-pairs returned is NOT rounded to the nearest integer. #See why its not done in this method's docstring. But why do we add 1.005 #instead of '1' while computing the number of basepairs? As of 2008-03-05 #there a bug observed in the number this method returns if we just add '1' #Suppose a print statement shows the the numberOfBasePairs computed #above as 5.0. But int(numberOfBasePairs) returns 4 and not 5! This happens #sometime. I am not sure if in those cases the number of basepairs are #something like 4.99999......N which python rounds off to 5.0, but int of #that number actually returns 4. This is just a guess. But some print #statements do show this happening! So a workaround is to add some tolerance #of 0.0005 to 1. This addition is unlikely to have any user visible effect. return int(numberOfBasePairs) def getDuplexRiseFromNumberOfBasePairs(numberOfBasePairs, duplexLength): """ Returns the duplex rise from the number of base pairs and the duplex length @param numberOfBasePairs: number of base pairs in the duplx @type numberOfBasePairs: int @param duplexLength: The length of duplex. @type duplexLength: double @return: The duplex rise. @rtype: double """ duplexRise = duplexLength/ (numberOfBasePairs - 1) return duplexRise def getComplementSequence(inSequence): """ Returns the complement of the DNA sequence I{inSequence}. @param inSequence: The original DNA sequence. @type inSequence: str (possible error: the code looks more like it requires a QString [bruce 080101 comment]) @return: The complement DNA sequence. @rtype: str (possible error: the code looks more like it might return a QString [bruce 080101 comment]) """ #If user enters an empty 'space' or 'tab key', treat it as an empty space #in the complement sequence. (don't convert it to 'N' base) #This is needed in B{DnaSequenceEditor} where , if user enters an empty space #in the 'Strand' Sequence, its 'Mate' also enters an empty space. validSpaceSymbol = QString(' ') validTabSymbol = QString('\t') assert isinstance(inSequence, str) outSequence = "" for baseLetter in inSequence: if baseLetter not in basesDict.keys(): if baseLetter in validSpaceSymbol: pass elif baseLetter in validTabSymbol: baseLetter = '\t' else: baseLetter = "N" else: baseLetter = basesDict[baseLetter]['Complement'] outSequence += baseLetter return outSequence def getReverseSequence(inSequence): """ Returns the reverse order of the DNA sequence I{inSequence}. @param inSequence: The original DNA sequence. @type inSequence: str @return: The reversed sequence. @rtype: str """ assert isinstance(inSequence, str) outSequence = list(inSequence) outSequence.reverse() outSequence = ''.join(outSequence) return outSequence def replaceUnrecognized(inSequence, replaceBase = "N"): """ Replaces any unrecognized/invalid characters (alphanumeric or symbolic) from the DNA sequence and replaces them with I{replaceBase}. This can also be used to remove all unrecognized bases by setting I{replaceBase} to an empty string. @param inSequence: The original DNA sequence. @type inSequence: str @param replaceBase: The base letter to put in place of an unrecognized base. The default is "N". @type replaceBase: str @return: The sequence. @rtype: str """ assert isinstance(inSequence, str) assert isinstance(replaceBase, str) outSequence = "" for baseLetter in inSequence: if baseLetter not in basesDict.keys(): baseLetter = replaceBase outSequence += baseLetter if 0: print " inSequence:", inSequence print "outSequence:", outSequence return outSequence