# Copyright 2008 Nanorex, Inc. See LICENSE file for details. """ files_ios.py - provides functions to export a NE-1 model into IOS format as well as import optimized sequences into NE-1 @author: Urmi @version: $Id$ @copyright: 2008 Nanorex, Inc. See LICENSE file for details. Note: This is only applicable to DNA/ RNA models (so is IOS) """ from xml.dom.minidom import DOMImplementation from xml.dom import EMPTY_NAMESPACE, XML_NAMESPACE, XMLNS_NAMESPACE from dna.model.DnaLadderRailChunk import DnaStrandChunk from dna.model.DnaLadder import DnaLadder from printFunc import PrettyPrint import os, string, sys from xml.dom.minidom import parse from xml.parsers.expat import ExpatError from dna.model.DnaStrand import DnaStrand from PyQt4.Qt import QMessageBox def getAllDnaStrands(assy): """ get all the DNA strands from the NE-1 part to figure out strand info @param assy: the NE1 assy. @type assy: L{assembly} @return: a list of DNA strands """ dnaStrandList = [] def func(node): if isinstance(node, assy.DnaStrand): dnaStrandList.append(node) assy.part.topnode.apply2all(func) return dnaStrandList def createTokenLibrary(doc,elemDoc): """ create Token library in the IOS file @param: doc @type: DOM Document @param: elemDoc @type: root element """ elemTokenLibrary = doc.createElement('TokenLibrary') elemAtomicToken = doc.createElement('AtomicTokens') #create element name and child text from key value pair elemAtomicTokenA = doc.createElement('AtomicToken') elemAtomicTokenA.appendChild(doc.createTextNode('A')) elemAtomicToken.appendChild(elemAtomicTokenA) elemAtomicTokenC = doc.createElement('AtomicToken') elemAtomicTokenC.appendChild(doc.createTextNode('C')) elemAtomicToken.appendChild(elemAtomicTokenC) elemAtomicTokenG = doc.createElement('AtomicToken') elemAtomicTokenG.appendChild(doc.createTextNode('G')) elemAtomicToken.appendChild(elemAtomicTokenG) elemAtomicTokenT = doc.createElement('AtomicToken') elemAtomicTokenT.appendChild(doc.createTextNode('T')) elemAtomicToken.appendChild(elemAtomicTokenT) elemTokenLibrary.appendChild(elemAtomicToken) # create wild card token elemWildcardTokens = doc.createElement('WildcardTokens') elemWildcardToken = doc.createElement('WildcardToken') elemTokenT = doc.createElement('Token') elemTokenT.appendChild(doc.createTextNode('N')) elemWildcardToken.appendChild(elemTokenT) elemAtomicEquivalentA = doc.createElement('AtomicEquivalent') elemAtomicEquivalentA.appendChild(doc.createTextNode('A')) elemWildcardToken.appendChild(elemAtomicEquivalentA) elemAtomicEquivalentT = doc.createElement('AtomicEquivalent') elemAtomicEquivalentT.appendChild(doc.createTextNode('T')) elemWildcardToken.appendChild(elemAtomicEquivalentT) elemAtomicEquivalentG = doc.createElement('AtomicEquivalent') elemAtomicEquivalentG.appendChild(doc.createTextNode('G')) elemWildcardToken.appendChild(elemAtomicEquivalentG) elemAtomicEquivalentC = doc.createElement('AtomicEquivalent') elemAtomicEquivalentC.appendChild(doc.createTextNode('C')) elemWildcardToken.appendChild(elemAtomicEquivalentC) elemWildcardTokens.appendChild(elemWildcardToken) elemTokenLibrary.appendChild(elemWildcardTokens) #append token library to the iso file elemDoc.appendChild(elemTokenLibrary) return def createMappingLibrary(doc,elemDoc): """ create mapping library section for the NE-1 model file in the ios file @param: doc @type: DOM Document @param: elemDoc @type: root element """ elemMappingLibrary = doc.createElement('MappingLibrary') elemMapping = doc.createElement('Mapping') elemMapping.setAttribute('id', 'complement') # A to T elemTokenT = doc.createElement('Token') elemFrom = doc.createElement('From') elemFrom.appendChild(doc.createTextNode('A')) elemTokenT.appendChild(elemFrom) elemTo = doc.createElement('To') elemTo.appendChild(doc.createTextNode('T')) elemTokenT.appendChild(elemTo) elemMapping.appendChild(elemTokenT) # T to A elemTokenT = doc.createElement('Token') elemFrom = doc.createElement('From') elemFrom.appendChild(doc.createTextNode('T')) elemTokenT.appendChild(elemFrom) elemTo = doc.createElement('To') elemTo.appendChild(doc.createTextNode('A')) elemTokenT.appendChild(elemTo) elemMapping.appendChild(elemTokenT) # C to G elemTokenT = doc.createElement('Token') elemFrom = doc.createElement('From') elemFrom.appendChild(doc.createTextNode('C')) elemTokenT.appendChild(elemFrom) elemTo = doc.createElement('To') elemTo.appendChild(doc.createTextNode('G')) elemTokenT.appendChild(elemTo) elemMapping.appendChild(elemTokenT) # G to C elemTokenT = doc.createElement('Token') elemFrom = doc.createElement('From') elemFrom.appendChild(doc.createTextNode('G')) elemTokenT.appendChild(elemFrom) elemTo = doc.createElement('To') elemTo.appendChild(doc.createTextNode('C')) elemTokenT.appendChild(elemTo) elemMapping.appendChild(elemTokenT) elemMappingLibrary.appendChild(elemMapping) elemDoc.appendChild(elemMappingLibrary) return def createMapping(startIndex, endIndex): dictionary = dict() if startIndex < endIndex: j=0 i = startIndex while i <= endIndex : dictionary[i] = j j = j + 1 i = i + 1 else: j=0 i = startIndex while i >= endIndex : dictionary[i] = j j = j + 1 i = i - 1 return dictionary def createComplementaryChunkInformation(strandList, chunkNameListInOrder, indexTupleListInOrder): chunkAndComplementDict = dict() #visited array for all strands so that chunk info do not get written twice indexMappingList = [] visitedArray = [] for i in range(len(strandList)): strand = strandList[i] startIndex = indexTupleListInOrder[i][0][0] endIndex = indexTupleListInOrder[i][len(indexTupleListInOrder[i])-1][1] seqLen = len(strand.getStrandSequence()) indexMappingList.append(createMapping(startIndex, endIndex)) tempList = [] for j in range(seqLen): tempList.append(0) visitedArray.append(tempList) #create complementary info for strand in strandList: strand_wholechain = strand.get_strand_wholechain() for rail in strand_wholechain.rails(): atom = rail.baseatoms[0] atomMate = atom.get_strand_atom_mate() # do this only for double stranded DNA if atomMate is not None: #check visited array to see if complementary info has already been written baseIndices = strand_wholechain.wholechain_baseindex_range_for_rail(rail) index = strandList.index(strand) tempList = [] tempList = [x[0] for x in indexTupleListInOrder[index]] try: index1 = tempList.index(baseIndices[0]) startIndex = baseIndices[0] endIndex = baseIndices[1] except ValueError: index1 = tempList.index(baseIndices[1]) startIndex = baseIndices[1] endIndex = baseIndices[0] startIndexInVA = indexMappingList[index][startIndex] endIndexInVA = indexMappingList[index][endIndex] exist = 0 m = startIndexInVA while m <= endIndexInVA: if visitedArray[index][m] == 0: exist = 0 break else: exist = 1 m = m + 1 if exist == 1: #entry already exists continue else: #need to create chunk and complementary chunk info chunkName = chunkNameListInOrder[index][index1 + 1] #mark visited array to be 1 m = startIndexInVA while m <= endIndexInVA: visitedArray[index][m] = 1 m = m + 1 #find complementary chunk Name atomMateParent = atomMate.getDnaStrand() strandRails = atom.molecule.ladder.strand_rails assert len(strandRails) == 2 if rail == strandRails[0]: complementaryRail = strandRails[1] else: complementaryRail = strandRails[0] baseIndicesForComp = atomMateParent.get_strand_wholechain().wholechain_baseindex_range_for_rail(complementaryRail) tempList = [] indexComp = strandList.index(atomMateParent) tempList = [x[0] for x in indexTupleListInOrder[indexComp]] try: index1 = tempList.index(baseIndicesForComp[0]) startIndex = baseIndicesForComp[0] endIndex = baseIndicesForComp[1] except ValueError: index1 = tempList.index(baseIndicesForComp[1]) startIndex = baseIndicesForComp[1] endIndex = baseIndicesForComp[0] startIndexInVA = indexMappingList[indexComp][startIndex] endIndexInVA = indexMappingList[indexComp][endIndex] compChunkName = chunkNameListInOrder[indexComp][index1 + 1] m = startIndexInVA while m <= endIndexInVA: visitedArray[indexComp][m] = 1 m = m + 1 chunkAndComplementDict[chunkName] = compChunkName return chunkAndComplementDict def railImplementation(assy): strandList = getAllDnaStrands(assy) baseStringListInOrder = [] indexTupleListInOrder = [] chunkNameListInOrder = [] #initialization for i in range(len(strandList)): baseStringListInOrder.append([]) indexTupleListInOrder.append([]) chunkNameListInOrder.append([]) for strand in strandList: strandID = strand.name strandIndex = strandList.index(strand) #wholechain_baseindex_range_for_rail(rail) can return sequences either #in 3' or 5' sequences. Hence the final sequence should be compared with # that of atoms in the bond direction and their corresponding basenames # to figure out its directionality. strand_wholechain = strand.get_strand_wholechain() someList = [] if strand_wholechain: for rail in strand_wholechain.rails(): baseList = [] for a in rail.baseatoms: bases = a.getDnaBaseName() aComp = a.get_strand_atom_mate() parent = a.getDnaStrand() if bases == 'X': bases = 'N' baseList.append(bases) baseIndices = strand_wholechain.wholechain_baseindex_range_for_rail(rail) baseString = ''.join(baseList) if baseIndices[1] < baseIndices[0]: baseStringFinal = baseString[::-1] indexTuple = [baseIndices[1], baseIndices[0]] else: baseStringFinal = baseString indexTuple = [baseIndices[0], baseIndices[1]] someList.append( (indexTuple, baseStringFinal) ) someList.sort() indexTupleListInOrder[strandIndex] = [x[0] for x in someList] baseStringListInOrder[strandIndex] = [x[1] for x in someList] checkStrandSequence = ''.join(baseStringListInOrder[strandIndex]) strandSeq = strand.getStrandSequence() if strandSeq == checkStrandSequence[::-1]: baseStringListInOrder[strandIndex].reverse() indexTupleListInOrder[strandIndex].reverse() #we also need to flip the order of the individual element in the tuple for l in range(len(indexTupleListInOrder[strandIndex])): indexTupleListInOrder[strandIndex][l].reverse() #create names for each chunk within each strand someList = [] for l in range(len(baseStringListInOrder[strandIndex])): chunkName = strandID + '_chunk_' + str(l) someList.append(chunkName) chunkNameListInOrder[strandIndex] = someList chunkNameListInOrder[strandIndex].insert(0, strandID) chunkAndComplementDict = createComplementaryChunkInformation(strandList, chunkNameListInOrder, indexTupleListInOrder) return chunkNameListInOrder, baseStringListInOrder, chunkAndComplementDict def createStrands(doc,elemDoc, assy): """ create strand section for the NE-1 model file in the ios file @param: doc @type: DOM Document @param: elemDoc @type: root element @param assy: the NE1 assy. @type assy: L{assembly} """ chunkNameListInOrder, baseStringListInOrder, chunkAndComplementDict = railImplementation(assy) #write the strands to the IOS export file elemStrands = doc.createElement('Strands') i = 0 while i < len(chunkNameListInOrder): strandID = chunkNameListInOrder[i][0] elemStrand = doc.createElement('Strand') elemStrand.setAttribute('id',strandID) for j in range(0, len(chunkNameListInOrder[i])-1): chunkID = chunkNameListInOrder[i][j+1] baseString = baseStringListInOrder[i][j] baseString.replace('X','N') elemRegion = doc.createElement('Region') elemRegion.setAttribute('id', chunkID) elemRegion.appendChild(doc.createTextNode(baseString)) elemStrand.appendChild(elemRegion) i = i + 1 elemStrands.appendChild(elemStrand) elemDoc.appendChild(elemStrands) return chunkAndComplementDict def createConstraints(doc,elemDoc, assy, compInfoDict): """ create constraints section for the NE-1 model file in the ios file @param: doc @type: DOM Document @param: elemDoc @type: root element @param assy: the NE1 assy. @type assy: L{assembly} """ # write the constraints elemConstraints = doc.createElement('Constraints') elemConstraintGroup = doc.createElement('ios:ConstraintGroup') elemConstraintGroup.setAttribute('strict', '1') for key in compInfoDict: elemMatch = doc.createElement('ios:Match') elemMatch.setAttribute('mapping', 'complement') elemConstraintRegion = doc.createElement('Region') elemConstraintRegion.setAttribute('ref',key) elemMatch.appendChild(elemConstraintRegion) elemConstraintRegion = doc.createElement('Region') elemConstraintRegion.setAttribute('ref',compInfoDict[key]) elemConstraintRegion.setAttribute('reverse', '1') elemMatch.appendChild(elemConstraintRegion) elemConstraintGroup.appendChild(elemMatch) elemConstraints.appendChild(elemConstraintGroup) elemDoc.appendChild(elemConstraints) return #export to IOS format def exportToIOSFormat(assy, fileName): """ Writes the IOS file @param assy: the NE1 assy. @type assy: L{assembly} @param: IOS output file in XML @type: string """ if fileName == '': print "No file selected to export" return d = DOMImplementation() #create doctype doctype = DOMImplementation.createDocumentType(d,'ios', None, None) #create empty DOM Document and get root element doc = DOMImplementation.createDocument(d, EMPTY_NAMESPACE,'ios', doctype) elemDoc = doc.documentElement elemDoc.setAttributeNS(XMLNS_NAMESPACE, "xmlns:ios", "http://www.parabon.com/namespaces/inSeqioOptimizationSpecification") elemDoc.setAttributeNS(XMLNS_NAMESPACE, "xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") createTokenLibrary(doc, elemDoc) createMappingLibrary(doc,elemDoc) compInfoDict = createStrands(doc, elemDoc, assy) createConstraints(doc, elemDoc, assy, compInfoDict) #print doc to file f = open(fileName,'w') PrettyPrint(doc,f) f.close() # don't know how to set the IOS prefix, so processing text to # include that f = open(fileName,'r') allLines=f.readlines() allLines[1] = "\n" allLines[len(allLines)-1] = "\n" f.close() #write the document all over to reflect the changes f = open(fileName,'w') f.writelines(allLines) f.close() return # UM 20080618: IOS IMPORT FUNCTIONS def importFromIOSFile(assy, fileName1): """ Imports optimized sequences to NE-1 from IOS file @param assy: the NE1 assy. @type assy: L{assembly} @param fileName1: IOS Import file @type fileName1: string @return: Returns True or False based on whether import was successful @note: Since DNA Strand Chunks do not get stored in the mmp file, there's no way, chunk by chunk info can be verified between the structure on the NE-1 window and that in the IOS file. The most that can be done is to verify the name of the strand Name info and their lengths. For instance if two NE-1 structures have the same name and number of strands,each of same length, but their pairing info is different, there's no way to check that and the sequences will get imported anyways. There IOS import happens at the user's risk. """ strandsOnScreen = checkStrandsOnNE_1Window(assy) if strandsOnScreen == False: msg = "Cannot import since currently IOS import is supported only for DNA strands and there are no DNA strands on the screen. There is also no support for importing into clipboard." QMessageBox.warning(assy.win, "Warning!", msg) return False fileName2 = doInitialProcessingOnXMLFile(fileName1) strandNameSeqDict = getHybridizationInfo(fileName2) if strandNameSeqDict is None: # Can remove the temp file if os.path.exists(fileName2): os.remove(fileName2) return False infoCorrect = verifyStructureInfo(assy, strandNameSeqDict) if infoCorrect: #import optimized bases from the IOS file importBases(assy, strandNameSeqDict) else: if os.path.exists(fileName2): os.remove(fileName2) return False if os.path.exists(fileName2): os.remove(fileName2) return True def checkStrandsOnNE_1Window(assy): """ Checks to see if at least one DNA strand exists on the NE-1 window @param part: the NE1 part. @type part: L{assembly} @return: True or False depending on whether there are DNA strands on the NE-1 window """ count = 0 part = assy.part if hasattr(part.topnode, 'members'): for node in part.topnode.members: if hasattr(node,'members'): if node.members is None: return False for nodeChild in node.members: if isinstance(nodeChild, assy.DnaStrand): count = count +1 else: if isinstance(node, assy.DnaStrand): count = count +1 if count >= 1: return True else: return False def importBases(assy, strandNameSeqDict): """ Imports optimized bases, currently stored in strandNameSeqDict dictionary @param assy: the NE1 assy. @type assy: L{assembly} @param strandNameSeqDict: the dictionary containing the strand names and sequences from the IOS import file @type strandNameSeqDict: dict """ def func(node): if isinstance(node, assy.DnaStrand): #retrive its name and see if it exists in the dictionary, if yes # then assign the base sequence try: seq = strandNameSeqDict[node.name] node.setStrandSequence(seq, False) for node in assy.part.topnode.members: for nodeChild in node.members: if isinstance(nodeChild, assy.DnaStrand): seq = nodeChild.getStrandSequence() except KeyError: msg = "Cannot import IOS file since strand %s does not exist in the IOS file" % node.name QMessageBox.warning(assy.win, "Warning!", msg) return assy.part.topnode.apply2all(func) #if we are in the Build DNA mode, update the LineEdit that displays the # sequences win = assy.win if win.commandSequencer.currentCommand.commandName == 'DNA_STRAND': win.commandSequencer.currentCommand.updateSequence() return def getStrandsBaseInfoFromNE_1(assy): """ Obtains the strand chunk names and their corresponding base string of the NE-1 part @param part: the NE1 part. @type part: L{assembly} @return: strand list and basestring list from NE-1 """ strandList = getAllDnaStrands(assy) strandListFromNE_1 = [] baseStringListFromNE_1 = [] for strand in strandList: strandID = strand.name #just get the name of the strand strandListFromNE_1.append(strandID) baseString = strand.getStrandSequence() baseStringListFromNE_1.append(baseString) return strandListFromNE_1, baseStringListFromNE_1 def verifyStructureInfo(assy, iosSeqNameDict): """ Verify that the structure info in the IOS file matches with that of the NE-1 part. @param part: the NE1 part. @type part: L{assembly} @param iosSeqNameDict: dictionary containing strand and basestring from the IOS file @type compInfoDict: dict @return: True or False based on if the structure in the IOS file matches up with the structure in the NE-1 window. """ strandListFromNE_1, baseStringListFromNE_1 = getStrandsBaseInfoFromNE_1(assy) #check their lengths first dictLength = len(iosSeqNameDict) strandListFromNE_1Length = len(strandListFromNE_1) if dictLength != strandListFromNE_1Length: msg = "IOS import aborted since the number of strands in the IOS file "\ "does not equal the number of strands in the current model." QMessageBox.warning(assy.win, "Warning!", msg) return False for strand in iosSeqNameDict: baseString = iosSeqNameDict[strand] try: index = strandListFromNE_1.index(strand) baseStringFromNE_1 = baseStringListFromNE_1[index] except ValueError: msg = "IOS import aborted since strand '%s' in the IOS file does"\ "not exist in the current model." % strand QMessageBox.warning(assy.win, "Warning!", msg) return False if len(baseStringFromNE_1) != len(baseString): msg = "IOS import aborted since the length of strand '%s' "\ "(%s bases) in the current model does not match the length "\ "of strand '%s' found in the IOS file (%d bases)." % \ (strandListFromNE_1[index], len(baseStringFromNE_1), strand, len(baseString)) QMessageBox.warning(assy.win, "Warning!", msg) return False return True def doInitialProcessingOnXMLFile(fileName1): """ do initial preprocessing on the file so that its acceptable by the parser from xml.dom.minidom @param fileName2: IOS import file @type fileName2: string @retun: Temporary file that is read by the xml.dom.minidom """ #its wierd, sometimes even with the prefix, the ExpatError exception does not #show up. Do n't know what's going on! Anyways the prefix ios is not needed #for any of the NE-1 processing and so it's better to be on the safe side! f1 = open(fileName1, 'r') allLines=f1.readlines() f1.close() #create a temporary file with the prefixes removed, make sure that you remove #this file at the end of processing fileName2 = "temp.xml" f2 = open(fileName2, 'w') for line in allLines: if line.find("