diff options
author | Urmi Majumder <urmi@nanorex.com> | 2008-06-19 16:37:41 +0000 |
---|---|---|
committer | Urmi Majumder <urmi@nanorex.com> | 2008-06-19 16:37:41 +0000 |
commit | 64fe0f7cd8d10565eeb6432c4b619418d9e77282 (patch) | |
tree | 48f3cb2a1103552af34c2ba04d3652b11e669966 | |
parent | 83911a71c90187086c2defb5d80e84b40498c1dd (diff) | |
download | nanoengineer-theirix-64fe0f7cd8d10565eeb6432c4b619418d9e77282.tar.gz nanoengineer-theirix-64fe0f7cd8d10565eeb6432c4b619418d9e77282.zip |
Implemented IOS import
-rw-r--r-- | cad/src/files/ios/files_ios.py | 453 |
1 files changed, 426 insertions, 27 deletions
diff --git a/cad/src/files/ios/files_ios.py b/cad/src/files/ios/files_ios.py index b39d15bd0..4685b5ca5 100644 --- a/cad/src/files/ios/files_ios.py +++ b/cad/src/files/ios/files_ios.py @@ -1,5 +1,6 @@ """ -files_ios.py - provides functions to export a NE-1 model into IOS format +files_ios.py - provides functions to export a NE-1 model into IOS format as well +as import optimized sequences into NE-1 @version: @copyright: 2004-2008 Nanorex, Inc. See LICENSE file for details. @@ -10,26 +11,26 @@ from xml.dom.minidom import DOMImplementation from xml.dom import EMPTY_NAMESPACE, XML_NAMESPACE, XMLNS_NAMESPACE from dna.model.DnaLadderRailChunk import DnaStrandChunk from dna.model.DnaLadder import DnaLadder -from dna.model.DnaSegment import DnaSegment -from dna.model.DnaStrand import DnaStrand from printFunc import PrettyPrint +import os, string +from xml.dom.minidom import parse +from xml.parsers.expat import ExpatError - -def getAllLadders(part): +def getAllLadders(assy): """ get all the DNA ladders from the screen to figure out strand pairing info - @param: part - @type: + @param assy: the NE1 assy. + @type assy: L{assembly} @return: a list of DNA ladders """ dnaSegmentList = [] def func(node): - if isinstance(node, DnaSegment): + if isinstance(node, assy.DnaSegment): dnaSegmentList.append(node) - part.topnode.apply2all(func) + assy.part.topnode.apply2all(func) #get all ladders for each segment dnaLadderList = [] for seg in dnaSegmentList: @@ -40,20 +41,20 @@ def getAllLadders(part): return dnaLadderList -def getAllDnaStrands(part): +def getAllDnaStrands(assy): """ get all the DNA strands from the screen to figure out strand info - @param: part - @type: + @param assy: the NE1 assy. + @type assy: L{assembly} @return: a list of DNA strands """ dnaStrandList = [] def func(node): - if isinstance(node, DnaStrand): + if isinstance(node, assy.DnaStrand): dnaStrandList.append(node) - part.topnode.apply2all(func) + assy.part.topnode.apply2all(func) return dnaStrandList @@ -182,19 +183,20 @@ def createMappingLibrary(doc,elemDoc): return -def createStrands(doc,elemDoc, part): +def createStrands(doc,elemDoc, assy): """ create strand section for the NE-1 model file in the ios file @param: doc @type: DOM Document @param: elemDoc @type: root element - + @param assy: the NE1 assy. + @type assy: L{assembly} """ #create strands elemStrands = doc.createElement('Strands') - strandList = getAllDnaStrands(part) + strandList = getAllDnaStrands(assy) for strand in strandList: strandID = strand.name elemStrand = doc.createElement('Strand') @@ -222,13 +224,15 @@ def createStrands(doc,elemDoc, part): elemDoc.appendChild(elemStrands) return -def createConstraints(doc,elemDoc, part): +def createConstraints(doc,elemDoc, assy): """ create constraints section for the NE-1 model file in the ios file @param: doc @type: DOM Document @param: elemDoc @type: root element + @param assy: the NE1 assy. + @type assy: L{assembly} """ # write the constraints @@ -236,7 +240,7 @@ def createConstraints(doc,elemDoc, part): elemConstraintGroup = doc.createElement('ios:ConstraintGroup') elemConstraintGroup.setAttribute('strict', '1') - ladderList = getAllLadders(part) + ladderList = getAllLadders(assy) for ladder in ladderList: strandChunks = ladder.strand_chunks() @@ -264,16 +268,15 @@ def createConstraints(doc,elemDoc, part): return #export to IOS format -def exportToIOSFormat(part, fileName): +def exportToIOSFormat(assy, fileName): """ Writes the IOS file - @param: part - @type: - + @param assy: the NE1 assy. + @type assy: L{assembly} @param: IOS output file in XML @type: string """ - + if fileName == '': print "No file selected to export" return @@ -290,8 +293,8 @@ def exportToIOSFormat(part, fileName): createTokenLibrary(doc, elemDoc) createMappingLibrary(doc,elemDoc) - createStrands(doc, elemDoc, part) - createConstraints(doc, elemDoc, part) + createStrands(doc, elemDoc, assy) + createConstraints(doc, elemDoc, assy) #print doc to file f = open(fileName,'w') @@ -301,7 +304,6 @@ def exportToIOSFormat(part, fileName): # include that f = open(fileName,'r') allLines=f.readlines() - print allLines[1], allLines[len(allLines)-1] allLines[1] = "<ios:IOS xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:ios='http://www.parabon.com/namespaces/inSeqioOptimizationSpecification'>\n" allLines[len(allLines)-1] = "</ios:IOS>\n" f.close() @@ -309,4 +311,401 @@ def exportToIOSFormat(part, fileName): f = open(fileName,'w') f.writelines(allLines) f.close() + return + + + + + + +# UM 20080618: IOS IMPORT FUNCTIONS + +def importFromIOSFile(assy, fileName1): + """ + Imports optimized sequences to NE-1 from IOS file + @param assy: the NE1 assy. + @type assy: L{assembly} + + @param fileName1: IOS Import file + @type fileName1: string + @return: Returns True or False based on whether import was successful + """ + part = assy.part + strandsOnScreen = checkStrandsOnNE_1Window(assy) + if strandsOnScreen == False: + print "Cannot import since currently IOS import is supported only for \ + DNA strands and there are no DNA strands on the screen" + return False + + fileName2 = doInitialProcessingOnXMLFile(fileName1) + strandBasesDict, compInfoDict, strandNameSeqDict = getHybridizationInfo(fileName2) + if strandBasesDict is None or strandNameSeqDict is None: + # Can remove the temp file + if os.path.exists(fileName2): + os.remove(fileName2) + return False + if compInfoDict is None: + print "All single strands to import" + + #make sure that the file you are reading into the system has information + #that corresponds to the current structure in the NE-1 window. + infoCorrect = verifyStructureInfo(assy, strandBasesDict, compInfoDict) + + if infoCorrect: + #import optimized bases from the IOS file + importBases(assy, strandNameSeqDict) + else: + if os.path.exists(fileName2): + os.remove(fileName2) + return False + + if os.path.exists(fileName2): + os.remove(fileName2) + + return True + +def checkStrandsOnNE_1Window(assy): + """ + Checks to see if at least one DNA strand exists on the NE-1 window + @param part: the NE1 part. + @type part: L{assembly} + @return: True or False depending on whether there are DNA strands on the + NE-1 window + """ + + count = 0 + part = assy.part + if hasattr(part.topnode, 'members'): + for node in part.topnode.members: + + if hasattr(node,'members'): + for nodeChild in node.members: + if isinstance(nodeChild, assy.DnaStrand): + count = count +1 + else: + if isinstance(node, DnaStrand): + count = count +1 + + if count >= 1: + return True + else: + return False + +def importBases(assy, strandNameSeqDict): + """ + Imports optimized bases, currently stored in strandNameSeqDict dictionary + @param assy: the NE1 assy. + @type assy: L{assembly} + + @param strandNameSeqDict: the dictionary containing the strand names and + sequences from the IOS import file + + @type strandNameSeqDict: dict + """ + part = assy.part + + def func(node): + if isinstance(node, assy.DnaStrand): + #retrive its name and see if it exists in the dictionary, if yes + # then assign the base sequence + try: + seq = strandNameSeqDict[node.name] + node.setStrandSequence(seq) + except KeyError: + print "Cannot import since strand %s does not exist \ + in the IOS file" % node.name + return + + assy.part.topnode.apply2all(func) + win = assy.win + #if we are in the Build DNA mode, update the LineEdit that displays the + # sequences + if win.commandSequencer.currentCommand.commandName == 'DNA_STRAND': + win.commandSequencer.currentCommand.updateSequence() + return + +def getStrandsBaseInfoFromNE_1(assy): + """ + Obtains the strand chunk names and their corresponding base string of the + NE-1 part + @param part: the NE1 part. + @type part: L{assembly} + @return: strand list and basestring list from NE-1 + """ + strandList = getAllDnaStrands(assy) + strandChunkListFromNE_1 = [] + baseStringListFromNE_1 = [] + + for strand in strandList: + strandChunkList = strand.getStrandChunks() + for chunk in strandChunkList: + chunkID = chunk.name + #just get the name of the strand + strandChunkListFromNE_1.append(chunkID) + atoms = chunk.get_baseatoms() + baseList = [] + for a in atoms: + bases = a.getDnaBaseName() + baseList.append(bases) + baseString = ''.join(baseList) + # base string needed for length info + baseStringListFromNE_1.append(baseString) + return strandChunkListFromNE_1, baseStringListFromNE_1 + +def verifyStrandInfo(assy, strandBasesDict): + """ + Verify strand info from NE-1 part and IOS file match + @param part: the NE1 part. + @type part: L{assembly} + @param strandBasesDict: the dictionary containing the strand names and + sequences from the IOS import file + + @type strandBasesDict: dict + @return: True or False based on whether strands in the NE-1 and that in the + IOS file match up. + """ + strandChunkListFromNE_1, baseStringListFromNE_1 = getStrandsBaseInfoFromNE_1(assy) + + # Once strand chunks name and base string info have been obtained from NE-1 + #check first of all such a chunk exists in the strand dictionary and if yes + # the base string lengths match + + #check their lengths first + dictLength = len(strandBasesDict) + strandChunkListFromNE_1Length = len(strandChunkListFromNE_1) + if dictLength != strandChunkListFromNE_1Length: + print "Cannot import IOS file since number of strand chunks in the import file and \ + one in NE-1 window does not match" + return False + + #check if all the strand chunks exist + for chunks in strandChunkListFromNE_1: + try: + baseString = strandBasesDict[chunks] + except KeyError: + print "Cannot import IOS file since strand chunk %s in the NE-1 window \ + cannot be found in the IOS file" % (chunks) + return False + + #check if all the basestring lengths match or not + k = 0 + for chunks in strandChunkListFromNE_1: + baseString = strandBasesDict[chunks] + baseStringFromNE_1 = baseStringListFromNE_1[k] + + #print baseStringFromNE_1, baseString, k, len(baseStringFromNE_1),len(baseString) + if len(baseStringFromNE_1) != len(baseString): + print "Cannot import IOS file since base string length %d of \ + chunk %s in the NE-1 window does not match with the one found in \ + the IOS file %d" % (len(baseStringFromNE_1), chunks, len(baseString)) + return False + k = k + 1 + + + return True + +def getChunkAndComplFromNE_1(assy): + """ + Obtain strand chunk and their corresponding complementary chunk for NE-1 part. + @param part: the NE1 part. + @type part: L{assembly} + @return: Two lists containing strand chunks and their complements respectively + """ + ladderList = getAllLadders(assy) + strandChunkList = [] + strandChunkComplList = [] + for ladder in ladderList: + strandChunks = ladder.strand_chunks() + if ladder.num_strands() == 2: + chunk1 = strandChunks[0].name + chunk2 = strandChunks[1].name + strandChunkList.append(chunk1) + strandChunkComplList.append(chunk2) + + return strandChunkList, strandChunkComplList + +def verifyComplementInfo(assy, compInfoDict): + """ + Verify that the complementary strand info from the IOS file macthes with + that from the NE-1 part. + + @param part: the NE1 part. + @type part: L{assembly} + @param compInfoDict: dictionary containing strand and their complementary + strands from the IOS file + @type compInfoDict: dict + @return: True or False based on whether the pairing info in the IOS file matches + with that in the NE-1 assembly + """ + strandChunkList, strandChunkComplList = getChunkAndComplFromNE_1(assy) + if strandChunkList != '' and compInfoDict is None: + print "Cannot import IOS file since no pairing info in the IOS file, but\ + NE-1 structure has doublestranded regions" + return False + if strandChunkList == '' and compInfoDict is None: + #no harm having single strands info, so long they match + return True + + # as with strands verify the number of double stranded regions are equal + if len(strandChunkList) != len(compInfoDict): + print "Cannot do IOS import since number of double stranded regions \ + are not equal" + return False + + #verify complementary strands are same in IOS file and in NE-1 window + k = 0 + for chunk in strandChunkList: + # no need to check if the chunk exists in the dictionary since if it + # does not, it has already returned False from the verifyStrandInfo() + # function + + complFromIOS = compInfoDict[chunk] + if complFromIOS != strandChunkComplList[k]: + print "Cannot import IOS file since matching info for %s not found" % chunk + return False + k = k + 1 + return True + +def verifyStructureInfo(assy, strandBasesDict, compInfoDict): + """ + Verify that the structure info in the IOS file matches with that of the NE-1 part. + @param part: the NE1 part. + @type part: L{assembly} + @param strandBasesDict: dictionary containing strand and their corresponding + base string from IOS file + @type strandBasesDict: dict + @param compInfoDict: dictionary containing strand and their complementary + strands from the IOS file + @type compInfoDict: dict + @return: True or False based on if the structure in the IOS file matches up + with the structure in the NE-1 window. + """ + strandInfoCorrect = verifyStrandInfo(assy, strandBasesDict) + if strandInfoCorrect == False: + return False + compInfoCorrect = verifyComplementInfo(assy, compInfoDict) + if compInfoCorrect: + return True + else: + return False + +def doInitialProcessingOnXMLFile(fileName1): + """ + do initial preprocessing on the file so that its acceptable by the parser + from xml.dom.minidom + @param fileName2: IOS import file + @type fileName2: string + @retun: Temporary file that is read by the xml.dom.minidom + """ + #its wierd, sometimes even with the prefix, the ExpatError exception does not + #show up. Do n't know what's going on! Anyways the prefix ios is not needed + #for any of the NE-1 processing and so it's better to be on the safe side! + + f1 = open(fileName1, 'r') + allLines=f1.readlines() + f1.close() + #create a temporary file with the prefixes removed, make sure that you remove + #this file at the end of processing + fileName2 = "temp.xml" + f2 = open(fileName2, 'w') + + + for line in allLines: + if line.find("<ios:")!= -1: + line = line.replace("<ios:","<") + if line.find("</ios:")!= -1: + line = line.replace("</ios:","</") + f2.writelines(line) + f2.close() + + return fileName2 + +def getHybridizationInfo(fileName2): + """ + Process this temporary file for strand chunk info. At the same time, we + check whether its a proper IOS file. + + @param fileName2: IOS import file + @type fileName2: string + @return: 3 dictionaries containing (strand, Bases), (strand, complements), + and (strand, sequence) + """ + + try: + doc = parse(fileName2) + except ExpatError: + print "Cannot import IOS file, since its not in correct XML format" + return None, None, None + + #need to distinguish between regions for mapping and simple strand regions + # hence get strand + strandList = doc.getElementsByTagName("Strand") + if len(strandList) == 0: + print "Cannot import IOS file since no strands to import" + return None, None, None + + strandNameList = [] + strandChunkList = [] + basesForStrandChunkList = [] + strandSeqList = [] + + #Within each strand access all regions + for i in range(len(strandList)): + strandNameList.append(str(strandList.item(i).getAttribute("id"))) + regionList = strandList.item(i).getElementsByTagName("Region") + + #each strand needs to have at least one region and so if one of them + #does not have it, then it is not a correct IOS file and you should return + # without bothering to process the rest of the file. + # So far IOS file format is concerned, single strand chunks do not neeed + # to be in a Region node. However, without the ID of the region, we have + # no way to read it into NE-1 and hence this import is invalid. + + if len(regionList) == 0: + print "Cannot import IOS file: strand does not have any region, not \ + a correct IOS file format" + return None, None, None + + tempStrandSeq = '' + for j in range(len(regionList)): + #get list of strand chunks + strandChunkList.append(str(regionList.item(j).getAttribute("id"))) + #get new base sequence after IOS optimization + tempBaseString = '' + if regionList.item(j).childNodes.item(0) is not None: + tempBaseString = str(regionList.item(j).childNodes.item(0).toxml()) + + #if the base string is empty, there's no point of analyzing any + #further either + if tempBaseString== '': + print "Cannot import IOS file: strand chunk does not have any bases, \ + not a correct IOS file format" + return None, None, None + + basesForStrandChunkList.append(tempBaseString) + tempStrandSeq = tempStrandSeq + tempBaseString + strandSeqList.append(tempStrandSeq) + + strandBasesDict = dict(zip(strandChunkList, basesForStrandChunkList)) + strandNameSeqDict = dict(zip(strandNameList, strandSeqList)) + + #would also need the constraints region to verify the pairing info + strand1List = [] + strand2List = [] + + matchingList = doc.getElementsByTagName("Match") + if len(matchingList) == 0: + return strandBasesDict, None, strandNameSeqDict + + #Within each matching access the pairs individually and store them in a tuple + for i in range(len(matchingList)): + compPairs = matchingList.item(i).getElementsByTagName("Region") + + #get the names of complementary chunks + strand1List.append(str(compPairs.item(0).getAttribute("ref"))) + strand2List.append(str(compPairs.item(1).getAttribute("ref"))) + + compInfoDict = dict(zip(strand1List, strand2List)) + + return strandBasesDict, compInfoDict, strandNameSeqDict
\ No newline at end of file |