summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUrmi Majumder <urmi@nanorex.com>2008-06-19 16:37:41 +0000
committerUrmi Majumder <urmi@nanorex.com>2008-06-19 16:37:41 +0000
commit64fe0f7cd8d10565eeb6432c4b619418d9e77282 (patch)
tree48f3cb2a1103552af34c2ba04d3652b11e669966
parent83911a71c90187086c2defb5d80e84b40498c1dd (diff)
downloadnanoengineer-theirix-64fe0f7cd8d10565eeb6432c4b619418d9e77282.tar.gz
nanoengineer-theirix-64fe0f7cd8d10565eeb6432c4b619418d9e77282.zip
Implemented IOS import
-rw-r--r--cad/src/files/ios/files_ios.py453
1 files changed, 426 insertions, 27 deletions
diff --git a/cad/src/files/ios/files_ios.py b/cad/src/files/ios/files_ios.py
index b39d15bd0..4685b5ca5 100644
--- a/cad/src/files/ios/files_ios.py
+++ b/cad/src/files/ios/files_ios.py
@@ -1,5 +1,6 @@
"""
-files_ios.py - provides functions to export a NE-1 model into IOS format
+files_ios.py - provides functions to export a NE-1 model into IOS format as well
+as import optimized sequences into NE-1
@version:
@copyright: 2004-2008 Nanorex, Inc. See LICENSE file for details.
@@ -10,26 +11,26 @@ from xml.dom.minidom import DOMImplementation
from xml.dom import EMPTY_NAMESPACE, XML_NAMESPACE, XMLNS_NAMESPACE
from dna.model.DnaLadderRailChunk import DnaStrandChunk
from dna.model.DnaLadder import DnaLadder
-from dna.model.DnaSegment import DnaSegment
-from dna.model.DnaStrand import DnaStrand
from printFunc import PrettyPrint
+import os, string
+from xml.dom.minidom import parse
+from xml.parsers.expat import ExpatError
-
-def getAllLadders(part):
+def getAllLadders(assy):
"""
get all the DNA ladders from the screen to figure out strand pairing info
- @param: part
- @type:
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
@return: a list of DNA ladders
"""
dnaSegmentList = []
def func(node):
- if isinstance(node, DnaSegment):
+ if isinstance(node, assy.DnaSegment):
dnaSegmentList.append(node)
- part.topnode.apply2all(func)
+ assy.part.topnode.apply2all(func)
#get all ladders for each segment
dnaLadderList = []
for seg in dnaSegmentList:
@@ -40,20 +41,20 @@ def getAllLadders(part):
return dnaLadderList
-def getAllDnaStrands(part):
+def getAllDnaStrands(assy):
"""
get all the DNA strands from the screen to figure out strand info
- @param: part
- @type:
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
@return: a list of DNA strands
"""
dnaStrandList = []
def func(node):
- if isinstance(node, DnaStrand):
+ if isinstance(node, assy.DnaStrand):
dnaStrandList.append(node)
- part.topnode.apply2all(func)
+ assy.part.topnode.apply2all(func)
return dnaStrandList
@@ -182,19 +183,20 @@ def createMappingLibrary(doc,elemDoc):
return
-def createStrands(doc,elemDoc, part):
+def createStrands(doc,elemDoc, assy):
"""
create strand section for the NE-1 model file in the ios file
@param: doc
@type: DOM Document
@param: elemDoc
@type: root element
-
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
"""
#create strands
elemStrands = doc.createElement('Strands')
- strandList = getAllDnaStrands(part)
+ strandList = getAllDnaStrands(assy)
for strand in strandList:
strandID = strand.name
elemStrand = doc.createElement('Strand')
@@ -222,13 +224,15 @@ def createStrands(doc,elemDoc, part):
elemDoc.appendChild(elemStrands)
return
-def createConstraints(doc,elemDoc, part):
+def createConstraints(doc,elemDoc, assy):
"""
create constraints section for the NE-1 model file in the ios file
@param: doc
@type: DOM Document
@param: elemDoc
@type: root element
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
"""
# write the constraints
@@ -236,7 +240,7 @@ def createConstraints(doc,elemDoc, part):
elemConstraintGroup = doc.createElement('ios:ConstraintGroup')
elemConstraintGroup.setAttribute('strict', '1')
- ladderList = getAllLadders(part)
+ ladderList = getAllLadders(assy)
for ladder in ladderList:
strandChunks = ladder.strand_chunks()
@@ -264,16 +268,15 @@ def createConstraints(doc,elemDoc, part):
return
#export to IOS format
-def exportToIOSFormat(part, fileName):
+def exportToIOSFormat(assy, fileName):
"""
Writes the IOS file
- @param: part
- @type:
-
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
@param: IOS output file in XML
@type: string
"""
-
+
if fileName == '':
print "No file selected to export"
return
@@ -290,8 +293,8 @@ def exportToIOSFormat(part, fileName):
createTokenLibrary(doc, elemDoc)
createMappingLibrary(doc,elemDoc)
- createStrands(doc, elemDoc, part)
- createConstraints(doc, elemDoc, part)
+ createStrands(doc, elemDoc, assy)
+ createConstraints(doc, elemDoc, assy)
#print doc to file
f = open(fileName,'w')
@@ -301,7 +304,6 @@ def exportToIOSFormat(part, fileName):
# include that
f = open(fileName,'r')
allLines=f.readlines()
- print allLines[1], allLines[len(allLines)-1]
allLines[1] = "<ios:IOS xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:ios='http://www.parabon.com/namespaces/inSeqioOptimizationSpecification'>\n"
allLines[len(allLines)-1] = "</ios:IOS>\n"
f.close()
@@ -309,4 +311,401 @@ def exportToIOSFormat(part, fileName):
f = open(fileName,'w')
f.writelines(allLines)
f.close()
+
return
+
+
+
+
+
+
+# UM 20080618: IOS IMPORT FUNCTIONS
+
+def importFromIOSFile(assy, fileName1):
+ """
+ Imports optimized sequences to NE-1 from IOS file
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
+
+ @param fileName1: IOS Import file
+ @type fileName1: string
+ @return: Returns True or False based on whether import was successful
+ """
+ part = assy.part
+ strandsOnScreen = checkStrandsOnNE_1Window(assy)
+ if strandsOnScreen == False:
+ print "Cannot import since currently IOS import is supported only for \
+ DNA strands and there are no DNA strands on the screen"
+ return False
+
+ fileName2 = doInitialProcessingOnXMLFile(fileName1)
+ strandBasesDict, compInfoDict, strandNameSeqDict = getHybridizationInfo(fileName2)
+ if strandBasesDict is None or strandNameSeqDict is None:
+ # Can remove the temp file
+ if os.path.exists(fileName2):
+ os.remove(fileName2)
+ return False
+ if compInfoDict is None:
+ print "All single strands to import"
+
+ #make sure that the file you are reading into the system has information
+ #that corresponds to the current structure in the NE-1 window.
+ infoCorrect = verifyStructureInfo(assy, strandBasesDict, compInfoDict)
+
+ if infoCorrect:
+ #import optimized bases from the IOS file
+ importBases(assy, strandNameSeqDict)
+ else:
+ if os.path.exists(fileName2):
+ os.remove(fileName2)
+ return False
+
+ if os.path.exists(fileName2):
+ os.remove(fileName2)
+
+ return True
+
+def checkStrandsOnNE_1Window(assy):
+ """
+ Checks to see if at least one DNA strand exists on the NE-1 window
+ @param part: the NE1 part.
+ @type part: L{assembly}
+ @return: True or False depending on whether there are DNA strands on the
+ NE-1 window
+ """
+
+ count = 0
+ part = assy.part
+ if hasattr(part.topnode, 'members'):
+ for node in part.topnode.members:
+
+ if hasattr(node,'members'):
+ for nodeChild in node.members:
+ if isinstance(nodeChild, assy.DnaStrand):
+ count = count +1
+ else:
+ if isinstance(node, DnaStrand):
+ count = count +1
+
+ if count >= 1:
+ return True
+ else:
+ return False
+
+def importBases(assy, strandNameSeqDict):
+ """
+ Imports optimized bases, currently stored in strandNameSeqDict dictionary
+ @param assy: the NE1 assy.
+ @type assy: L{assembly}
+
+ @param strandNameSeqDict: the dictionary containing the strand names and
+ sequences from the IOS import file
+
+ @type strandNameSeqDict: dict
+ """
+ part = assy.part
+
+ def func(node):
+ if isinstance(node, assy.DnaStrand):
+ #retrive its name and see if it exists in the dictionary, if yes
+ # then assign the base sequence
+ try:
+ seq = strandNameSeqDict[node.name]
+ node.setStrandSequence(seq)
+ except KeyError:
+ print "Cannot import since strand %s does not exist \
+ in the IOS file" % node.name
+ return
+
+ assy.part.topnode.apply2all(func)
+ win = assy.win
+ #if we are in the Build DNA mode, update the LineEdit that displays the
+ # sequences
+ if win.commandSequencer.currentCommand.commandName == 'DNA_STRAND':
+ win.commandSequencer.currentCommand.updateSequence()
+ return
+
+def getStrandsBaseInfoFromNE_1(assy):
+ """
+ Obtains the strand chunk names and their corresponding base string of the
+ NE-1 part
+ @param part: the NE1 part.
+ @type part: L{assembly}
+ @return: strand list and basestring list from NE-1
+ """
+ strandList = getAllDnaStrands(assy)
+ strandChunkListFromNE_1 = []
+ baseStringListFromNE_1 = []
+
+ for strand in strandList:
+ strandChunkList = strand.getStrandChunks()
+ for chunk in strandChunkList:
+ chunkID = chunk.name
+ #just get the name of the strand
+ strandChunkListFromNE_1.append(chunkID)
+ atoms = chunk.get_baseatoms()
+ baseList = []
+ for a in atoms:
+ bases = a.getDnaBaseName()
+ baseList.append(bases)
+ baseString = ''.join(baseList)
+ # base string needed for length info
+ baseStringListFromNE_1.append(baseString)
+ return strandChunkListFromNE_1, baseStringListFromNE_1
+
+def verifyStrandInfo(assy, strandBasesDict):
+ """
+ Verify strand info from NE-1 part and IOS file match
+ @param part: the NE1 part.
+ @type part: L{assembly}
+ @param strandBasesDict: the dictionary containing the strand names and
+ sequences from the IOS import file
+
+ @type strandBasesDict: dict
+ @return: True or False based on whether strands in the NE-1 and that in the
+ IOS file match up.
+ """
+ strandChunkListFromNE_1, baseStringListFromNE_1 = getStrandsBaseInfoFromNE_1(assy)
+
+ # Once strand chunks name and base string info have been obtained from NE-1
+ #check first of all such a chunk exists in the strand dictionary and if yes
+ # the base string lengths match
+
+ #check their lengths first
+ dictLength = len(strandBasesDict)
+ strandChunkListFromNE_1Length = len(strandChunkListFromNE_1)
+ if dictLength != strandChunkListFromNE_1Length:
+ print "Cannot import IOS file since number of strand chunks in the import file and \
+ one in NE-1 window does not match"
+ return False
+
+ #check if all the strand chunks exist
+ for chunks in strandChunkListFromNE_1:
+ try:
+ baseString = strandBasesDict[chunks]
+ except KeyError:
+ print "Cannot import IOS file since strand chunk %s in the NE-1 window \
+ cannot be found in the IOS file" % (chunks)
+ return False
+
+ #check if all the basestring lengths match or not
+ k = 0
+ for chunks in strandChunkListFromNE_1:
+ baseString = strandBasesDict[chunks]
+ baseStringFromNE_1 = baseStringListFromNE_1[k]
+
+ #print baseStringFromNE_1, baseString, k, len(baseStringFromNE_1),len(baseString)
+ if len(baseStringFromNE_1) != len(baseString):
+ print "Cannot import IOS file since base string length %d of \
+ chunk %s in the NE-1 window does not match with the one found in \
+ the IOS file %d" % (len(baseStringFromNE_1), chunks, len(baseString))
+ return False
+ k = k + 1
+
+
+ return True
+
+def getChunkAndComplFromNE_1(assy):
+ """
+ Obtain strand chunk and their corresponding complementary chunk for NE-1 part.
+ @param part: the NE1 part.
+ @type part: L{assembly}
+ @return: Two lists containing strand chunks and their complements respectively
+ """
+ ladderList = getAllLadders(assy)
+ strandChunkList = []
+ strandChunkComplList = []
+ for ladder in ladderList:
+ strandChunks = ladder.strand_chunks()
+ if ladder.num_strands() == 2:
+ chunk1 = strandChunks[0].name
+ chunk2 = strandChunks[1].name
+ strandChunkList.append(chunk1)
+ strandChunkComplList.append(chunk2)
+
+ return strandChunkList, strandChunkComplList
+
+def verifyComplementInfo(assy, compInfoDict):
+ """
+ Verify that the complementary strand info from the IOS file macthes with
+ that from the NE-1 part.
+
+ @param part: the NE1 part.
+ @type part: L{assembly}
+ @param compInfoDict: dictionary containing strand and their complementary
+ strands from the IOS file
+ @type compInfoDict: dict
+ @return: True or False based on whether the pairing info in the IOS file matches
+ with that in the NE-1 assembly
+ """
+ strandChunkList, strandChunkComplList = getChunkAndComplFromNE_1(assy)
+ if strandChunkList != '' and compInfoDict is None:
+ print "Cannot import IOS file since no pairing info in the IOS file, but\
+ NE-1 structure has doublestranded regions"
+ return False
+ if strandChunkList == '' and compInfoDict is None:
+ #no harm having single strands info, so long they match
+ return True
+
+ # as with strands verify the number of double stranded regions are equal
+ if len(strandChunkList) != len(compInfoDict):
+ print "Cannot do IOS import since number of double stranded regions \
+ are not equal"
+ return False
+
+ #verify complementary strands are same in IOS file and in NE-1 window
+ k = 0
+ for chunk in strandChunkList:
+ # no need to check if the chunk exists in the dictionary since if it
+ # does not, it has already returned False from the verifyStrandInfo()
+ # function
+
+ complFromIOS = compInfoDict[chunk]
+ if complFromIOS != strandChunkComplList[k]:
+ print "Cannot import IOS file since matching info for %s not found" % chunk
+ return False
+ k = k + 1
+ return True
+
+def verifyStructureInfo(assy, strandBasesDict, compInfoDict):
+ """
+ Verify that the structure info in the IOS file matches with that of the NE-1 part.
+ @param part: the NE1 part.
+ @type part: L{assembly}
+ @param strandBasesDict: dictionary containing strand and their corresponding
+ base string from IOS file
+ @type strandBasesDict: dict
+ @param compInfoDict: dictionary containing strand and their complementary
+ strands from the IOS file
+ @type compInfoDict: dict
+ @return: True or False based on if the structure in the IOS file matches up
+ with the structure in the NE-1 window.
+ """
+ strandInfoCorrect = verifyStrandInfo(assy, strandBasesDict)
+ if strandInfoCorrect == False:
+ return False
+ compInfoCorrect = verifyComplementInfo(assy, compInfoDict)
+ if compInfoCorrect:
+ return True
+ else:
+ return False
+
+def doInitialProcessingOnXMLFile(fileName1):
+ """
+ do initial preprocessing on the file so that its acceptable by the parser
+ from xml.dom.minidom
+ @param fileName2: IOS import file
+ @type fileName2: string
+ @retun: Temporary file that is read by the xml.dom.minidom
+ """
+ #its wierd, sometimes even with the prefix, the ExpatError exception does not
+ #show up. Do n't know what's going on! Anyways the prefix ios is not needed
+ #for any of the NE-1 processing and so it's better to be on the safe side!
+
+ f1 = open(fileName1, 'r')
+ allLines=f1.readlines()
+ f1.close()
+ #create a temporary file with the prefixes removed, make sure that you remove
+ #this file at the end of processing
+ fileName2 = "temp.xml"
+ f2 = open(fileName2, 'w')
+
+
+ for line in allLines:
+ if line.find("<ios:")!= -1:
+ line = line.replace("<ios:","<")
+ if line.find("</ios:")!= -1:
+ line = line.replace("</ios:","</")
+ f2.writelines(line)
+ f2.close()
+
+ return fileName2
+
+def getHybridizationInfo(fileName2):
+ """
+ Process this temporary file for strand chunk info. At the same time, we
+ check whether its a proper IOS file.
+
+ @param fileName2: IOS import file
+ @type fileName2: string
+ @return: 3 dictionaries containing (strand, Bases), (strand, complements),
+ and (strand, sequence)
+ """
+
+ try:
+ doc = parse(fileName2)
+ except ExpatError:
+ print "Cannot import IOS file, since its not in correct XML format"
+ return None, None, None
+
+ #need to distinguish between regions for mapping and simple strand regions
+ # hence get strand
+ strandList = doc.getElementsByTagName("Strand")
+ if len(strandList) == 0:
+ print "Cannot import IOS file since no strands to import"
+ return None, None, None
+
+ strandNameList = []
+ strandChunkList = []
+ basesForStrandChunkList = []
+ strandSeqList = []
+
+ #Within each strand access all regions
+ for i in range(len(strandList)):
+ strandNameList.append(str(strandList.item(i).getAttribute("id")))
+ regionList = strandList.item(i).getElementsByTagName("Region")
+
+ #each strand needs to have at least one region and so if one of them
+ #does not have it, then it is not a correct IOS file and you should return
+ # without bothering to process the rest of the file.
+ # So far IOS file format is concerned, single strand chunks do not neeed
+ # to be in a Region node. However, without the ID of the region, we have
+ # no way to read it into NE-1 and hence this import is invalid.
+
+ if len(regionList) == 0:
+ print "Cannot import IOS file: strand does not have any region, not \
+ a correct IOS file format"
+ return None, None, None
+
+ tempStrandSeq = ''
+ for j in range(len(regionList)):
+ #get list of strand chunks
+ strandChunkList.append(str(regionList.item(j).getAttribute("id")))
+ #get new base sequence after IOS optimization
+ tempBaseString = ''
+ if regionList.item(j).childNodes.item(0) is not None:
+ tempBaseString = str(regionList.item(j).childNodes.item(0).toxml())
+
+ #if the base string is empty, there's no point of analyzing any
+ #further either
+ if tempBaseString== '':
+ print "Cannot import IOS file: strand chunk does not have any bases, \
+ not a correct IOS file format"
+ return None, None, None
+
+ basesForStrandChunkList.append(tempBaseString)
+ tempStrandSeq = tempStrandSeq + tempBaseString
+ strandSeqList.append(tempStrandSeq)
+
+ strandBasesDict = dict(zip(strandChunkList, basesForStrandChunkList))
+ strandNameSeqDict = dict(zip(strandNameList, strandSeqList))
+
+ #would also need the constraints region to verify the pairing info
+ strand1List = []
+ strand2List = []
+
+ matchingList = doc.getElementsByTagName("Match")
+ if len(matchingList) == 0:
+ return strandBasesDict, None, strandNameSeqDict
+
+ #Within each matching access the pairs individually and store them in a tuple
+ for i in range(len(matchingList)):
+ compPairs = matchingList.item(i).getElementsByTagName("Region")
+
+ #get the names of complementary chunks
+ strand1List.append(str(compPairs.item(0).getAttribute("ref")))
+ strand2List.append(str(compPairs.item(1).getAttribute("ref")))
+
+ compInfoDict = dict(zip(strand1List, strand2List))
+
+ return strandBasesDict, compInfoDict, strandNameSeqDict \ No newline at end of file