"""
The xml_simple_parser.py script is an xml parser that can parse a line separated xml text.
This xml parser will read a line seperated xml text and produce a tree of the xml with a root element. Each element can have an attribute table, children, a class name, parents, text and a link to the root element.
This example gets an xml tree for the xml file boolean.xml. This example is run in a terminal in the folder which contains boolean.xml and xml_simple_parser.py.
> python
Python 2.5.1 (r251:54863, Sep 22 2007, 01:43:31)
[GCC 4.2.1 (SUSE Linux)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> file = open( 'boolean.xml', 'r' )
>>> xmlText = file.read()
>>> file.close()
>>> from xml_simple_parser import XMLSimpleParser
>>> xmlParser = XMLSimpleParser( xmlText )
>>> print( xmlParser )
?xml, {'version': '1.0'}
ArtOfIllusion, {'xmlns:bf': '//babelfiche/codec', 'version': '2.0', 'fileversion': '3'}
Scene, {'bf:id': 'theScene'}
materials, {'bf:elem-type': 'java.lang.Object', 'bf:list': 'collection', 'bf:id': '1', 'bf:type': 'java.util.Vector'}
..
many more lines of the xml tree
..
"""
from __future__ import absolute_import
#Init has to be imported first because it has code to workaround the python bug where relative imports don't work if the module is imported as a main module.
import __init__
from skeinforge_tools.skeinforge_utilities import euclidean
from skeinforge_tools.skeinforge_utilities import gcodec
__author__ = "Enrique Perez (perez_enrique@yahoo.com)"
__credits__ = 'Nophead \nArt of Illusion '
__date__ = "$Date: 2008/21/04 $"
__license__ = "GPL 3.0"
class XMLElement:
"An xml element."
def __init__( self ):
"Add empty lists."
self.attributeTable = {}
self.children = []
self.className = ''
self.parents = []
self.rootElement = None
self.text = ''
def __repr__( self ):
"Get the string representation of this XML element."
stringRepresentation = '%s%s, %s' % ( ' ' * len( self.parents ), self.className, self.attributeTable )
if len( self.text ):
stringRepresentation += '\n%s%s' % ( ' ' * len( self.parents ), self.text )
for child in self.children:
stringRepresentation += '\n%s' % child
return stringRepresentation
def addAttribute( self, word ):
"Set the attribute table to the split line."
indexOfEqualSign = word.find( '=' )
key = word[ : indexOfEqualSign ]
afterEqualSign = word[ indexOfEqualSign + 1 : ]
afterEqualSign = afterEqualSign.lstrip()
value = afterEqualSign[ 1 : - 1 ]
self.attributeTable[ key ] = value
def getChildrenWithClassName( self, className ):
"Get the children which have the given class name."
childrenWithClassName = []
for child in self.children:
if className == child.className:
childrenWithClassName.append( child )
return childrenWithClassName
def getFirstChildWithClassName( self, className ):
"Get the first child which has the given class name."
childrenWithClassName = self.getChildrenWithClassName( className )
if len( childrenWithClassName ) < 1:
return None
return childrenWithClassName[ 0 ]
def getSubChildWithID( self, idReference ):
"Get the child which has the idReference."
for child in self.children:
if 'bf:id' in child.attributeTable:
if child.attributeTable[ 'bf:id' ] == idReference:
return child
subChildWithID = child.getSubChildWithID( idReference )
if subChildWithID != None:
return subChildWithID
return None
def parseReplacedLine( self, line, parents ):
"Parse replaced line."
if line[ : len( '' ) ] == '':
del parents[ - 1 ]
return
self.className = line[ 1 : line.replace( '>', ' ' ).find( ' ' ) ]
indexOfEndOfTheBeginTag = - 1
lastWord = line[ - 2 : ]
splitLine = line.replace( '">', '" > ' ).split()
if lastWord == '/>':
indexOfEndOfTheBeginTag = len( splitLine ) - 1
elif '>' in splitLine:
indexOfEndOfTheBeginTag = splitLine.index( '>' )
for word in splitLine[ 1 : indexOfEndOfTheBeginTag ]:
self.addAttribute( word )
self.parents = parents
if len( self.parents ) > 0:
parents[ - 1 ].children.append( self )
if lastWord == '/>':
return
tagEnd = '%s>' % self.className
if line[ - len( tagEnd ) : ] == tagEnd:
untilTagEnd = line[ : - len( tagEnd ) ]
lastGreaterThanIndex = untilTagEnd.rfind( '>' )
self.text = untilTagEnd[ lastGreaterThanIndex + 1 : ]
return
parents.append( self )
class XMLSimpleParser:
"A simple xml parser."
def __init__( self, xmlText ):
"Add empty lists."
self.isInComment = False
self.parents = []
self.rootElement = None
self.lines = gcodec.getTextLines( xmlText )
for line in self.lines:
self.parseLine( line )
def __repr__( self ):
"Get the string representation of this parser."
return str( self.rootElement )
def parseLine( self, line ):
"Parse a gcode line and add it to the inset skein."
lineStripped = line.lstrip()
if len( lineStripped ) < 1:
return
if lineStripped[ : len( '' ) != - 1:
self.isInComment = False
return
if self.isInComment:
return
xmlElement = XMLElement()
xmlElement.parseReplacedLine( lineStripped, self.parents )
if self.rootElement == None:
self.rootElement = xmlElement
xmlElement.rootElement = self.rootElement