trunk/reprap/miscellaneous/python-beanshell-scripts/skeinforge_tools/skeinforge_utilities/xml_simple_parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

"""
The xml_simple_parser.py script is an xml parser that can parse a line separated xml text.

This xml parser will read a line seperated xml text and produce a tree of the xml with a root element.  Each element can have an attribute table, children, a class name, parents, text and a link to the root element.

This example gets an xml tree for the xml file boolean.xml.  This example is run in a terminal in the folder which contains boolean.xml and xml_simple_parser.py.


> python
Python 2.5.1 (r251:54863, Sep 22 2007, 01:43:31)
[GCC 4.2.1 (SUSE Linux)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> file = open( 'boolean.xml', 'r' )
>>> xmlText = file.read()
>>> file.close()
>>> from xml_simple_parser import XMLSimpleParser
>>> xmlParser = XMLSimpleParser( xmlText )
>>> print( xmlParser )
  ?xml, {'version': '1.0'}
  ArtOfIllusion, {'xmlns:bf': '//babelfiche/codec', 'version': '2.0', 'fileversion': '3'}
  Scene, {'bf:id': 'theScene'}
  materials, {'bf:elem-type': 'java.lang.Object', 'bf:list': 'collection', 'bf:id': '1', 'bf:type': 'java.util.Vector'}
..
many more lines of the xml tree
..

"""


from __future__ import absolute_import
#Init has to be imported first because it has code to workaround the python bug where relative imports don't work if the module is imported as a main module.
import __init__

from skeinforge_tools.skeinforge_utilities import euclidean
from skeinforge_tools.skeinforge_utilities import gcodec

__author__ = "Enrique Perez (perez_enrique@yahoo.com)"
__credits__ = 'Nophead <http://hydraraptor.blogspot.com/>\nArt of Illusion <http://www.artofillusion.org/>'
__date__ = "$Date: 2008/21/04 $"
__license__ = "GPL 3.0"


class XMLElement:
	"An xml element."
	def __init__( self ):
		"Add empty lists."
		self.attributeTable = {}
		self.children = []
		self.className = ''
		self.parents = []
		self.rootElement = None
		self.text = ''

	def __repr__( self ):
		"Get the string representation of this XML element."
		stringRepresentation = '%s%s, %s' % ( '  ' * len( self.parents ), self.className, self.attributeTable )
		if len( self.text ):
			stringRepresentation += '\n%s%s' % ( '  ' * len( self.parents ), self.text )
		for child in self.children:
			stringRepresentation += '\n%s' % child
		return stringRepresentation

	def addAttribute( self, word ):
		"Set the attribute table to the split line."
		indexOfEqualSign = word.find( '=' )
		key = word[ : indexOfEqualSign ]
		afterEqualSign = word[ indexOfEqualSign + 1 : ]
		afterEqualSign = afterEqualSign.lstrip()
		value = afterEqualSign[ 1 : - 1 ]
		self.attributeTable[ key ] = value

	def getChildrenWithClassName( self, className ):
		"Get the children which have the given class name."
		childrenWithClassName = []
		for child in self.children:
			if className == child.className:
				childrenWithClassName.append( child )
		return childrenWithClassName

	def getFirstChildWithClassName( self, className ):
		"Get the first child which has the given class name."
		childrenWithClassName = self.getChildrenWithClassName( className )
		if len( childrenWithClassName ) < 1:
			return None
		return childrenWithClassName[ 0 ]

	def getSubChildWithID( self, idReference ):
		"Get the child which has the idReference."
		for child in self.children:
			if 'bf:id' in child.attributeTable:
				if child.attributeTable[ 'bf:id' ] == idReference:
					return child
			subChildWithID = child.getSubChildWithID( idReference )
			if subChildWithID != None:
				return subChildWithID
		return None

	def parseReplacedLine( self, line, parents ):
		"Parse replaced line."
		if line[ : len( '</' ) ] == '</':
			del parents[ - 1 ]
			return
		self.className = line[ 1 : line.replace( '>', ' ' ).find( ' ' ) ]
		indexOfEndOfTheBeginTag = - 1
		lastWord = line[ - 2 : ]
		splitLine = line.replace( '">', '" > ' ).split()
		if lastWord == '/>':
			indexOfEndOfTheBeginTag = len( splitLine ) - 1
		elif '>' in splitLine:
			indexOfEndOfTheBeginTag = splitLine.index( '>' )
		for word in splitLine[ 1 : indexOfEndOfTheBeginTag ]:
			self.addAttribute( word )
		self.parents = parents
		if len( self.parents ) > 0:
			parents[ - 1 ].children.append( self )
		if lastWord == '/>':
			return
		tagEnd = '</%s>' % self.className
		if line[ - len( tagEnd ) : ] == tagEnd:
			untilTagEnd = line[ : - len( tagEnd ) ]
			lastGreaterThanIndex = untilTagEnd.rfind( '>' )
			self.text = untilTagEnd[ lastGreaterThanIndex + 1 : ]
			return
		parents.append( self )


class XMLSimpleParser:
	"A simple xml parser."
	def __init__( self, xmlText ):
		"Add empty lists."
		self.isInComment = False
		self.parents = []
		self.rootElement = None
		self.lines = gcodec.getTextLines( xmlText )
		for line in self.lines:
			self.parseLine( line )
	
	def __repr__( self ):
		"Get the string representation of this parser."
		return str( self.rootElement )

	def parseLine( self, line ):
		"Parse a gcode line and add it to the inset skein."
		lineStripped = line.lstrip()
		if len( lineStripped ) < 1:
			return
		if lineStripped[ : len( '<!--' ) ] == '<!--':
			self.isInComment = True
		if self.isInComment:
			if lineStripped.find( '-->' ) != - 1:
				self.isInComment = False
				return
		if self.isInComment:
			return
		xmlElement = XMLElement()
		xmlElement.parseReplacedLine( lineStripped, self.parents )
		if self.rootElement == None:
			self.rootElement = xmlElement
		xmlElement.rootElement = self.rootElement