XML examples needed
Georg Lohrer
GeorgLohrer at gmx.de
Mon Jun 4 00:26:35 EDT 2001
On Sun, 03 Jun 2001 20:59:12 GMT, xyzmats at laplaza.org (Mats Wichmann)
wrote:
>Sigh. Sometimes all this stuff makes me feel really stupid.
>
>I'm trying to figure out some really basic usage of XML in Python. I
>have a document that I'd like to structure as XML and use Python to
>generate an output file from it: the XML is for maintainability of the
>"raw data", but it needs to be turned into something much more complex
>eventually (a web page with embedded javascript).
A fourtnight ago I was in the same situation. After discovering that
there are two basically different ways and many more different
implementation approaches some light comes into the things.
First of all we have two different ways of accessing XML-stuff in
Python. One to access it via the the SAX-interface (Standard-API-XML).
This interface will callback different methods of yours for getting
the starting of an item or the end and of course for characters
between these two.
The other is the DOM (Document-Object-Model). It uses also the SAX but
it provides a higher level interface or representation to/of the
XML-data. A DOM-data-structures are built up of 'nodes'. Every node
will have siblings or childs. Even the bottom childs will contain the
pure data. As an example I give you a code snippet of my facade class
for accessing XML:
class XMLFacade:
"""The XMLFacade produces an internal representation of the
interfaces
description file (XML). The internal data structure holding all
interface
informations is build as shown:
Get internal data structure with
'interfaces = xml_facade.interfaces'
The structure is easy (for more details have a look at the
DTD):
interfaces[key] = (corba_servant, corba_client,
originate_message, answer_message)
key = string
corba_servant = (object-id, implemenation,
ns_context_list)
object-id = string
implementation = string
corba_client = (narrow_id, ns_context_list)
narrow_id = string
ns_context_list = [string]
originate_message = string
answer_message = string
An example of usage could be run by starting the XMLFacade.py
stand-alone
with xml-file-name as first parameter:
'python ./XMLFacade.py ./interfaces.xml'
"""
def __init__(self, xml_file_path = None):
"""Initialize internal variables"""
print "c'tor of XMLFacade()"
self.__interfaces = {}
self.__valid = 0
self.__reader = Sax2.Reader(validate=1) # validate with DTD
if xml_file_path:
xml_dom_object = self.parse(xml_file_path)
self.extract(xml_dom_object)
def __del__(self):
print "d'tor of XMLFacade()"
#------------------------------------------------------------------------
# Access methods
# The access could be done by executing "instance.member_var"
def _get_interfaces(self):
return self.__get_interfaces()
def __get_interfaces(self):
"""Private member method"""
return self.__interfaces
#------------------------------------------------------------------------
# Public methods
def getText(self, node):
"""Get text-data out of passed node"""
assert node.nodeType == Node.ELEMENT_NODE
str = ""
if node.hasChildNodes():
cnl = node.childNodes
for i in range(cnl.length):
tn = cnl.item(i)
assert tn.nodeType == Node.TEXT_NODE
str += tn.data
return str
def parse(self, xml_file_path=None):
"""Parse given XML-File and check against DTD if it is
valid."""
if xml_file_path:
from xml.sax._exceptions import SAXParseException
try:
xml_dom_object = self.__reader.fromUri(xml_file_path)
except SAXParseException, ex:
print "XML not valid (checked against DTD)!n%s"%ex
sys.exit(1)
return xml_dom_object
def extract(self, xml_dom_object=None):
"""Extract interfaces out of given DOM-document object"""
self.__interfaces = {}
if xml_dom_object:
self.parseXML(xml_dom_object)
def parseXML(self, xml_dom_object):
assert xml_dom_object.nodeType == Node.DOCUMENT_NODE
entry_list = xml_dom_object.getElementsByTagName("ENTRY")
for i in range(entry_list.length): # process all
'Entry'-keys
key = ""
object_id = ""
implementation = ""
nscs_list = [] # servant's NS_CONTEXT
list
narrow_id = ""
nscc_list = [] # client's NS_CONTEXT list
originate_message = ""
answer_message = ""
entry_node = entry_list.item(i)
assert entry_node.nodeType == Node.ELEMENT_NODE
kl = entry_node.getElementsByTagName("KEY")
key = self.getText(kl.item(0)) # only one
KEY-element accepted/expected
#- SERVANT
--------------------------------------------------------------
csl = entry_node.getElementsByTagName("CORBA_SERVANT") #
CORBA_SERVANT list
assert csl.length > 0
cs_node = csl.item(0) # only one SERVANT-element
accepted/expected
oil = cs_node.getElementsByTagName("OBJECT_ID") #
assert oil.length == 1 # there must be one
OBJECT_ID
oi_node = oil.item(0)
object_id = self.getText(oi_node)
impll = cs_node.getElementsByTagName("IMPLEMENTATION") #
assert impll.length == 1 # there must be one
IMPLEMENTATION
impl_node = impll.item(0)
implementation = self.getText(impl_node)
nscs_list = [] # NS_CONTEXT servant list
nscsl = cs_node.getElementsByTagName("NS_CONTEXT") #
NS_CONTEXT list
assert nscsl.length > 1 # at least one entry
for j in range(nscsl.length):
nscs_node = nscsl.item(j)
nscs_list.append(self.getText(nscs_node))
#- CLIENT
ccl = entry_node.getElementsByTagName("CORBA_CLIENT")
assert ccl.length > 0
cc_node = ccl.item(0) # only one CLIENT-element
accepted/expected
nidl = cc_node.getElementsByTagName("NARROW_ID") #
assert nidl.length == 1 # there must be one NARROW_ID
nid_node = nidl.item(0)
narrow_id = self.getText(nid_node)
nscc_list = [] # NS_CONTEXT client list
nsccl = cc_node.getElementsByTagName("NS_CONTEXT") #
NS_CONTEXT list
assert nsccl.length > 1 # at least one entry
for j in range(nsccl.length):
nscc_node = nsccl.item(j)
nscc_list.append(self.getText(nscc_node))
#- ORIGINATE_MESSAGE
----------------------------------------------------
oml = entry_node.getElementsByTagName("ORIGINATE_MESSAGE")
if oml.length > 0:
oml_node = oml.item(0) # only one CLIENT-element
accepted/expected
originate_message = self.getText(oml_node)
#- ORIGINATE_MESSAGE
----------------------------------------------------
aml = entry_node.getElementsByTagName("ANSWER_MESSAGE")
if aml.length > 0:
aml_node = aml.item(0) # only one CLIENT-element
accepted/expected
answer_message = self.getText(aml_node)
self.__interfaces[key] = ((object_id, implementation,
nscs_list), \
(narrow_id, nscc_list), \
originate_message,
answer_message)
#==============================================================================
def _print_interfaces(interfaces = None):
print "Interfaces read:"
if interfaces and len(interfaces.keys()) > 0:
for key in interfaces.keys():
interface = interfaces[key]
servant = interface[0]
object_id = servant[0]
implementation = servant[1]
nscs_list = servant[2]
client = interface[1]
narrow_id = client[0]
nscc_list = client[1]
originate_message = interface[2]
answer_message = interface[3]
print "Key: %s"%key
print " Object-ID:%s"%object_id
print " Implementation:%s"%implementation
print " Servant's NS_Context:",nscs_list
print " Client's narrow() Id:",narrow_id
print " Client's NS_Context :",nscc_list
print " Originate message :",originate_message
print " Answer message :",answer_message
print
else:
print "No keys found!"
#==============================================================================
def _test():
"""Testing of XMLFacade()"""
import os
if len(sys.argv) <= 1:
print "Usage:"
print "python ",sys.argv[0]," <xml-file>"
print
sys.exit(1)
print "Extract interfaces out of given xml-file\n"
if not os.path.isfile(sys.argv[1]):
print "Give xml-file(-path) as first parameter"
sys.exit(1)
xml_facade = XMLFacade()
xml_dom_object = xml_facade.parse(sys.argv[1])
xml_facade.extract(xml_dom_object)
interfaces = xml_facade.interfaces
_print_interfaces(interfaces)
del xml_facade
print "\n\nDo the same with c'tor setup"
xml_facade = XMLFacade(sys.argv[1])
_print_interfaces(xml_facade.interfaces)
#==============================================================================
if __name__ == '__main__':
_test()
The XML will look like this:
<?xml version = "1.0"?>
<!DOCTYPE INTERFACES SYSTEM "interfaces.dtd">
<INTERFACES>
<ENTRY ID="BEcho"
DESC="Short demo message (splitted into CORBA-part and internal
message">
<KEY>BBEcho</KEY>
<CORBA_SERVANT>
<OBJECT_ID>BBEechoObjectID</OBJECT_ID>
<IMPLEMENTATION>BBEcho_i</IMPLEMENTATION>
<NS_CONTEXT>callProcessing</NS_CONTEXT>
<NS_CONTEXT>CellConfiguration</NS_CONTEXT>
<NS_CONTEXT>CellConfigCh</NS_CONTEXT>
</CORBA_SERVANT>
<CORBA_CLIENT>
<NARROW_ID>Example.BBEchoResponse</NARROW_ID>
<NS_CONTEXT>callProcessing</NS_CONTEXT>
<NS_CONTEXT>CellConfiguration</NS_CONTEXT>
<NS_CONTEXT>CellConfigChResponseHandler</NS_CONTEXT>
</CORBA_CLIENT>
<ORIGINATE_MESSAGE>MsgBBEcho</ORIGINATE_MESSAGE>
<ANSWER_MESSAGE>MsgBBEchoResponse</ANSWER_MESSAGE>
</ENTRY>
</INTERFACES>
The DTD looks like this:
<!ELEMENT INTERFACES ((ENTRY)*)>
<!ELEMENT ENTRY (KEY, CORBA_SERVANT, CORBA_CLIENT, ORIGINATE_MESSAGE,
ANSWER_MESSAGE)>
<!ATTLIST ENTRY
ID ID #REQUIRED
DESC CDATA #IMPLIED
>
<!ELEMENT KEY (#PCDATA)>
<!ELEMENT CORBA_SERVANT (OBJECT_ID+, IMPLEMENTATION+, NS_CONTEXT*)>
<!ELEMENT CORBA_CLIENT (NARROW_ID+, NS_CONTEXT+)>
<!ELEMENT NS_CONTEXT (#PCDATA)>
<!ELEMENT OBJECT_ID (#PCDATA)>
<!ELEMENT IMPLEMENTATION (#PCDATA)>
<!ELEMENT NARROW_ID (#PCDATA)>
<!ELEMENT ORIGINATE_MESSAGE (#PCDATA)>
<!ELEMENT ANSWER_MESSAGE (#PCDATA)>
The whole work is done in XMLFacade.parse_xml(). It extracts the data
out of the XML-DOM and fills up lists of different items. These lists
will be used in the next steps of the whole application.
It's pretty easy to start with a simple XML/DTD and do all 'by-hand'
directly running the Python-interpreter. You'll get a deeper
understanding if you can see the different data-structures and
data-items during processing.
Ciao, Georg
More information about the Python-list
mailing list