[XML-SIG] Parse MULTIPLE XML files in a directory

amitesh kumar amitesh116 at gmail.com
Fri Aug 10 08:58:29 CEST 2007


Hi,

Please review the following code and help me.

Here I'm trying to :
1. Read each XML file in a folder.
2. Parse file.
3. Store some of the tags values as key-value pair in a map
4. Similarly maintain another collection that'll store one list per file.

-- 
With Regards
Amitesh K.
9850638640
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/xml-sig/attachments/20070810/d386d52a/attachment.htm 
-------------- next part --------------
import sys
import xml.parsers.expat
import dircache

rec = {}
rec2 = {}
oli = {}
ordtagname = '*'
recList = {}
cnt = 0
cnt2 = 0

ordtags = set()
shptags = set()
omptags = set()

ordtags.add('orrfnbr')
ordtags.add('afidlog')
ordtags.add('orprtot')
ordtags.add('ortxtot')
ordtags.add('orshtot')
ordtags.add('orcpcur')
ordtags.add('orpstmp')
ordtags.add('orustmp')
ordtags.add('orappstat')
ordtags.add('orappdt')

shptags.add('strfnbr')
shptags.add('stprnbr')
shptags.add('stvdnbr')
shptags.add('stprice')
shptags.add('stquant')
shptags.add('stpstmp')
shptags.add('stustmp')
shptags.add('starwbll')
shptags.add('stdspchstat')
shptags.add('stlogistics')
shptags.add('stentrydt')
shptags.add('stcpprice')
shptags.add('stlstprice')

omptags.add('ompaymthd')
omptags.add('ommaxaamt')


def start_element(name, attrs):
	global ordtagname, rec,recList,cnt,ordtags,rec2,cnt2
	if name in ordtags or name in shptags or name in omptags:
		ordtagname = name
	sys.stdout.flush()

def end_element(name):
	global ordtagname, rec,recList,cnt,ordtags,rec2,cnt2
	if name in ordtags or name in shptags or name in omptags:
		ordtagname = ''
	if name == 'shipto':
		cnt2 = cnt2+1	
	if name == 'order':
		cnt2 = 0
	sys.stdout.flush()
	
def char_data(data):
	global ordtagname, rec,recList,cnt,ordtags,rec2,cnt2
	if None != data: 
	    if ordtagname in ordtags or ordtagname in shptags or ordtagname in omptags:
			if ordtagname in shptags :
				rec2[repr(ordtagname).strip('u\'')] = repr(data).strip('u\'')
			else:
			    rec[repr(ordtagname).strip('u\'')] = repr(data).strip('u\'')
			rec['OLI-'+str(cnt2)] = rec2
	sys.stdout.flush()

for f in iter(dircache.listdir('./xmls/')):
	print f
	g=open('./xmls/'+f, 'r')
	p = xml.parsers.expat.ParserCreate()
	p.StartElementHandler  = start_element
	p.CharacterDataHandler = char_data
	p.EndElementHandler    = end_element
	p.ParseFile(g) 
	recList['ORDER-'+str(cnt)] = rec
	print recList
	g.close()
	cnt = cnt+1


More information about the XML-SIG mailing list