Windows file paths, again

Dan Guido dguido at gmail.com
Wed Oct 21 14:13:19 EDT 2009


I'm trying to write a few methods that normalize Windows file paths.
I've gotten it to work in 99% of the cases, but it seems like my code
still chokes on '\x'. I've pasted my code below, can someone help me
figure out a better way to write this? This seems overly complicated
for such a simple problem...


# returns normalized filepath with arguments removed
def remove_arguments(filepath):
	#print "removing args from: " + filepath
	(head, tail) = os.path.split(filepath)
	pathext = os.environ['PATHEXT'].split(";")
	
	while(tail != ''):
		#print "trying: " + os.path.join(head,tail)
		
		# does it just work?
		if os.path.isfile(os.path.join(head, tail)):
			#print "it just worked"
			return os.path.join(head, tail)
		
		# try every extension
		for ext in pathext:
			if os.path.isfile(os.path.join(head, tail) + ext):
				return os.path.join(head, tail) + ext
	
		# remove the last word, try again
		tail = tail.split()[:-1]
		tail = " ".join(tail)
	
	return None
	
escape_dict={'\a':r'\a',
           '\b':r'\b',
           '\c':r'\c',
           '\f':r'\f',
           '\n':r'\n',
           '\r':r'\r',
           '\t':r'\t',
           '\v':r'\v',
           '\'':r'\'',
           #'\"':r'\"',
           '\0':r'\0',
           '\1':r'\1',
           '\2':r'\2',
           '\3':r'\3',
           '\4':r'\4',
           '\5':r'\5',
           '\6':r'\6',
           '\7':r'\a', #i have no idea
           '\8':r'\8',
           '\9':r'\9'}

def raw(text):
	"""Returns a raw string representation of text"""
	new_string=''
	for char in text:
		try:
			new_string+=escape_dict[char]
			#print "escaped"
		except KeyError:
			new_string+=char
			#print "keyerror"
		#print new_string
	return new_string

# returns the normalized path to a file if it exists
# returns None if it doesn't exist
def normalize_path(path):
	#print "not normal: " + path
	
	# make sure it's not blank
	if(path == ""):
		return None

	# get rid of mistakenly escaped bytes
	path = raw(path)
	#print "step1: " + path

	# remove quotes
	path = path.replace('"', '')
	#print "step2: " + path
	
	#convert to lowercase
	lower = path.lower()
	#print "step3: " + lower
	
	# expand all the normally formed environ variables
	expanded = os.path.expandvars(lower)
	#print "step4: " + expanded
	
	# chop off \??\
	if expanded[:4] == "\\??\\":
		expanded = expanded[4:]
	#print "step5: " + expanded
	
	# strip a leading '/'
	if expanded[:1] == "\\":
		expanded = expanded[1:]
	#print "step7: " + expanded
	
	systemroot = os.environ['SYSTEMROOT']
	
	# sometimes systemroot won't have %
	r = re.compile('systemroot', re.IGNORECASE)
	expanded = r.sub(systemroot, expanded)
	#print "step8: " + expanded
	
	# prepend the %systemroot% if its missing
	if expanded[:8] == "system32" or "syswow64":
		expanded = os.path.join(systemroot, expanded)
	#print "step9: " + expanded
	
	stripped = remove_arguments(expanded.lower())
	
	# just in case you're running as LUA
	# this is a race condition but you can suck it
	if(stripped):
		if os.access(stripped, os.R_OK):
			return stripped
	
	return None
	
def test_normalize():
	test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys"
	test2 = "C:\WINDOWS\system32\msdtc.exe"
	test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs"
	test4 = "\SystemRoot\System32\drivers\vga.sys"
	test5 = "system32\DRIVERS\compbatt.sys"
	test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe"
	test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe"
	test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch"
	test9 = ""
	test10 = "SysWow64\drivers\AsIO.sys"
	test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys"
	test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything

	print normalize_path(test1)
	print normalize_path(test2)
	print normalize_path(test3)
	print normalize_path(test4)
	print normalize_path(test5)
	print normalize_path(test6)
	print normalize_path(test7)
	print normalize_path(test8)
	print normalize_path(test9)
	print normalize_path(test10)
	print normalize_path(test11)
	print normalize_path(test12)

--
Dan Guido



More information about the Python-list mailing list