Turnign greek-iso filenames => utf-8 iso

Νικόλαος Κούρας support at superhost.gr
Wed Jun 12 06:40:20 EDT 2013


Thanks Steven , i made some alternations to the variables names and at 
the end of the way that i check a database filename against and hdd 
filename. Here is the code:

# 
=================================================================================================================
# Convert wrongly encoded filenames to utf-8
# 
=================================================================================================================
path = b'/home/nikos/public_html/data/apps/'
filenames = os.listdir( path )

utf8_filenames = []

for filename in filenames:
	# Compute 'path/to/filename'
	filename_bytes = path + filename
	encoding = guess_encoding( filename_bytes )
	
	if encoding == 'utf-8':
		# File name is valid UTF-8, so we can skip to the next file.
		utf8_filenames.append( filename_bytes )
		continue
	elif encoding is None:
		# No idea what the encoding is. Hit it with a hammer until it stops 
moving.
		filename = filename_bytes.decode( 'utf-8', 'xmlcharrefreplace' )
	else:
		filename = filename_bytes.decode( encoding )

	# Rename the file to something which ought to be UTF-8 clean.
	newname_bytes = filename.encode('utf-8')
	os.rename( filename_bytes, newname_bytes )
	utf8_filenames.append( newname_bytes )
	
	# Once we get here, the file ought to be UTF-8 clean and the Unicode 
name ought to exist:
	assert os.path.exists( newname_bytes.decode('utf-8') )


# Switch filenames from utf8 bytestrings => unicode strings
filenames = []

for utf8_filename in utf8_filenames:
	filenames.append( utf8_filename.decode('utf-8') )

# Check the presence of a database file against the dir files and delete 
record if it doesn't exist
cur.execute('''SELECT url FROM files''')
data = cur.fetchall()

for url in data:
	if url not in filenames:
		# Delete spurious
		cur.execute('''DELETE FROM files WHERE url = %s''', url )
=========================

Now 'http://superhost.gr/?page=files.py' is not erring out at all but 
also it doesn't display the big filename table for users to download.

Here is how i try to print the filenames with button for the users:

=================================================================================================================
#Display ALL files, each with its own download button# 
=================================================================================================================
print('''<body background='/data/images/star.jpg'>
		 <center><img src='/data/images/download.gif'><br><br>
		 <table border=5 cellpadding=5 bgcolor=green>
''')

try:
	cur.execute( '''SELECT * FROM files ORDER BY lastvisit DESC''' )
	data = cur.fetchall()
	
	for row in data:
		(filename, hits, host, lastvisit) = row
		lastvisit = lastvisit.strftime('%A %e %b, %H:%M')
		
		print('''
		<form method="get" action="/cgi-bin/files.py">
			<tr>
				<td> <center> <input type="submit" name="filename" value="%s"> </td>
				<td> <center> <font color=yellow size=5> %s </td>
				<td> <center> <font color=orange size=4> %s </td>
				<td> <center> <font color=silver size=4> %s </td>
			</tr>
		</form>
		''' % (filename, hits, host, lastvisit) )
	print( '''</table><br><br>''' )
except pymysql.ProgrammingError as e:
	print( repr(e) )



More information about the Python-list mailing list