[Python-Dev] Unicode decode exception

wxjmfauth at gmail.com wxjmfauth at gmail.com
Tue Dec 2 02:28:27 EST 2014


>>> 
>>> # input
>>> source_ascii = b'\x61\x62\x63'
>>> source_greek = b'\xe1\xe2\xe3'
>>> source_cyrillic = b'\xd0\xd1\xd2'
>>> buffer = ''
>>> buffer  = buffer + source_ascii.decode('ascii')
>>> buffer  = buffer + source_greek.decode('cp1253')
>>> buffer  = buffer + source_cyrillic.decode('iso-8859-5')
>>> len(buffer)
9
>>> # output
>>> sys.stdout.sethostencoding('cp1252')
>>> print(buffer.encode(sys.stdout.encoding, 'replace').\
...     decode(sys.stdout.encoding))
abc??????
>>> sys.stdout.sethostencoding('mac-roman')
>>> print(buffer.encode(sys.stdout.encoding, 'replace').\
...     decode(sys.stdout.encoding))
abc??????
>>> sys.stdout.sethostencoding('ascii')
>>> print(buffer.encode(sys.stdout.encoding, 'replace').\
...     decode(sys.stdout.encoding))
abc??????
>>> sys.stdout.sethostencoding('cp1253')
>>> print(buffer.encode(sys.stdout.encoding, 'replace').\
...     decode(sys.stdout.encoding))
abcαβγ???
>>> sys.stdout.sethostencoding('iso-8859-5')
>>> print(buffer.encode(sys.stdout.encoding, 'replace').\
...     decode(sys.stdout.encoding))
abc???абв
>>> sys.stdout.sethostencoding('utf-32-be')
>>> print(buffer.encode(sys.stdout.encoding, 'replace').\
...     decode(sys.stdout.encoding))
abcαβγабв
>>> 
>>> # typical decode error
>>> source_greek.decode('ascii')
Traceback (most recent call last):
  File "<eta last command>", line 1, in <module>
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe1 in position 0: ordinal not in 
range(128)

>From a *Py32* gui interactive interpreter mimicking
platforms.

jmf



More information about the Python-list mailing list