urllib2.unquote() vs unicode

Tue Mar 18 00:20:01 EDT 2008

I've been hit by a urllib2.unquote() issue. Consider the following
unit test:

import unittest
import urllib2

class UnquoteUnitTest(unittest.TestCase):

   def setUp(self):
       self.utxt = u'%C4%99'
       self.stxt = '%C4%99'

   def testEq(self):
       self.assertEqual(
               self.utxt,
               self.stxt)

   def testStrEq(self):
       self.assertEqual(
               str(self.utxt),
               str(self.stxt))

   def testUnicodeEq(self):
       self.assertEqual(
               unicode(self.utxt),
               unicode(self.stxt))

   def testUnquote(self):
       self.assertEqual(
               urllib2.unquote(self.utxt),
               urllib2.unquote(self.stxt))

   def testUnquoteStr(self):
       self.assertEqual(
               urllib2.unquote(str(self.utxt)),
               urllib2.unquote(str(self.stxt)))

   def testUnquoteUnicode(self):
       self.assertEqual(
               urllib2.unquote(unicode(self.utxt)),
               urllib2.unquote(unicode(self.stxt)))

if __name__ == '__main__':
   unittest.main()

The three testEq*() tests positively confirm that the two are equal,
they are the same, they are also the same if cast both to str or
unicode. Tests with unquote() called with utxt and stxt cast into str
or unicode are also successful. However...

...E..
======================================================================
ERROR: testUnquote (__main__.UnquoteUnitTest)
----------------------------------------------------------------------
Traceback (most recent call last):
 File "unquote.py", line 28, in testUnquote
   urllib2.unquote(self.stxt))
 File "/usr/lib/python2.4/unittest.py", line 332, in failUnlessEqual
   if not first == second:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc4 in position
0: ordinal not in range(128)

----------------------------------------------------------------------
Ran 6 tests in 0.001s

FAILED (errors=1)

Why does this test fail while others are successful? Any ideas?

Regards,
Maciej