Webpy and UnicodeDecodeError

Oscar Del Ben thehcdreamer at gmail.com
Fri Dec 18 10:51:29 EST 2009


On Dec 18, 4:43 pm, Dave Angel <da... at ieee.org> wrote:
> Oscar Del Ben wrote:
> > So I'm trying to send a file through webpy and urllib2 but I can't get
> > around these UnicodeErrors. Here's the code:
>
> > # controller
>
> > x = web.input(video_original={})
> > params = {'foo': x['foo']}
>
> > files = (('video[original]', 'test', x['video_original'].file.read
> > ()),)
> > client.upload(upload_url, params, files, access_token())
>
> > # client library
>
> > def __encodeMultipart(self, fields, files):
> >         """
> >         fields is a sequence of (name, value) elements for regular
> > form fields.
> >         files is a sequence of (name, filename, value) elements for
> > data to be uploaded as files
> >         Return (content_type, body) ready for httplib.HTTP instance
> >         """
> >         boundary = mimetools.choose_boundary()
> >         crlf = '\r\n'
>
> >         l = []
> >         for k, v in fields.iteritems():
> >             l.append('--' + boundary)
> >             l.append('Content-Disposition: form-data; name="%s"' % k)
> >             l.append('')
> >             l.append(v)
> >         for (k, f, v) in files:
> >             l.append('--' + boundary)
> >             l.append('Content-Disposition: form-data; name="%s";
> > filename="%s"' % (k, f))
> >             l.append('Content-Type: %s' % self.__getContentType(f))
> >             l.append('')
> >             l.append(v)
> >         l.append('--' + boundary + '--')
> >         l.append('')
> >         body = crlf.join(l)
>
> >         return boundary, body
>
> >     def __getContentType(self, filename):
> >         return mimetypes.guess_type(filename)[0] or 'application/octet-
> > stream'
>
> >     def upload(self, path, post_params, files, token=None):
>
> >       if token:
> >         token = oauth.OAuthToken.from_string(token)
>
> >       url = "http://%s%s" % (self.authority, path)
>
> >       (boundary, body) = self.__encodeMultipart(post_params, files)
>
> >       headers = {'Content-Type': 'multipart/form-data; boundary=%s' %
> > boundary,
> >           'Content-Length': str(len(body))
> >           }
>
> >       request = oauth.OAuthRequest.from_consumer_and_token(
> >         self.consumer,
> >         token,
> >         http_method='POST',
> >         http_url=url,
> >         parameters=post_params
> >       )
>
> >       request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(),
> > self.consumer, token)
>
> >       request = urllib2.Request(request.http_url, postdata=body,
> > headers=headers)
> >       request.get_method = lambda: 'POST'
>
> >       return urllib2.urlopen(request)
>
> > Unfortunately I get two kinds of unicode error, the first one in the
> > crlf.join(l):
>
> > Traceback (most recent call last):
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 242, in process
> >     return self.handle()
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 233, in handle
> >     return self._delegate(fn, self.fvars, args)
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 412, in _delegate
> >     return handle_class(cls)
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 387, in handle_class
> >     return tocall(*args)
> >   File "/Users/oscar/projects/work/whitelabel/code.py", line 328, in
> > POST
> >     return simplejson.load(client.upload(upload_url, params, files,
> > access_token()))
> >   File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
> > 131, in upload
> >     (boundary, body) = self.__encodeMultipart(post_params, files)
> >   File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
> > 111, in __encodeMultipart
> >     body = crlf.join(l)
> > UnicodeDecodeError: 'ascii' codec can't decode byte 0xb7 in position
> > 42: ordinal not in range(128)
>
> > And here's another one:
>
> > Traceback (most recent call last):
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 242, in process
> >     return self.handle()
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 233, in handle
> >     return self._delegate(fn, self.fvars, args)
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 412, in _delegate
> >     return handle_class(cls)
> >   File "/Users/oscar/projects/work/whitelabel/web/application.py",
> > line 387, in handle_class
> >     return tocall(*args)
> >   File "/Users/oscar/projects/work/whitelabel/code.py", line 328, in
> > POST
> >     return simplejson.load(client.upload(upload_url, params, files,
> > access_token()))
> >   File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
> > 131, in upload
> >     (boundary, body) = self.__encodeMultipart(post_params, files)
> >   File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line
> > 111, in __encodeMultipart
> >     body = crlf.join(l)
> > UnicodeDecodeError: 'ascii' codec can't decode byte 0xb7 in position
> > 42: ordinal not in range(128)
>
> > Does anyone know why this errors happens and what I should do to
> > prevent them? Many thanks.
>
> > Oscar
>
> I did a short test to demonstrate the likely problem, without all the
> other libraries and complexity.
>
> lst = ["abc"]
> lst.append("def")
> lst.append(u"abc")
> lst.append("g\x48\x82\x94i")
> print lst
> print "**".join(lst)
>
> That fragment of code generates (in Python 2.6) the following output and
> traceback:
>
> ['abc', 'def', u'abc', 'gH\x82\x94i']
> Traceback (most recent call last):
>   File "M:\Programming\Python\sources\dummy\stuff2.py", line 10, in <module>
>     print "**".join(lst)
> UnicodeDecodeError: 'ascii' codec can't decode byte 0x82 in position 2:
> ordinal not in range(128)
>
> You'll notice that one of the strings is a unicode one, and another one
> has the character 0x82 in it.  Once join() discovers Unicode, it needs
> to produce a Unicode string, and by default, it uses the ASCII codec to
> get it.
>
> If you print your 'l' list (bad name, by the way, looks too much like a
> '1'), you can see which element is Unicode, and which one has the \xb7
> in position 42.  You'll have to decide which is the problem, and solve
> it accordingly.  Was the fact that one of the strings is unicode an
> oversight?  Or did you think that all characters would be 0x7f or less?  
> Or do you want to handle all possible characters, and if so, with what
> encoding?
>
> DaveA

Thanks for your reply DaveA.

Since I'm dealing with file uploads, I guess I should only care about
those. I understand the fact that I'm trying to concatenate a unicode
string with a binary, but I don't know how to deal with this. Perhaps
the uploaded file should be encoded in some way? I don't think this is
the case though.



More information about the Python-list mailing list