How to print first(national) char from unicode string encoded inutf-8?

Tue Sep 2 04:17:21 EDT 2008

On 2 Wrz, 06:05, "Mark Tolonen" <M8R-yft... at mailinator.com> wrote:
> "Marco Bizzarri" <marco.bizza... at gmail.com> wrote in message
>
> news:mailman.331.1220276398.3487.python-list at python.org...
>
>
>
> > On Mon, Sep 1, 2008 at 3:25 PM,  <sni... at gmail.com> wrote:
>
> >> When I do ${urllib.unquote(c.user.firstName)} without encoding to
> >> latin-1 I got different chars than I will get: no Ùukasz but Å ukasz
> >> --
> >>http://mail.python.org/mailman/listinfo/python-list
>
> > That's crazy. "string".encode('latin1') gives you a latin1 encoded
> > string; latin1 is a single byte encoding, therefore taking the first
> > byte should be no problem.
>
> > Have you tried:
>
> > urlib.unquote(c.user.firstName)[0].encode('latin1') or
>
> > urlib.unquote(c.user.firstName)[0].encode('utf8')
>
> > I'm assuming here that the urlib.unquote(c.user.firstName) returns an
> > encodable string (which I'm absolutely not sure), but if it does, this
> > should take the first 'character'.
>
> The OP stated that the original string was "encoded in UTF-8 and
> urllib.quote()", so after urllib.unquote the string is in UTF-8 format.
> This must be decoded into a Unicode string before removing the first
> character:
>
>     urllib.unquote(c.user.firstName).decode('utf-8')[0]
>
> The next problem is that the character in the OP's example string 'Ù' is not
> present in the latin-1 encoding, but using utf-8 encoding demonstrates that
> the full two-byte UTF-8 encoded character is collected:
>
>     >>> import urllib
>     >>> name = urllib.quote(u'Ùukasz'.encode('utf-8'))
>     >>> name
>     '%C5%81ukasz'
>     >>> urllib.unquote(name).decode('utf-8')[0].encode('utf-8')
>     '\xc5\x81'
>
> -Mark

@Mark, when I tried urllib.unquote(c.user.firstName).decode('utf-8')
[0].encode('utf-8'), I received this message:

>>  return render('/reports/create_report_step2.mako')
Module pylons.templating:344 in render
<<                                      **cache_args)
        return pylons.buffet.render(template_name=template,
fragment=fragment,
                                    format=format, namespace=kargs,
**cache_args)

    >>  format=format, namespace=kargs, **cache_args)
Module pylons.templating:229 in render
<<          log.debug("Rendering template %s with engine %s",
full_path, engine_name)
            return engine_config['engine'].render(namespace,
template=full_path,
                **options)>>  **options)
Module mako.ext.turbogears:49 in render
<<              info.update(self.extra_vars_func())

            return template.render(**info)
    >>  return template.render(**info)
Module mako.template:114 in render
<<          declared by this template's internal rendering method are
also pulled from the given *args, **data
            members.        members."""
            return runtime._render(self, self.callable_, args, data)

        def render_unicode(self, *args, **data):>>  return
runtime._render(self, self.callable_, args, data)
Module mako.runtime:287 in _render
<<      context = Context(buf, **data)
        context._with_template = template
        _render_context(template, callable_, context, *args,
**_kwargs_for_callable(callable_, data))
        return context.pop_buffer().getvalue()>>
_render_context(template, callable_, context, *args,
**_kwargs_for_callable(callable_, data))
Module mako.runtime:304 in _render_context
<<          # if main render method, call from the base of the
inheritance stack
            (inherit, lclcontext) = _populate_self_namespace(context,
tmpl)
            _exec_template(inherit, lclcontext, args=args,
kwargs=kwargs)
        else:
            # otherwise, call the actual rendering method specified>>
_exec_template(inherit, lclcontext, args=args, kwargs=kwargs)
Module mako.runtime:337 in _exec_template
<<                  error_template.render_context(context,
error=error)
        else:
            callable_(context, *args, **kwargs)>>  callable_(context,
*args, **kwargs)
Module _reports_create_report_step2_mako:57 in render_body
<<
context.write(filters.decode.utf8(urllib.unquote(str(c.period.end))))
                context.write(u' + ')
 
context.write(filters.decode.utf8(urllib.unquote(c.user.firstName).decode('utf-8')
[0].encode('utf-8')))
 
context.write(filters.decode.utf8(urllib.unquote(str(c.user.secondName)
[0:1])))
                context.write(u'</h3>\r\n        <input type="hidden"
name="works[]" value="')>>
context.write(filters.decode.utf8(urllib.unquote(c.user.firstName).decode('utf-8')
[0].encode('utf-8')))
Module encodings.utf_8:16 in decode
<<
    def decode(input, errors='strict'):
        return codecs.utf_8_decode(input, errors, True)

    class IncrementalEncoder(codecs.IncrementalEncoder):>>  return
codecs.utf_8_decode(input, errors, True)
<type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode
characters in position 0-1: ordinal not in range(128)