[pypy-svn] r79019 - in pypy/branch/fast-forward/pypy/module/pyexpat: . test
afa at codespeak.net
afa at codespeak.net
Thu Nov 11 23:53:21 CET 2010
Author: afa
Date: Thu Nov 11 23:53:20 2010
New Revision: 79019
Modified:
pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
Log:
pyexpat: correctly parse strings with non-utf8 encoding.
Modified: pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py (original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py Thu Nov 11 23:53:20 2010
@@ -278,10 +278,7 @@
def __init__(self, encoding, namespace_separator, w_intern,
_from_external_entity=False):
- if encoding:
- self.encoding = encoding
- else:
- self.encoding = 'utf-8'
+ self.encoding = encoding
self.namespace_separator = namespace_separator
self.w_intern = w_intern
@@ -339,10 +336,9 @@
def w_convert(self, space, s):
if self.returns_unicode:
- return space.call_function(
- space.getattr(space.wrap(s), space.wrap("decode")),
- space.wrap(self.encoding),
- space.wrap("strict"))
+ from pypy.rlib.runicode import str_decode_utf_8
+ return space.wrap(str_decode_utf_8(
+ s, len(s), "strict")[0])
else:
return space.wrap(s)
Modified: pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py (original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py Thu Nov 11 23:53:20 2010
@@ -31,3 +31,25 @@
p.buffer_size = 150
assert p.buffer_size == 150
raises(TypeError, setattr, p, 'buffer_size', sys.maxint + 1)
+
+ def test_encoding(self):
+ # use one of the few encodings built-in in expat
+ xml = "<?xml version='1.0' encoding='iso-8859-1'?><s>caf\xe9</s>"
+ import pyexpat
+ p = pyexpat.ParserCreate()
+ def gotText(text):
+ assert text == u"caf\xe9"
+ p.CharacterDataHandler = gotText
+ assert p.returns_unicode
+ p.Parse(xml)
+
+ def test_explicit_encoding(self):
+ xml = "<?xml version='1.0'?><s>caf\xe9</s>"
+ import pyexpat
+ p = pyexpat.ParserCreate(encoding='iso-8859-1')
+ def gotText(text):
+ assert text == u"caf\xe9"
+ p.CharacterDataHandler = gotText
+ assert p.returns_unicode
+ p.Parse(xml)
+
More information about the Pypy-commit
mailing list