[pypy-svn] r61947 - in pypy/trunk/pypy/module: _codecs _codecs/test unicodedata
afa at codespeak.net
afa at codespeak.net
Mon Feb 16 13:47:29 CET 2009
Author: afa
Date: Mon Feb 16 13:47:28 2009
New Revision: 61947
Modified:
pypy/trunk/pypy/module/_codecs/app_codecs.py
pypy/trunk/pypy/module/_codecs/test/test_codecs.py
pypy/trunk/pypy/module/unicodedata/__init__.py
pypy/trunk/pypy/module/unicodedata/interp_ucd.py
Log:
Allow u'\N{NAME}' to return characters beyond the BMP on narrow unicode builds.
Test and fix, should fix test_unicodedata on Windows.
python2 is inconsistent here: it allows
u'\N{CJK UNIFIED IDEOGRAPH-20000}' but not unichr(0x20000).
Things will be better with python3.
Modified: pypy/trunk/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/app_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/app_codecs.py Mon Feb 16 13:47:28 2009
@@ -749,13 +749,19 @@
message = "unknown Unicode character name"
st = s[pos+1:look]
try:
- chr = unicodedata.lookup("%s" % st)
+ ch = unicodedata._get_code("%s" % st)
except KeyError, e:
x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
+ p += x[0]
+ pos = x[1]
else:
- x = chr, look + 1
- p += x[0]
- pos = x[1]
+ pos = look + 1
+ if ch <= sys.maxunicode:
+ p += unichr(ch)
+ else:
+ ch -= 0x10000L
+ p += unichr(0xD800 + (ch >> 10))
+ p += unichr(0xDC00 + (ch & 0x03FF))
else:
x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
else:
Modified: pypy/trunk/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/test/test_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/test/test_codecs.py Mon Feb 16 13:47:28 2009
@@ -44,6 +44,7 @@
assert unicode('\\N{SPACE}\\N{SPACE}','unicode-escape') == u" "
assert unicode('\\N{SPACE}a\\N{SPACE}','unicode-escape') == u" a "
assert "\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx"
+ assert 1 <= len(u"\N{CJK UNIFIED IDEOGRAPH-20000}") <= 2
def test_literals(self):
raises(UnicodeError, eval, 'u\'\\Uffffffff\'')
Modified: pypy/trunk/pypy/module/unicodedata/__init__.py
==============================================================================
--- pypy/trunk/pypy/module/unicodedata/__init__.py (original)
+++ pypy/trunk/pypy/module/unicodedata/__init__.py Mon Feb 16 13:47:28 2009
@@ -13,6 +13,6 @@
}
for name in '''lookup name decimal digit numeric category bidirectional
east_asian_width combining mirrored decomposition
- normalize'''.split():
+ normalize _get_code'''.split():
interpleveldefs[name] = '''space.getattr(space.wrap(interp_ucd.ucd),
space.wrap("%s"))''' % name
Modified: pypy/trunk/pypy/module/unicodedata/interp_ucd.py
==============================================================================
--- pypy/trunk/pypy/module/unicodedata/interp_ucd.py (original)
+++ pypy/trunk/pypy/module/unicodedata/interp_ucd.py Mon Feb 16 13:47:28 2009
@@ -48,17 +48,20 @@
self.version = unicodedb.version
- def lookup(self, space, name):
+ def _get_code(self, space, name):
try:
code = self._lookup(name.upper())
except KeyError:
msg = space.mod(space.wrap("undefined character name '%s'"), space.wrap(name))
raise OperationError(space.w_KeyError, msg)
+ return space.wrap(code)
+ _get_code.unwrap_spec = ['self', ObjSpace, str]
+
+ def lookup(self, space, name):
return space.call_function(space.builtin.get('unichr'),
- space.wrap(code))
-
+ self._get_code(space, name))
lookup.unwrap_spec = ['self', ObjSpace, str]
-
+
def name(self, space, w_unichr, w_default=NoneNotWrapped):
code = unichr_to_code_w(space, w_unichr)
try:
More information about the Pypy-commit
mailing list