[pypy-commit] pypy unicode-utf8: remove most imports of runicode, except mbcs (win32). TBD: cpyext, micronumpy
mattip
pypy.commits at gmail.com
Sat Feb 9 13:57:02 EST 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8
Changeset: r95924:322380023a4a
Date: 2019-02-09 18:21 +0100
http://bitbucket.org/pypy/pypy/changeset/322380023a4a/
Log: remove most imports of runicode, except mbcs (win32). TBD: cpyext,
micronumpy
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -5,7 +5,7 @@
from pypy.tool import stdlib_opcode as ops
from pypy.interpreter.error import OperationError
from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.runicode import MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
from rpython.rlib.objectmodel import specialize
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -3,7 +3,7 @@
from pypy.interpreter.error import OperationError, oefmt
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import rutf8, runicode
+from rpython.rlib import rutf8
from rpython.rlib.rarithmetic import r_uint, intmask
from rpython.rtyper.lltypesystem import rffi
from pypy.module.unicodedata import unicodedb
@@ -40,23 +40,6 @@
space.newtext(msg)]))
return raise_unicode_exception_encode
- at specialize.memo()
-def encode_unicode_error_handler(space):
- # Fast version of the "strict" errors handler.
- def raise_unicode_exception_encode(errors, encoding, msg, uni,
- startingpos, endingpos):
- assert isinstance(uni, unicode)
- u_len = len(uni)
- utf8 = runicode.unicode_encode_utf8sp(uni, u_len)
- raise OperationError(space.w_UnicodeEncodeError,
- space.newtuple([space.newtext(encoding),
- space.newtext(utf8, u_len),
- space.newint(startingpos),
- space.newint(endingpos),
- space.newtext(msg)]))
- return u'', None, 0
- return raise_unicode_exception_encode
-
def default_error_encode(
errors, encoding, msg, u, startingpos, endingpos):
"""A default handler, for tests"""
@@ -935,23 +918,45 @@
return result.build()
+ at specialize.memo()
+def _encode_unicode_error_handler(space):
+ # Fast version of the "strict" errors handler.
+ from rpython.rlib import runicode
+ def raise_unicode_exception_encode(errors, encoding, msg, uni,
+ startingpos, endingpos):
+ assert isinstance(uni, unicode)
+ u_len = len(uni)
+ utf8 = runicode.unicode_encode_utf8sp(uni, u_len)
+ raise OperationError(space.w_UnicodeEncodeError,
+ space.newtuple([space.newtext(encoding),
+ space.newtext(utf8, u_len),
+ space.newint(startingpos),
+ space.newint(endingpos),
+ space.newtext(msg)]))
+ return u'', None, 0
+ return raise_unicode_exception_encode
+
+
def encode_utf8(space, uni, allow_surrogates=False):
# Note that Python3 tends to forbid *all* surrogates in utf-8.
# If allow_surrogates=True, then revert to the Python 2 behavior
# which never raises UnicodeEncodeError. Surrogate pairs are then
# allowed, either paired or lone. A paired surrogate is considered
# like the non-BMP character it stands for. See also *_utf8sp().
+ from rpython.rlib import runicode
assert isinstance(uni, unicode)
return runicode.unicode_encode_utf_8(
uni, len(uni), "strict",
- errorhandler=encode_unicode_error_handler(space),
+ errorhandler=_encode_unicode_error_handler(space),
allow_surrogates=allow_surrogates)
def encode_utf8sp(space, uni, allow_surrogates=True):
+ xxx
# Surrogate-preserving utf-8 encoding. Any surrogate character
# turns into its 3-bytes encoding, whether it is paired or not.
# This should always be reversible, and the reverse is
# decode_utf8sp().
+ from rpython.rlib import runicode
return runicode.unicode_encode_utf8sp(uni, len(uni))
def decode_utf8sp(space, string):
diff --git a/pypy/module/_codecs/__init__.py b/pypy/module/_codecs/__init__.py
--- a/pypy/module/_codecs/__init__.py
+++ b/pypy/module/_codecs/__init__.py
@@ -1,5 +1,4 @@
from pypy.interpreter.mixedmodule import MixedModule
-from rpython.rlib import runicode
from rpython.rlib.objectmodel import not_rpython
from pypy.module._codecs import interp_codecs
@@ -90,6 +89,7 @@
@not_rpython
def __init__(self, space, *args):
# mbcs codec is Windows specific, and based on rffi.
+ from rpython.rlib import runicode
if (hasattr(runicode, 'str_decode_mbcs')):
self.interpleveldefs['mbcs_encode'] = 'interp_codecs.mbcs_encode'
self.interpleveldefs['mbcs_decode'] = 'interp_codecs.mbcs_decode'
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,8 +1,7 @@
from rpython.rlib import jit, rutf8
from rpython.rlib.objectmodel import we_are_translated, not_rpython
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
-from rpython.rlib import runicode
-from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -378,7 +377,7 @@
raise oefmt(space.w_TypeError, "handler must be callable")
# ____________________________________________________________
-# delegation to runicode/unicodehelper
+# delegation to unicodehelper
def _find_implementation(impl_name):
func = getattr(unicodehelper, impl_name)
@@ -447,6 +446,7 @@
]:
make_decoder_wrapper(decoder)
+from rpython.rlib import runicode
if hasattr(runicode, 'str_decode_mbcs'):
make_encoder_wrapper('mbcs_encode')
make_decoder_wrapper('mbcs_decode')
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,7 +1,7 @@
import sys
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, runicode, rutf8
+from rpython.rlib import rfloat, rutf8
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rlib.rarithmetic import r_uint
from pypy.interpreter.error import oefmt
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -1,5 +1,5 @@
from rpython.rtyper.lltypesystem import rffi, lltype
-from rpython.rlib import rstring, runicode
+from rpython.rlib import rstring
from rpython.tool.sourcetools import func_renamer
from pypy.interpreter.error import OperationError, oefmt
@@ -192,7 +192,8 @@
@cpython_api([], Py_UNICODE, error=CANNOT_FAIL)
def PyUnicode_GetMax(space):
"""Get the maximum ordinal for a Unicode character."""
- return runicode.UNICHR(runicode.MAXUNICODE)
+ from rpython.rlib import runicode, rutf8
+ return runicode.UNICHR(rutf8.MAXUNICODE)
@cpython_api([rffi.VOIDP], rffi.CCHARP, error=CANNOT_FAIL)
def PyUnicode_AS_DATA(space, ref):
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -3,7 +3,7 @@
"""
from rpython.rlib import jit
-from rpython.rlib.runicode import MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
from pypy.interpreter import gateway
from pypy.interpreter.error import oefmt
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -5,7 +5,7 @@
import string
from pypy.interpreter.error import OperationError, oefmt
-from rpython.rlib import rstring, runicode, rlocale, rfloat, jit, rutf8
+from rpython.rlib import rstring, rlocale, rfloat, jit, rutf8
from rpython.rlib.objectmodel import specialize
from rpython.rlib.rfloat import formatd
from rpython.rlib.rarithmetic import r_uint, intmask
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -9,7 +9,7 @@
# We always use MAXUNICODE = 0x10ffff when unicode objects use utf8
-if 1 or rffi.sizeof(lltype.UniChar) == 4:
+if rffi.sizeof(lltype.UniChar) == 4:
MAXUNICODE = 0x10ffff
allow_surrogate_by_default = False
else:
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -26,6 +26,9 @@
from rpython.rlib.unicodedata import unicodedb
from rpython.rtyper.lltypesystem import lltype, rffi
+# We always use MAXUNICODE = 0x10ffff when unicode objects use utf8
+MAXUNICODE = 0x10ffff
+allow_surrogate_by_default = False
# we need a way to accept both r_uint and int(nonneg=True)
#@signature(types.int_nonneg(), types.bool(), returns=types.str())
More information about the pypy-commit
mailing list