[pypy-commit] pypy unicode-utf8: remove most imports of runicode, except mbcs (win32). TBD: cpyext, micronumpy

Sat Feb 9 13:57:02 EST 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8
Changeset: r95924:322380023a4a
Date: 2019-02-09 18:21 +0100
http://bitbucket.org/pypy/pypy/changeset/322380023a4a/

Log:	remove most imports of runicode, except mbcs (win32). TBD: cpyext,
	micronumpy

diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -5,7 +5,7 @@
 from pypy.tool import stdlib_opcode as ops
 from pypy.interpreter.error import OperationError
 from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.runicode import MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
 from rpython.rlib.objectmodel import specialize
 
 
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -3,7 +3,7 @@
 from pypy.interpreter.error import OperationError, oefmt
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib import rutf8, runicode
+from rpython.rlib import rutf8
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rtyper.lltypesystem import rffi
 from pypy.module.unicodedata import unicodedb
@@ -40,23 +40,6 @@
                                              space.newtext(msg)]))
     return raise_unicode_exception_encode
 
- at specialize.memo()
-def encode_unicode_error_handler(space):
-    # Fast version of the "strict" errors handler.
-    def raise_unicode_exception_encode(errors, encoding, msg, uni,
-                                       startingpos, endingpos):
-        assert isinstance(uni, unicode)
-        u_len = len(uni)
-        utf8 = runicode.unicode_encode_utf8sp(uni, u_len)
-        raise OperationError(space.w_UnicodeEncodeError,
-                             space.newtuple([space.newtext(encoding),
-                                             space.newtext(utf8, u_len),
-                                             space.newint(startingpos),
-                                             space.newint(endingpos),
-                                             space.newtext(msg)]))
-        return u'', None, 0
-    return raise_unicode_exception_encode
-
 def default_error_encode(
         errors, encoding, msg, u, startingpos, endingpos):
     """A default handler, for tests"""
@@ -935,23 +918,45 @@
 
     return result.build()
 
+ at specialize.memo()
+def _encode_unicode_error_handler(space):
+    # Fast version of the "strict" errors handler.
+    from rpython.rlib import runicode
+    def raise_unicode_exception_encode(errors, encoding, msg, uni,
+                                       startingpos, endingpos):
+        assert isinstance(uni, unicode)
+        u_len = len(uni)
+        utf8 = runicode.unicode_encode_utf8sp(uni, u_len)
+        raise OperationError(space.w_UnicodeEncodeError,
+                             space.newtuple([space.newtext(encoding),
+                                             space.newtext(utf8, u_len),
+                                             space.newint(startingpos),
+                                             space.newint(endingpos),
+                                             space.newtext(msg)]))
+        return u'', None, 0
+    return raise_unicode_exception_encode
+
+
 def encode_utf8(space, uni, allow_surrogates=False):
     # Note that Python3 tends to forbid *all* surrogates in utf-8.
     # If allow_surrogates=True, then revert to the Python 2 behavior
     # which never raises UnicodeEncodeError.  Surrogate pairs are then
     # allowed, either paired or lone.  A paired surrogate is considered
     # like the non-BMP character it stands for.  See also *_utf8sp().
+    from rpython.rlib import runicode
     assert isinstance(uni, unicode)
     return runicode.unicode_encode_utf_8(
         uni, len(uni), "strict",
-        errorhandler=encode_unicode_error_handler(space),
+        errorhandler=_encode_unicode_error_handler(space),
         allow_surrogates=allow_surrogates)
 
 def encode_utf8sp(space, uni, allow_surrogates=True):
+    xxx
     # Surrogate-preserving utf-8 encoding.  Any surrogate character
     # turns into its 3-bytes encoding, whether it is paired or not.
     # This should always be reversible, and the reverse is
     # decode_utf8sp().
+    from rpython.rlib import runicode
     return runicode.unicode_encode_utf8sp(uni, len(uni))
 
 def decode_utf8sp(space, string):
diff --git a/pypy/module/_codecs/__init__.py b/pypy/module/_codecs/__init__.py
--- a/pypy/module/_codecs/__init__.py
+++ b/pypy/module/_codecs/__init__.py
@@ -1,5 +1,4 @@
 from pypy.interpreter.mixedmodule import MixedModule
-from rpython.rlib import runicode
 from rpython.rlib.objectmodel import not_rpython
 from pypy.module._codecs import interp_codecs
 
@@ -90,6 +89,7 @@
     @not_rpython
     def __init__(self, space, *args):
         # mbcs codec is Windows specific, and based on rffi.
+        from rpython.rlib import runicode
         if (hasattr(runicode, 'str_decode_mbcs')):
             self.interpleveldefs['mbcs_encode'] = 'interp_codecs.mbcs_encode'
             self.interpleveldefs['mbcs_decode'] = 'interp_codecs.mbcs_decode'
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,8 +1,7 @@
 from rpython.rlib import jit, rutf8
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
-from rpython.rlib import runicode
-from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -378,7 +377,7 @@
         raise oefmt(space.w_TypeError, "handler must be callable")
 
 # ____________________________________________________________
-# delegation to runicode/unicodehelper
+# delegation to unicodehelper
 
 def _find_implementation(impl_name):
     func = getattr(unicodehelper, impl_name)
@@ -447,6 +446,7 @@
          ]:
     make_decoder_wrapper(decoder)
 
+from rpython.rlib import runicode
 if hasattr(runicode, 'str_decode_mbcs'):
     make_encoder_wrapper('mbcs_encode')
     make_decoder_wrapper('mbcs_decode')
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,7 +1,7 @@
 import sys
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, runicode, rutf8
+from rpython.rlib import rfloat, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib.rarithmetic import r_uint
 from pypy.interpreter.error import oefmt
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -1,5 +1,5 @@
 from rpython.rtyper.lltypesystem import rffi, lltype
-from rpython.rlib import rstring, runicode
+from rpython.rlib import rstring
 from rpython.tool.sourcetools import func_renamer
 
 from pypy.interpreter.error import OperationError, oefmt
@@ -192,7 +192,8 @@
 @cpython_api([], Py_UNICODE, error=CANNOT_FAIL)
 def PyUnicode_GetMax(space):
     """Get the maximum ordinal for a Unicode character."""
-    return runicode.UNICHR(runicode.MAXUNICODE)
+    from rpython.rlib import runicode, rutf8
+    return runicode.UNICHR(rutf8.MAXUNICODE)
 
 @cpython_api([rffi.VOIDP], rffi.CCHARP, error=CANNOT_FAIL)
 def PyUnicode_AS_DATA(space, ref):
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -3,7 +3,7 @@
 """
 
 from rpython.rlib import jit
-from rpython.rlib.runicode import MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
 
 from pypy.interpreter import gateway
 from pypy.interpreter.error import oefmt
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -5,7 +5,7 @@
 import string
 
 from pypy.interpreter.error import OperationError, oefmt
-from rpython.rlib import rstring, runicode, rlocale, rfloat, jit, rutf8
+from rpython.rlib import rstring, rlocale, rfloat, jit, rutf8
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rfloat import formatd
 from rpython.rlib.rarithmetic import r_uint, intmask
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -9,7 +9,7 @@
 
 
 # We always use MAXUNICODE = 0x10ffff when unicode objects use utf8
-if 1 or rffi.sizeof(lltype.UniChar) == 4:
+if rffi.sizeof(lltype.UniChar) == 4:
     MAXUNICODE = 0x10ffff
     allow_surrogate_by_default = False
 else:
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -26,6 +26,9 @@
 from rpython.rlib.unicodedata import unicodedb
 from rpython.rtyper.lltypesystem import lltype, rffi
 
+# We always use MAXUNICODE = 0x10ffff when unicode objects use utf8
+MAXUNICODE = 0x10ffff
+allow_surrogate_by_default = False
 
 # we need a way to accept both r_uint and int(nonneg=True)
 #@signature(types.int_nonneg(), types.bool(), returns=types.str())