[pypy-commit] pypy py3.5: Kill space.identifier_w(), which is almost not used any more. We might

Wed Feb 22 10:50:27 EST 2017

Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r90306:eaf70fb58a3e
Date: 2017-02-22 16:46 +0100
http://bitbucket.org/pypy/pypy/changeset/eaf70fb58a3e/

Log:	Kill space.identifier_w(), which is almost not used any more. We
	might need to think about surrogate-forbidding conversions, but
	later.

diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -595,7 +595,7 @@
                         except IndexError:
                             name = '?'
                         else:
-                            name = space.identifier_w(w_name)
+                            name = space.text_w(w_name)
                     break
         self.kwd_name = name
 
diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -433,7 +433,7 @@
         w_returns = get_field(space, w_node, 'returns', True)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _name = space.identifier_w(w_name)
+        _name = space.text_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
         _args = arguments.from_object(space, w_args)
@@ -513,7 +513,7 @@
         w_returns = get_field(space, w_node, 'returns', True)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _name = space.identifier_w(w_name)
+        _name = space.text_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
         _args = arguments.from_object(space, w_args)
@@ -606,7 +606,7 @@
         w_decorator_list = get_field(space, w_node, 'decorator_list', False)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _name = space.identifier_w(w_name)
+        _name = space.text_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
         bases_w = space.unpackiterable(w_bases)
@@ -1511,7 +1511,7 @@
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
         names_w = space.unpackiterable(w_names)
-        _names = [space.identifier_w(w_item) for w_item in names_w]
+        _names = [space.text_w(w_item) for w_item in names_w]
         _lineno = space.int_w(w_lineno)
         _col_offset = space.int_w(w_col_offset)
         return Global(_names, _lineno, _col_offset)
@@ -1551,7 +1551,7 @@
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
         names_w = space.unpackiterable(w_names)
-        _names = [space.identifier_w(w_item) for w_item in names_w]
+        _names = [space.text_w(w_item) for w_item in names_w]
         _lineno = space.int_w(w_lineno)
         _col_offset = space.int_w(w_col_offset)
         return Nonlocal(_names, _lineno, _col_offset)
@@ -2880,7 +2880,7 @@
         _value = expr.from_object(space, w_value)
         if _value is None:
             raise_required_value(space, w_node, 'value')
-        _attr = space.identifier_w(w_attr)
+        _attr = space.text_w(w_attr)
         if _attr is None:
             raise_required_value(space, w_node, 'attr')
         _ctx = expr_context.from_object(space, w_ctx)
@@ -3022,7 +3022,7 @@
         w_ctx = get_field(space, w_node, 'ctx', False)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _id = space.identifier_w(w_id)
+        _id = space.text_w(w_id)
         if _id is None:
             raise_required_value(space, w_node, 'id')
         _ctx = expr_context.from_object(space, w_ctx)
@@ -3911,7 +3911,7 @@
         w_annotation = get_field(space, w_node, 'annotation', True)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _arg = space.identifier_w(w_arg)
+        _arg = space.text_w(w_arg)
         if _arg is None:
             raise_required_value(space, w_node, 'arg')
         _annotation = expr.from_object(space, w_annotation)
@@ -3978,7 +3978,7 @@
     def from_object(space, w_node):
         w_name = get_field(space, w_node, 'name', False)
         w_asname = get_field(space, w_node, 'asname', True)
-        _name = space.identifier_w(w_name)
+        _name = space.text_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
         _asname = space.text_or_none_w(w_asname)
diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py b/pypy/interpreter/astcompiler/tools/asdl_py.py
--- a/pypy/interpreter/astcompiler/tools/asdl_py.py
+++ b/pypy/interpreter/astcompiler/tools/asdl_py.py
@@ -162,7 +162,7 @@
         elif field.type in ("identifier",):
             if field.opt:
                 return "space.text_or_none_w(%s)" % (value,)
-            return "space.identifier_w(%s)" % (value,)
+            return "space.text_w(%s)" % (value,)
         elif field.type in ("int",):
             return "space.int_w(%s)" % (value,)
         elif field.type in ("bool",):
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -236,9 +236,6 @@
     def unicode_w(self, space):
         self._typed_unwrap_error(space, "string")
 
-    def identifier_w(self, space):
-        self._typed_unwrap_error(space, "string")
-
     def text_w(self, space):
         self._typed_unwrap_error(space, "string")
 
@@ -1522,8 +1519,8 @@
             # most API in CPython 3.x no longer do.
             if self.isinstance_w(w_obj, self.w_bytes):
                 return StringBuffer(w_obj.bytes_w(self))
-            if self.isinstance_w(w_obj, self.w_unicode):
-                return StringBuffer(w_obj.identifier_w(self))  # no surrogates
+            if self.isinstance_w(w_obj, self.w_unicode):  # NB. CPython forbids
+                return StringBuffer(w_obj.text_w(self))   # surrogates here
             try:
                 return w_obj.buffer_w(self, self.BUF_SIMPLE)
             except BufferInterfaceNotFound:
@@ -1534,8 +1531,8 @@
             # most API in CPython 3.x no longer do.
             if self.isinstance_w(w_obj, self.w_bytes):
                 return w_obj.bytes_w(self)
-            if self.isinstance_w(w_obj, self.w_unicode):
-                return w_obj.identifier_w(self)    # no surrogates (forbidden)
+            if self.isinstance_w(w_obj, self.w_unicode):  # NB. CPython forbids
+                return w_obj.text_w(self)                 # surrogates here
             try:
                 return w_obj.buffer_w(self, self.BUF_SIMPLE).as_str()
             except BufferInterfaceNotFound:
@@ -1590,8 +1587,7 @@
         encoded string). Else, call bytes_w().
 
         We should kill str_w completely and manually substitute it with
-        text_w/identifier_w/bytes_w at all call sites.  It remains for
-        now for tests only.
+        text_w/bytes_w at all call sites.  It remains for now for tests only.
         """
         if self.isinstance_w(w_obj, self.w_unicode):
             return w_obj.text_w(self)
@@ -1680,18 +1676,6 @@
     realtext_w = text_w         # Python 2 compatibility
     realunicode_w = unicode_w
 
-    def identifier_w(self, w_obj):
-        """
-        Unwrap an object which is used as an identifier (i.e. names of
-        variables, methdods, functions, classes etc.). In py3k, identifiers
-        are unicode strings and are unwrapped as UTF-8 encoded byte strings.
-        This differs from space.text_w() because it raises an app-level
-        UnicodeEncodeError if the unicode string contains surrogates.
-        This corresponds exactly to 'str.encode(obj, "utf-8")' at app-level.
-        (XXX check what occurs on narrow builds or kill narrow builds!)
-        """
-        return w_obj.identifier_w(self)
-
     def fsencode(space, w_obj):
         from pypy.interpreter.unicodehelper import fsencode
         return fsencode(space, w_obj)
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -97,17 +97,12 @@
     newtext = wrap
     newunicode = wrap
 
-    def str_w(self, s):
-        return str(s)
     def text_w(self, s):
-        return self.str_w(s)
+        return self.unicode_w(s).encode('utf-8')
 
     def unicode_w(self, s):
         return unicode(s)
 
-    def identifier_w(self, s):
-        return self.unicode_w(s).encode('utf-8')
-
     def len(self, x):
         return len(x)
 
diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -214,11 +214,11 @@
         assert space.unicode0_w(w(u"123")) == u"123"
         exc = space.raises_w(space.w_ValueError, space.unicode0_w, w(u"123\x004"))
 
-    def test_identifier_w(self):
+    def test_text_w(self):
         space = self.space
         x = u'àèì'
         w_name = space.wrap(x)
-        assert space.identifier_w(w_name) == x.encode('utf-8')
+        assert space.text_w(w_name) == x.encode('utf-8')
 
     def test_getindex_w(self):
         w_instance1 = self.space.appexec([], """():
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -45,7 +45,7 @@
 
     def text_w(self, space):
         return NonConstant("foobar")
-    identifier_w = bytes_w = text_w
+    bytes_w = text_w
 
     def unicode_w(self, space):
         return NonConstant(u"foobar")
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -30,13 +30,11 @@
                 space.w_unicode, "__new__", space.w_unicode, w_uni)
         assert w_new is w_uni
 
-    def test_identifier_or_text_w(self):
+    def test_text_w(self):
         space = self.space
         w_uni = space.wrap(u'abcd')
-        assert space.identifier_w(w_uni) == 'abcd'
         assert space.text_w(w_uni) == 'abcd'
         w_uni = space.wrap(unichr(0xd921) + unichr(0xdddd))
-        space.raises_w(space.w_UnicodeEncodeError, space.identifier_w, w_uni)
         assert space.text_w(w_uni) == '\xed\xa4\xa1\xed\xb7\x9d'
         #                             ^^^ and not the 4-bytes combined character
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -8,8 +8,7 @@
 from rpython.rlib.runicode import (
     make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
     unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii,
-    unicode_encode_utf8sp, unicode_encode_utf8_forbid_surrogates,
-    SurrogateError)
+    unicode_encode_utf8sp)
 from rpython.rlib import jit
 
 from pypy.interpreter import unicodehelper
@@ -81,32 +80,13 @@
     def unicode_w(self, space):
         return self._value
 
-    def _identifier_or_text_w(self, space, ignore_sg):
-        try:
-            identifier = jit.conditional_call_elidable(
-                                self._utf8, g_encode_utf8, self._value)
-            if not jit.isconstant(self):
-                self._utf8 = identifier
-        except SurrogateError:
-            # If 'ignore_sg' is False, this logic is here only
-            # to get an official app-level UnicodeEncodeError.
-            # If 'ignore_sg' is True, we encode instead using
-            # unicode_encode_utf8sp().
-            u = self._value
-            if ignore_sg:
-                identifier = unicode_encode_utf8sp(u, len(u))
-            else:
-                eh = unicodehelper.encode_error_handler(space)
-                identifier = unicode_encode_utf_8(u, len(u), None,
-                                                  errorhandler=eh)
+    def text_w(self, space):
+        identifier = jit.conditional_call_elidable(
+                            self._utf8, g_encode_utf8, self._value)
+        if not jit.isconstant(self):
+            self._utf8 = identifier
         return identifier
 
-    def text_w(self, space):
-        return self._identifier_or_text_w(space, ignore_sg=True)
-
-    def identifier_w(self, space):
-        return self._identifier_or_text_w(space, ignore_sg=False)
-
     def listview_unicode(self):
         return _create_list_from_unicode(self._value)
 
@@ -1277,7 +1257,7 @@
 @jit.elidable
 def g_encode_utf8(value):
     """This is a global function because of jit.conditional_call_value"""
-    return unicode_encode_utf8_forbid_surrogates(value, len(value))
+    return unicode_encode_utf8sp(value, len(value))
 
 _repr_function, _ = make_unicode_escape_function(
     pass_printable=True, unicode_output=True, quotes=True, prefix='')