[pypy-commit] pypy py3.5: merge unicode-utf8-py3 into py3.5

Wed Feb 13 17:07:27 EST 2019

Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.5
Changeset: r96005:0bbb64dc7f98
Date: 2019-02-13 23:13 +0200
http://bitbucket.org/pypy/pypy/changeset/0bbb64dc7f98/

Log:	merge unicode-utf8-py3 into py3.5

diff too long, truncating to 2000 out of 25065 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -61,3 +61,9 @@
 9112c8071614108b1042bfef0713915107004d62 release-pypy2.7-v7.0.0
 1f86f25937b6ae6c8b25236c35228fac587678bf release-pypy3.5-v7.0.0
 dab365a465140aa79a5f3ba4db784c4af4d5c195 release-pypy3.6-v7.0.0
+9112c8071614108b1042bfef0713915107004d62 release-pypy2.7-v7.0.0
+c8805ee6d7846ca2722b106eeaa2f128c699aba3 release-pypy2.7-v7.0.0
+1f86f25937b6ae6c8b25236c35228fac587678bf release-pypy3.5-v7.0.0
+928a4f70d3de7d17449456946154c5da6e600162 release-pypy3.5-v7.0.0
+dab365a465140aa79a5f3ba4db784c4af4d5c195 release-pypy3.6-v7.0.0
+fb40f7a5524c77b80e6c468e087d621610137261 release-pypy3.6-v7.0.0
diff --git a/TODO b/TODO
new file mode 100644
--- /dev/null
+++ b/TODO
@@ -0,0 +1,20 @@
+* find a better way to run "find" without creating the index storage, if one
+  if one is not already readily available (understand cost now, improve after merge)
+* improve performance of splitlines (CF)
+* think about cost of utf8 list strategy (CF)
+* revisit why runicode import str_decode_utf_8_impl needed instead of runicode
+  import str_decode_utf_8
+* revisit remaining places in win32 where we do utf8.decode('utf-8'), they should work
+  directly with utf8 (can be converted via runicode.str_decode_utf_8 as well)
+  - rutf8.utf8_encode_mbcs
+  - unicodehelper.fsencode
+  - _winreg.interp_winreg
+* remove 'assert not isinstance(*, unicode)
+* add a flag that prevents support for unicode in rpython and enable it in PyPy (CF, Armin)
+* convert all realunicode_w to unicode_w after we flush out all old uses of
+  unicode_w
+* review all uses of W_Unicode.text_w, right now it is exactly W_Unicode.utf8_w. 
+  It shoud only return valid utf8 (see 0be26dc39a59 which broke translation on
+  win32 and failed tests on linux64). Then we can use it in places like
+  _socket.interp_func.getaddrinfo instead of space.encode_unicode_object(w_port,
+  'utf-8', 'strict')
diff --git a/pypy/TODO b/pypy/TODO
--- a/pypy/TODO
+++ b/pypy/TODO
@@ -1,6 +1,3 @@
-...
-
-
 antocuni's older TODO:
 
 * run coverage against the parser/astbuilder/astcompiler: it's probably full of
@@ -11,3 +8,5 @@
 
 * re-enable BUILD_LIST_FROM_ARG: see the comment in astcompiler/codegen.py in
 ast.ListComp.build_container
+
+* review use of std_decode_utf8, we probably do not want to be using it
diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst
--- a/pypy/doc/release-v7.0.0.rst
+++ b/pypy/doc/release-v7.0.0.rst
@@ -39,7 +39,7 @@
 
 The utf8 branch that changes internal representation of unicode to utf8 did not
 make it into the release, so there is still more goodness coming.
-You can download the v6.0 releases here:
+You can download the v7.0 releases here:
 
     http://pypy.org/download.html
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,6 +5,11 @@
 .. this is a revision shortly after release-pypy-7.0.0
 .. startrev: 481c69f7d81f
 
+.. branch: zlib-copying-third-time-a-charm
+
+Make sure zlib decompressobjs have their streams deallocated immediately
+on flush.
+
 .. branch: zlib-copying-redux
 
 Fix calling copy on already-flushed compressobjs.
@@ -15,7 +20,11 @@
 as they do on CPython.
 
 
-.. math-improvements
+.. branch: math-improvements
 
 Improve performance of long operations where one of the operands fits into
-an int.
\ No newline at end of file
+an int.
+
+.. branch: regalloc-playgrounds
+
+Improve register allocation in the JIT.
diff --git a/pypy/doc/whatsnew-pypy2-5.10.0.rst b/pypy/doc/whatsnew-pypy2-5.10.0.rst
--- a/pypy/doc/whatsnew-pypy2-5.10.0.rst
+++ b/pypy/doc/whatsnew-pypy2-5.10.0.rst
@@ -1,42 +1,42 @@
-==========================
-What's new in PyPy2.7 5.10
-==========================
-
-.. this is a revision shortly after release-pypy2.7-v5.9.0
-.. startrev:d56dadcef996
-
-
-.. branch: cppyy-packaging
-
-Cleanup and improve cppyy packaging
-
-.. branch: docs-osx-brew-openssl
-
-.. branch: keep-debug-symbols
-
-Add a smartstrip tool, which can optionally keep the debug symbols in a
-separate file, instead of just stripping them away. Use it in packaging
-
-.. branch: bsd-patches
-
-Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
-tracker (issues 2694, 2695, 2696, 2697)
-
-.. branch: run-extra-tests
-
-Run extra_tests/ in buildbot
-
-.. branch: vmprof-0.4.10
-
-Upgrade the _vmprof backend to vmprof 0.4.10
-
-.. branch: fix-vmprof-stacklet-switch
-.. branch: fix-vmprof-stacklet-switch-2
-
-Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
-
-.. branch: win32-vcvars
-
-.. branch: rdict-fast-hash
-
-Make it possible to declare that the hash function of an r_dict is fast in RPython.
+==========================
+What's new in PyPy2.7 5.10
+==========================
+
+.. this is a revision shortly after release-pypy2.7-v5.9.0
+.. startrev:d56dadcef996
+
+
+.. branch: cppyy-packaging
+
+Cleanup and improve cppyy packaging
+
+.. branch: docs-osx-brew-openssl
+
+.. branch: keep-debug-symbols
+
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+
+Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+.. branch: fix-vmprof-stacklet-switch-2
+
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
+.. branch: rdict-fast-hash
+
+Make it possible to declare that the hash function of an r_dict is fast in RPython.
diff --git a/pypy/doc/whatsnew-pypy2-6.0.0.rst b/pypy/doc/whatsnew-pypy2-6.0.0.rst
--- a/pypy/doc/whatsnew-pypy2-6.0.0.rst
+++ b/pypy/doc/whatsnew-pypy2-6.0.0.rst
@@ -1,132 +1,128 @@
-===========================
-What's new in PyPy2.7 5.10+
-===========================
-
-.. this is a revision shortly after release-pypy2.7-v5.10.0
-.. startrev: 6b024edd9d12
-
-.. branch: cpyext-avoid-roundtrip
-
-Big refactoring of some cpyext code, which avoids a lot of nonsense when
-calling C from Python and vice-versa: the result is a big speedup in
-function/method calls, up to 6 times faster.
-
-.. branch: cpyext-datetime2
-
-Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD
-
-
-.. branch: mapdict-size-limit
-
-Fix a corner case of mapdict: When an instance is used like a dict (using
-``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are
-added, then the performance using mapdict is linear in the number of
-attributes. This is now fixed (by switching to a regular dict after 80
-attributes).
-
-
-.. branch: cpyext-faster-arg-passing
-
-When using cpyext, improve the speed of passing certain objects from PyPy to C
-code, most notably None, True, False, types, all instances of C-defined types.
-Before, a dict lookup was needed every time such an object crossed over, now it
-is just a field read.
-
-
-.. branch: 2634_datetime_timedelta_performance
-
-Improve datetime + timedelta performance.
-
-.. branch: memory-accounting
-
-Improve way to describe memory
-
-.. branch: msvc14
-
-Allow compilaiton with Visual Studio 2017 compiler suite on windows
-
-.. branch: winapi
-
-Update _winapi and internal _winbase_cffi (via _winbase_build) for python 3 
-
-.. branch: refactor-slots
-
-Refactor cpyext slots.
-
-
-.. branch: call-loopinvariant-into-bridges
-
-Speed up branchy code that does a lot of function inlining by saving one call
-to read the TLS in most bridges.
-
-.. branch: rpython-sprint
-
-Refactor in rpython signatures
-
-.. branch: cpyext-tls-operror2
-
-Store error state thread-locally in executioncontext, fixes issue #2764
-
-.. branch: cpyext-fast-typecheck
-
-Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify
-`W_PyCWrapperObject` which is used to call slots from the C-API, greatly
-improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks
-
-
-.. branch: fix-sre-problems
-
-Fix two (unrelated) JIT bugs manifesting in the re module:
-
-- green fields are broken and were thus disabled, plus their usage removed from
-  the _sre implementation
-
-- in rare "trace is too long" situations, the JIT could break behaviour
-  arbitrarily.
-
-.. branch: jit-hooks-can-be-disabled
-
-Be more efficient about JIT hooks. Make it possible for the frontend to declare
-that jit hooks are currently not enabled at all. in that case, the list of ops
-does not have to be created in the case of the on_abort hook (which is
-expensive).
-
-
-.. branch: pyparser-improvements
-
-Improve speed of Python parser, improve ParseError messages slightly.
-
-.. branch: ioctl-arg-size
-
-Work around possible bugs in upstream ioctl users, like CPython allocate at
-least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes
-issue #2776
-
-.. branch: cpyext-subclass-setattr
-
-Fix for python-level classes that inherit from C-API types, previously the
-`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj`
-which led to cases where instance attributes were lost. Fixes issue #2793
-
-
-.. branch: pyparser-improvements-2
-
-Improve line offsets that are reported by SyntaxError. Improve error messages
-for a few situations, including mismatched parenthesis.
-
-.. branch: issue2752
-
-Fix a rare GC bug that was introduced more than one year ago, but was
-not diagnosed before issue #2752.
-
-.. branch: gc-hooks
-
-Introduce GC hooks, as documented in doc/gc_info.rst
-
-.. branch: gc-hook-better-timestamp
-
-Improve GC hooks
-
-.. branch: cppyy-packaging
-
-Update backend to 0.6.0 and support exceptions through wrappers
+===========================
+What's new in PyPy2.7 5.10+
+===========================
+
+.. this is a revision shortly after release-pypy2.7-v5.10.0
+.. startrev: 6b024edd9d12
+
+.. branch: cpyext-avoid-roundtrip
+
+Big refactoring of some cpyext code, which avoids a lot of nonsense when
+calling C from Python and vice-versa: the result is a big speedup in
+function/method calls, up to 6 times faster.
+
+.. branch: cpyext-datetime2
+
+Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD
+
+
+.. branch: mapdict-size-limit
+
+Fix a corner case of mapdict: When an instance is used like a dict (using
+``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are
+added, then the performance using mapdict is linear in the number of
+attributes. This is now fixed (by switching to a regular dict after 80
+attributes).
+
+
+.. branch: cpyext-faster-arg-passing
+
+When using cpyext, improve the speed of passing certain objects from PyPy to C
+code, most notably None, True, False, types, all instances of C-defined types.
+Before, a dict lookup was needed every time such an object crossed over, now it
+is just a field read.
+
+
+.. branch: 2634_datetime_timedelta_performance
+
+Improve datetime + timedelta performance.
+
+.. branch: memory-accounting
+
+Improve way to describe memory
+
+.. branch: msvc14
+
+Allow compilaiton with Visual Studio 2017 compiler suite on windows
+
+.. branch: refactor-slots
+
+Refactor cpyext slots.
+
+
+.. branch: call-loopinvariant-into-bridges
+
+Speed up branchy code that does a lot of function inlining by saving one call
+to read the TLS in most bridges.
+
+.. branch: rpython-sprint
+
+Refactor in rpython signatures
+
+.. branch: cpyext-tls-operror2
+
+Store error state thread-locally in executioncontext, fixes issue #2764
+
+.. branch: cpyext-fast-typecheck
+
+Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify
+`W_PyCWrapperObject` which is used to call slots from the C-API, greatly
+improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks
+
+
+.. branch: fix-sre-problems
+
+Fix two (unrelated) JIT bugs manifesting in the re module:
+
+- green fields are broken and were thus disabled, plus their usage removed from
+  the _sre implementation
+
+- in rare "trace is too long" situations, the JIT could break behaviour
+  arbitrarily.
+
+.. branch: jit-hooks-can-be-disabled
+
+Be more efficient about JIT hooks. Make it possible for the frontend to declare
+that jit hooks are currently not enabled at all. in that case, the list of ops
+does not have to be created in the case of the on_abort hook (which is
+expensive).
+
+
+.. branch: pyparser-improvements
+
+Improve speed of Python parser, improve ParseError messages slightly.
+
+.. branch: ioctl-arg-size
+
+Work around possible bugs in upstream ioctl users, like CPython allocate at
+least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes
+issue #2776
+
+.. branch: cpyext-subclass-setattr
+
+Fix for python-level classes that inherit from C-API types, previously the
+`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj`
+which led to cases where instance attributes were lost. Fixes issue #2793
+
+
+.. branch: pyparser-improvements-2
+
+Improve line offsets that are reported by SyntaxError. Improve error messages
+for a few situations, including mismatched parenthesis.
+
+.. branch: issue2752
+
+Fix a rare GC bug that was introduced more than one year ago, but was
+not diagnosed before issue #2752.
+
+.. branch: gc-hooks
+
+Introduce GC hooks, as documented in doc/gc_info.rst
+
+.. branch: gc-hook-better-timestamp
+
+Improve GC hooks
+
+.. branch: cppyy-packaging
+
+Update backend to 0.6.0 and support exceptions through wrappers
diff --git a/pypy/doc/whatsnew-pypy2-7.0.0.rst b/pypy/doc/whatsnew-pypy2-7.0.0.rst
--- a/pypy/doc/whatsnew-pypy2-7.0.0.rst
+++ b/pypy/doc/whatsnew-pypy2-7.0.0.rst
@@ -1,69 +1,69 @@
-==========================
-What's new in PyPy2.7 6.0+
-==========================
-
-.. this is a revision shortly after release-pypy-6.0.0
-.. startrev: e50e11af23f1
-
-.. branch: cppyy-packaging
-
-Main items: vastly better template resolution and improved performance. In
-detail: upgrade to backend 1.4, improved handling of templated methods and
-functions (in particular automatic deduction of types), improved pythonization
-interface, range of compatibility fixes for Python3, free functions now take
-fast libffi path when possible, moves for strings (incl. from Python str),
-easier/faster handling of std::vector by numpy, improved and faster object
-identity preservation
-
-.. branch: socket_default_timeout_blockingness
-
-Make sure 'blocking-ness' of socket is set along with default timeout
-
-.. branch: crypt_h
-
-Include crypt.h for crypt() on Linux
-
-.. branch: gc-more-logging
-
-Log additional gc-minor and gc-collect-step info in the PYPYLOG
-
-.. branch: reverse-debugger
-
-The reverse-debugger branch has been merged.  For more information, see
-https://bitbucket.org/pypy/revdb
-
-
-.. branch: pyparser-improvements-3
-
-Small refactorings in the Python parser.
-
-.. branch: fix-readme-typo
-
-.. branch: avoid_shell_injection_in_shutil
-
-Backport CPython fix for possible shell injection issue in `distutils.spawn`,
-https://bugs.python.org/issue34540
-
-.. branch: cffi_dlopen_unicode
-
-Enable use of unicode file names in `dlopen`
-
-.. branch: rlock-in-rpython
-
-Backport CPython fix for `thread.RLock` 
-
-
-.. branch: expose-gc-time
-
-Make GC hooks measure time in seconds (as opposed to an opaque unit).
-
-.. branch: cleanup-test_lib_pypy
-
-Update most test_lib_pypy/ tests and move them to extra_tests/.
-
-.. branch: gc-disable
-
-Make it possible to manually manage the GC by using a combination of
-gc.disable() and gc.collect_step(). Make sure to write a proper release
-announcement in which we explain that existing programs could leak memory if
-they run for too much time between a gc.disable()/gc.enable()
+==========================
+What's new in PyPy2.7 6.0+
+==========================
+
+.. this is a revision shortly after release-pypy-6.0.0
+.. startrev: e50e11af23f1
+
+.. branch: cppyy-packaging
+
+Main items: vastly better template resolution and improved performance. In
+detail: upgrade to backend 1.4, improved handling of templated methods and
+functions (in particular automatic deduction of types), improved pythonization
+interface, range of compatibility fixes for Python3, free functions now take
+fast libffi path when possible, moves for strings (incl. from Python str),
+easier/faster handling of std::vector by numpy, improved and faster object
+identity preservation
+
+.. branch: socket_default_timeout_blockingness
+
+Make sure 'blocking-ness' of socket is set along with default timeout
+
+.. branch: crypt_h
+
+Include crypt.h for crypt() on Linux
+
+.. branch: gc-more-logging
+
+Log additional gc-minor and gc-collect-step info in the PYPYLOG
+
+.. branch: reverse-debugger
+
+The reverse-debugger branch has been merged.  For more information, see
+https://bitbucket.org/pypy/revdb
+
+
+.. branch: pyparser-improvements-3
+
+Small refactorings in the Python parser.
+
+.. branch: fix-readme-typo
+
+.. branch: avoid_shell_injection_in_shutil
+
+Backport CPython fix for possible shell injection issue in `distutils.spawn`,
+https://bugs.python.org/issue34540
+
+.. branch: cffi_dlopen_unicode
+
+Enable use of unicode file names in `dlopen`
+
+.. branch: rlock-in-rpython
+
+Backport CPython fix for `thread.RLock` 
+
+
+.. branch: expose-gc-time
+
+Make GC hooks measure time in seconds (as opposed to an opaque unit).
+
+.. branch: cleanup-test_lib_pypy
+
+Update most test_lib_pypy/ tests and move them to extra_tests/.
+
+.. branch: gc-disable
+
+Make it possible to manually manage the GC by using a combination of
+gc.disable() and gc.collect_step(). Make sure to write a proper release
+announcement in which we explain that existing programs could leak memory if
+they run for too much time between a gc.disable()/gc.enable()
diff --git a/pypy/doc/whatsnew-pypy3-5.10.0.rst b/pypy/doc/whatsnew-pypy3-5.10.0.rst
--- a/pypy/doc/whatsnew-pypy3-5.10.0.rst
+++ b/pypy/doc/whatsnew-pypy3-5.10.0.rst
@@ -1,21 +1,7 @@
-=========================
-What's new in PyPy3 5.9+
-=========================
-
-.. this is the revision after release-pypy3.5-5.9
-.. startrev: be41e3ac0a29
-
-.. branch: sched_yield
-Add sched_yield posix attribute
-
-.. branch: py3.5-appexec
-Raise if space.is_true(space.appexec()) used in app level tests, fix tests
-that did this
-
-.. branch: py3.5-mac-embedding
-Download and patch dependencies when building cffi-based stdlib modules
-
-.. branch: os_lockf
-
-.. branch: py3.5-xattr
-Add posix.*attr() functions
+========================
+What's new in PyPy3 7.0+
+========================
+
+.. this is the revision after release-pypy3.5-v7.0
+.. startrev: 9d2fa7c63b7c
+
diff --git a/pypy/doc/whatsnew-pypy3-6.0.0.rst b/pypy/doc/whatsnew-pypy3-6.0.0.rst
--- a/pypy/doc/whatsnew-pypy3-6.0.0.rst
+++ b/pypy/doc/whatsnew-pypy3-6.0.0.rst
@@ -1,28 +1,7 @@
-=========================
-What's new in PyPy3 5.10+
-=========================
+========================
+What's new in PyPy3 7.0+
+========================
 
-.. this is the revision after release-pypy3.5-v5.10
-.. startrev: 34c63fba0bba
+.. this is the revision after release-pypy3.5-v7.0
+.. startrev: 9d2fa7c63b7c
 
-.. branch: hroncok/fix-typeerror-str-does-not-support-the-b-1514414905375
-
-Fix for bytestrings in console repl
-
-.. branch: py3-winreg
-
-Update winreg module to use unicode, wide-strings
-
-.. branch: cpyext-py3-instancemethod-attributes
-
-Add missing ``__doc__``, ``__module__``, ``__name__`` attributes to 
-``instancemethod``
-
-.. branch: winapi
-
-Update support for _winapi cffi module for python3
-
-.. branch: py3.5-refactor-slots
-
-Refactor cpyext slots.
-
diff --git a/pypy/doc/whatsnew-pypy3-7.0.0.rst b/pypy/doc/whatsnew-pypy3-7.0.0.rst
--- a/pypy/doc/whatsnew-pypy3-7.0.0.rst
+++ b/pypy/doc/whatsnew-pypy3-7.0.0.rst
@@ -5,15 +5,10 @@
 .. this is the revision after release-pypy3.5-v6.0
 .. startrev: 580e3e26cd32
 
-.. branch: hroncok/fix-multiprocessing-regression-on-newer--1524656522151
+.. branch: unicode-utf8
 
-Fix multiprocessing regression on newer glibcs
+Use utf-8 internally to represent unicode strings
 
-.. branch: py3.5-user-site-impl
+.. branch: unicode-utf8-py3
 
-Use implementation-specific site directories in sysconfig like in Python2
-
-.. branch: py3.5-reverse-debugger
-
-The reverse-debugger branch has been merged.  For more information, see
-https://bitbucket.org/pypy/revdb
+Use utf-8 internally to represent unicode strings
diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -83,7 +83,7 @@
             ##    con.interact()
             except OperationError as e:
                 debug("OperationError:")
-                debug(" operror-type: " + e.w_type.getname(space).encode('utf-8'))
+                debug(" operror-type: " + e.w_type.getname(space))
                 debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space))))
                 return 1
         finally:
@@ -91,7 +91,7 @@
                 space.finish()
             except OperationError as e:
                 debug("OperationError:")
-                debug(" operror-type: " + e.w_type.getname(space).encode('utf-8'))
+                debug(" operror-type: " + e.w_type.getname(space))
                 debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space))))
                 return 1
         return exitcode
@@ -148,7 +148,7 @@
         except OperationError as e:
             if verbose:
                 debug("OperationError:")
-                debug(" operror-type: " + e.w_type.getname(space).encode('utf-8'))
+                debug(" operror-type: " + e.w_type.getname(space))
                 debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space))))
             return rffi.cast(rffi.INT, -1)
         finally:
@@ -202,7 +202,7 @@
             """)
         except OperationError as e:
             debug("OperationError:")
-            debug(" operror-type: " + e.w_type.getname(space).encode('utf-8'))
+            debug(" operror-type: " + e.w_type.getname(space))
             debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space))))
             return -1
         return 0
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -596,6 +596,10 @@
                         except IndexError:
                             name = '?'
                         else:
+                            w_enc = space.newtext(space.sys.defaultencoding)
+                            w_err = space.newtext("replace")
+                            w_name = space.call_method(w_name, "encode", w_enc,
+                                                       w_err)
                             name = space.text_w(w_name)
                     break
         self.kwd_name = name
diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py
--- a/pypy/interpreter/astcompiler/astbuilder.py
+++ b/pypy/interpreter/astcompiler/astbuilder.py
@@ -58,6 +58,7 @@
         self.space = space
         self.compile_info = compile_info
         self.root_node = n
+        # used in f-strings
         self.recursive_parser = recursive_parser
 
     def build_ast(self):
diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py
--- a/pypy/interpreter/astcompiler/fstring.py
+++ b/pypy/interpreter/astcompiler/fstring.py
@@ -3,6 +3,7 @@
 from pypy.interpreter import error
 from pypy.interpreter import unicodehelper
 from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rutf8 import codepoints_in_utf8
 
 
 def add_constant_string(astbuilder, joined_pieces, w_string, atom_node):
@@ -21,10 +22,8 @@
     joined_pieces.append(node(w_string, atom_node.get_lineno(),
                                         atom_node.get_column()))
 
-def f_constant_string(astbuilder, joined_pieces, u, atom_node):
-    space = astbuilder.space
-    add_constant_string(astbuilder, joined_pieces, space.newunicode(u),
-                        atom_node)
+def f_constant_string(astbuilder, joined_pieces, w_u, atom_node):
+    add_constant_string(astbuilder, joined_pieces, w_u, atom_node)
 
 def f_string_compile(astbuilder, source, atom_node):
     # Note: a f-string is kept as a single literal up to here.
@@ -259,20 +258,20 @@
         i += 1
 
     fstr.current_index = i
+    space = astbuilder.space
     literal = builder.build()
+    lgt = codepoints_in_utf8(literal)
     if not fstr.raw_mode and '\\' in literal:
-        space = astbuilder.space
         literal = parsestring.decode_unicode_utf8(space, literal, 0,
                                                   len(literal))
-        return unicodehelper.decode_unicode_escape(space, literal)
-    else:
-        return literal.decode('utf-8')
+        literal, lgt, pos = unicodehelper.decode_unicode_escape(space, literal)
+    return space.newtext(literal, lgt)
 
 
 def fstring_find_literal_and_expr(astbuilder, fstr, atom_node, rec):
-    # Return a tuple with the next literal part, and optionally the
+    # Return a tuple with the next literal part as a W_Unicode, and optionally the
     # following expression node.  Updates the current index inside 'fstr'.
-    literal = fstring_find_literal(astbuilder, fstr, atom_node, rec)
+    w_u = fstring_find_literal(astbuilder, fstr, atom_node, rec)
 
     s = fstr.unparsed
     i = fstr.current_index
@@ -284,7 +283,7 @@
         # We must now be the start of an expression, on a '{'.
         assert s[i] == '{'
         expr = fstring_find_expr(astbuilder, fstr, atom_node, rec)
-    return literal, expr
+    return w_u, expr
 
 
 def parse_f_string(astbuilder, joined_pieces, fstr, atom_node, rec=0):
@@ -303,11 +302,11 @@
             "really the case", atom_node)
 
     while True:
-        literal, expr = fstring_find_literal_and_expr(astbuilder, fstr,
+        w_u, expr = fstring_find_literal_and_expr(astbuilder, fstr,
                                                       atom_node, rec)
 
         # add the literal part
-        f_constant_string(astbuilder, joined_pieces, literal, atom_node)
+        f_constant_string(astbuilder, joined_pieces, w_u, atom_node)
 
         if expr is None:
             break         # We're done with this f-string.
diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py
--- a/pypy/interpreter/astcompiler/misc.py
+++ b/pypy/interpreter/astcompiler/misc.py
@@ -112,7 +112,7 @@
     # only intern identifier-like strings
     from pypy.objspace.std.unicodeobject import _isidentifier
     if (space.is_w(space.type(w_const), space.w_unicode) and
-        _isidentifier(space.unicode_w(w_const))):
+        _isidentifier(space.utf8_w(w_const))):
         return space.new_interned_w_str(w_const)
     return w_const
 
diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -5,7 +5,7 @@
 from pypy.tool import stdlib_opcode as ops
 from pypy.interpreter.error import OperationError
 from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.runicode import MAXUNICODE
+from rpython.rlib.rutf8 import MAXUNICODE
 from rpython.rlib.objectmodel import specialize
 
 
@@ -326,7 +326,7 @@
                     # produce compatible pycs.
                     if (self.space.isinstance_w(w_obj, self.space.w_unicode) and
                         self.space.isinstance_w(w_const, self.space.w_unicode)):
-                        #unistr = self.space.unicode_w(w_const)
+                        #unistr = self.space.utf8_w(w_const)
                         #if len(unistr) == 1:
                         #    ch = ord(unistr[0])
                         #else:
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -902,7 +902,7 @@
 
     def test_flufl(self):
         source = "x <> y"
-        raises(SyntaxError, self.get_ast, source)
+        py.test.raises(SyntaxError, self.get_ast, source)
         comp = self.get_first_expr(source,
                                    flags=consts.CO_FUTURE_BARRY_AS_BDFL)
         assert isinstance(comp, ast.Compare)
@@ -1130,7 +1130,7 @@
         s = self.get_first_expr("b'hi' b' implicitly' b' extra'")
         assert isinstance(s, ast.Bytes)
         assert space.eq_w(s.s, space.newbytes("hi implicitly extra"))
-        raises(SyntaxError, self.get_first_expr, "b'hello' 'world'")
+        py.test.raises(SyntaxError, self.get_first_expr, "b'hello' 'world'")
         sentence = u"Die Männer ärgern sich!"
         source = u"# coding: utf-7\nstuff = '%s'" % (sentence,)
         info = pyparse.CompileInfo("<test>", "exec")
@@ -1325,8 +1325,8 @@
         assert isinstance(if2, ast.Name)
 
     def test_cpython_issue12983(self):
-        raises(SyntaxError, self.get_ast, r"""b'\x'""")
-        raises(SyntaxError, self.get_ast, r"""b'\x0'""")
+        py.test.raises(SyntaxError, self.get_ast, r"""b'\x'""")
+        py.test.raises(SyntaxError, self.get_ast, r"""b'\x0'""")
 
     def test_matmul(self):
         mod = self.get_ast("a @ b")
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -1,5 +1,6 @@
 from __future__ import division
 import py, sys
+from pytest import raises
 from pypy.interpreter.astcompiler import codegen, astbuilder, symtable, optimize
 from pypy.interpreter.pyparser import pyparse
 from pypy.interpreter.pyparser.test import expressions
@@ -76,7 +77,7 @@
         space = self.space
         pyco_expr = PyCode._from_code(space, co_expr)
         w_res = pyco_expr.exec_host_bytecode(w_dict, w_dict)
-        res = space.str_w(space.repr(w_res))
+        res = space.text_w(space.repr(w_res))
         expected_repr = self.get_py3_repr(expected)
         if isinstance(expected, float):
             # Float representation can vary a bit between interpreter
@@ -1249,7 +1250,6 @@
 
     def test_revdb_metavar(self):
         from pypy.interpreter.reverse_debugging import dbstate, setup_revdb
-        self.space.config.translation.reverse_debugger = True
         self.space.reverse_debugging = True
         try:
             setup_revdb(self.space)
@@ -1264,9 +1264,6 @@
 
 class AppTestCompiler:
 
-    def setup_class(cls):
-        cls.w_maxunicode = cls.space.wrap(sys.maxunicode)
-
     def test_docstring_not_loaded(self):
         import io, dis, sys
         ns = {}
@@ -1428,7 +1425,7 @@
             ''', d)
             return d['f'](5)
         """)
-        assert 'generator' in space.str_w(space.repr(w_generator))
+        assert 'generator' in space.text_w(space.repr(w_generator))
 
     def test_folding_of_list_constants(self):
         for source in (
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -3,7 +3,7 @@
 
 from rpython.rlib.cache import Cache
 from rpython.tool.uid import HUGEVAL_BYTES
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, rutf8
 from rpython.rlib.debug import make_sure_not_resized
 from rpython.rlib.objectmodel import (we_are_translated, newlist_hint,
      compute_unique_id, specialize, not_rpython)
@@ -80,10 +80,10 @@
 
     def getname(self, space):
         try:
-            return space.unicode_w(space.getattr(self, space.newtext('__name__')))
+            return space.utf8_w(space.getattr(self, space.newtext('__name__')))
         except OperationError as e:
             if e.match(space, space.w_TypeError) or e.match(space, space.w_AttributeError):
-                return u'?'
+                return '?'
             raise
 
     def getaddrstring(self, space):
@@ -105,9 +105,9 @@
             w_id = space.rshift(w_id, w_4)
         return ''.join(addrstring)
 
-    def getrepr(self, space, info, moreinfo=u''):
-        addrstring = unicode(self.getaddrstring(space))
-        return space.newunicode(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo))
+    def getrepr(self, space, info, moreinfo=''):
+        addrstring = self.getaddrstring(space)
+        return space.newtext("<%s at 0x%s%s>" % (info, addrstring, moreinfo))
 
     def getslotvalue(self, index):
         raise NotImplementedError
@@ -245,11 +245,14 @@
     def bytes_w(self, space):
         self._typed_unwrap_error(space, "bytes")
 
-    def unicode_w(self, space):
-        self._typed_unwrap_error(space, "string")
+    def text_w(self, space):
+        self._typed_unwrap_error(space, "unicode")
 
-    def text_w(self, space):
-        self._typed_unwrap_error(space, "string")
+    def utf8_w(self, space):
+        self._typed_unwrap_error(space, "unicode")
+
+    def convert_to_w_unicode(self, space):
+        self._typed_unwrap_error(space, "unicode")
 
     def bytearray_list_of_chars_w(self, space):
         self._typed_unwrap_error(space, "bytearray")
@@ -420,7 +423,7 @@
         self.builtin_modules = {}
         self.reloading_modules = {}
 
-        self.interned_strings = make_weak_value_dictionary(self, unicode, W_Root)
+        self.interned_strings = make_weak_value_dictionary(self, str, W_Root)
         self.actionflag = ActionFlag()    # changed by the signal module
         self.check_signal_action = None   # changed by the signal module
         make_finalizer_queue(W_Root, self)
@@ -781,12 +784,12 @@
 
     def setitem_str(self, w_obj, key, w_value):
         # key is a "text", i.e. a byte string (in python3 it
-        # represents a utf-8-encoded unicode)
+        # represents a valid utf-8-encoded unicode)
         return self.setitem(w_obj, self.newtext(key), w_value)
 
     def finditem_str(self, w_obj, key):
         # key is a "text", i.e. a byte string (in python3 it
-        # represents a utf-8-encoded unicode)
+        # represents a valid utf-8-encoded unicode)
         return self.finditem(w_obj, self.newtext(key))
 
     def finditem(self, w_obj, w_key):
@@ -820,9 +823,9 @@
 
     def new_interned_w_str(self, w_u):
         assert isinstance(w_u, W_Root)   # and is not None
-        u = self.unicode_w(w_u)
+        u = self.utf8_w(w_u)
         if not we_are_translated():
-            assert type(u) is unicode
+            assert type(u) is str
         w_u1 = self.interned_strings.get(u)
         if w_u1 is None:
             w_u1 = w_u
@@ -835,12 +838,11 @@
         # returns a "text" object (ie str in python2 and unicode in python3)
         if not we_are_translated():
             assert type(s) is str
-        u = s.decode('utf-8')
-        w_s1 = self.interned_strings.get(u)
+        w_s1 = self.interned_strings.get(s)
         if w_s1 is None:
-            w_s1 = self.newunicode(u)
+            w_s1 = self.newtext(s)
             if self._side_effects_ok():
-                self.interned_strings.set(u, w_s1)
+                self.interned_strings.set(s, w_s1)
         return w_s1
 
     def _revdb_startup(self):
@@ -879,11 +881,7 @@
         # interface for marshal_impl
         if not we_are_translated():
             assert type(s) is str
-        try:
-            u = s.decode('utf-8')
-        except UnicodeDecodeError:
-            return None
-        return self.interned_strings.get(u)   # may be None
+        return self.interned_strings.get(s)   # may be None
 
     @specialize.arg(1)
     def descr_self_interp_w(self, RequiredClass, w_obj):
@@ -1066,7 +1064,7 @@
         """
         return None
 
-    def listview_unicode(self, w_list):
+    def listview_utf8(self, w_list):
         """ Return a list of unwrapped unicode out of a list of unicode. If the
         argument is not a list or does not contain only unicode, return None.
         May return None anyway.
@@ -1096,8 +1094,15 @@
     def newlist_bytes(self, list_s):
         return self.newlist([self.newbytes(s) for s in list_s])
 
-    def newlist_unicode(self, list_u):
-        return self.newlist([self.newunicode(u) for u in list_u])
+    def newlist_utf8(self, list_u, is_ascii):
+        l_w = [None] * len(list_u)
+        for i, item in enumerate(list_u):
+            if not is_ascii:
+                length = rutf8.check_utf8(item, True)
+            else:
+                length = len(item)
+            l_w[i] = self.newutf8(item, length)
+        return self.newlist(l_w)
 
     def newlist_int(self, list_i):
         return self.newlist([self.newint(i) for i in list_i])
@@ -1595,6 +1600,8 @@
         else:
             assert False
 
+        if self.isinstance_w(w_obj, self.w_unicode):
+            return w_obj.charbuf_w(self)
     def text_or_none_w(self, w_obj):
         return None if self.is_none(w_obj) else self.text_w(w_obj)
 
@@ -1617,18 +1624,22 @@
             an utf-8 encoded rpython string.
         """
         assert w_obj is not None
+        if not self.isinstance_w(w_obj, self.w_unicode):
+            w_obj._typed_unwrap_error(self, "unicode")
         return w_obj.text_w(self)
 
     @not_rpython    # tests only; should be replaced with bytes_w or text_w
     def str_w(self, w_obj):
         """
-        if w_obj is unicode, call text_w() (i.e., return the UTF-8-nosg
+        if w_obj is unicode, call utf8_w() (i.e., return the UTF-8-nosg
         encoded string). Else, call bytes_w().
 
         We should kill str_w completely and manually substitute it with
         text_w/bytes_w at all call sites.  It remains for now for tests only.
         """
+        XXX # deprecated, leaving in place for clear errors
         if self.isinstance_w(w_obj, self.w_unicode):
+            # XXX lo text_w, but better to deprecate str_w than to fix this
             return w_obj.text_w(self)
         else:
             return w_obj.bytes_w(self)
@@ -1711,23 +1722,38 @@
         assert w_obj is not None
         return w_obj.float_w(self, allow_conversion)
 
-    @specialize.argtype(1)
-    def unicode_w(self, w_obj):
-        assert w_obj is not None
-        return w_obj.unicode_w(self)
+    def utf8_w(self, w_obj):
+        return w_obj.utf8_w(self)
 
-    def unicode0_w(self, w_obj):
-        "Like unicode_w, but rejects strings with NUL bytes."
+    def utf8_0_w(self, w_obj):
+        "Like utf_w, but rejects strings with NUL bytes."
         from rpython.rlib import rstring
-        result = w_obj.unicode_w(self)
-        if u'\x00' in result:
+        result = w_obj.utf8_w(self)
+        if '\x00' in result:
+            raise oefmt(self.w_TypeError,
+                        "argument must be a string without NUL "
+                        "characters")
+        return rstring.assert_str0(result)
+
+    def convert_to_w_unicode(self, w_obj):
+        return w_obj.convert_to_w_unicode(self)
+
+    def realunicode_w(self, w_obj):
+        from pypy.interpreter.unicodehelper import decode_utf8sp
+        utf8 = self.utf8_w(w_obj)
+        return decode_utf8sp(self, utf8)[0].decode('utf8')
+
+    def utf8_0_w(self, w_obj):
+        "Like utf8_w, but rejects strings with NUL bytes."
+        from rpython.rlib import rstring
+        result = w_obj.utf8_w(self)
+        if '\x00' in result:
             raise oefmt(self.w_ValueError,
-                        "argument must be a unicode string without NUL "
+                        "argument must be a utf8 string without NUL "
                         "characters")
         return rstring.assert_str0(result)
 
     realtext_w = text_w         # Python 2 compatibility
-    realunicode_w = unicode_w
 
     def fsencode(space, w_obj):
         from pypy.interpreter.unicodehelper import fsencode
@@ -1742,6 +1768,27 @@
             w_obj = self.fsencode(w_obj)
         return self.bytesbuf0_w(w_obj)
 
+    def convert_arg_to_w_unicode(self, w_obj, strict=None):
+        # XXX why convert_to_w_unicode does something slightly different?
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        # for z_translation tests
+        if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar")
+        return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict)
+
+    def utf8_len_w(self, w_obj):
+        w_obj = self.convert_arg_to_w_unicode(w_obj)
+        return w_obj._utf8, w_obj._len()
+
+    def realutf8_w(self, w_obj):
+        # Like utf8_w(), but only works if w_obj is really of type
+        # 'unicode'.  On Python 3 this is the same as utf8_w().
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        # for z_translation tests
+        if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar")
+        if not isinstance(w_obj, W_UnicodeObject):
+            raise oefmt(self.w_TypeError, "argument must be a unicode")
+        return self.utf8_w(w_obj)
+
     def bytesbuf0_w(self, w_obj):
         # Like bytes0_w(), but also accept a read-only buffer.
         from rpython.rlib import rstring
@@ -1759,7 +1806,7 @@
     def fsdecode_w(self, w_obj):
         if self.isinstance_w(w_obj, self.w_bytes):
             w_obj = self.fsdecode(w_obj)
-        return self.unicode0_w(w_obj)
+        return self.utf8_w(w_obj)
 
     def bool_w(self, w_obj):
         # Unwraps a bool, also accepting an int for compatibility.
@@ -2087,7 +2134,7 @@
     'float_w',
     'uint_w',
     'bigint_w',
-    'unicode_w',
+    'utf8_w',
     'unwrap',
     'is_true',
     'is_w',
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -9,8 +9,7 @@
 from rpython.rlib.objectmodel import we_are_translated, specialize
 from rpython.rlib.objectmodel import dont_inline, not_rpython
 from rpython.rlib import rstack, rstackovf
-from rpython.rlib import rwin32
-from rpython.rlib import runicode
+from rpython.rlib import rwin32, rutf8
 
 from pypy.interpreter import debug
 
@@ -21,7 +20,8 @@
 def strerror(errno):
     """Translate an error code to a unicode message string."""
     from pypy.module._codecs.locale import str_decode_locale_surrogateescape
-    return str_decode_locale_surrogateescape(os.strerror(errno))
+    utf8, lgt = str_decode_locale_surrogateescape(os.strerror(errno))
+    return utf8, lgt
 
 class OperationError(Exception):
     """Interpreter-level exception that signals an exception that should be
@@ -72,7 +72,7 @@
         space = getattr(self.w_type, 'space', None)
         if space is not None:
             if self.__class__ is not OperationError and s is None:
-                s = self._compute_value(space)
+                s, lgt = self._compute_value(space)
             try:
                 s = space.text_w(s)
             except Exception:
@@ -306,8 +306,8 @@
     def get_w_value(self, space):
         w_value = self._w_value
         if w_value is None:
-            value = self._compute_value(space)
-            self._w_value = w_value = space.newunicode(value)
+            value, lgt = self._compute_value(space)
+            self._w_value = w_value = space.newtext(value, lgt)
         return w_value
 
     def _compute_value(self, space):
@@ -472,16 +472,7 @@
     assert len(formats) > 0, "unsupported: no % command found"
     return tuple(parts), tuple(formats)
 
-def _decode_utf8(string):
-    # when building the error message, don't crash if the byte string
-    # provided is not valid UTF-8
-    assert isinstance(string, str)
-    result, consumed = runicode.str_decode_utf_8(
-        string, len(string), "replace", final=True)
-    return result
-
 def get_operrcls2(valuefmt):
-    valuefmt = valuefmt.decode('ascii')
     strings, formats = decompose_valuefmt(valuefmt)
     assert len(strings) == len(formats) + 1
     try:
@@ -501,30 +492,49 @@
 
             def _compute_value(self, space):
                 lst = [None] * (len(formats) + len(formats) + 1)
+                lgt = 0
                 for i, fmt, attr in entries:
                     lst[i + i] = self.xstrings[i]
+                    lgt += len(self.xstrings[i])
                     value = getattr(self, attr)
                     if fmt == 'd':
-                        result = str(value).decode('ascii')
+                        result = str(value)
+                        lgt += len(result)
                     elif fmt == 'R':
-                        result = space.unicode_w(space.repr(value))
+                        result = space.utf8_w(space.repr(value))
+                        lgt += len(result)
                     elif fmt == 'S':
-                        result = space.unicode_w(space.str(value))
+                        result = space.utf8_w(space.str(value))
+                        lgt += len(result)
                     elif fmt == 'T':
-                        result = _decode_utf8(space.type(value).name)
+                        result = space.type(value).name
+                        lgt += len(result)
                     elif fmt == 'N':
                         result = value.getname(space)
+                        lgt += len(result)
                     elif fmt == '8':
-                        result = _decode_utf8(value)
+                        # u'str\uxxxx' -> 'str\xXX\xXX' -> u"'str\xXX\xXX'"
+                        from pypy.interpreter import unicodehelper
+                        result, _lgt, pos  = unicodehelper.str_decode_utf8(
+                            value, 'replace', True,
+                            unicodehelper.decode_never_raise, True)
+                        lgt += _lgt
+                    elif isinstance(value, unicode):
+                        # 's'
+                        result = str(value.encode('utf-8'))
+                        lgt += len(value)
                     else:
-                        if isinstance(value, unicode):
-                            result = value
-                        else:
-                            result = _decode_utf8(str(value))
+                        result = str(value)
+                        try:
+                            lgt += rutf8.check_utf8(result, True)
+                        except rutf8.CheckError as e:
+                            lgt -= e.pos
                     lst[i + i + 1] = result
                 lst[-1] = self.xstrings[-1]
-                return u''.join(lst)
-        #
+                lgt += len(self.xstrings[-1])
+                retval = ''.join(lst)
+                return retval, lgt
+
         _fmtcache2[formats] = OpErrFmt
     return OpErrFmt, strings
 
@@ -534,7 +544,7 @@
         self.setup(w_type)
 
     def _compute_value(self, space):
-        return self._value.decode('utf-8')
+        return self._value, len(self._value)
 
     def async(self, space):
         # also matches a RuntimeError("maximum rec.") if the stack is
@@ -565,8 +575,8 @@
 
     %8 - The result of arg.decode('utf-8')
     %N - The result of w_arg.getname(space)
-    %R - The result of space.unicode_w(space.repr(w_arg))
-    %S - The result of space.unicode_w(space.str(w_arg))
+    %R - The result of space.utf8_w(space.repr(w_arg))
+    %S - The result of space.utf8_w(space.str(w_arg))
     %T - The result of space.type(w_arg).name
 
     """
@@ -621,12 +631,13 @@
     if rwin32.WIN32 and isinstance(e, WindowsError):
         winerror = e.winerror
         try:
-            msg = rwin32.FormatErrorW(winerror)
+            msg, lgt = rwin32.FormatErrorW(winerror)
         except ValueError:
-            msg = u'Windows Error %d' % winerror
+            msg = 'Windows Error %d' % winerror
+            lgt = len(msg)
         w_errno = space.w_None
         w_winerror = space.newint(winerror)
-        w_msg = space.newunicode(msg)
+        w_msg = space.newtext(msg, lgt)
     else:
         errno = e.errno
         if errno == EINTR:
@@ -635,12 +646,13 @@
                 return None
 
         try:
-            msg = strerror(errno)
+            msg, lgt = strerror(errno)
         except ValueError:
-            msg = u'error %d' % errno
+            msg = 'error %d' % errno
+            lgt = len(msg)
         w_errno = space.newint(errno)
         w_winerror = space.w_None
-        w_msg = space.newunicode(msg)
+        w_msg = space.newtext(msg, lgt)
 
     if w_filename is None:
         w_filename = space.w_None
@@ -670,9 +682,9 @@
                          eintr_retry=eintr_retry)
 
 def exception_from_errno(space, w_type, errno):
-    msg = strerror(errno)
+    msg, lgt = strerror(errno)
     w_error = space.call_function(w_type, space.newint(errno),
-                                  space.newunicode(msg))
+                                  space.newtext(msg, lgt))
     return OperationError(w_type, w_error)
 
 def exception_from_saved_errno(space, w_type):
diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py
--- a/pypy/interpreter/function.py
+++ b/pypy/interpreter/function.py
@@ -45,7 +45,8 @@
                  closure=None, w_ann=None, forcename=None, qualname=None):
         self.space = space
         self.name = forcename or code.co_name
-        self.qualname = qualname or self.name.decode('utf-8')
+        self.qualname = qualname or self.name
+        assert isinstance(self.qualname, str)
         self.w_doc = None   # lazily read from code.getdocstring()
         self.code = code       # Code instance
         self.w_func_globals = w_globals  # the globals dictionary
@@ -255,7 +256,7 @@
         return self.call_args(__args__)
 
     def descr_function_repr(self):
-        return self.getrepr(self.space, u'function %s' % self.qualname)
+        return self.getrepr(self.space, 'function %s' % self.qualname)
 
 
     def _cleanup_(self):
@@ -313,7 +314,7 @@
         tup_base = []
         tup_state = [
             space.newtext(self.name),
-            space.newunicode(self.qualname),
+            space.newtext(self.qualname),
             w_doc,
             self.code,
             w_func_globals,
@@ -337,7 +338,7 @@
 
         self.space = space
         self.name = space.text_w(w_name)
-        self.qualname = space.unicode_w(w_qualname)
+        self.qualname = space.utf8_w(w_qualname)
         self.code = space.interp_w(Code, w_code)
         if not space.is_w(w_closure, space.w_None):
             from pypy.interpreter.nestedscope import Cell
@@ -430,11 +431,11 @@
                         "__name__ must be set to a string object")
 
     def fget_func_qualname(self, space):
-        return space.newunicode(self.qualname)
+        return space.newtext(self.qualname)
 
     def fset_func_qualname(self, space, w_name):
         try:
-            self.qualname = space.unicode_w(w_name)
+            self.qualname = space.realutf8_w(w_name)
         except OperationError as e:
             if e.match(space, space.w_TypeError):
                 raise oefmt(space.w_TypeError,
@@ -549,14 +550,14 @@
             name = self.w_function.getname(self.space)
         else:
             try:
-                name = space.unicode_w(w_name)
+                name = space.utf8_w(w_name)
             except OperationError as e:
                 if not e.match(space, space.w_TypeError):
                     raise
-                name = u'?'
-        objrepr = space.unicode_w(space.repr(self.w_instance))
-        s = u'<bound method %s of %s>' % (name, objrepr)
-        return space.newunicode(s)
+                name = '?'
+        objrepr = space.utf8_w(space.repr(self.w_instance))
+        s = b'<bound method %s of %s>' % (name, objrepr)
+        return space.newtext(s)
 
     def descr_method_getattribute(self, w_attr):
         space = self.space
@@ -598,7 +599,7 @@
         else:
             w_builtins = space.getbuiltinmodule('builtins')
             new_inst = space.getattr(w_builtins, space.newtext('getattr'))
-            tup = [w_instance, space.newunicode(w_function.getname(space))]
+            tup = [w_instance, space.newtext(w_function.getname(space))]
         return space.newtuple([new_inst, space.newtuple(tup)])
 
 
@@ -699,7 +700,7 @@
         return self.space.newtext('<built-in function %s>' % (self.name,))
 
     def descr__reduce__(self, space):
-        return space.newunicode(self.qualname)
+        return space.newtext(self.qualname)
 
 def is_builtin_code(w_func):
     from pypy.interpreter.gateway import BuiltinCode
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -174,6 +174,9 @@
     def visit_unicode(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
+    def visit_utf8(self, el, app_sig):
+        self.checked_space_method(el, app_sig)
+
     def visit_fsencode(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
@@ -324,7 +327,10 @@
         self.run_args.append("space.text0_w(%s)" % (self.scopenext(),))
 
     def visit_unicode(self, typ):
-        self.run_args.append("space.unicode_w(%s)" % (self.scopenext(),))
+        self.run_args.append("space.realunicode_w(%s)" % (self.scopenext(),))
+
+    def visit_utf8(self, typ):
+        self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),))
 
     def visit_fsencode(self, typ):
         self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),))
@@ -492,11 +498,14 @@
         self.unwrap.append("space.text_w(%s)" % (self.nextarg(),))
 
     def visit_unicode(self, typ):
-        self.unwrap.append("space.unicode_w(%s)" % (self.nextarg(),))
+        self.unwrap.append("space.realunicode_w(%s)" % (self.nextarg(),))
 
     def visit_text0(self, typ):
         self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),))
 
+    def visit_utf8(self, typ):
+        self.unwrap.append("space.utf8_w(%s)" % (self.nextarg(),))
+
     def visit_fsencode(self, typ):
         self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),))
 
@@ -567,8 +576,10 @@
     assert typ in (int, str, float, unicode, r_longlong, r_uint, r_ulonglong, bool)
     if typ is r_int is r_longlong:
         return 'gateway_r_longlong_w'
-    elif typ in (str, unicode):
-        return typ.__name__ + '_w'
+    elif typ is str:
+        return 'utf8_w'
+    elif typ is unicode:
+        return 'realunicode_w'
     elif typ is bool:
         # For argument clinic's "bool" specifier: accept any object, and
         # convert it to a boolean value.  If you don't want this
@@ -1113,7 +1124,7 @@
             kw_defs_w = []
             for name, w_def in sorted(alldefs_w.items()):
                 assert name in sig.kwonlyargnames
-                w_name = space.newunicode(name.decode('utf-8'))
+                w_name = space.newtext(name)
                 kw_defs_w.append((w_name, w_def))
 
         return defs_w, kw_defs_w
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -38,14 +38,12 @@
         # 'qualname' is a unicode string
         if self._qualname is not None:
             return self._qualname
-        return self.get_name().decode('utf-8')
+        return self.get_name()
 
     def descr__repr__(self, space):
         addrstring = self.getaddrstring(space)
-        return space.newunicode(u"<%s object %s at 0x%s>" %
-                          (unicode(self.KIND),
-                           self.get_qualname(),
-                           unicode(addrstring)))
+        return space.newtext("<%s object %s at 0x%s>" %
+                          (self.KIND, self.get_qualname(), addrstring))
 
     def descr_send(self, w_arg):
         """send(arg) -> send 'arg' into generator/coroutine,
@@ -215,7 +213,7 @@
             e2.record_context(space, space.getexecutioncontext())
             raise e2
         else:
-            space.warn(space.newunicode(u"generator '%s' raised StopIteration"
+            space.warn(space.newtext("generator '%s' raised StopIteration"
                                         % self.get_qualname()),
                        space.w_PendingDeprecationWarning)
 
@@ -308,11 +306,11 @@
                         "__name__ must be set to a string object")
 
     def descr__qualname__(self, space):
-        return space.newunicode(self.get_qualname())
+        return space.newtext(self.get_qualname())
 
     def descr_set__qualname__(self, space, w_name):
         try:
-            self._qualname = space.unicode_w(w_name)
+            self._qualname = space.utf8_w(w_name)
         except OperationError as e:
             if e.match(space, space.w_TypeError):
                 raise oefmt(space.w_TypeError,
@@ -399,8 +397,8 @@
            self.frame is not None and \
            self.frame.last_instr == -1:
             space = self.space
-            msg = u"coroutine '%s' was never awaited" % self.get_qualname()
-            space.warn(space.newunicode(msg), space.w_RuntimeWarning)
+            msg = "coroutine '%s' was never awaited" % self.get_qualname()
+            space.warn(space.newtext(msg), space.w_RuntimeWarning)
         GeneratorOrCoroutine._finalize_(self)
 
 
diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py
--- a/pypy/interpreter/mixedmodule.py
+++ b/pypy/interpreter/mixedmodule.py
@@ -130,7 +130,7 @@
                     bltin.w_module = self.w_name
                     func._builtinversion_ = bltin
                     bltin.name = name
-                    bltin.qualname = bltin.name.decode('utf-8')
+                    bltin.qualname = bltin.name
                 w_value = bltin
             space.setitem(self.w_dict, w_name, w_value)
             return w_value
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -206,7 +206,7 @@
         self.co_filename = '<builtin>/%s' % (basename,)
         self.w_filename = self.space.newfilename(self.co_filename)
 
-    co_names = property(lambda self: [self.space.str_w(w_name) for w_name in self.co_names_w]) # for trace
+    co_names = property(lambda self: [self.space.text_w(w_name) for w_name in self.co_names_w]) # for trace
 
     def signature(self):
         return self._signature
@@ -452,8 +452,8 @@
     def repr(self, space):
         space = self.space
         # co_name should be an identifier
-        name = self.co_name.decode('utf-8')
-        fn = space.unicode_w(self.w_filename)
-        return space.newunicode(u'<code object %s at 0x%s, file "%s", line %d>' % (
-            name, unicode(self.getaddrstring(space)), fn,
+        name = self.co_name
+        fn = space.utf8_w(self.w_filename)
+        return space.newtext(b'<code object %s at 0x%s, file "%s", line %d>' % (
+            name, self.getaddrstring(space), fn,
             -1 if self.co_firstlineno == 0 else self.co_firstlineno))
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1081,8 +1081,8 @@
             try:
                 w_pkgname = space.getattr(
                     w_module, space.newtext('__name__'))
-                w_fullname = space.newunicode(u'%s.%s' %
-                    (space.unicode_w(w_pkgname), space.unicode_w(w_name)))
+                w_fullname = space.newtext(b'%s.%s' %
+                    (space.utf8_w(w_pkgname), space.utf8_w(w_name)))
                 return space.getitem(space.sys.get('modules'), w_fullname)
             except OperationError:
                 raise oefmt(
@@ -1333,7 +1333,7 @@
     def _make_function(self, oparg, freevars=None):
         space = self.space
         w_qualname = self.popvalue()
-        qualname = self.space.unicode_w(w_qualname)
+        qualname = self.space.utf8_w(w_qualname)
         w_codeobj = self.popvalue()
         codeobj = self.space.interp_w(PyCode, w_codeobj)
         if freevars is not None:
@@ -1628,7 +1628,7 @@
         if (oparg & consts.FVS_MASK) == consts.FVS_HAVE_SPEC:
             w_spec = self.popvalue()
         else:
-            w_spec = space.newunicode(u'')
+            w_spec = space.newtext('')
         w_value = self.popvalue()
         #
         conversion = oparg & consts.FVC_MASK
@@ -1649,9 +1649,9 @@
         lst = []
         for i in range(itemcount-1, -1, -1):
             w_item = self.peekvalue(i)
-            lst.append(space.unicode_w(w_item))
+            lst.append(space.utf8_w(w_item))
         self.dropvalues(itemcount)
-        w_res = space.newunicode(u''.join(lst))
+        w_res = space.newtext(''.join(lst))
         self.pushvalue(w_res)
 
     def _revdb_load_var(self, oparg):
diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -29,20 +29,24 @@
                     except:  # we can't allow any exceptions here!
                         return None""")
         elif self.text is not None:
-            from rpython.rlib.runicode import str_decode_utf_8
+            from rpython.rlib.runicode import str_decode_utf_8_impl
             # self.text may not be UTF-8 in case of decoding errors.
             # adjust the encoded text offset to a decoded offset
             # XXX do the right thing about continuation lines, which
             # XXX are their own fun, sometimes giving offset >
             # XXX len(self.text) for example (right now, avoid crashing)
+            def replace_error_handler(errors, encoding, msg, s, startpos, endpos):
+                # must return unicode
+                return u'\ufffd', endpos
             if offset > len(self.text):
                 offset = len(self.text)
-            text, _ = str_decode_utf_8(self.text, offset, 'replace')
+            text, _ = str_decode_utf_8_impl(self.text, offset,
+                             'replace', False, replace_error_handler, True)
             offset = len(text)
             if len(self.text) != offset:
-                text, _ = str_decode_utf_8(self.text, len(self.text),
-                                           'replace')
-            w_text = space.newunicode(text)
+                text, _ = str_decode_utf_8_impl(self.text, len(self.text),
+                             'replace', False, replace_error_handler, True)
+            w_text = space.newtext(text.encode('utf8'), len(text))
         return space.newtuple([
             space.newtext(self.msg),
             space.newtuple([
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,4 +1,5 @@
 # coding: utf-8
+from rpython.rlib import rutf8
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter import unicodehelper
@@ -91,9 +92,11 @@
         if encoding is None:
             substr = s[ps:q]
         else:
+            unicodehelper.check_utf8_or_raise(space, s, ps, q)
             substr = decode_unicode_utf8(space, s, ps, q)
-        v = unicodehelper.decode_unicode_escape(space, substr)
-        return space.newunicode(v)
+        r = unicodehelper.decode_unicode_escape(space, substr)
+        v, length, pos = r
+        return space.newutf8(v, length)
 
     assert 0 <= ps <= q
     substr = s[ps : q]
@@ -111,8 +114,8 @@
         elif saw_f:
             return W_FString(substr, rawmode)
         else:
-            v = unicodehelper.decode_utf8(space, substr)
-            return space.newunicode(v)
+            v = unicodehelper.str_decode_utf8(substr, 'strict', True, None)
+            return space.newtext(*v)
 
     v = PyString_DecodeEscape(space, substr, 'strict', encoding)
     return space.newbytes(v)
@@ -135,15 +138,12 @@
                 # the backslash we just wrote, we emit "\u005c"
                 # instead.
                 lis.append("u005c")
-        if ord(s[ps]) & 0x80: # XXX inefficient
-            w, ps = decode_utf8(space, s, ps, end)
-            for c in w:
-                # The equivalent of %08x, which is not supported by RPython.
-                # 7 zeroes are enough for the unicode range, and the
-                # result still fits in 32-bit.
-                hexa = hex(ord(c) + 0x10000000)
-                lis.append('\\U0')
-                lis.append(hexa[3:])  # Skip 0x and the leading 1
+        if ord(s[ps]) & 0x80:
+            cp = rutf8.codepoint_at_pos(s, ps)
+            hexa = hex(cp + 0x10000000)
+            lis.append('\\U0')
+            lis.append(hexa[3:])  # Skip 0x and the leading 1
+            ps = rutf8.next_codepoint_pos(s, ps)
         else:
             lis.append(s[ps])
             ps += 1
@@ -250,20 +250,29 @@
             ch >= 'A' and ch <= 'F')
 
 
-def decode_utf8(space, s, ps, end):
+def check_utf8(space, s, ps, end):
     assert ps >= 0
     pt = ps
     # while (s < end && *s != '\\') s++; */ /* inefficient for u".."
     while ps < end and ord(s[ps]) & 0x80:
         ps += 1
-    u = unicodehelper.decode_utf8(space, s[pt:ps])
-    return u, ps
+    try:
+        rutf8.check_utf8(s, True, pt, ps)
+    except rutf8.CheckError as e:
+        lgt, flag = rutf8.check_utf8(s, True, pt, e.pos)
+        unicodehelper.decode_error_handler(space)('strict', 'utf8',
+            'invalid utf-8', s, pt + lgt, pt + lgt + 1)
+    return s[pt:ps]
 
 def decode_utf8_recode(space, s, ps, end, recode_encoding):
-    u, ps = decode_utf8(space, s, ps, end)
-    w_v = unicodehelper.encode(space, space.newunicode(u), recode_encoding)
+    p = ps
+    while p < end and ord(s[p]) & 0x80:
+        p += 1
+    lgt = unicodehelper.check_utf8_or_raise(space, s, ps, p)
+    w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt),
+                               recode_encoding)
     v = space.bytes_w(w_v)
-    return v, ps
+    return v, p
 
 def raise_app_valueerror(space, msg):
     raise OperationError(space.w_ValueError, space.newtext(msg))
diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -6,6 +6,7 @@
 from pypy.interpreter.pyparser.pytokenize import tabsize, alttabsize, whiteSpaceDFA, \
     triple_quoted, endDFAs, single_quoted, pseudoDFA
 from pypy.interpreter.astcompiler import consts
+from rpython.rlib import rutf8
 
 NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
 NUMCHARS = '0123456789'
@@ -46,14 +47,9 @@
 
 
 def verify_utf8(token):
-    for c in token:
-        if ord(c) >= 0x80:
-            break
-    else:
-        return True
     try:
-        u = token.decode('utf-8')
-    except UnicodeDecodeError:
+        rutf8.check_utf8(token, False)
+    except rutf8.CheckError:
         return False
     return True
 
@@ -69,17 +65,12 @@
 
 def verify_identifier(token):
     # 1=ok; 0=not an identifier; -1=bad utf-8
-    for c in token:
-        if ord(c) >= 0x80:
-            break
-    else:
-        return 1
     try:
-        u = token.decode('utf-8')
-    except UnicodeDecodeError:
+        rutf8.check_utf8(token, False)
+    except rutf8.CheckError:
         return -1
     from pypy.objspace.std.unicodeobject import _isidentifier
-    return _isidentifier(u)
+    return _isidentifier(token)
 
 
 DUMMY_DFA = automata.DFA([], [])
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -10,7 +10,7 @@
             assert space.bytes_w(w_ret) == value
         elif isinstance(value, unicode):
             assert space.type(w_ret) == space.w_unicode
-            assert space.unicode_w(w_ret) == value
+            assert space.utf8_w(w_ret).decode('utf8') == value
         else:
             assert False
 
@@ -61,7 +61,7 @@
         s = "u'\x81'"
         s = s.decode("koi8-u").encode("utf8")[1:]
         w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
-        ret = space.unwrap(w_ret)
+        ret = w_ret._utf8.decode('utf8')
         assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
 
     def test_unicode_pep414(self):
@@ -112,14 +112,14 @@
         space = self.space
         s = '"""' + '\\' + '\n"""'
         w_ret = parsestring.parsestr(space, None, s)
-        assert space.str_w(w_ret) == ''
+        assert space.text_w(w_ret) == ''
 
     def test_bug1(self):
         space = self.space
         expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n']
         input = ["'", 'x', ' ', chr(0xc3), chr(0xa9), ' ', chr(92), 'n', "'"]
         w_ret = parsestring.parsestr(space, 'utf8', ''.join(input))
-        assert space.str_w(w_ret) == ''.join(expected)
+        assert space.text_w(w_ret) == ''.join(expected)
 
     def test_wide_unicode_in_source(self):
         if sys.maxunicode == 65535:
@@ -131,7 +131,4 @@
     def test_decode_unicode_utf8(self):
         buf = parsestring.decode_unicode_utf8(self.space,
                                               'u"\xf0\x9f\x92\x8b"', 2, 6)
-        if sys.maxunicode == 65535:
-            assert buf == r"\U0000d83d\U0000dc8b"
-        else:
-            assert buf == r"\U0001f48b"
+        assert buf == r"\U0001f48b"
diff --git a/pypy/interpreter/test/test_appinterp.py b/pypy/interpreter/test/test_appinterp.py
--- a/pypy/interpreter/test/test_appinterp.py
+++ b/pypy/interpreter/test/test_appinterp.py
@@ -155,7 +155,7 @@
         w_mymod2 = MyModule(space2, space2.wrap('mymod'))
 
         w_str = space1.getattr(w_mymod1, space1.wrap("hi"))
-        assert space1.str_w(w_str) == "hello"
+        assert space1.text_w(w_str) == "hello"
 
 class TestMixedModuleUnfreeze:
     spaceconfig = dict(usemodules=('_socket',))
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -55,6 +55,9 @@
     pass
 
 class DummySpace(object):
+    class sys:
+        defaultencoding = 'utf-8'
+
     def newtuple(self, items):
         return tuple(items)
 
@@ -92,16 +95,15 @@
     def getitem(self, obj, key):
         return obj[key]
 
-    def wrap(self, obj):
+    def wrap(self, obj, lgt=-1):
         return obj
     newtext = wrap
-    newunicode = wrap
 
     def text_w(self, s):
-        return self.unicode_w(s).encode('utf-8')
+        return self.utf8_w(s)
 
-    def unicode_w(self, s):
-        return unicode(s)
+    def utf8_w(self, s):
+        return s
 
     def len(self, x):
         return len(x)
@@ -135,7 +137,7 @@
     def type(self, obj):
         class Type:
             def getname(self, space):
-                return unicode(type(obj).__name__)
+                return type(obj).__name__
         return Type()
 
 
@@ -343,14 +345,14 @@
     def test_unwrap_error(self):
         space = DummySpace()
         valuedummy = object()
-        def unicode_w(w):
+        def utf8_w(w):
             if w is None:
                 raise OperationError(TypeError, None)
             if w is valuedummy:
                 raise OperationError(ValueError, None)
-            return str(w)
-        space.unicode_w = unicode_w
-        space.text_w = unicode_w
+            return bytes(w, 'utf-8')
+        space.utf8_w = utf8_w
+        space.text_w = utf8_w
         excinfo = py.test.raises(OperationError, Arguments, space, [],
                                  ["a"], [1], w_starstararg={None: 1})
         assert excinfo.value.w_type is TypeError
@@ -672,14 +674,14 @@
         try:
             Arguments(space, [], w_stararg=space.wrap(42))
         except OperationError as e:
-            msg = space.str_w(space.str(e.get_w_value(space)))
+            msg = space.text_w(space.str(e.get_w_value(space)))
             assert msg == "argument after * must be an iterable, not int"
         else:
             assert 0, "did not raise"
         try:
             Arguments(space, [], w_starstararg=space.wrap(42))
         except OperationError as e:
-            msg = space.str_w(space.str(e.get_w_value(space)))
+            msg = space.text_w(space.str(e.get_w_value(space)))
             assert msg == "argument after ** must be a mapping, not int"
         else:
             assert 0, "did not raise"
@@ -838,7 +840,6 @@