[pypy-commit] pypy length-hint: merge default

pjenvey noreply at buildbot.pypy.org
Thu Sep 27 02:04:31 CEST 2012


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: length-hint
Changeset: r57623:0cd03df08692
Date: 2012-09-26 15:21 -0700
http://bitbucket.org/pypy/pypy/changeset/0cd03df08692/

Log:	merge default

diff --git a/pypy/doc/config/objspace.usemodules._csv.txt b/pypy/doc/config/objspace.usemodules._csv.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules._csv.txt
@@ -0,0 +1,2 @@
+Implementation in RPython for the core of the 'csv' module
+
diff --git a/pypy/jit/metainterp/optimizeopt/util.py b/pypy/jit/metainterp/optimizeopt/util.py
--- a/pypy/jit/metainterp/optimizeopt/util.py
+++ b/pypy/jit/metainterp/optimizeopt/util.py
@@ -2,9 +2,10 @@
 from pypy.rlib.objectmodel import r_dict, compute_identity_hash
 from pypy.rlib.rarithmetic import intmask
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.jit.metainterp import resoperation, history
+from pypy.jit.metainterp import resoperation
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.jit.metainterp.resoperation import rop
+from pypy.rlib.objectmodel import we_are_translated
 
 # ____________________________________________________________
 # Misc. utilities
@@ -28,13 +29,20 @@
 def make_dispatcher_method(Class, name_prefix, op_prefix=None, default=None):
     ops = _findall(Class, name_prefix, op_prefix)
     def dispatch(self, op, *args):
-        opnum = op.getopnum()
-        for value, cls, func in ops:
-            if opnum == value:
-                assert isinstance(op, cls)
+        if we_are_translated():
+            opnum = op.getopnum()
+            for value, cls, func in ops:
+                if opnum == value:
+                    assert isinstance(op, cls)
+                    return func(self, op, *args)
+            if default:
+                return default(self, op, *args)
+        else:
+            func = getattr(Class, name_prefix + op.getopname().upper(), None)
+            if func is not None:
                 return func(self, op, *args)
-        if default:
-            return default(self, op, *args)
+            if default:
+                return default(self, op, *args)
     dispatch.func_name = "dispatch_" + name_prefix
     return dispatch
 
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2028,6 +2028,7 @@
                 y -= 1
             return res
         def g(x, y):
+            set_param(myjitdriver, 'max_unroll_loops', 5)
             a1 = f(A(x), y)
             a2 = f(A(x), y)
             b1 = f(B(x), y)
diff --git a/pypy/jit/metainterp/test/test_send.py b/pypy/jit/metainterp/test/test_send.py
--- a/pypy/jit/metainterp/test/test_send.py
+++ b/pypy/jit/metainterp/test/test_send.py
@@ -1,5 +1,5 @@
 import py
-from pypy.rlib.jit import JitDriver, promote, elidable
+from pypy.rlib.jit import JitDriver, promote, elidable, set_param
 from pypy.jit.codewriter.policy import StopAtXPolicy
 from pypy.jit.metainterp.test.support import LLJitMixin, OOJitMixin
 
@@ -181,6 +181,7 @@
             def getvalue(self):
                 return self.y
         def f(x, y):
+            set_param(myjitdriver, 'max_unroll_loops', 5)
             if x & 1:
                 w = W1(x)
             else:
@@ -226,6 +227,7 @@
         w2 = W2(20)
 
         def f(x, y):
+            set_param(myjitdriver, 'max_unroll_loops', 5)
             if x & 1:
                 w = w1
             else:
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -1,11 +1,13 @@
 from pypy.interpreter.mixedmodule import MixedModule
+from pypy.rlib import rdynload
+
 
 class Module(MixedModule):
 
     appleveldefs = {
         }
     interpleveldefs = {
-        '__version__': 'space.wrap("0.3")',
+        '__version__': 'space.wrap("0.4")',
 
         'nonstandard_integer_types': 'misc.nonstandard_integer_types',
 
@@ -42,3 +44,12 @@
         'FFI_DEFAULT_ABI': 'ctypefunc._get_abi(space, "FFI_DEFAULT_ABI")',
         'FFI_CDECL': 'ctypefunc._get_abi(space,"FFI_DEFAULT_ABI")',#win32 name
         }
+
+for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL",
+              "RTLD_NODELETE", "RTLD_NOLOAD", "RTLD_DEEPBIND"]:
+    if getattr(rdynload.cConfig, _name) is not None:
+        Module.interpleveldefs[_name] = 'space.wrap(%d)' % (
+            getattr(rdynload.cConfig, _name),)
+
+for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL"]:
+    Module.interpleveldefs.setdefault(_name, 'space.wrap(0)')
diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -286,8 +286,8 @@
         for i, cf in enumerate(ctype.fields_list):
             if cf.is_bitfield():
                 raise OperationError(space.w_NotImplementedError,
-                    space.wrap("cannot pass as argument a struct "
-                               "with bit fields"))
+                    space.wrap("cannot pass as argument or return value "
+                               "a struct with bit fields"))
             ffi_subtype = self.fb_fill_type(cf.ctype, False)
             if elements:
                 elements[i] = ffi_subtype
diff --git a/pypy/module/_cffi_backend/libraryobj.py b/pypy/module/_cffi_backend/libraryobj.py
--- a/pypy/module/_cffi_backend/libraryobj.py
+++ b/pypy/module/_cffi_backend/libraryobj.py
@@ -5,7 +5,6 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.rdynload import DLLHANDLE, dlopen, dlsym, dlclose, DLOpenError
-from pypy.rlib.rdynload import RTLD_GLOBAL
 
 from pypy.module._cffi_backend.cdataobj import W_CData
 from pypy.module._cffi_backend.ctypeobj import W_CType
@@ -15,17 +14,13 @@
     _immutable_ = True
     handle = rffi.cast(DLLHANDLE, 0)
 
-    def __init__(self, space, filename, is_global):
+    def __init__(self, space, filename, flags):
         self.space = space
-        if is_global and RTLD_GLOBAL is not None:
-            mode = RTLD_GLOBAL
-        else:
-            mode = -1     # default value, corresponds to RTLD_LOCAL
         with rffi.scoped_str2charp(filename) as ll_libname:
             if filename is None:
                 filename = "<None>"
             try:
-                self.handle = dlopen(ll_libname, mode)
+                self.handle = dlopen(ll_libname, flags)
             except DLOpenError, e:
                 raise operationerrfmt(space.w_OSError,
                                       "cannot load library %s: %s",
@@ -100,7 +95,7 @@
 W_Library.acceptable_as_base_class = False
 
 
- at unwrap_spec(filename="str_or_None", is_global=int)
-def load_library(space, filename, is_global=0):
-    lib = W_Library(space, filename, is_global)
+ at unwrap_spec(filename="str_or_None", flags=int)
+def load_library(space, filename, flags=0):
+    lib = W_Library(space, filename, flags)
     return space.wrap(lib)
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -42,19 +42,34 @@
     return sizeof(BPtr)
 
 
-def find_and_load_library(name, is_global=0):
+def find_and_load_library(name, flags=RTLD_NOW):
     import ctypes.util
     if name is None:
         path = None
     else:
         path = ctypes.util.find_library(name)
-    return load_library(path, is_global)
+    return load_library(path, flags)
 
 def test_load_library():
     x = find_and_load_library('c')
     assert repr(x).startswith("<clibrary '")
-    x = find_and_load_library('c', 1)
+    x = find_and_load_library('c', RTLD_NOW | RTLD_GLOBAL)
     assert repr(x).startswith("<clibrary '")
+    x = find_and_load_library('c', RTLD_LAZY)
+    assert repr(x).startswith("<clibrary '")
+
+def test_all_rtld_symbols():
+    import sys
+    FFI_DEFAULT_ABI        # these symbols must be defined
+    FFI_CDECL
+    RTLD_LAZY
+    RTLD_NOW
+    RTLD_GLOBAL
+    RTLD_LOCAL
+    if sys.platform.startswith("linux"):
+        RTLD_NODELETE
+        RTLD_NOLOAD
+        RTLD_DEEPBIND
 
 def test_nonstandard_integer_types():
     d = nonstandard_integer_types()
diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py
--- a/pypy/module/_cffi_backend/test/test_c.py
+++ b/pypy/module/_cffi_backend/test/test_c.py
@@ -22,7 +22,6 @@
 from pypy.tool.udir import udir
 from pypy.conftest import gettestobjspace, option
 from pypy.interpreter import gateway
-from pypy.module._cffi_backend.test import _backend_test_c
 from pypy.module._cffi_backend import Module
 from pypy.translator.platform import host
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
@@ -87,20 +86,24 @@
 
 all_names = ', '.join(Module.interpleveldefs.keys())
 
+backend_test_c = py.path.local(__file__).join('..', '_backend_test_c.py')
+
 lst = []
-for name, value in _backend_test_c.__dict__.items():
-    if name.startswith('test_'):
-        lst.append(value)
-lst.sort(key=lambda func: func.func_code.co_firstlineno)
+with backend_test_c.open('r') as f:
+    for line in f:
+        if line.startswith('def test_'):
+            line = line[4:]
+            line = line[:line.index('():')]
+            lst.append(line)
 
 tmpdir = udir.join('test_c').ensure(dir=1)
 
 tmpname = tmpdir.join('_test_c.py')
 with tmpname.open('w') as f:
     for func in lst:
-        print >> f, 'def %s(self):' % (func.__name__,)
+        print >> f, 'def %s(self):' % (func,)
         print >> f, '    import _all_test_c'
-        print >> f, '    _all_test_c.%s()' % (func.__name__,)
+        print >> f, '    _all_test_c.%s()' % (func,)
 
 tmpname2 = tmpdir.join('_all_test_c.py')
 with tmpname2.open('w') as f:
@@ -110,7 +113,7 @@
     print >> f, '    class test:'
     print >> f, '        raises = staticmethod(raises)'
     print >> f, '        skip = staticmethod(skip)'
-    print >> f, py.path.local(__file__).join('..', '_backend_test_c.py').read()
+    print >> f, backend_test_c.read()
 
 
 mod = tmpname.pyimport()
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -405,7 +405,6 @@
          "ascii_encode",
          "latin_1_encode",
          "utf_7_encode",
-         "utf_8_encode",
          "utf_16_encode",
          "utf_16_be_encode",
          "utf_16_le_encode",
@@ -422,7 +421,6 @@
          "ascii_decode",
          "latin_1_decode",
          "utf_7_decode",
-         "utf_8_decode",
          "utf_16_decode",
          "utf_16_be_decode",
          "utf_16_le_decode",
@@ -437,6 +435,30 @@
     make_encoder_wrapper('mbcs_encode')
     make_decoder_wrapper('mbcs_decode')
 
+# utf-8 functions are not regular, because we have to pass
+# "allow_surrogates=True"
+ at unwrap_spec(uni=unicode, errors='str_or_None')
+def utf_8_encode(space, uni, errors="strict"):
+    if errors is None:
+        errors = 'strict'
+    state = space.fromcache(CodecState)
+    result = runicode.unicode_encode_utf_8(
+        uni, len(uni), errors, state.encode_error_handler,
+        allow_surrogates=True)
+    return space.newtuple([space.wrap(result), space.wrap(len(uni))])
+
+ at unwrap_spec(string='bufferstr', errors='str_or_None')
+def utf_8_decode(space, string, errors="strict", w_final=False):
+    if errors is None:
+        errors = 'strict'
+    final = space.is_true(w_final)
+    state = space.fromcache(CodecState)
+    result, consumed = runicode.str_decode_utf_8(
+        string, len(string), errors,
+        final, state.decode_error_handler,
+        allow_surrogates=True)
+    return space.newtuple([space.wrap(result), space.wrap(consumed)])
+
 @unwrap_spec(data=str, errors='str_or_None', byteorder=int)
 def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
     if errors is None:
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -67,8 +67,15 @@
                 w_line = space.next(self.w_iter)
             except OperationError, e:
                 if e.match(space, space.w_StopIteration):
-                    if field_builder is not None:
-                        raise self.error("newline inside string")
+                    if (field_builder is not None and
+                            state != START_RECORD and state != EAT_CRNL and
+                            (len(field_builder.build()) > 0 or
+                             state == IN_QUOTED_FIELD)):
+                        if dialect.strict:
+                            raise self.error("newline inside string")
+                        else:
+                            self.save_field(field_builder)
+                            break
                 raise
             self.line_num += 1
             line = space.str_w(w_line)
diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py
--- a/pypy/module/_csv/test/test_reader.py
+++ b/pypy/module/_csv/test/test_reader.py
@@ -99,3 +99,11 @@
 
     def test_dubious_quote(self):
         self._read_test(['12,12,1",'], [['12', '12', '1"', '']])
+
+    def test_read_eof(self):
+        self._read_test(['a,"'], [['a', '']])
+        self._read_test(['"a'], [['a']])
+        self._read_test(['^'], [['\n']], escapechar='^')
+        self._read_test(['a,"'], 'Error', strict=True)
+        self._read_test(['"a'], 'Error', strict=True)
+        self._read_test(['^'], 'Error', escapechar='^', strict=True)
diff --git a/pypy/module/_ffi/interp_funcptr.py b/pypy/module/_ffi/interp_funcptr.py
--- a/pypy/module/_ffi/interp_funcptr.py
+++ b/pypy/module/_ffi/interp_funcptr.py
@@ -287,7 +287,11 @@
                                                                w_restype)
     addr = rffi.cast(rffi.VOIDP, addr)
     func = libffi.Func(name, argtypes, restype, addr, flags)
-    return W_FuncPtr(func, argtypes_w, w_restype)
+    try:
+        return W_FuncPtr(func, argtypes_w, w_restype)
+    except OSError:
+        raise OperationError(space.w_SystemError,
+                         space.wrap("internal error building the Func object"))
 
 
 W_FuncPtr.typedef = TypeDef(
diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py
--- a/pypy/module/_socket/interp_socket.py
+++ b/pypy/module/_socket/interp_socket.py
@@ -29,7 +29,9 @@
         info is a pair (hostaddr, port).
         """
         try:
-            sock, addr = self.accept(W_RSocket)
+            fd, addr = self.accept()
+            sock = rsocket.make_socket(
+                fd, self.family, self.type, self.proto, W_RSocket)
             return space.newtuple([space.wrap(sock),
                                    addr.as_object(sock.fd, space)])
         except SocketError, e:
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -874,7 +874,7 @@
 # Iteration
 
 
-class W_DictMultiIterKeysObject(W_Object):
+class W_BaseDictMultiIterObject(W_Object):
     from pypy.objspace.std.dicttype import dictiter_typedef as typedef
 
     _immutable_fields_ = ["iteratorimplementation"]
@@ -885,32 +885,17 @@
         w_self.space = space
         w_self.iteratorimplementation = iteratorimplementation
 
+class W_DictMultiIterKeysObject(W_BaseDictMultiIterObject):
+    pass
+
+class W_DictMultiIterValuesObject(W_BaseDictMultiIterObject):
+    pass
+
+class W_DictMultiIterItemsObject(W_BaseDictMultiIterObject):
+    pass
+
 registerimplementation(W_DictMultiIterKeysObject)
-
-class W_DictMultiIterValuesObject(W_Object):
-    from pypy.objspace.std.dicttype import dictiter_typedef as typedef
-
-    _immutable_fields_ = ["iteratorimplementation"]
-
-    ignore_for_isinstance_cache = True
-
-    def __init__(w_self, space, iteratorimplementation):
-        w_self.space = space
-        w_self.iteratorimplementation = iteratorimplementation
-
 registerimplementation(W_DictMultiIterValuesObject)
-
-class W_DictMultiIterItemsObject(W_Object):
-    from pypy.objspace.std.dicttype import dictiter_typedef as typedef
-
-    _immutable_fields_ = ["iteratorimplementation"]
-
-    ignore_for_isinstance_cache = True
-
-    def __init__(w_self, space, iteratorimplementation):
-        w_self.space = space
-        w_self.iteratorimplementation = iteratorimplementation
-
 registerimplementation(W_DictMultiIterItemsObject)
 
 def iter__DictMultiIterKeysObject(space, w_dictiter):
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -71,7 +71,7 @@
             if result is not None:
                 return W_RopeObject(result)
         elif encoding == "utf-8":
-            result = rope.unicode_encode_utf8(node)
+            result = rope.unicode_encode_utf8(node, allow_surrogates=True)
             if result is not None:
                 return W_RopeObject(result)
     return encode_object(space, w_unistr, encoding, errors)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -13,7 +13,7 @@
 from pypy.rlib.objectmodel import compute_hash, specialize
 from pypy.rlib.objectmodel import compute_unique_id
 from pypy.rlib.rstring import UnicodeBuilder
-from pypy.rlib.runicode import unicode_encode_unicode_escape
+from pypy.rlib.runicode import make_unicode_escape_function
 from pypy.module.unicodedata import unicodedb
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib import jit
@@ -918,10 +918,13 @@
                     space.wrap("character mapping must return integer, None or unicode"))
     return W_UnicodeObject(u''.join(result))
 
+_repr_function, _ = make_unicode_escape_function(
+    pass_printable=False, unicode_output=False, quotes=True, prefix='u')
+
 def repr__Unicode(space, w_unicode):
     chars = w_unicode._value
     size = len(chars)
-    s = unicode_encode_unicode_escape(chars, size, "strict", quotes=True)
+    s = _repr_function(chars, size, "strict")
     return space.wrap(s)
 
 def mod__Unicode_ANY(space, w_format, w_values):
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -236,13 +236,14 @@
             if encoding == 'ascii':
                 u = space.unicode_w(w_object)
                 eh = encode_error_handler(space)
-                return space.wrap(unicode_encode_ascii(u, len(u), None,
-                                                       errorhandler=eh))
+                return space.wrap(unicode_encode_ascii(
+                        u, len(u), None, errorhandler=eh))
             if encoding == 'utf-8':
                 u = space.unicode_w(w_object)
                 eh = encode_error_handler(space)
-                return space.wrap(unicode_encode_utf_8(u, len(u), None,
-                                                       errorhandler=eh))
+                return space.wrap(unicode_encode_utf_8(
+                        u, len(u), None, errorhandler=eh,
+                        allow_surrogates=True))
         from pypy.module._codecs.interp_codecs import lookup_codec
         w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
     if errors is None:
@@ -265,15 +266,14 @@
             # XXX error handling
             s = space.bufferstr_w(w_obj)
             eh = decode_error_handler(space)
-            return space.wrap(str_decode_ascii(s, len(s), None,
-                                               final=True,
-                                               errorhandler=eh)[0])
+            return space.wrap(str_decode_ascii(
+                    s, len(s), None, final=True, errorhandler=eh)[0])
         if encoding == 'utf-8':
             s = space.bufferstr_w(w_obj)
             eh = decode_error_handler(space)
-            return space.wrap(str_decode_utf_8(s, len(s), None,
-                                               final=True,
-                                               errorhandler=eh)[0])
+            return space.wrap(str_decode_utf_8(
+                    s, len(s), None, final=True, errorhandler=eh,
+                    allow_surrogates=True)[0])
     w_codecs = space.getbuiltinmodule("_codecs")
     w_decode = space.getattr(w_codecs, space.wrap("decode"))
     if errors is None:
diff --git a/pypy/rlib/rdynload.py b/pypy/rlib/rdynload.py
--- a/pypy/rlib/rdynload.py
+++ b/pypy/rlib/rdynload.py
@@ -44,6 +44,10 @@
     RTLD_LOCAL = rffi_platform.DefinedConstantInteger('RTLD_LOCAL')
     RTLD_GLOBAL = rffi_platform.DefinedConstantInteger('RTLD_GLOBAL')
     RTLD_NOW = rffi_platform.DefinedConstantInteger('RTLD_NOW')
+    RTLD_LAZY = rffi_platform.DefinedConstantInteger('RTLD_LAZY')
+    RTLD_NODELETE = rffi_platform.DefinedConstantInteger('RTLD_NODELETE')
+    RTLD_NOLOAD = rffi_platform.DefinedConstantInteger('RTLD_NOLOAD')
+    RTLD_DEEPBIND = rffi_platform.DefinedConstantInteger('RTLD_DEEPBIND')
 
 class cConfig:
     pass
@@ -72,6 +76,7 @@
     RTLD_LOCAL = cConfig.RTLD_LOCAL
     RTLD_GLOBAL = cConfig.RTLD_GLOBAL
     RTLD_NOW = cConfig.RTLD_NOW
+    RTLD_LAZY = cConfig.RTLD_LAZY
 
     def dlerror():
         # XXX this would never work on top of ll2ctypes, because
@@ -90,7 +95,8 @@
                 mode = RTLD_LOCAL
             else:
                 mode = 0
-        mode |= RTLD_NOW
+        if (mode & (RTLD_LAZY | RTLD_NOW)) == 0:
+            mode |= RTLD_NOW
         res = c_dlopen(name, rffi.cast(rffi.INT, mode))
         if not res:
             err = dlerror()
diff --git a/pypy/rlib/rope.py b/pypy/rlib/rope.py
--- a/pypy/rlib/rope.py
+++ b/pypy/rlib/rope.py
@@ -1485,7 +1485,7 @@
     if rope.is_bytestring():
         return rope
 
-def unicode_encode_utf8(rope):
+def unicode_encode_utf8(rope, allow_surrogates=False):
     from pypy.rlib.runicode import unicode_encode_utf_8
     if rope.is_ascii():
         return rope
@@ -1494,7 +1494,8 @@
                                 unicode_encode_utf8(rope.right))
     elif isinstance(rope, LiteralUnicodeNode):
         return LiteralStringNode(
-            unicode_encode_utf_8(rope.u, len(rope.u), "strict"))
+            unicode_encode_utf_8(rope.u, len(rope.u), "strict",
+                                 allow_surrogates=allow_surrogates))
     elif isinstance(rope, LiteralStringNode):
         return LiteralStringNode(_str_encode_utf_8(rope.s))
 
diff --git a/pypy/rlib/rsocket.py b/pypy/rlib/rsocket.py
--- a/pypy/rlib/rsocket.py
+++ b/pypy/rlib/rsocket.py
@@ -609,9 +609,11 @@
     """
     _mixin_ = True        # for interp_socket.py
     fd = _c.INVALID_SOCKET
-    def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0):
+    def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0,
+                 fd=_c.INVALID_SOCKET):
         """Create a new socket."""
-        fd = _c.socket(family, type, proto)
+        if _c.invalid_socket(fd):
+            fd = _c.socket(family, type, proto)
         if _c.invalid_socket(fd):
             raise self.error_handler()
         # PLAT RISCOS
@@ -717,11 +719,9 @@
         addrlen_p[0] = rffi.cast(_c.socklen_t, maxlen)
         return addr, addr.addr_p, addrlen_p
 
-    def accept(self, SocketClass=None):
+    def accept(self):
         """Wait for an incoming connection.
-        Return (new socket object, client address)."""
-        if SocketClass is None:
-            SocketClass = RSocket
+        Return (new socket fd, client address)."""
         if self._select(False) == 1:
             raise SocketTimeout
         address, addr_p, addrlen_p = self._addrbuf()
@@ -734,9 +734,7 @@
         if _c.invalid_socket(newfd):
             raise self.error_handler()
         address.addrlen = rffi.cast(lltype.Signed, addrlen)
-        sock = make_socket(newfd, self.family, self.type, self.proto,
-                           SocketClass)
-        return (sock, address)
+        return (newfd, address)
 
     def bind(self, address):
         """Bind the socket to a local address."""
@@ -755,6 +753,11 @@
             if res != 0:
                 raise self.error_handler()
 
+    def detach(self):
+        fd = self.fd
+        self.fd = _c.INVALID_SOCKET
+        return fd
+
     if _c.WIN32:
         def _connect(self, address):
             """Connect the socket to a remote address."""
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -77,12 +77,14 @@
 ]
 
 def str_decode_utf_8(s, size, errors, final=False,
-                     errorhandler=None):
+                     errorhandler=None, allow_surrogates=False):
     if errorhandler is None:
         errorhandler = raise_unicode_exception_decode
-    return str_decode_utf_8_impl(s, size, errors, final, errorhandler)
+    return str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+                                 allow_surrogates=allow_surrogates)
 
-def str_decode_utf_8_impl(s, size, errors, final, errorhandler):
+def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+                          allow_surrogates):
     if size == 0:
         return u'', 0
 
@@ -184,8 +186,7 @@
             if (ordch2>>6 != 0x2 or    # 0b10
                 (ordch1 == 0xe0 and ordch2 < 0xa0)
                 # surrogates shouldn't be valid UTF-8!
-                # Uncomment the line below to make them invalid.
-                # or (ordch1 == 0xed and ordch2 > 0x9f)
+                or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f)
                 ):
                 r, pos = errorhandler(errors, 'utf-8',
                                       'invalid continuation byte',
@@ -254,13 +255,21 @@
     result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
     result.append((chr((0x80 | (ch & 0x3f)))))
 
-def unicode_encode_utf_8(s, size, errors, errorhandler=None):
+def unicode_encode_utf_8(s, size, errors, errorhandler=None,
+                         allow_surrogates=False):
+    if errorhandler is None:
+        errorhandler = raise_unicode_exception_encode
+    return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
+                                     allow_surrogates=allow_surrogates)
+
+def unicode_encode_utf_8_impl(s, size, errors, errorhandler,
+                              allow_surrogates=False):
     assert(size >= 0)
     result = StringBuilder(size)
-    i = 0
-    while i < size:
-        ch = ord(s[i])
-        i += 1
+    pos = 0
+    while pos < size:
+        ch = ord(s[pos])
+        pos += 1
         if ch < 0x80:
             # Encode ASCII
             result.append(chr(ch))
@@ -272,20 +281,32 @@
             # Encode UCS2 Unicode ordinals
             if ch < 0x10000:
                 # Special case: check for high surrogate
-                if 0xD800 <= ch <= 0xDBFF and i != size:
-                    ch2 = ord(s[i])
-                    # Check for low surrogate and combine the two to
-                    # form a UCS4 value
-                    if 0xDC00 <= ch2 <= 0xDFFF:
-                        ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
-                        i += 1
-                        _encodeUCS4(result, ch3)
+                if 0xD800 <= ch <= 0xDFFF:
+                    if pos != size:
+                        ch2 = ord(s[pos])
+                        # Check for low surrogate and combine the two to
+                        # form a UCS4 value
+                        if ch <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+                            ch3 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000
+                            pos += 1
+                            _encodeUCS4(result, ch3)
+                            continue
+                    if not allow_surrogates:
+                        r, pos = errorhandler(errors, 'utf-8',
+                                              'surrogates not allowed',
+                                              s, pos-1, pos)
+                        for ch in r:
+                            if ord(ch) < 0x80:
+                                result.append(chr(ord(ch)))
+                            else:
+                                errorhandler('strict', 'utf-8',
+                                             'surrogates not allowed',
+                                             s, pos-1, pos)
                         continue
-                # Fall through: handles isolated high surrogates
+                    # else: Fall through and handles isolated high surrogates
                 result.append((chr((0xe0 | (ch >> 12)))))
                 result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
                 result.append((chr((0x80 | (ch & 0x3f)))))
-                continue
             else:
                 _encodeUCS4(result, ch)
     return result.build()
@@ -1202,74 +1223,120 @@
 
     return builder.build(), pos
 
-def unicode_encode_unicode_escape(s, size, errors, errorhandler=None, quotes=False):
-    # errorhandler is not used: this function cannot cause Unicode errors
-    result = StringBuilder(size)
+def make_unicode_escape_function(pass_printable=False, unicode_output=False,
+                                 quotes=False, prefix=None):
+    # Python3 has two similar escape functions: One to implement
+    # encode('unicode_escape') and which outputs bytes, and unicode.__repr__
+    # which outputs unicode.  They cannot share RPython code, so we generate
+    # them with the template below.
+    # Python2 does not really need this, but it reduces diffs between branches.
 
-    if quotes:
-        if s.find(u'\'') != -1 and s.find(u'\"') == -1:
-            quote = ord('\"')
-            result.append('u"')
+    if unicode_output:
+        STRING_BUILDER = UnicodeBuilder
+        STR = unicode
+        CHR = UNICHR
+    else:
+        STRING_BUILDER = StringBuilder
+        STR = str
+        CHR = chr
+
+    def unicode_escape(s, size, errors, errorhandler=None):
+        # errorhandler is not used: this function cannot cause Unicode errors
+        result = STRING_BUILDER(size)
+
+        if quotes:
+            if prefix:
+                result.append(STR(prefix))
+            if s.find(u'\'') != -1 and s.find(u'\"') == -1:
+                quote = ord('\"')
+                result.append(STR('"'))
+            else:
+                quote = ord('\'')
+                result.append(STR('\''))
         else:
-            quote = ord('\'')
-            result.append('u\'')
-    else:
-        quote = 0
+            quote = 0
 
-        if size == 0:
-            return ''
+            if size == 0:
+                return STR('')
 
-    pos = 0
-    while pos < size:
-        ch = s[pos]
-        oc = ord(ch)
+        pos = 0
+        while pos < size:
+            ch = s[pos]
+            oc = ord(ch)
 
-        # Escape quotes
-        if quotes and (oc == quote or ch == '\\'):
-            result.append('\\')
-            result.append(chr(oc))
-            pos += 1
-            continue
-
-        # The following logic is enabled only if MAXUNICODE == 0xffff, or
-        # for testing on top of a host CPython where sys.maxunicode == 0xffff
-        if ((MAXUNICODE < 65536 or
-                (not we_are_translated() and sys.maxunicode < 65536))
-            and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
-            # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
-            pos += 1
-            oc2 = ord(s[pos])
-
-            if 0xDC00 <= oc2 <= 0xDFFF:
-                ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
-                raw_unicode_escape_helper(result, ucs)
+            # Escape quotes
+            if quotes and (oc == quote or ch == '\\'):
+                result.append(STR('\\'))
+                result.append(CHR(oc))
                 pos += 1
                 continue
-            # Fall through: isolated surrogates are copied as-is
-            pos -= 1
 
-        # Map special whitespace to '\t', \n', '\r'
-        if ch == '\t':
-            result.append('\\t')
-        elif ch == '\n':
-            result.append('\\n')
-        elif ch == '\r':
-            result.append('\\r')
-        elif ch == '\\':
-            result.append('\\\\')
+            # The following logic is enabled only if MAXUNICODE == 0xffff, or
+            # for testing on top of a host Python where sys.maxunicode == 0xffff
+            if ((MAXUNICODE < 65536 or
+                    (not we_are_translated() and sys.maxunicode < 65536))
+                and 0xD800 <= oc < 0xDC00 and pos + 1 < size):
+                # Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes
+                pos += 1
+                oc2 = ord(s[pos])
 
-        # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
-        elif oc < 32 or oc >= 0x7F:
-            raw_unicode_escape_helper(result, oc)
+                if 0xDC00 <= oc2 <= 0xDFFF:
+                    ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
+                    char_escape_helper(result, ucs)
+                    pos += 1
+                    continue
+                # Fall through: isolated surrogates are copied as-is
+                pos -= 1
 
-        # Copy everything else as-is
+            # Map special whitespace to '\t', \n', '\r'
+            if ch == '\t':
+                result.append(STR('\\t'))
+            elif ch == '\n':
+                result.append(STR('\\n'))
+            elif ch == '\r':
+                result.append(STR('\\r'))
+            elif ch == '\\':
+                result.append(STR('\\\\'))
+
+            # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
+            elif pass_printable and not unicodedb.isprintable(oc):
+                char_escape_helper(result, oc)
+            elif not pass_printable and (oc < 32 or oc >= 0x7F):
+                char_escape_helper(result, oc)
+
+            # Copy everything else as-is
+            else:
+                result.append(CHR(oc))
+            pos += 1
+
+        if quotes:
+            result.append(CHR(quote))
+        return result.build()
+
+    def char_escape_helper(result, char):
+        num = hex(char)
+        if STR is unicode:
+            num = num.decode('ascii')
+        if char >= 0x10000:
+            result.append(STR("\\U"))
+            zeros = 8
+        elif char >= 0x100:
+            result.append(STR("\\u"))
+            zeros = 4
         else:
-            result.append(chr(oc))
-        pos += 1
+            result.append(STR("\\x"))
+            zeros = 2
+        lnum = len(num)
+        nb = zeros + 2 - lnum # num starts with '0x'
+        if nb > 0:
+            result.append_multiple_char(STR('0'), nb)
+        result.append_slice(num, 2, lnum)
 
-    if quotes:
-        result.append(chr(quote))
-    return result.build()
+    return unicode_escape, char_escape_helper
+
+# This function is also used by _codecs/interp_codecs.py
+(unicode_encode_unicode_escape, raw_unicode_escape_helper
+ ) = make_unicode_escape_function()
 
 # ____________________________________________________________
 # Raw unicode escape
@@ -1326,23 +1393,6 @@
 
     return result.build(), pos
 
-def raw_unicode_escape_helper(result, char):
-    num = hex(char)
-    if char >= 0x10000:
-        result.append("\\U")
-        zeros = 8
-    elif char >= 0x100:
-        result.append("\\u")
-        zeros = 4
-    else:
-        result.append("\\x")
-        zeros = 2
-    lnum = len(num)
-    nb = zeros + 2 - lnum # num starts with '0x'
-    if nb > 0:
-        result.append_multiple_char('0', nb)
-    result.append_slice(num, 2, lnum)
-
 def unicode_encode_raw_unicode_escape(s, size, errors, errorhandler=None):
     # errorhandler is not used: this function cannot cause Unicode errors
     if size == 0:
diff --git a/pypy/rlib/test/test_rpoll.py b/pypy/rlib/test/test_rpoll.py
--- a/pypy/rlib/test/test_rpoll.py
+++ b/pypy/rlib/test/test_rpoll.py
@@ -25,7 +25,8 @@
     assert events[0][0] == serv.fd
     assert events[0][1] & POLLIN
 
-    servconn, cliaddr = serv.accept()
+    servconn_fd, cliaddr = serv.accept()
+    servconn = RSocket(AF_INET, fd=servconn_fd)
 
     events = poll({serv.fd: POLLIN,
                    cli.fd: POLLOUT}, timeout=500)
diff --git a/pypy/rlib/test/test_rsocket.py b/pypy/rlib/test/test_rsocket.py
--- a/pypy/rlib/test/test_rsocket.py
+++ b/pypy/rlib/test/test_rsocket.py
@@ -167,7 +167,8 @@
     lock.acquire()
     thread.start_new_thread(connecting, ())
     print 'waiting for connection'
-    s1, addr2 = sock.accept()
+    fd1, addr2 = sock.accept()
+    s1 = RSocket(fd=fd1)
     print 'connection accepted'
     lock.acquire()
     print 'connecting side knows that the connection was accepted too'
@@ -255,7 +256,8 @@
     if errcodesok:
         assert err.value.errno in (errno.EINPROGRESS, errno.EWOULDBLOCK)
 
-    s1, addr2 = sock.accept()
+    fd1, addr2 = sock.accept()
+    s1 = RSocket(fd=fd1)
     s1.setblocking(False)
     assert addr.eq(s2.getpeername())
     assert addr2.get_port() == s2.getsockname().get_port()
@@ -414,7 +416,8 @@
 
     clientsock = RSocket(AF_UNIX)
     clientsock.connect(a)
-    s, addr = serversock.accept()
+    fd, addr = serversock.accept()
+    s = RSocket(AF_UNIX, fd=fd)
 
     s.send('X')
     data = clientsock.recv(100)
diff --git a/pypy/rlib/test/test_runicode.py b/pypy/rlib/test/test_runicode.py
--- a/pypy/rlib/test/test_runicode.py
+++ b/pypy/rlib/test/test_runicode.py
@@ -118,6 +118,9 @@
         for i in range(10000):
             for encoding in ("utf-7 utf-8 utf-16 utf-16-be utf-16-le "
                              "utf-32 utf-32-be utf-32-le").split():
+                if encoding == 'utf-8' and 0xd800 <= i <= 0xdfff:
+                    # Don't try to encode lone surrogates
+                    continue
                 self.checkdecode(unichr(i), encoding)
 
     def test_random(self):
@@ -242,9 +245,8 @@
             self.checkdecode(s, "utf-8")
 
     def test_utf8_surrogate(self):
-        # A surrogate should not be valid utf-8, but python 2.x accepts them.
-        # This test will raise an error with python 3.x
-        self.checkdecode(u"\ud800", "utf-8")
+        # surrogates used to be allowed by python 2.x
+        raises(UnicodeDecodeError, self.checkdecode, u"\ud800", "utf-8")
 
     def test_invalid_start_byte(self):
         """
@@ -691,12 +693,16 @@
             self.checkencode(s, "utf-8")
 
     def test_utf8_surrogates(self):
-        # check replacing of two surrogates by single char while encoding
         # make sure that the string itself is not marshalled
         u = u"\ud800"
         for i in range(4):
             u += u"\udc00"
-        self.checkencode(u, "utf-8")
+        if runicode.MAXUNICODE < 65536:
+            # Check replacing of two surrogates by single char while encoding
+            self.checkencode(u, "utf-8")
+        else:
+            # This is not done in wide unicode builds
+            raises(UnicodeEncodeError, self.checkencode, u, "utf-8")
 
     def test_ascii_error(self):
         self.checkencodeerror(u"abc\xFF\xFF\xFFcde", "ascii", 3, 6)
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -28,8 +28,10 @@
         from pypy.rpython.annlowlevel import hlstr
         value = hlstr(llvalue)
         assert value is not None
-        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict',
-                                             False, self.ll_raise_unicode_exception_decode)
+        univalue, _ = self.rstr_decode_utf_8(
+            value, len(value), 'strict', final=False,
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llunicode(univalue)
 
     def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
@@ -50,9 +52,9 @@
         self.runicode_encode_utf_8 = None
 
     def ensure_ll_encode_utf8(self):
-        from pypy.rlib.runicode import unicode_encode_utf_8
-        self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
-                                                        'runicode_encode_utf_8')
+        from pypy.rlib.runicode import unicode_encode_utf_8_impl
+        self.runicode_encode_utf_8 = func_with_new_name(
+            unicode_encode_utf_8_impl, 'runicode_encode_utf_8')
 
     def rtype_method_upper(self, hop):
         raise TypeError("Cannot do toupper on unicode string")
@@ -65,9 +67,16 @@
         from pypy.rpython.annlowlevel import hlunicode
         s = hlunicode(ll_s)
         assert s is not None
-        bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+        bytes = self.runicode_encode_utf_8(
+            s, len(s), 'strict',
+            errorhandler=self.ll_raise_unicode_exception_decode,
+            allow_surrogates=False)
         return self.ll.llstr(bytes)
 
+    def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+                                          startingpos, endingpos):
+        raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+    
 class __extend__(annmodel.SomeString):
     def rtyper_makerepr(self, rtyper):
         return rtyper.type_system.rstr.string_repr
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -22,16 +22,15 @@
 def get_recent_cpython_executable():
 
     if sys.platform == 'win32':
-        python = sys.executable.replace('\\', '/') + ' '
+        python = sys.executable.replace('\\', '/')
     else:
-        python = sys.executable + ' '
-
+        python = sys.executable
     # Is there a command 'python' that runs python 2.5-2.7?
     # If there is, then we can use it instead of sys.executable
     returncode, stdout, stderr = runsubprocess.run_subprocess(
         "python", "-V")
     if _CPYTHON_RE.match(stdout) or _CPYTHON_RE.match(stderr):
-        python = 'python '
+        python = 'python'
     return python
 
 
@@ -559,6 +558,7 @@
         for rule in rules:
             mk.rule(*rule)
 
+        #XXX: this conditional part is not tested at all
         if self.config.translation.gcrootfinder == 'asmgcc':
             trackgcfiles = [cfile[:cfile.rfind('.')] for cfile in mk.cfiles]
             if self.translator.platform.name == 'msvc':
@@ -581,7 +581,7 @@
             else:
                 mk.definition('PYPY_MAIN_FUNCTION', "main")
 
-            python = get_recent_cpython_executable()
+            mk.definition('PYTHON', get_recent_cpython_executable())
 
             if self.translator.platform.name == 'msvc':
                 lblofiles = []
@@ -603,22 +603,22 @@
                         'cmd /c $(MASM) /nologo /Cx /Cp /Zm /coff /Fo$@ /c $< $(INCLUDEDIRS)')
                 mk.rule('.c.gcmap', '',
                         ['$(CC) /nologo $(ASM_CFLAGS) /c /FAs /Fa$*.s $< $(INCLUDEDIRS)',
-                         'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
+                         'cmd /c $(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc -t $*.s > $@']
                         )
                 mk.rule('gcmaptable.c', '$(GCMAPFILES)',
-                        'cmd /c ' + python + '$(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
+                        'cmd /c $(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py -fmsvc $(GCMAPFILES) > $@')
 
             else:
                 mk.definition('OBJECTS', '$(ASMLBLFILES) gcmaptable.s')
                 mk.rule('%.s', '%.c', '$(CC) $(CFLAGS) $(CFLAGSEXTRA) -frandom-seed=$< -o $@ -S $< $(INCLUDEDIRS)')
                 mk.rule('%.lbl.s %.gcmap', '%.s',
-                        [python +
-                             '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
+                        [
+                             '$(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py '
                              '-t $< > $*.gctmp',
                          'mv $*.gctmp $*.gcmap'])
                 mk.rule('gcmaptable.s', '$(GCMAPFILES)',
-                        [python +
-                             '$(PYPYDIR)/translator/c/gcc/trackgcroot.py '
+                        [
+                             '$(PYTHON) $(PYPYDIR)/translator/c/gcc/trackgcroot.py '
                              '$(GCMAPFILES) > $@.tmp',
                          'mv $@.tmp $@'])
                 mk.rule('.PRECIOUS', '%.s', "# don't remove .s files if Ctrl-C'ed")


More information about the pypy-commit mailing list