[pypy-commit] pypy regalloc-playground: merge default

Fri Oct 27 09:25:20 EDT 2017

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: regalloc-playground
Changeset: r92859:4d898e5c1b6c
Date: 2017-10-27 15:24 +0200
http://bitbucket.org/pypy/pypy/changeset/4d898e5c1b6c/

Log:	merge default

diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -8,60 +8,63 @@
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
         res = type.__new__(self, name, cls, typedict)
-        if '_type_' in typedict:
-            ffiarray = _rawffi.Array(typedict['_type_']._ffishape_)
-            res._ffiarray = ffiarray
-            subletter = getattr(typedict['_type_'], '_type_', None)
-            if subletter == 'c':
-                def getvalue(self):
-                    return _rawffi.charp2string(self._buffer.buffer,
-                                                self._length_)
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, str):
-                        _rawffi.rawstring2charp(self._buffer.buffer, val)
-                    else:
-                        for i in range(len(val)):
-                            self[i] = val[i]
-                    if len(val) < self._length_:
-                        self._buffer[len(val)] = '\x00'
-                res.value = property(getvalue, setvalue)
 
-                def getraw(self):
-                    return _rawffi.charp2rawstring(self._buffer.buffer,
-                                                   self._length_)
+        if cls == (_CData,): # this is the Array class defined below
+            res._ffiarray = None
+            return res
+        if not hasattr(res, '_length_') or not isinstance(res._length_, int):
+            raise AttributeError(
+                "class must define a '_length_' attribute, "
+                "which must be a positive integer")
+        ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_)
+        subletter = getattr(res._type_, '_type_', None)
+        if subletter == 'c':
+            def getvalue(self):
+                return _rawffi.charp2string(self._buffer.buffer,
+                                            self._length_)
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, str):
+                    _rawffi.rawstring2charp(self._buffer.buffer, val)
+                else:
+                    for i in range(len(val)):
+                        self[i] = val[i]
+                if len(val) < self._length_:
+                    self._buffer[len(val)] = b'\x00'
+            res.value = property(getvalue, setvalue)
 
-                def setraw(self, buffer):
-                    if len(buffer) > self._length_:
-                        raise ValueError("%r too long" % (buffer,))
-                    _rawffi.rawstring2charp(self._buffer.buffer, buffer)
-                res.raw = property(getraw, setraw)
-            elif subletter == 'u':
-                def getvalue(self):
-                    return _rawffi.wcharp2unicode(self._buffer.buffer,
-                                                  self._length_)
+            def getraw(self):
+                return _rawffi.charp2rawstring(self._buffer.buffer,
+                                               self._length_)
 
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, unicode):
-                        target = self._buffer
-                    else:
-                        target = self
-                    for i in range(len(val)):
-                        target[i] = val[i]
-                    if len(val) < self._length_:
-                        target[len(val)] = u'\x00'
-                res.value = property(getvalue, setvalue)
-                
-            if '_length_' in typedict:
-                res._ffishape_ = (ffiarray, typedict['_length_'])
-                res._fficompositesize_ = res._sizeofinstances()
-        else:
-            res._ffiarray = None
+            def setraw(self, buffer):
+                if len(buffer) > self._length_:
+                    raise ValueError("%r too long" % (buffer,))
+                _rawffi.rawstring2charp(self._buffer.buffer, buffer)
+            res.raw = property(getraw, setraw)
+        elif subletter == 'u':
+            def getvalue(self):
+                return _rawffi.wcharp2unicode(self._buffer.buffer,
+                                              self._length_)
+
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, unicode):
+                    target = self._buffer
+                else:
+                    target = self
+                for i in range(len(val)):
+                    target[i] = val[i]
+                if len(val) < self._length_:
+                    target[len(val)] = u'\x00'
+            res.value = property(getvalue, setvalue)
+
+        res._ffishape_ = (ffiarray, res._length_)
+        res._fficompositesize_ = res._sizeofinstances()
         return res
 
     from_address = cdata_from_address
@@ -156,7 +159,7 @@
     l = [self[i] for i in range(start, stop, step)]
     letter = getattr(self._type_, '_type_', None)
     if letter == 'c':
-        return "".join(l)
+        return b"".join(l)
     if letter == 'u':
         return u"".join(l)
     return l
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -176,6 +176,10 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _copy_to(self, addr):
+        target = type(self).from_address(addr)._buffer
+        target[0] = self._get_buffer_value()
+
     def _to_ffi_param(self):
         if self.__class__._is_pointer_like():
             return self._get_buffer_value()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -114,7 +114,9 @@
         cobj = self._type_.from_param(value)
         if ensure_objects(cobj) is not None:
             store_reference(self, index, cobj._objects)
-        self._subarray(index)[0] = cobj._get_buffer_value()
+        address = self._buffer[0]
+        address += index * sizeof(self._type_)
+        cobj._copy_to(address)
 
     def __nonzero__(self):
         return self._buffer[0] != 0
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -291,6 +291,11 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _copy_to(self, addr):
+        from ctypes import memmove
+        origin = self._get_buffer_value()
+        memmove(addr, origin, self._fficompositesize_)
+
     def _to_ffi_param(self):
         return self._buffer
 
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -1027,21 +1027,25 @@
         if '\0' in sql:
             raise ValueError("the query contains a null character")
 
-        first_word = sql.lstrip().split(" ")[0].upper()
-        if first_word == "":
+        
+        if sql:
+            first_word = sql.lstrip().split()[0].upper()
+            if first_word == '':
+                self._type = _STMT_TYPE_INVALID
+            if first_word == "SELECT":
+                self._type = _STMT_TYPE_SELECT
+            elif first_word == "INSERT":
+                self._type = _STMT_TYPE_INSERT
+            elif first_word == "UPDATE":
+                self._type = _STMT_TYPE_UPDATE
+            elif first_word == "DELETE":
+                self._type = _STMT_TYPE_DELETE
+            elif first_word == "REPLACE":
+                self._type = _STMT_TYPE_REPLACE
+            else:
+                self._type = _STMT_TYPE_OTHER
+        else:
             self._type = _STMT_TYPE_INVALID
-        elif first_word == "SELECT":
-            self._type = _STMT_TYPE_SELECT
-        elif first_word == "INSERT":
-            self._type = _STMT_TYPE_INSERT
-        elif first_word == "UPDATE":
-            self._type = _STMT_TYPE_UPDATE
-        elif first_word == "DELETE":
-            self._type = _STMT_TYPE_DELETE
-        elif first_word == "REPLACE":
-            self._type = _STMT_TYPE_REPLACE
-        else:
-            self._type = _STMT_TYPE_OTHER
 
         if isinstance(sql, unicode):
             sql = sql.encode('utf-8')
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -182,6 +182,57 @@
 technical difficulties.
 
 
+What about numpy, numpypy, micronumpy?
+--------------------------------------
+
+Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy.  It
+has two pieces:
+
+  * the builtin module :source:`pypy/module/micronumpy`: this is written in
+    RPython and roughly covers the content of the ``numpy.core.multiarray``
+    module. Confusingly enough, this is available in PyPy under the name
+    ``_numpypy``.  It is included by default in all the official releases of
+    PyPy (but it might be dropped in the future).
+
+  * a fork_ of the official numpy repository maintained by us and informally
+    called ``numpypy``: even more confusing, the name of the repo on bitbucket
+    is ``numpy``.  The main difference with the upstream numpy, is that it is
+    based on the micronumpy module written in RPython, instead of of
+    ``numpy.core.multiarray`` which is written in C.
+
+Moreover, it is also possible to install the upstream version of ``numpy``:
+its core is written in C and it runs on PyPy under the cpyext compatibility
+layer. This is what you get if you do ``pypy -m pip install numpy``.
+
+
+Should I install numpy or numpypy?
+-----------------------------------
+
+TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip
+install numpy``.  You might also be interested in using the experimental `PyPy
+binary wheels`_ to save compilation time.
+
+The upstream ``numpy`` is written in C, and runs under the cpyext
+compatibility layer.  Nowadays, cpyext is mature enough that you can simply
+use the upstream ``numpy``, since it passes 99.9% of the test suite. At the
+moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext
+is infamously slow, and thus it has worse performance compared to
+``numpypy``. However, we are actively working on improving it, as we expect to
+reach the same speed, eventually.
+
+On the other hand, ``numpypy`` is more JIT-friendly and very fast to call,
+since it is written in RPython: but it is a reimplementation, and it's hard to
+be completely compatible: over the years the project slowly matured and
+eventually it was able to call out to the LAPACK and BLAS libraries to speed
+matrix calculations, and reached around an 80% parity with the upstream
+numpy. However, 80% is far from 100%.  Since cpyext/numpy compatibility is
+progressing fast, we have discontinued support for ``numpypy``.
+
+.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html
+.. _fork: https://bitbucket.org/pypy/numpy
+.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels
+
+
 Is PyPy more clever than CPython about Tail Calls?
 --------------------------------------------------
 
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -290,66 +290,87 @@
     def test_random_switching(self):
         from _continuation import continulet
         #
+        seen = []
+        #
         def t1(c1):
-            return c1.switch()
+            seen.append(3)
+            res = c1.switch()
+            seen.append(6)
+            return res
+        #
         def s1(c1, n):
+            seen.append(2)
             assert n == 123
             c2 = t1(c1)
-            return c1.switch('a') + 1
+            seen.append(7)
+            res = c1.switch('a') + 1
+            seen.append(10)
+            return res
         #
         def s2(c2, c1):
+            seen.append(5)
             res = c1.switch(c2)
+            seen.append(8)
             assert res == 'a'
-            return c2.switch('b') + 2
+            res = c2.switch('b') + 2
+            seen.append(12)
+            return res
         #
         def f():
+            seen.append(1)
             c1 = continulet(s1, 123)
             c2 = continulet(s2, c1)
             c1.switch()
+            seen.append(4)
             res = c2.switch()
+            seen.append(9)
             assert res == 'b'
             res = c1.switch(1000)
+            seen.append(11)
             assert res == 1001
-            return c2.switch(2000)
+            res = c2.switch(2000)
+            seen.append(13)
+            return res
         #
         res = f()
         assert res == 2002
+        assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
         #
-        def g(c):
+        def bar(c):
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             c.switch(sys._getframe(1).f_back)
-            assert sys._getframe(2) is f3.f_back
+            assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
-        def f(c):
-            g(c)
+        def foo(c):
+            bar(c)
         #
-        c = continulet(f)
-        f1 = c.switch()
-        assert f1.f_code.co_name == 'g'
-        f2 = c.switch()
-        assert f2.f_code.co_name == 'f'
-        f3 = c.switch()
-        assert f3 is f2
-        assert f1.f_back is f3
+        c = continulet(foo)
+        f1_bar = c.switch()
+        assert f1_bar.f_code.co_name == 'bar'
+        f2_foo = c.switch()
+        assert f2_foo.f_code.co_name == 'foo'
+        f3_foo = c.switch()
+        assert f3_foo is f2_foo
+        assert f1_bar.f_back is f3_foo
         def main():
-            f4 = c.switch()
-            assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f4_main = c.switch()
+            assert f4_main.f_code.co_name == 'main'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         def main2():
-            f5 = c.switch()
-            assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f5_main2 = c.switch()
+            assert f5_main2.f_code.co_name == 'main2'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         main()
         main2()
         res = c.switch()
         assert res is None
-        assert f3.f_back is None
+        assert f3_foo.f_back is None
 
     def test_traceback_is_complete(self):
         import sys
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -577,6 +577,7 @@
     'PyComplex_AsCComplex', 'PyComplex_FromCComplex',
 
     'PyObject_AsReadBuffer', 'PyObject_AsWriteBuffer', 'PyObject_CheckReadBuffer',
+    'PyBuffer_GetPointer', 'PyBuffer_ToContiguous', 'PyBuffer_FromContiguous',
 
     'PyOS_getsig', 'PyOS_setsig',
     'PyThread_get_thread_ident', 'PyThread_allocate_lock', 'PyThread_free_lock',
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -317,6 +317,31 @@
 PyAPI_FUNC(int) PyObject_AsReadBuffer(PyObject *, const void **, Py_ssize_t *);
 PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *, void **, Py_ssize_t *);
 PyAPI_FUNC(int) PyObject_CheckReadBuffer(PyObject *);
+PyAPI_FUNC(void *) PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices);
+/* Get the memory area pointed to by the indices for the buffer given.
+   Note that view->ndim is the assumed size of indices
+*/
+
+PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view,
+                                   Py_ssize_t len, char fort);
+PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf,
+                                     Py_ssize_t len, char fort);
+/* Copy len bytes of data from the contiguous chunk of memory
+   pointed to by buf into the buffer exported by obj.  Return
+   0 on success and return -1 and raise a PyBuffer_Error on
+   error (i.e. the object does not have a buffer interface or
+   it is not working).
+
+   If fort is 'F' and the object is multi-dimensional,
+   then the data will be copied into the array in
+   Fortran-style (first dimension varies the fastest).  If
+   fort is 'C', then the data will be copied into the array
+   in C-style (last dimension varies the fastest).  If fort
+   is 'A', then it does not matter and the copy will be made
+   in whatever way is more efficient.
+
+*/
+
 
 #define PyObject_MALLOC         PyObject_Malloc
 #define PyObject_REALLOC        PyObject_Realloc
diff --git a/pypy/module/cpyext/src/abstract.c b/pypy/module/cpyext/src/abstract.c
--- a/pypy/module/cpyext/src/abstract.c
+++ b/pypy/module/cpyext/src/abstract.c
@@ -101,6 +101,163 @@
     return 0;
 }
 
+void*
+PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices)
+{
+    char* pointer;
+    int i;
+    pointer = (char *)view->buf;
+    for (i = 0; i < view->ndim; i++) {
+        pointer += view->strides[i]*indices[i];
+        if ((view->suboffsets != NULL) && (view->suboffsets[i] >= 0)) {
+            pointer = *((char**)pointer) + view->suboffsets[i];
+        }
+    }
+    return (void*)pointer;
+}
+
+void
+_Py_add_one_to_index_F(int nd, Py_ssize_t *index, const Py_ssize_t *shape)
+{
+    int k;
+
+    for (k=0; k<nd; k++) {
+        if (index[k] < shape[k]-1) {
+            index[k]++;
+            break;
+        }
+        else {
+            index[k] = 0;
+        }
+    }
+}
+
+void
+_Py_add_one_to_index_C(int nd, Py_ssize_t *index, const Py_ssize_t *shape)
+{
+    int k;
+
+    for (k=nd-1; k>=0; k--) {
+        if (index[k] < shape[k]-1) {
+            index[k]++;
+            break;
+        }
+        else {
+            index[k] = 0;
+        }
+    }
+}
+
+  /* view is not checked for consistency in either of these.  It is
+     assumed that the size of the buffer is view->len in
+     view->len / view->itemsize elements.
+  */
+
+int
+PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
+{
+    int k;
+    void (*addone)(int, Py_ssize_t *, const Py_ssize_t *);
+    Py_ssize_t *indices, elements;
+    char *dest, *ptr;
+
+    if (len > view->len) {
+        len = view->len;
+    }
+
+    if (PyBuffer_IsContiguous(view, fort)) {
+        /* simplest copy is all that is needed */
+        memcpy(buf, view->buf, len);
+        return 0;
+    }
+
+    /* Otherwise a more elaborate scheme is needed */
+
+    /* view->ndim <= 64 */
+    indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
+    if (indices == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    for (k=0; k<view->ndim;k++) {
+        indices[k] = 0;
+    }
+
+    if (fort == 'F') {
+        addone = _Py_add_one_to_index_F;
+    }
+    else {
+        addone = _Py_add_one_to_index_C;
+    }
+    dest = buf;
+    /* XXX : This is not going to be the fastest code in the world
+             several optimizations are possible.
+     */
+    elements = len / view->itemsize;
+    while (elements--) {
+        ptr = PyBuffer_GetPointer(view, indices);
+        memcpy(dest, ptr, view->itemsize);
+        dest += view->itemsize;
+        addone(view->ndim, indices, view->shape);
+    }
+    PyMem_Free(indices);
+    return 0;
+}
+
+int
+PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
+{
+    int k;
+    void (*addone)(int, Py_ssize_t *, const Py_ssize_t *);
+    Py_ssize_t *indices, elements;
+    char *src, *ptr;
+
+    if (len > view->len) {
+        len = view->len;
+    }
+
+    if (PyBuffer_IsContiguous(view, fort)) {
+        /* simplest copy is all that is needed */
+        memcpy(view->buf, buf, len);
+        return 0;
+    }
+
+    /* Otherwise a more elaborate scheme is needed */
+
+    /* view->ndim <= 64 */
+    indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
+    if (indices == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    for (k=0; k<view->ndim;k++) {
+        indices[k] = 0;
+    }
+
+    if (fort == 'F') {
+        addone = _Py_add_one_to_index_F;
+    }
+    else {
+        addone = _Py_add_one_to_index_C;
+    }
+    src = buf;
+    /* XXX : This is not going to be the fastest code in the world
+             several optimizations are possible.
+     */
+    elements = len / view->itemsize;
+    while (elements--) {
+        ptr = PyBuffer_GetPointer(view, indices);
+        memcpy(ptr, src, view->itemsize);
+        src += view->itemsize;
+        addone(view->ndim, indices, view->shape);
+    }
+
+    PyMem_Free(indices);
+    return 0;
+}
+
+
+
 /* Buffer C-API for Python 3.0 */
 
 int
diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py
--- a/pypy/module/cpyext/test/test_memoryobject.py
+++ b/pypy/module/cpyext/test/test_memoryobject.py
@@ -115,7 +115,36 @@
                 view = PyMemoryView_GET_BUFFER(memoryview);
                 Py_DECREF(memoryview);
                 return PyLong_FromLong(view->len / view->itemsize);
-            """)])
+            """),
+            ("test_contiguous", "METH_O",
+             """
+                Py_buffer* view;
+                PyObject * memoryview;
+                void * buf = NULL;
+                int ret;
+                Py_ssize_t len;
+                memoryview = PyMemoryView_FromObject(args);
+                if (memoryview == NULL)
+                    return NULL;
+                view = PyMemoryView_GET_BUFFER(memoryview);
+                Py_DECREF(memoryview);
+                len = view->len;
+                if (len == 0)
+                    return NULL;
+                buf = malloc(len);
+                ret = PyBuffer_ToContiguous(buf, view, view->len, 'A');
+                if (ret != 0)
+                {
+                    free(buf);
+                    return NULL;
+                }
+                ret = PyBuffer_FromContiguous(view, buf, view->len, 'A');
+                free(buf);
+                if (ret != 0)
+                    return NULL;
+                 Py_RETURN_NONE;
+             """),
+            ])
         module = self.import_module(name='buffer_test')
         arr = module.PyMyArray(10)
         ten = foo.get_len(arr)
@@ -124,6 +153,7 @@
         assert ten == 10
         ten = foo.test_buffer(arr)
         assert ten == 10
+        foo.test_contiguous(arr)
 
     def test_releasebuffer(self):
         module = self.import_extension('foo', [
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -808,7 +808,7 @@
                     w_encoding)
 
     if space.is_none(w_namespace_separator):
-        namespace_separator = 0
+        namespace_separator = -1
     elif space.isinstance_w(w_namespace_separator, space.w_text):
         separator = space.text_w(w_namespace_separator)
         if len(separator) == 0:
@@ -831,7 +831,7 @@
     elif space.is_w(w_intern, space.w_None):
         w_intern = None
 
-    if namespace_separator:
+    if namespace_separator >= 0:
         xmlparser = XML_ParserCreateNS(
             encoding,
             rffi.cast(rffi.CHAR, namespace_separator))
diff --git a/pypy/module/pyexpat/test/test_parser.py b/pypy/module/pyexpat/test/test_parser.py
--- a/pypy/module/pyexpat/test/test_parser.py
+++ b/pypy/module/pyexpat/test/test_parser.py
@@ -59,7 +59,7 @@
                 p.CharacterDataHandler = lambda s: data.append(s)
                 encoding = encoding_arg is None and 'utf-8' or encoding_arg
 
-                res = p.Parse(u"<xml>\u00f6</xml>".encode(encoding), isfinal=True)
+                res = p.Parse(u"<xml>\u00f6</xml>".encode(encoding), True)
                 assert res == 1
                 assert data == [u"\u00f6"]
 
@@ -188,6 +188,34 @@
             p.ParseFile(fake_reader)
             assert fake_reader.read_count == 4
 
+    def test_entities(self):
+        import pyexpat
+        parser = pyexpat.ParserCreate(None, "")
+
+        def startElement(tag, attrs):
+            assert tag == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF'
+            assert attrs == {
+                'http://www.w3.org/XML/1998/namespacebase':
+                'http://www.semanticweb.org/jiba/ontologies/2017/0/test'}
+        parser.StartElementHandler = startElement
+        parser.Parse("""<?xml version="1.0"?>
+
+        <!DOCTYPE rdf:RDF [
+        <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
+        <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
+        <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
+        <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
+        ]>
+
+        <rdf:RDF xmlns="http://www.semanticweb.org/jiba/ontologies/2017/0/test#"
+          xml:base="http://www.semanticweb.org/jiba/ontologies/2017/0/test"
+          xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+          xmlns:owl="http://www.w3.org/2002/07/owl#"
+          xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+          xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+        </rdf:RDF>
+        """, True)
+
 class AppTestPyexpat2:
     spaceconfig = dict(usemodules=['pyexpat', 'itertools', '_socket',
                                    'time', 'struct', 'binascii'])
diff --git a/pypy/module/test_lib_pypy/test_sqlite3.py b/pypy/module/test_lib_pypy/test_sqlite3.py
--- a/pypy/module/test_lib_pypy/test_sqlite3.py
+++ b/pypy/module/test_lib_pypy/test_sqlite3.py
@@ -228,6 +228,14 @@
         cur.execute("create table test(a)")
         cur.executemany("insert into test values (?)", [[1], [2], [3]])
         assert cur.lastrowid is None
+        # issue 2682
+        cur.execute('''insert
+                    into test 
+                    values (?)
+                    ''', (1, ))
+        assert cur.lastrowid is not None
+        cur.execute('''insert\t into test values (?) ''', (1, ))
+        assert cur.lastrowid is not None
 
     def test_authorizer_bad_value(self, con):
         def authorizer_cb(action, arg1, arg2, dbname, source):