[Python-3000-checkins] r54799 - in python/branches/p3yk: Include/bytesobject.h Lib/test/test_bytes.py Objects/bytesobject.c Objects/object.c Objects/stringobject.c

guido.van.rossum python-3000-checkins at python.org
Fri Apr 13 03:39:41 CEST 2007


Author: guido.van.rossum
Date: Fri Apr 13 03:39:34 2007
New Revision: 54799

Modified:
   python/branches/p3yk/Include/bytesobject.h
   python/branches/p3yk/Lib/test/test_bytes.py
   python/branches/p3yk/Objects/bytesobject.c
   python/branches/p3yk/Objects/object.c
   python/branches/p3yk/Objects/stringobject.c
Log:
Rough and dirty job -- allow concatenation of bytes and arbitrary
buffer-supporting objects (Unicode always excluded), and also of
str and bytes.
(For some reason u"" + b"" doesn't fail, I'll investigate later.)


Modified: python/branches/p3yk/Include/bytesobject.h
==============================================================================
--- python/branches/p3yk/Include/bytesobject.h	(original)
+++ python/branches/p3yk/Include/bytesobject.h	Fri Apr 13 03:39:34 2007
@@ -34,6 +34,7 @@
 
 /* Direct API functions */
 PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
+PyAPI_FUNC(PyObject *) PyBytes_Concat(PyObject *, PyObject *);
 PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
 PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
 PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);

Modified: python/branches/p3yk/Lib/test/test_bytes.py
==============================================================================
--- python/branches/p3yk/Lib/test/test_bytes.py	(original)
+++ python/branches/p3yk/Lib/test/test_bytes.py	Fri Apr 13 03:39:34 2007
@@ -225,7 +225,7 @@
                 # Skip step 0 (invalid)
                 for step in indices[1:]:
                     self.assertEqual(b[start:stop:step], bytes(L[start:stop:step]))
-        
+
     def test_regexps(self):
         def by(s):
             return bytes(map(ord, s))
@@ -298,7 +298,7 @@
 
         b[3:5] = [3, 4, 5, 6]
         self.assertEqual(b, bytes(range(10)))
-        
+
         b[3:0] = [42, 42, 42]
         self.assertEqual(b, bytes([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9]))
 
@@ -317,7 +317,7 @@
                     L[start:stop:step] = data
                     b[start:stop:step] = data
                     self.assertEquals(b, bytes(L))
-                    
+
                     del L[start:stop:step]
                     del b[start:stop:step]
                     self.assertEquals(b, bytes(L))
@@ -371,8 +371,10 @@
         b1 = bytes("abc")
         b2 = bytes("def")
         self.assertEqual(b1 + b2, bytes("abcdef"))
-        self.assertRaises(TypeError, lambda: b1 + "def")
-        self.assertRaises(TypeError, lambda: "abc" + b2)
+        self.assertEqual(b1 + "def", bytes("abcdef"))
+        self.assertEqual("def" + b1, bytes("defabc"))
+        self.assertRaises(TypeError, lambda: b1 + u"def")
+        ##self.assertRaises(TypeError, lambda: u"abc" + b2)  # XXX FIXME
 
     def test_repeat(self):
         b = bytes("abc")
@@ -393,6 +395,14 @@
         self.assertEqual(b, bytes("abcdef"))
         self.assertEqual(b, b1)
         self.failUnless(b is b1)
+        b += "xyz"
+        self.assertEqual(b, b"abcdefxyz")
+        try:
+            b += u""
+        except TypeError:
+            pass
+        else:
+            self.fail("bytes += unicode didn't raise TypeError")
 
     def test_irepeat(self):
         b = bytes("abc")
@@ -490,7 +500,7 @@
         a.extend(a)
         self.assertEqual(a, orig + orig)
         self.assertEqual(a[5:], orig)
-    
+
     def test_remove(self):
         b = b'hello'
         b.remove(ord('l'))
@@ -643,14 +653,36 @@
                     q = pm.loads(ps)
                     self.assertEqual(b, q)
 
+    def test_strip(self):
+        b = b'mississippi'
+        self.assertEqual(b.strip(b'i'), b'mississipp')
+        self.assertEqual(b.strip(b'm'), b'ississippi')
+        self.assertEqual(b.strip(b'pi'), b'mississ')
+        self.assertEqual(b.strip(b'im'), b'ssissipp')
+        self.assertEqual(b.strip(b'pim'), b'ssiss')
+
+    def test_lstrip(self):
+        b = b'mississippi'
+        self.assertEqual(b.lstrip(b'i'), b'mississippi')
+        self.assertEqual(b.lstrip(b'm'), b'ississippi')
+        self.assertEqual(b.lstrip(b'pi'), b'mississippi')
+        self.assertEqual(b.lstrip(b'im'), b'ssissippi')
+        self.assertEqual(b.lstrip(b'pim'), b'ssissippi')
+
+    def test_rstrip(self):
+        b = b'mississippi'
+        self.assertEqual(b.rstrip(b'i'), b'mississipp')
+        self.assertEqual(b.rstrip(b'm'), b'mississippi')
+        self.assertEqual(b.rstrip(b'pi'), b'mississ')
+        self.assertEqual(b.rstrip(b'im'), b'mississipp')
+        self.assertEqual(b.rstrip(b'pim'), b'mississ')
+
     # Optimizations:
     # __iter__? (optimization)
     # __reversed__? (optimization)
 
-    # XXX Some string methods?  (Those that don't use character properties)
-    # lstrip, rstrip, strip?? (currently un-pepped)
-    # join
-    
+    # XXX More string methods?  (Those that don't use character properties)
+
     # There are tests in string_tests.py that are more
     # comprehensive for things like split, partition, etc.
     # Unfortunately they are all bundled with tests that
@@ -675,7 +707,7 @@
             getattr(bytes, methodname),
             object,
             *args
-        )    
+        )
 
     # Currently the bytes containment testing uses a single integer
     # value. This may not be the final design, but until then the

Modified: python/branches/p3yk/Objects/bytesobject.c
==============================================================================
--- python/branches/p3yk/Objects/bytesobject.c	(original)
+++ python/branches/p3yk/Objects/bytesobject.c	Fri Apr 13 03:39:34 2007
@@ -31,7 +31,10 @@
 
 /* end nullbytes support */
 
-static int _getbytevalue(PyObject* arg, int *value)
+/* Helpers */
+
+static int
+_getbytevalue(PyObject* arg, int *value)
 {
     PyObject *intarg = PyNumber_Int(arg);
     if (! intarg)
@@ -45,6 +48,24 @@
     return 1;
 }
 
+Py_ssize_t
+_getbuffer(PyObject *obj, void **ptr)
+{
+    PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
+
+    if (buffer == NULL ||
+        PyUnicode_Check(obj) ||
+        buffer->bf_getreadbuffer == NULL ||
+        buffer->bf_getsegcount == NULL ||
+        buffer->bf_getsegcount(obj, NULL) != 1)
+    {
+        *ptr = NULL;
+        return -1;
+    }
+
+    return buffer->bf_getreadbuffer(obj, 0, ptr);
+}
+
 /* Direct API functions */
 
 PyObject *
@@ -140,56 +161,63 @@
     return 0;
 }
 
-/* Functions stuffed into the type object */
-
-static Py_ssize_t
-bytes_length(PyBytesObject *self)
-{
-    return self->ob_size;
-}
-
-static PyObject *
-bytes_concat(PyBytesObject *self, PyObject *other)
+PyObject *
+PyBytes_Concat(PyObject *a, PyObject *b)
 {
+    Py_ssize_t asize, bsize, size;
+    void *aptr, *bptr;
     PyBytesObject *result;
-    Py_ssize_t mysize;
-    Py_ssize_t size;
 
-    if (!PyBytes_Check(other)) {
-        PyErr_Format(PyExc_TypeError,
-                     "can't concat bytes to %.100s", other->ob_type->tp_name);
+    asize = _getbuffer(a, &aptr);
+    bsize = _getbuffer(b, &bptr);
+    if (asize < 0 || bsize < 0) {
+        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
+                     a->ob_type->tp_name, b->ob_type->tp_name);
         return NULL;
     }
 
-    mysize = self->ob_size;
-    size = mysize + ((PyBytesObject *)other)->ob_size;
+    size = asize + bsize;
     if (size < 0)
         return PyErr_NoMemory();
+
     result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
     if (result != NULL) {
-        memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
-        memcpy(result->ob_bytes + self->ob_size,
-               ((PyBytesObject *)other)->ob_bytes,
-               ((PyBytesObject *)other)->ob_size);
+        memcpy(result->ob_bytes, aptr, asize);
+        memcpy(result->ob_bytes + asize, bptr, bsize);
     }
     return (PyObject *)result;
 }
 
+/* Functions stuffed into the type object */
+
+static Py_ssize_t
+bytes_length(PyBytesObject *self)
+{
+    return self->ob_size;
+}
+
+static PyObject *
+bytes_concat(PyBytesObject *self, PyObject *other)
+{
+    return PyBytes_Concat((PyObject *)self, other);
+}
+
 static PyObject *
 bytes_iconcat(PyBytesObject *self, PyObject *other)
 {
-    Py_ssize_t mysize;
+    void *optr;
     Py_ssize_t osize;
+    Py_ssize_t mysize;
     Py_ssize_t size;
 
-    if (!PyBytes_Check(other)) {
+    osize = _getbuffer(other, &optr);
+    if (osize < 0) {
         PyErr_Format(PyExc_TypeError,
                      "can't concat bytes to %.100s", other->ob_type->tp_name);
         return NULL;
     }
 
     mysize = self->ob_size;
-    osize = ((PyBytesObject *)other)->ob_size;
     size = mysize + osize;
     if (size < 0)
         return PyErr_NoMemory();
@@ -197,7 +225,7 @@
         self->ob_size = size;
     else if (PyBytes_Resize((PyObject *)self, size) < 0)
         return NULL;
-    memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
+    memcpy(self->ob_bytes + mysize, optr, osize);
     Py_INCREF(self);
     return (PyObject *)self;
 }
@@ -366,15 +394,10 @@
 bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
                PyObject *values)
 {
-    int avail;
-    int needed;
-    char *bytes;
+    Py_ssize_t avail, needed;
+    void *bytes;
 
-    if (values == NULL) {
-        bytes = NULL;
-        needed = 0;
-    }
-    else if (values == (PyObject *)self || !PyBytes_Check(values)) {
+    if (values == (PyObject *)self) {
         /* Make a copy an call this function recursively */
         int err;
         values = PyBytes_FromObject(values);
@@ -384,10 +407,19 @@
         Py_DECREF(values);
         return err;
     }
+    if (values == NULL) {
+        /* del b[lo:hi] */
+        bytes = NULL;
+        needed = 0;
+    }
     else {
-        assert(PyBytes_Check(values));
-        bytes = ((PyBytesObject *)values)->ob_bytes;
-        needed = ((PyBytesObject *)values)->ob_size;
+        needed = _getbuffer(values, &bytes);
+        if (needed < 0) {
+            PyErr_Format(PyExc_TypeError,
+                         "can't set bytes slice from %.100s",
+                         values->ob_type->tp_name);
+            return -1;
+        }
     }
 
     if (lo < 0)
@@ -840,42 +872,26 @@
 static PyObject *
 bytes_richcompare(PyObject *self, PyObject *other, int op)
 {
-    PyBufferProcs *self_buffer, *other_buffer;
     Py_ssize_t self_size, other_size;
     void *self_bytes, *other_bytes;
     PyObject *res;
     Py_ssize_t minsize;
     int cmp;
 
-    /* For backwards compatibility, bytes can be compared to anything that
-       supports the (binary) buffer API.  Except Unicode. */
-
-    if (PyUnicode_Check(self) || PyUnicode_Check(other)) {
-        Py_INCREF(Py_NotImplemented);
-        return Py_NotImplemented;
-    }
+    /* Bytes can be compared to anything that supports the (binary) buffer
+       API.  Except Unicode. */
 
-    self_buffer = self->ob_type->tp_as_buffer;
-    if (self_buffer == NULL ||
-        self_buffer->bf_getreadbuffer == NULL ||
-        self_buffer->bf_getsegcount == NULL ||
-        self_buffer->bf_getsegcount(self, NULL) != 1)
-    {
+    self_size = _getbuffer(self, &self_bytes);
+    if (self_size < 0) {
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
     }
-    self_size = self_buffer->bf_getreadbuffer(self, 0, &self_bytes);
 
-    other_buffer = other->ob_type->tp_as_buffer;
-    if (other_buffer == NULL ||
-        other_buffer->bf_getreadbuffer == NULL ||
-        other_buffer->bf_getsegcount == NULL ||
-        other_buffer->bf_getsegcount(self, NULL) != 1)
-    {
+    other_size = _getbuffer(other, &other_bytes);
+    if (other_size < 0) {
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
     }
-    other_size = other_buffer->bf_getreadbuffer(other, 0, &other_bytes);
 
     if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
         /* Shortcut: if the lengths differ, the objects differ */
@@ -2435,6 +2451,93 @@
     Py_RETURN_NONE;
 }
 
+/* XXX These two helpers could be optimized if argsize == 1 */
+
+Py_ssize_t
+lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
+              void *argptr, Py_ssize_t argsize)
+{
+    Py_ssize_t i = 0;
+    while (i < mysize && memchr(argptr, myptr[i], argsize))
+        i++;
+    return i;
+}
+
+Py_ssize_t
+rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
+              void *argptr, Py_ssize_t argsize)
+{
+    Py_ssize_t i = mysize - 1;
+    while (i >= 0 && memchr(argptr, myptr[i], argsize))
+        i--;
+    return i + 1;
+}
+
+PyDoc_STRVAR(strip__doc__,
+"B.strip(bytes) -> bytes\n\
+\n\
+Strip leading and trailing bytes contained in the argument.");
+static PyObject *
+bytes_strip(PyBytesObject *self, PyObject *arg)
+{
+    Py_ssize_t left, right, mysize, argsize;
+    void *myptr, *argptr;
+    if (arg == NULL || !PyBytes_Check(arg)) {
+        PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
+        return NULL;
+    }
+    myptr = self->ob_bytes;
+    mysize = self->ob_size;
+    argptr = ((PyBytesObject *)arg)->ob_bytes;
+    argsize = ((PyBytesObject *)arg)->ob_size;
+    left = lstrip_helper(myptr, mysize, argptr, argsize);
+    right = rstrip_helper(myptr, mysize, argptr, argsize);
+    return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
+}
+
+PyDoc_STRVAR(lstrip__doc__,
+"B.lstrip(bytes) -> bytes\n\
+\n\
+Strip leading bytes contained in the argument.");
+static PyObject *
+bytes_lstrip(PyBytesObject *self, PyObject *arg)
+{
+    Py_ssize_t left, right, mysize, argsize;
+    void *myptr, *argptr;
+    if (arg == NULL || !PyBytes_Check(arg)) {
+        PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
+        return NULL;
+    }
+    myptr = self->ob_bytes;
+    mysize = self->ob_size;
+    argptr = ((PyBytesObject *)arg)->ob_bytes;
+    argsize = ((PyBytesObject *)arg)->ob_size;
+    left = lstrip_helper(myptr, mysize, argptr, argsize);
+    right = mysize;
+    return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
+}
+
+PyDoc_STRVAR(rstrip__doc__,
+"B.rstrip(bytes) -> bytes\n\
+\n\
+Strip trailing bytes contained in the argument.");
+static PyObject *
+bytes_rstrip(PyBytesObject *self, PyObject *arg)
+{
+    Py_ssize_t left, right, mysize, argsize;
+    void *myptr, *argptr;
+    if (arg == NULL || !PyBytes_Check(arg)) {
+        PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
+        return NULL;
+    }
+    myptr = self->ob_bytes;
+    mysize = self->ob_size;
+    argptr = ((PyBytesObject *)arg)->ob_bytes;
+    argsize = ((PyBytesObject *)arg)->ob_size;
+    left = 0;
+    right = rstrip_helper(myptr, mysize, argptr, argsize);
+    return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
+}
 
 PyDoc_STRVAR(decode_doc,
 "B.decode([encoding[,errors]]) -> unicode obect.\n\
@@ -2659,6 +2762,9 @@
     {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
     {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
     {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
+    {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
+    {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
+    {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
     {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
     {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
     {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,

Modified: python/branches/p3yk/Objects/object.c
==============================================================================
--- python/branches/p3yk/Objects/object.c	(original)
+++ python/branches/p3yk/Objects/object.c	Fri Apr 13 03:39:34 2007
@@ -1347,7 +1347,7 @@
 
 /* Helper for PyObject_Dir without arguments: returns the local scope. */
 static PyObject *
-_dir_locals()
+_dir_locals(void)
 {
 	PyObject *names;
 	PyObject *locals = PyEval_GetLocals();
@@ -1892,4 +1892,3 @@
 #ifdef __cplusplus
 }
 #endif
-

Modified: python/branches/p3yk/Objects/stringobject.c
==============================================================================
--- python/branches/p3yk/Objects/stringobject.c	(original)
+++ python/branches/p3yk/Objects/stringobject.c	Fri Apr 13 03:39:34 2007
@@ -948,6 +948,8 @@
 		if (PyUnicode_Check(bb))
 		    return PyUnicode_Concat((PyObject *)a, bb);
 #endif
+                if (PyBytes_Check(bb))
+			return PyBytes_Concat((PyObject *)a, bb);
 		PyErr_Format(PyExc_TypeError,
 			     "cannot concatenate 'str' and '%.200s' objects",
 			     bb->ob_type->tp_name);


More information about the Python-3000-checkins mailing list