[Python-checkins] r59420 - in python/trunk: Misc/NEWS Objects/stringobject.c

skip.montanaro python-checkins at python.org
Sat Dec 8 16:33:25 CET 2007


Author: skip.montanaro
Date: Sat Dec  8 16:33:24 2007
New Revision: 59420

Modified:
   python/trunk/Misc/NEWS
   python/trunk/Objects/stringobject.c
Log:
When splitting, avoid making a copy of the string if the split doesn't find
anything (issue 1538).


Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sat Dec  8 16:33:24 2007
@@ -12,6 +12,9 @@
 Core and builtins
 -----------------
 
+- Issue #1538: Avoid copying string in split/rsplit if the split
+  char is not found.
+
 - Issue #1553: An erroneous __length_hint__ can make list() raise a
   SystemError.
 

Modified: python/trunk/Objects/stringobject.c
==============================================================================
--- python/trunk/Objects/stringobject.c	(original)
+++ python/trunk/Objects/stringobject.c	Sat Dec  8 16:33:24 2007
@@ -1403,8 +1403,9 @@
 #define RSKIP_NONSPACE(s, i)     { while (i>=0  && !isspace(Py_CHARMASK(s[i]))) i--; }
 
 Py_LOCAL_INLINE(PyObject *)
-split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
+split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
 {
+	const char *s = PyString_AS_STRING(self);
 	Py_ssize_t i, j, count=0;
 	PyObject *str;
 	PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
@@ -1419,6 +1420,13 @@
 		if (i==len) break;
 		j = i; i++;
 		SKIP_NONSPACE(s, i, len);
+		if (j == 0 && i == len && PyString_CheckExact(self)) {
+			/* No whitespace in self, so just use it as list[0] */
+			Py_INCREF(self);
+			PyList_SET_ITEM(list, 0, (PyObject *)self);
+			count++;
+			break;
+		}
 		SPLIT_ADD(s, j, i);
 	}
 
@@ -1437,8 +1445,9 @@
 }
 
 Py_LOCAL_INLINE(PyObject *)
-split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
+split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
 {
+	const char *s = PyString_AS_STRING(self);
 	register Py_ssize_t i, j, count=0;
 	PyObject *str;
 	PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
@@ -1457,7 +1466,13 @@
 			}
 		}
 	}
-	if (i <= len) {
+	if (i == 0 && count == 0 && PyString_CheckExact(self)) {
+		/* ch not in self, so just use self as list[0] */
+		Py_INCREF(self);
+		PyList_SET_ITEM(list, 0, (PyObject *)self);
+		count++;
+	}
+	else if (i <= len) {
 		SPLIT_ADD(s, i, len);
 	}
 	FIX_PREALLOC_SIZE(list);
@@ -1492,7 +1507,7 @@
 	if (maxsplit < 0)
 		maxsplit = PY_SSIZE_T_MAX;
 	if (subobj == Py_None)
-		return split_whitespace(s, len, maxsplit);
+		return split_whitespace(self, len, maxsplit);
 	if (PyString_Check(subobj)) {
 		sub = PyString_AS_STRING(subobj);
 		n = PyString_GET_SIZE(subobj);
@@ -1509,7 +1524,7 @@
 		return NULL;
 	}
 	else if (n == 1)
-		return split_char(s, len, sub[0], maxsplit);
+		return split_char(self, len, sub[0], maxsplit);
 
 	list = PyList_New(PREALLOC_SIZE(maxsplit));
 	if (list == NULL)
@@ -1609,8 +1624,9 @@
 }
 
 Py_LOCAL_INLINE(PyObject *)
-rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
+rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
 {
+	const char *s = PyString_AS_STRING(self);
 	Py_ssize_t i, j, count=0;
 	PyObject *str;
 	PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
@@ -1625,6 +1641,13 @@
 		if (i<0) break;
 		j = i; i--;
 		RSKIP_NONSPACE(s, i);
+		if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
+			/* No whitespace in self, so just use it as list[0] */
+			Py_INCREF(self);
+			PyList_SET_ITEM(list, 0, (PyObject *)self);
+			count++;
+			break;
+		}
 		SPLIT_ADD(s, i + 1, j + 1);
 	}
 	if (i >= 0) {
@@ -1645,8 +1668,9 @@
 }
 
 Py_LOCAL_INLINE(PyObject *)
-rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
+rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
 {
+	const char *s = PyString_AS_STRING(self);
 	register Py_ssize_t i, j, count=0;
 	PyObject *str;
 	PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
@@ -1664,7 +1688,13 @@
 			}
 		}
 	}
-	if (j >= -1) {
+	if (i < 0 && count == 0 && PyString_CheckExact(self)) {
+		/* ch not in self, so just use self as list[0] */
+		Py_INCREF(self);
+		PyList_SET_ITEM(list, 0, (PyObject *)self);
+		count++;
+	}
+	else if (j >= -1) {
 		SPLIT_ADD(s, 0, j + 1);
 	}
 	FIX_PREALLOC_SIZE(list);
@@ -1691,7 +1721,7 @@
 {
 	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
 	Py_ssize_t maxsplit = -1, count=0;
-	const char *s = PyString_AS_STRING(self), *sub;
+	const char *s, *sub;
 	PyObject *list, *str, *subobj = Py_None;
 
 	if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@@ -1699,7 +1729,7 @@
 	if (maxsplit < 0)
 		maxsplit = PY_SSIZE_T_MAX;
 	if (subobj == Py_None)
-		return rsplit_whitespace(s, len, maxsplit);
+		return rsplit_whitespace(self, len, maxsplit);
 	if (PyString_Check(subobj)) {
 		sub = PyString_AS_STRING(subobj);
 		n = PyString_GET_SIZE(subobj);
@@ -1716,7 +1746,7 @@
 		return NULL;
 	}
 	else if (n == 1)
-		return rsplit_char(s, len, sub[0], maxsplit);
+		return rsplit_char(self, len, sub[0], maxsplit);
 
 	list = PyList_New(PREALLOC_SIZE(maxsplit));
 	if (list == NULL)
@@ -1725,6 +1755,7 @@
 	j = len;
 	i = j - n;
 
+	s = PyString_AS_STRING(self);
 	while ( (i >= 0) && (maxsplit-- > 0) ) {
 		for (; i>=0; i--) {
 			if (Py_STRING_MATCH(s, i, sub, n)) {


More information about the Python-checkins mailing list