[Python-checkins] r46088 - in python/branches/rjones-funccall: Include/unicodeobject.h Lib/gzip.py Objects/stringobject.c Objects/unicodeobject.c

richard.jones python-checkins at python.org
Tue May 23 11:06:31 CEST 2006


Author: richard.jones
Date: Tue May 23 11:06:29 2006
New Revision: 46088

Modified:
   python/branches/rjones-funccall/   (props changed)
   python/branches/rjones-funccall/Include/unicodeobject.h
   python/branches/rjones-funccall/Lib/gzip.py
   python/branches/rjones-funccall/Objects/stringobject.c
   python/branches/rjones-funccall/Objects/unicodeobject.c
Log:
merge from trunk

Modified: python/branches/rjones-funccall/Include/unicodeobject.h
==============================================================================
--- python/branches/rjones-funccall/Include/unicodeobject.h	(original)
+++ python/branches/rjones-funccall/Include/unicodeobject.h	Tue May 23 11:06:29 2006
@@ -352,12 +352,20 @@
         Py_UNICODE_ISDIGIT(ch) || \
         Py_UNICODE_ISNUMERIC(ch))
 
-#define Py_UNICODE_COPY(target, source, length)\
-    (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
+/* memcpy has a considerable setup overhead on many platforms; use a
+   loop for short strings (the "16" below is pretty arbitary) */
+#define Py_UNICODE_COPY(target, source, length) do\
+    {Py_ssize_t i_; Py_UNICODE *t_ = (target); const Py_UNICODE *s_ = (source);\
+      if (length > 16)\
+        memcpy(t_, s_, (length)*sizeof(Py_UNICODE));\
+      else\
+        for (i_ = 0; i_ < (length); i_++) t_[i_] = s_[i_];\
+    } while (0)
 
 #define Py_UNICODE_FILL(target, value, length) do\
-    {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
-    while (0)
+    {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
+        for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
+    } while (0)
 
 #define Py_UNICODE_MATCH(string, offset, substring)\
     ((*((string)->str + (offset)) == *((substring)->str)) &&\

Modified: python/branches/rjones-funccall/Lib/gzip.py
==============================================================================
--- python/branches/rjones-funccall/Lib/gzip.py	(original)
+++ python/branches/rjones-funccall/Lib/gzip.py	Tue May 23 11:06:29 2006
@@ -107,6 +107,8 @@
             self.extrabuf = ""
             self.extrasize = 0
             self.filename = filename
+            # Starts small, scales exponentially
+            self.min_readsize = 100
 
         elif mode[0:1] == 'w' or mode[0:1] == 'a':
             self.mode = WRITE
@@ -381,32 +383,35 @@
             self.read(count % 1024)
 
     def readline(self, size=-1):
-        if size < 0: size = sys.maxint
+        if size < 0:
+            size = sys.maxint
+            readsize = self.min_readsize
+        else:
+            readsize = size
         bufs = []
-        readsize = min(100, size)    # Read from the file in small chunks
-        while True:
-            if size == 0:
-                return "".join(bufs) # Return resulting line
-
+        while size != 0:
             c = self.read(readsize)
             i = c.find('\n')
-            if size is not None:
-                # We set i=size to break out of the loop under two
-                # conditions: 1) there's no newline, and the chunk is
-                # larger than size, or 2) there is a newline, but the
-                # resulting line would be longer than 'size'.
-                if i==-1 and len(c) > size: i=size-1
-                elif size <= i: i = size -1
+
+            # We set i=size to break out of the loop under two
+            # conditions: 1) there's no newline, and the chunk is
+            # larger than size, or 2) there is a newline, but the
+            # resulting line would be longer than 'size'.
+            if (size <= i) or (i == -1 and len(c) > size):
+                i = size - 1
 
             if i >= 0 or c == '':
-                bufs.append(c[:i+1])    # Add portion of last chunk
-                self._unread(c[i+1:])   # Push back rest of chunk
-                return ''.join(bufs)    # Return resulting line
+                bufs.append(c[:i + 1])    # Add portion of last chunk
+                self._unread(c[i + 1:])   # Push back rest of chunk
+                break
 
             # Append chunk to list, decrease 'size',
             bufs.append(c)
             size = size - len(c)
             readsize = min(size, readsize * 2)
+        if readsize > self.min_readsize:
+            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
+        return ''.join(bufs) # Return resulting line
 
     def readlines(self, sizehint=0):
         # Negative numbers result in reading all the lines

Modified: python/branches/rjones-funccall/Objects/stringobject.c
==============================================================================
--- python/branches/rjones-funccall/Objects/stringobject.c	(original)
+++ python/branches/rjones-funccall/Objects/stringobject.c	Tue May 23 11:06:29 2006
@@ -2159,9 +2159,9 @@
 PyDoc_STRVAR(count__doc__,
 "S.count(sub[, start[, end]]) -> int\n\
 \n\
-Return the number of occurrences of substring sub in string\n\
-S[start:end].  Optional arguments start and end are\n\
-interpreted as in slice notation.");
+Return the number of non-overlapping occurrences of substring sub in\n\
+string S[start:end].  Optional arguments start and end are interpreted\n\
+as in slice notation.");
 
 static PyObject *
 string_count(PyStringObject *self, PyObject *args)

Modified: python/branches/rjones-funccall/Objects/unicodeobject.c
==============================================================================
--- python/branches/rjones-funccall/Objects/unicodeobject.c	(original)
+++ python/branches/rjones-funccall/Objects/unicodeobject.c	Tue May 23 11:06:29 2006
@@ -5078,8 +5078,8 @@
 PyDoc_STRVAR(count__doc__,
 "S.count(sub[, start[, end]]) -> int\n\
 \n\
-Return the number of occurrences of substring sub in Unicode string\n\
-S[start:end].  Optional arguments start and end are\n\
+Return the number of non-overlapping occurrences of substring sub in\n\
+Unicode string S[start:end].  Optional arguments start and end are\n\
 interpreted as in slice notation.");
 
 static PyObject *
@@ -5898,9 +5898,19 @@
 
     p = u->str;
 
-    while (len-- > 0) {
-        Py_UNICODE_COPY(p, str->str, str->length);
-        p += str->length;
+    if (str->length == 1 && len > 0) {
+        Py_UNICODE_FILL(p, str->str[0], len);
+    } else {
+	int done = 0; /* number of characters copied this far */
+	if (done < nchars) {
+            Py_UNICODE_COPY(p, str->str, str->length);
+            done = str->length;
+	}
+	while (done < nchars) {
+            int n = (done <= nchars-done) ? done : nchars-done;
+            Py_UNICODE_COPY(p+done, p, n);
+            done += n;
+	}
     }
 
     return (PyObject*) u;


More information about the Python-checkins mailing list