[pypy-commit] pypy default: simplify/optimize RStringIO by changing it to use StringBuilder

Thu Mar 21 08:36:13 CET 2013

Author: Brian Kearns <bdkearns at gmail.com>
Branch: 
Changeset: r62605:b4901c26d853
Date: 2013-03-21 03:24 -0400
http://bitbucket.org/pypy/pypy/changeset/b4901c26d853/

Log:	simplify/optimize RStringIO by changing it to use StringBuilder

diff --git a/rpython/rlib/rStringIO.py b/rpython/rlib/rStringIO.py
--- a/rpython/rlib/rStringIO.py
+++ b/rpython/rlib/rStringIO.py
@@ -1,6 +1,4 @@
-
-PIECES = 80
-BIGPIECES = 32
+from rpython.rlib.rstring import StringBuilder
 
 AT_END = -1
 
@@ -8,8 +6,7 @@
 class RStringIO(object):
     """RPython-level StringIO object.
     The fastest path through this code is for the case of a bunch of write()
-    followed by getvalue().  For at most PIECES write()s and one getvalue(),
-    there is one copy of the data done, as if ''.join() was used.
+    followed by getvalue().
     """
     _mixin_ = True        # for interp_stringio.py
 
@@ -18,20 +15,12 @@
         #  * the list of characters self.bigbuffer;
         #  * each of the strings in self.strings.
         #
-        # Invariants:
-        #  * self.numbigstrings <= self.numstrings;
-        #  * all strings in self.strings[self.numstrings:PIECES] are empty.
-        #
-        self.strings = [''] * PIECES
-        self.numstrings = 0
-        self.numbigstrings = 0
+        self.strings = StringBuilder()
         self.bigbuffer = []
         self.pos = AT_END
 
     def close(self):
         self.strings = None
-        self.numstrings = 0
-        self.numbigstrings = 0
         self.bigbuffer = None
 
     def is_closed(self):
@@ -40,58 +29,21 @@
     def getvalue(self):
         """If self.strings contains more than 1 string, join all the
         strings together.  Return the final single string."""
-        if len(self.bigbuffer) > 0:
+        if len(self.bigbuffer):
             self.copy_into_bigbuffer()
             return ''.join(self.bigbuffer)
-        if self.numstrings > 1:
-            result = self.strings[0] = ''.join(self.strings)
-            for i in range(1, self.numstrings):
-                self.strings[i] = ''
-            self.numstrings = 1
-            self.numbigstrings = 1
-        else:
-            result = self.strings[0]
-        return result
+        return self.strings.build()
 
     def getsize(self):
         result = len(self.bigbuffer)
-        for i in range(0, self.numstrings):
-            result += len(self.strings[i])
+        result += self.strings.getlength()
         return result
 
     def copy_into_bigbuffer(self):
         """Copy all the data into the list of characters self.bigbuffer."""
-        for i in range(0, self.numstrings):
-            self.bigbuffer += self.strings[i]
-            self.strings[i] = ''
-        self.numstrings = 0
-        self.numbigstrings = 0
-        return self.bigbuffer
-
-    def reduce(self):
-        """Reduce the number of (non-empty) strings in self.strings."""
-        # When self.pos == AT_END, the calls to write(str) accumulate
-        # the strings in self.strings until all PIECES slots are filled.
-        # Then the reduce() method joins all the strings and put the
-        # result back into self.strings[0].  The next time all the slots
-        # are filled, we only join self.strings[1:] and put the result
-        # in self.strings[1]; and so on.  The purpose of this is that
-        # the string resulting from a join is expected to be big, so the
-        # next join operation should only join the newly added strings.
-        # When we have done this BIGPIECES times, the next join collects
-        # all strings again into self.strings[0] and we start from
-        # scratch.
-        limit = self.numbigstrings
-        self.strings[limit] = ''.join(self.strings[limit:])
-        for i in range(limit + 1, self.numstrings):
-            self.strings[i] = ''
-        self.numstrings = limit + 1
-        if limit < BIGPIECES:
-            self.numbigstrings = limit + 1
-        else:
-            self.numbigstrings = 0
-        assert self.numstrings <= BIGPIECES + 1
-        return self.numstrings
+        if self.strings.getlength():
+            self.bigbuffer += self.strings.build()
+            self.strings = StringBuilder()
 
     def write(self, buffer):
         # Idea: for the common case of a sequence of write() followed
@@ -110,30 +62,25 @@
             else:
                 # slow path: collect all data into self.bigbuffer and
                 # handle the various cases
-                bigbuffer = self.copy_into_bigbuffer()
-                fitting = len(bigbuffer) - p
+                self.copy_into_bigbuffer()
+                fitting = len(self.bigbuffer) - p
                 if fitting > 0:
                     # the write starts before the end of the data
                     fitting = min(len(buffer), fitting)
                     for i in range(fitting):
-                        bigbuffer[p+i] = buffer[i]
+                        self.bigbuffer[p+i] = buffer[i]
                     if len(buffer) > fitting:
                         # the write extends beyond the end of the data
-                        bigbuffer += buffer[fitting:]
+                        self.bigbuffer += buffer[fitting:]
                         endp = AT_END
                     self.pos = endp
                     return
                 else:
                     # the write starts at or beyond the end of the data
-                    bigbuffer += '\x00' * (-fitting)
+                    self.bigbuffer += '\x00' * (-fitting)
                     self.pos = AT_END      # fall-through to the fast path
         # Fast path.
-        # See comments in reduce().
-        count = self.numstrings
-        if count == PIECES:
-            count = self.reduce()
-        self.strings[count] = buffer
-        self.numstrings = count + 1
+        self.strings.append(buffer)
 
     def seek(self, position, mode=0):
         if mode == 1:
@@ -165,8 +112,8 @@
         if p == AT_END:
             return ''
         assert p >= 0
-        bigbuffer = self.copy_into_bigbuffer()
-        mysize = len(bigbuffer)
+        self.copy_into_bigbuffer()
+        mysize = len(self.bigbuffer)
         count = mysize - p
         if n >= 0:
             count = min(n, count)
@@ -174,10 +121,10 @@
             return ''
         if p == 0 and count == mysize:
             self.pos = AT_END
-            return ''.join(bigbuffer)
+            return ''.join(self.bigbuffer)
         else:
             self.pos = p + count
-            return ''.join(bigbuffer[p:p+count])
+            return ''.join(self.bigbuffer[p:p+count])
 
     def truncate(self, size):
         # NB. 'size' is mandatory.  This has the same un-Posix-y semantics
@@ -188,10 +135,8 @@
             self.copy_into_bigbuffer()
         else:
             # we can drop all extra strings
-            for i in range(0, self.numstrings):
-                self.strings[i] = ''
-            self.numstrings = 0
-            self.numbigstrings = 0
+            if self.strings.getlength():
+                self.strings = StringBuilder()
         if size < len(self.bigbuffer):
             del self.bigbuffer[size:]
         self.pos = AT_END