[pypy-commit] pypy default: simplify/optimize RStringIO by changing it to use StringBuilder
bdkearns
noreply at buildbot.pypy.org
Thu Mar 21 08:36:13 CET 2013
Author: Brian Kearns <bdkearns at gmail.com>
Branch:
Changeset: r62605:b4901c26d853
Date: 2013-03-21 03:24 -0400
http://bitbucket.org/pypy/pypy/changeset/b4901c26d853/
Log: simplify/optimize RStringIO by changing it to use StringBuilder
diff --git a/rpython/rlib/rStringIO.py b/rpython/rlib/rStringIO.py
--- a/rpython/rlib/rStringIO.py
+++ b/rpython/rlib/rStringIO.py
@@ -1,6 +1,4 @@
-
-PIECES = 80
-BIGPIECES = 32
+from rpython.rlib.rstring import StringBuilder
AT_END = -1
@@ -8,8 +6,7 @@
class RStringIO(object):
"""RPython-level StringIO object.
The fastest path through this code is for the case of a bunch of write()
- followed by getvalue(). For at most PIECES write()s and one getvalue(),
- there is one copy of the data done, as if ''.join() was used.
+ followed by getvalue().
"""
_mixin_ = True # for interp_stringio.py
@@ -18,20 +15,12 @@
# * the list of characters self.bigbuffer;
# * each of the strings in self.strings.
#
- # Invariants:
- # * self.numbigstrings <= self.numstrings;
- # * all strings in self.strings[self.numstrings:PIECES] are empty.
- #
- self.strings = [''] * PIECES
- self.numstrings = 0
- self.numbigstrings = 0
+ self.strings = StringBuilder()
self.bigbuffer = []
self.pos = AT_END
def close(self):
self.strings = None
- self.numstrings = 0
- self.numbigstrings = 0
self.bigbuffer = None
def is_closed(self):
@@ -40,58 +29,21 @@
def getvalue(self):
"""If self.strings contains more than 1 string, join all the
strings together. Return the final single string."""
- if len(self.bigbuffer) > 0:
+ if len(self.bigbuffer):
self.copy_into_bigbuffer()
return ''.join(self.bigbuffer)
- if self.numstrings > 1:
- result = self.strings[0] = ''.join(self.strings)
- for i in range(1, self.numstrings):
- self.strings[i] = ''
- self.numstrings = 1
- self.numbigstrings = 1
- else:
- result = self.strings[0]
- return result
+ return self.strings.build()
def getsize(self):
result = len(self.bigbuffer)
- for i in range(0, self.numstrings):
- result += len(self.strings[i])
+ result += self.strings.getlength()
return result
def copy_into_bigbuffer(self):
"""Copy all the data into the list of characters self.bigbuffer."""
- for i in range(0, self.numstrings):
- self.bigbuffer += self.strings[i]
- self.strings[i] = ''
- self.numstrings = 0
- self.numbigstrings = 0
- return self.bigbuffer
-
- def reduce(self):
- """Reduce the number of (non-empty) strings in self.strings."""
- # When self.pos == AT_END, the calls to write(str) accumulate
- # the strings in self.strings until all PIECES slots are filled.
- # Then the reduce() method joins all the strings and put the
- # result back into self.strings[0]. The next time all the slots
- # are filled, we only join self.strings[1:] and put the result
- # in self.strings[1]; and so on. The purpose of this is that
- # the string resulting from a join is expected to be big, so the
- # next join operation should only join the newly added strings.
- # When we have done this BIGPIECES times, the next join collects
- # all strings again into self.strings[0] and we start from
- # scratch.
- limit = self.numbigstrings
- self.strings[limit] = ''.join(self.strings[limit:])
- for i in range(limit + 1, self.numstrings):
- self.strings[i] = ''
- self.numstrings = limit + 1
- if limit < BIGPIECES:
- self.numbigstrings = limit + 1
- else:
- self.numbigstrings = 0
- assert self.numstrings <= BIGPIECES + 1
- return self.numstrings
+ if self.strings.getlength():
+ self.bigbuffer += self.strings.build()
+ self.strings = StringBuilder()
def write(self, buffer):
# Idea: for the common case of a sequence of write() followed
@@ -110,30 +62,25 @@
else:
# slow path: collect all data into self.bigbuffer and
# handle the various cases
- bigbuffer = self.copy_into_bigbuffer()
- fitting = len(bigbuffer) - p
+ self.copy_into_bigbuffer()
+ fitting = len(self.bigbuffer) - p
if fitting > 0:
# the write starts before the end of the data
fitting = min(len(buffer), fitting)
for i in range(fitting):
- bigbuffer[p+i] = buffer[i]
+ self.bigbuffer[p+i] = buffer[i]
if len(buffer) > fitting:
# the write extends beyond the end of the data
- bigbuffer += buffer[fitting:]
+ self.bigbuffer += buffer[fitting:]
endp = AT_END
self.pos = endp
return
else:
# the write starts at or beyond the end of the data
- bigbuffer += '\x00' * (-fitting)
+ self.bigbuffer += '\x00' * (-fitting)
self.pos = AT_END # fall-through to the fast path
# Fast path.
- # See comments in reduce().
- count = self.numstrings
- if count == PIECES:
- count = self.reduce()
- self.strings[count] = buffer
- self.numstrings = count + 1
+ self.strings.append(buffer)
def seek(self, position, mode=0):
if mode == 1:
@@ -165,8 +112,8 @@
if p == AT_END:
return ''
assert p >= 0
- bigbuffer = self.copy_into_bigbuffer()
- mysize = len(bigbuffer)
+ self.copy_into_bigbuffer()
+ mysize = len(self.bigbuffer)
count = mysize - p
if n >= 0:
count = min(n, count)
@@ -174,10 +121,10 @@
return ''
if p == 0 and count == mysize:
self.pos = AT_END
- return ''.join(bigbuffer)
+ return ''.join(self.bigbuffer)
else:
self.pos = p + count
- return ''.join(bigbuffer[p:p+count])
+ return ''.join(self.bigbuffer[p:p+count])
def truncate(self, size):
# NB. 'size' is mandatory. This has the same un-Posix-y semantics
@@ -188,10 +135,8 @@
self.copy_into_bigbuffer()
else:
# we can drop all extra strings
- for i in range(0, self.numstrings):
- self.strings[i] = ''
- self.numstrings = 0
- self.numbigstrings = 0
+ if self.strings.getlength():
+ self.strings = StringBuilder()
if size < len(self.bigbuffer):
del self.bigbuffer[size:]
self.pos = AT_END
More information about the pypy-commit
mailing list