[pypy-svn] r60992 - in pypy/trunk/pypy/objspace/std: . test
fijal at codespeak.net
fijal at codespeak.net
Thu Jan 15 14:10:15 CET 2009
Author: fijal
Date: Thu Jan 15 14:10:13 2009
New Revision: 60992
Modified:
pypy/trunk/pypy/objspace/std/test/test_unicodeobject.py
pypy/trunk/pypy/objspace/std/unicodeobject.py
Log:
a test and a fix. the thing is that now the algorithm is *exactly* like
string one, but code duplication stays
Modified: pypy/trunk/pypy/objspace/std/test/test_unicodeobject.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/test/test_unicodeobject.py (original)
+++ pypy/trunk/pypy/objspace/std/test/test_unicodeobject.py Thu Jan 15 14:10:13 2009
@@ -120,6 +120,7 @@
raises(ValueError, u'abc'.split, '')
raises(ValueError, u'abc'.split, u'')
raises(ValueError, 'abc'.split, u'')
+ assert u' a b c d'.split(None, 0) == [u'a b c d']
def test_rsplit(self):
assert u"".rsplit() == []
Modified: pypy/trunk/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/unicodeobject.py (original)
+++ pypy/trunk/pypy/objspace/std/unicodeobject.py Thu Jan 15 14:10:13 2009
@@ -659,36 +659,36 @@
return space.wrap(self.count(substr, start, end))
def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
- self = w_self._value
maxsplit = space.int_w(w_maxsplit)
- parts = []
- if len(self) == 0:
- return space.newlist([])
- start = 0
- end = len(self)
- inword = 0
-
- while maxsplit != 0 and start < end:
- index = start
- for index in range(start, end):
- if _isspace(self[index]):
- break
- else:
- inword = 1
+ res_w = []
+ value = w_self._value
+ length = len(value)
+ i = 0
+ while True:
+ # find the beginning of the next word
+ while i < length:
+ if not value[i].isspace():
+ break # found
+ i += 1
else:
- break
- if inword == 1:
- parts.append(W_UnicodeObject(self[start:index]))
- maxsplit -= 1
- # Eat whitespace
- for start in range(index + 1, end):
- if not _isspace(self[start]):
- break
+ break # end of string, finished
+
+ # find the end of the word
+ if maxsplit == 0:
+ j = length # take all the rest of the string
else:
- return space.newlist(parts)
+ j = i + 1
+ while j < length and not value[j].isspace():
+ j += 1
+ maxsplit -= 1 # NB. if it's already < 0, it stays < 0
- parts.append(W_UnicodeObject(self[start:]))
- return space.newlist(parts)
+ # the word is value[i:j]
+ res_w.append(W_UnicodeObject(value[i:j]))
+
+ # continue to look from the character following the space after the word
+ i = j + 1
+
+ return space.newlist(res_w)
def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
self = w_self._value
More information about the Pypy-commit
mailing list