[pypy-svn] r14078 - in pypy/branch/dist-2.4.1/pypy/objspace/std: . test

Sat Jul 2 13:07:36 CEST 2005

Author: ignas
Date: Sat Jul  2 13:07:35 2005
New Revision: 14078

Added:
   pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodeobject.py
      - copied, changed from r14047, pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodestring.py
Removed:
   pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodestring.py
Modified:
   pypy/branch/dist-2.4.1/pypy/objspace/std/unicodeobject.py
   pypy/branch/dist-2.4.1/pypy/objspace/std/unicodetype.py
Log:
Added rsplit to unicode string as in python 2.4.1


Copied: pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodeobject.py (from r14047, pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodestring.py)
==============================================================================

--- pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodestring.py	(original)
+++ pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodeobject.py	Sat Jul  2 13:07:35 2005
@@ -59,11 +59,59 @@
         assert u'+123'.zfill(6) == u'+00123'
 
     def test_split(self):
-        assert (u'this is the split function'.split() ==
-                [u'this', u'is', u'the', u'split', u'function'])
-        assert (u'this!is!the!split!function'.split('!') ==
-                [u'this', u'is', u'the', u'split', u'function'])
-    
+        assert u"".split() == []
+        assert u" ".split() == []
+        assert u"a".split() == [u'a']
+        assert u"a".split(u"a", 1) == [u'', u'']
+        assert u" ".split(u" ", 1) == [u'', u'']
+        assert u"aa".split(u"a", 2) == [u'', u'', u'']
+        assert u" a ".split() == [u'a']
+        assert u"a b c".split() == [u'a',u'b',u'c']
+        assert u'this is the split function'.split() == [u'this', u'is', u'the', u'split', u'function']
+        assert u'a|b|c|d'.split(u'|') == [u'a', u'b', u'c', u'd']
+        assert 'a|b|c|d'.split(u'|') == [u'a', u'b', u'c', u'd']
+        assert u'a|b|c|d'.split('|') == [u'a', u'b', u'c', u'd']
+        assert u'a|b|c|d'.split(u'|', 2) == [u'a', u'b', u'c|d']
+        assert u'a b c d'.split(None, 1) == [u'a', u'b c d']
+        assert u'a b c d'.split(None, 2) == [u'a', u'b', u'c d']
+        assert u'a b c d'.split(None, 3) == [u'a', u'b', u'c', u'd']
+        assert u'a b c d'.split(None, 4) == [u'a', u'b', u'c', u'd']
+        assert u'a b c d'.split(None, 0) == [u'a b c d']
+        assert u'a  b  c  d'.split(None, 2) == [u'a', u'b', u'c  d']
+        assert u'a b c d '.split() == [u'a', u'b', u'c', u'd']
+        assert u'a//b//c//d'.split(u'//') == [u'a', u'b', u'c', u'd']
+        assert u'endcase test'.split(u'test') == [u'endcase ', u'']
+        raises(ValueError, u'abc'.split, '')
+        raises(ValueError, u'abc'.split, u'')
+        raises(ValueError, 'abc'.split, u'')
+
+    def test_rsplit(self):
+        assert u"".rsplit() == []
+        assert u" ".rsplit() == []
+        assert u"a".rsplit() == [u'a']
+        assert u"a".rsplit(u"a", 1) == [u'', u'']
+        assert u" ".rsplit(u" ", 1) == [u'', u'']
+        assert u"aa".rsplit(u"a", 2) == [u'', u'', u'']
+        assert u" a ".rsplit() == [u'a']
+        assert u"a b c".rsplit() == [u'a',u'b',u'c']
+        assert u'this is the rsplit function'.rsplit() == [u'this', u'is', u'the', u'rsplit', u'function']
+        assert u'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd']
+        assert u'a|b|c|d'.rsplit('|') == [u'a', u'b', u'c', u'd']
+        assert 'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd']
+        assert u'a|b|c|d'.rsplit(u'|', 2) == [u'a|b', u'c', u'd']
+        assert u'a b c d'.rsplit(None, 1) == [u'a b c', u'd']
+        assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd']
+        assert u'a b c d'.rsplit(None, 3) == [u'a', u'b', u'c', u'd']
+        assert u'a b c d'.rsplit(None, 4) == [u'a', u'b', u'c', u'd']
+        assert u'a b c d'.rsplit(None, 0) == [u'a b c d']
+        assert u'a  b  c  d'.rsplit(None, 2) == [u'a  b', u'c', u'd']
+        assert u'a b c d '.rsplit() == [u'a', u'b', u'c', u'd']
+        assert u'a//b//c//d'.rsplit(u'//') == [u'a', u'b', u'c', u'd']
+        assert u'endcase test'.rsplit(u'test') == [u'endcase ', u'']
+        raises(ValueError, u'abc'.rsplit, u'')
+        raises(ValueError, u'abc'.rsplit, '')
+        raises(ValueError, 'abc'.rsplit, u'')
+
     def test_long_from_unicode(self):
         assert long(u'12345678901234567890') == 12345678901234567890
         assert int(u'12345678901234567890') == 12345678901234567890

Deleted: /pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodestring.py
==============================================================================
--- /pypy/branch/dist-2.4.1/pypy/objspace/std/test/test_unicodestring.py	Sat Jul  2 13:07:35 2005
+++ (empty file)
@@ -1,75 +0,0 @@
-# test the integration of unicode and strings (even though we don't
-# really implement unicode yet).
-
-import autopath, sys
-
-
-objspacename = 'std'
-
-class AppTestUnicodeStringStdOnly:
-    def test_compares(self):
-        assert u'a' == 'a'
-        assert 'a' == u'a'
-        assert not u'a' == 'b' # xxx u'a' != 'b' fails
-        assert not 'a'  == u'b'# xxx 'a' != u'b' fails
-
-class AppTestUnicodeString:
-    def test_addition(self):
-        def check(a, b):
-            assert a == b
-            assert type(a) == type(b)
-        check(u'a' + 'b', u'ab')
-        check('a' + u'b', u'ab')
-
-    def test_join(self):
-        def check(a, b):
-            assert a == b
-            assert type(a) == type(b)
-        check(', '.join([u'a']), u'a')
-        check(', '.join(['a', u'b']), u'a, b')
-        check(u', '.join(['a', 'b']), u'a, b')
-
-    if sys.version_info >= (2,3):
-        def test_contains_ex(self):
-            assert u'' in 'abc'
-            assert u'bc' in 'abc'
-            assert 'bc' in 'abc'
-
-    def test_contains(self):
-        assert u'a' in 'abc'
-        assert 'a' in u'abc'
-
-    def test_splitlines(self):
-        assert u''.splitlines() == []
-        assert u''.splitlines(1) == []
-        assert u'\n'.splitlines() == [u'']
-        assert u'a'.splitlines() == [u'a']
-        assert u'one\ntwo'.splitlines() == [u'one', u'two']
-        assert u'\ntwo\nthree'.splitlines() == [u'', u'two', u'three']
-        assert u'\n\n'.splitlines() == [u'', u'']
-        assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
-        assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
-
-    def test_zfill(self):
-        assert u'123'.zfill(6) == u'000123'
-        assert u'123'.zfill(2) == u'123'
-        assert u'123'.zfill(6) == u'000123'
-        assert u'+123'.zfill(2) == u'+123'
-        assert u'+123'.zfill(4) == u'+123'
-        assert u'+123'.zfill(6) == u'+00123'
-
-    def test_split(self):
-        assert (u'this is the split function'.split() ==
-                [u'this', u'is', u'the', u'split', u'function'])
-        assert (u'this!is!the!split!function'.split('!') ==
-                [u'this', u'is', u'the', u'split', u'function'])
-    
-    def test_long_from_unicode(self):
-        assert long(u'12345678901234567890') == 12345678901234567890
-        assert int(u'12345678901234567890') == 12345678901234567890
-
-    def test_int_from_unicode(self):
-        assert int(u'12345') == 12345
-
-    def test_float_from_unicode(self):
-        assert float(u'123.456e89') == float('123.456e89')

Modified: pypy/branch/dist-2.4.1/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/branch/dist-2.4.1/pypy/objspace/std/unicodeobject.py	(original)
+++ pypy/branch/dist-2.4.1/pypy/objspace/std/unicodeobject.py	Sat Jul  2 13:07:35 2005
@@ -647,25 +647,30 @@
         return space.newlist([])
     start = 0
     end = len(self)
+    inword = 0
+
     while maxsplit != 0 and start < end:
         index = start
         for index in range(start, end):
             if _isspace(self[index]):
                 break
+            else:
+                inword = 1
         else:
             break
-        parts.append(W_UnicodeObject(space, self[start:index]))
-        maxsplit -= 1
+        if inword == 1:
+            parts.append(W_UnicodeObject(space, self[start:index]))
+            maxsplit -= 1
         # Eat whitespace
         for start in range(index + 1, end):
             if not _isspace(self[start]):
                 break
         else:
             return space.newlist(parts)
+
     parts.append(W_UnicodeObject(space, self[start:]))
     return space.newlist(parts)
 
-
 def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
     self = w_self._value
     delim = w_delim._value
@@ -689,6 +694,64 @@
     parts.append(W_UnicodeObject(space, self[start:]))
     return space.newlist(parts)
 
+
+def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
+    self = w_self._value
+    maxsplit = space.int_w(w_maxsplit)
+    parts = []
+    if len(self) == 0:
+        return space.newlist([])
+    start = 0
+    end = len(self)
+    inword = 0
+
+    while maxsplit != 0 and start < end:
+        index = end
+        for index in range(end-1, start-1, -1):
+            if _isspace(self[index]):
+                break
+            else:
+                inword = 1
+        else:
+            break
+        if inword == 1:
+            parts.append(W_UnicodeObject(space, self[index+1:end]))
+            maxsplit -= 1
+        # Eat whitespace
+        for end in range(index, start-1, -1):
+            if not _isspace(self[end-1]):
+                break
+        else:
+            return space.newlist(parts)
+
+    parts.append(W_UnicodeObject(space, self[:end]))
+    parts.reverse()
+    return space.newlist(parts)
+
+def unicode_rsplit__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
+    self = w_self._value
+    delim = w_delim._value
+    maxsplit = space.int_w(w_maxsplit)
+    delim_len = len(delim)
+    if delim_len == 0:
+        raise OperationError(space.w_ValueError,
+                             space.wrap('empty separator'))
+    parts = []
+    if len(self) == 0:
+        return space.newlist([])
+    start = 0
+    end = len(self)
+    while maxsplit != 0:
+        index = _rfind(self, delim, 0, end)
+        if index < 0:
+            break
+        parts.append(W_UnicodeObject(space, self[index+delim_len:end]))
+        end = index
+        maxsplit -= 1
+    parts.append(W_UnicodeObject(space, self[:end]))
+    parts.reverse()
+    return space.newlist(parts)
+
 def _split(space, self, maxsplit):
     if len(self) == 0:
         return []
@@ -706,7 +769,7 @@
         maxsplit -= 1
     parts.append(W_UnicodeObject(space, self[index:]))
     return parts
-    
+
 def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
                                                  w_new, w_maxsplit):
     if len(w_old._value):
@@ -834,5 +897,9 @@
     def str_split__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
         return space.call_method(space.call_function(space.w_unicode, w_self),
                                  'split', w_delim, w_maxsplit)
-        
+
+    def str_rsplit__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
+        return space.call_method(space.call_function(space.w_unicode, w_self),
+                                 'rsplit', w_delim, w_maxsplit)
+
     register_all(vars(), stringtype)

Modified: pypy/branch/dist-2.4.1/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/branch/dist-2.4.1/pypy/objspace/std/unicodetype.py	(original)
+++ pypy/branch/dist-2.4.1/pypy/objspace/std/unicodetype.py	Sat Jul  2 13:07:35 2005
@@ -30,6 +30,7 @@
 unicode_rindex     = MultiMethod('rindex', 4, defaults=(0, maxint))
 unicode_rjust      = MultiMethod('rjust', 2)
 unicode_rstrip     = MultiMethod('rstrip', 2, defaults=(None,))
+unicode_rsplit     = MultiMethod('rsplit', 3, defaults=(None,-1))
 unicode_split      = MultiMethod('split', 3, defaults=(None,-1))
 unicode_splitlines = MultiMethod('splitlines', 2, defaults=(0,))
 unicode_startswith = MultiMethod('startswith', 4, defaults=(0,maxint))