[pypy-svn] r11503 - pypy/branch/non-fake-unicode/pypy/objspace/std

Wed Apr 27 10:47:34 CEST 2005

Author: ac
Date: Wed Apr 27 10:47:34 2005
New Revision: 11503

Modified:
   pypy/branch/non-fake-unicode/pypy/objspace/std/floattype.py
   pypy/branch/non-fake-unicode/pypy/objspace/std/inttype.py
   pypy/branch/non-fake-unicode/pypy/objspace/std/longtype.py
   pypy/branch/non-fake-unicode/pypy/objspace/std/objspace.py
   pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py
   pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py
Log:
Slightly less faked unicode.

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/floattype.py
==============================================================================

--- pypy/branch/non-fake-unicode/pypy/objspace/std/floattype.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/floattype.py	Wed Apr 27 10:47:34 2005
@@ -9,6 +9,14 @@
         except ValueError, e:
             raise OperationError(space.w_ValueError,
                                  space.wrap(str(e)))
+    elif space.is_true(space.isinstance(w_value, space.w_unicode)):
+        try:
+            # XXX can produce unwrapped long
+            from unicodeobject import unicode_to_decimal_w
+            value = float(unicode_to_decimal_w(space, w_value))
+        except ParseStringError, e:
+            raise OperationError(space.w_ValueError,
+                                 space.wrap(e.msg))
     else:
         w_obj = space.float(w_value)
         if space.is_true(space.is_(w_floattype, space.w_float)):

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/inttype.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/inttype.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/inttype.py	Wed Apr 27 10:47:34 2005
@@ -16,6 +16,14 @@
             except ParseStringError, e:
                 raise OperationError(space.w_ValueError,
                                      space.wrap(e.msg))
+        elif space.is_true(space.isinstance(w_value, space.w_unicode)):
+            try:
+                # XXX can produce unwrapped long
+                from unicodeobject import unicode_to_decimal_w
+                value = string_to_int(unicode_to_decimal_w(space, w_value))
+            except ParseStringError, e:
+                raise OperationError(space.w_ValueError,
+                                     space.wrap(e.msg))
         else:
             # otherwise, use the __int__() method
             w_obj = space.int(w_value)

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/longtype.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/longtype.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/longtype.py	Wed Apr 27 10:47:34 2005
@@ -17,6 +17,14 @@
             except ParseStringError, e:
                 raise OperationError(space.w_ValueError,
                                      space.wrap(e.msg))
+        elif space.is_true(space.isinstance(w_value, space.w_unicode)):
+            try:
+                # XXX can produce unwrapped long
+                from unicodeobject import unicode_to_decimal_w
+                value = string_to_long(unicode_to_decimal_w(space, w_value))
+            except ParseStringError, e:
+                raise OperationError(space.w_ValueError,
+                                     space.wrap(e.msg))
         else:
             # otherwise, use the __long__() method
             w_obj = space.long(w_value)

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/objspace.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/objspace.py	Wed Apr 27 10:47:34 2005
@@ -186,6 +186,8 @@
             return W_IntObject(self, x)
         if isinstance(x, str):
             return W_StringObject(self, x)
+        if isinstance(x, unicode):
+            return W_UnicodeObject(self, x)
         if isinstance(x, dict):
             items_w = [(self.wrap(k), self.wrap(v)) for (k, v) in x.iteritems()]
             return W_DictObject(self, items_w)

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py	Wed Apr 27 10:47:34 2005
@@ -1,25 +1,45 @@
 from pypy.objspace.std.objspace import *
-from pypy.objspace.std.fake import fake_type, wrap_exception
+from pypy.objspace.std.fake import wrap_exception
 from pypy.objspace.std.stringobject import W_StringObject
+from pypy.objspace.std.noneobject import W_NoneObject
+from pypy.objspace.std.sliceobject import W_SliceObject
+from pypy.objspace.std import slicetype
 from pypy.objspace.std.strutil import string_to_int, string_to_long, ParseStringError
 
-W_UnicodeObject = fake_type(unicode)
+class W_UnicodeObject(W_Object):
+    from pypy.objspace.std.unicodetype import unicode_typedef as typedef
+
+    def __init__(w_self, space, unicodechars):
+        W_Object.__init__(w_self, space)
+        w_self._value = unicodechars
+        w_self.w_hash = None
+
+    def __repr__(w_self):
+        """ representation for debugging purposes """
+        return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
+
+    def unwrap(w_self): 
+        return w_self._value # This is maybe not right
+
+registerimplementation(W_UnicodeObject)
 
 # Helper for converting int/long
 import unicodedata
 def unicode_to_decimal_w(space, w_unistr):
-    result = []
-    for uchr in space.unwrap(w_unistr):
+    unistr = space.unwrap(w_unistr)
+    result = [' '] * len(unistr)
+    for i in xrange(len(unistr)):
+        uchr = unistr[i]
         if uchr.isspace():
-            result.append(' ')
+            result[i] = ' '
             continue
         try:
-            result.append(chr(ord('0') + unicodedata.decimal(uchr)))
+            result[i] = chr(ord('0') + unicodedata.decimal(uchr))
             continue
         except ValueError:
             ch = ord(uchr)
             if 0 < ch < 256:
-                result.append(chr(ch))
+                result[i] = chr(ch)
                 continue
         raise OperationError(space.w_UnicodeEncodeError, space.wrap('invalid decimal Unicode string'))
     return ''.join(result)
@@ -27,100 +47,141 @@
 # string-to-unicode delegation
 def delegate_String2Unicode(w_str):
     space = w_str.space
-    return W_UnicodeObject(space, unicode(space.str_w(w_str)))
+    return space.call_function(space.w_unicode, w_str)
 
 def str_w__Unicode(space, w_uni):
-    return space.str_w(space.call_method(w_uni, 'encode'))
+    return space.str_w(space.str(w_uni))
 
-def eq__Unicode_ANY(space, w_uni, w_other):
+def repr__Unicode(space, w_uni):
+    return space.wrap(repr(w_uni._value))
+
+def str__Unicode(space, w_uni):
+    return space.call_method(w_uni, 'encode')
+
+def cmp__Unicode_ANY(space, w_uni, w_other):
     try:
-        return space.newbool(space.unwrap(w_uni) == space.unwrap(w_other))
+        return space.newbool(cmp(space.unwrap(w_uni), space.unwrap(w_other)))
     except:
         wrap_exception(space)
-
-def ne__Unicode_ANY(space, w_uni, w_other):
+        
+def ord__Unicode(space, w_uni):
     try:
-        return space.newbool(space.unwrap(w_uni) != space.unwrap(w_other))
+        return space.wrap(ord(w_uni._value))
     except:
         wrap_exception(space)
 
+def add__Unicode_Unicode(space, w_left, w_right):
+    return space.wrap(space.unwrap(w_left) + space.unwrap(w_right))
+def add__String_Unicode(space, w_left, w_right):
+    return space.wrap(space.str_w(w_left) + space.unwrap(w_right))
+def add__Unicode_String(space, w_left, w_right):
+    return space.wrap(space.unwrap(w_left) + space.str_w(w_right))
 
-def lt__Unicode_ANY(space, w_uni, w_other):
+def contains__String_Unicode(space, w_left, w_right):
     try:
-        return space.newbool(space.unwrap(w_uni) < space.unwrap(w_other))
+        return space.wrap(space.unwrap(w_right) in space.unwrap(w_left))
     except:
         wrap_exception(space)
 
-def gt__Unicode_ANY(space, w_uni, w_other):
-    try:
-        return space.newbool(space.unwrap(w_uni) > space.unwrap(w_other))
-    except:
-        wrap_exception(space)
+def contains__Unicode_Unicode(space, w_left, w_right):
+    return space.wrap(space.unwrap(w_right) in space.unwrap(w_left))
 
-def le__Unicode_ANY(space, w_uni, w_other):
-    try:
-        return space.newbool(space.unwrap(w_uni) <= space.unwrap(w_other))
-    except:
-        wrap_exception(space)
+def unicode_join__Unicode_ANY(space, w_self, w_list):
+    list = space.unpackiterable(w_list)
+    self = w_self._value
+    for i in range(len(list)):
+        list[i] = space.unwrap(space.call_function(space.w_unicode, list[i]))
+    return space.wrap(self.join(list))
 
-def ge__Unicode_ANY(space, w_uni, w_other):
+def unicode_encode__Unicode_String_String(space, w_self, w_encoding, w_errors):
     try:
-        return space.newbool(space.unwrap(w_uni) >= space.unwrap(w_other))
+        return space.wrap(w_self._value.encode(space.str_w(w_encoding), space.str_w(w_errors)))
     except:
         wrap_exception(space)
-
-def ord__Unicode(space, w_uni):
+def unicode_encode__Unicode_String_None(space, w_self, w_encoding, w_none):
     try:
-        return space.wrap(ord(space.unwrap(w_uni)))
+        return space.wrap(w_self._value.encode(space.str_w(w_encoding)))
     except:
         wrap_exception(space)
 
-def float__Unicode(space, w_uni):
+def unicode_encode__Unicode_None_None(space, w_self, w_encoding, w_errors):
     try:
-        return space.wrap(float(unicode_to_decimal_w(space, w_uni)))
+        return space.wrap(w_self._value.encode())
     except:
         wrap_exception(space)
 
-def int__Unicode(space, w_uni):
-    try:
-        return space.wrap(string_to_int(unicode_to_decimal_w(space, w_uni)))
-    except ParseStringError, e:
-        raise OperationError(space.w_ValueError, space.wrap(e.msg))
-    except:
-        wrap_exception(space)
+def hash__Unicode(space, w_uni):
+    if w_uni.w_hash is None:
+        w_uni.w_hash = space.wrap(hash(w_uni._value))
+    return w_uni.w_hash
 
-def long__Unicode(space, w_uni):
-    try:
-        return space.wrap(string_to_long(unicode_to_decimal_w(space, w_uni)))
-    except ParseStringError, e:
-        raise OperationError(space.w_ValueError, space.wrap(e.msg))    
-    except:
-        wrap_exception(space)
+def len__Unicode(space, w_uni):
+    return space.wrap(len(w_uni._value))
 
-def add__Unicode_Unicode(space, w_left, w_right):
-    return space.wrap(space.unwrap(w_left) + space.unwrap(w_right))
+def getitem__Unicode_ANY(space, w_uni, w_index):
+    ival = space.int_w(w_index)
+    uni = w_uni._value
+    ulen = len(uni)
+    if ival < 0:
+        ival += ulen
+    if ival < 0 or ival >= ulen:
+        exc = space.call_function(space.w_IndexError,
+                                  space.wrap("unicode index out of range"))
+        raise OperationError(space.w_IndexError, exc)
+    return W_UnicodeObject(space, uni[ival])
 
-def contains__String_Unicode(space, w_left, w_right):
-    try:
-        return space.wrap(space.unwrap(w_right) in space.unwrap(w_left))
-    except:
-        wrap_exception(space)
+def getitem__Unicode_Slice(space, w_uni, w_slice):
+    uni = w_uni._value
+    length = len(uni)
+    start, stop, step, sl = slicetype.indices4(space, w_slice, length)
+    if step == 1:
+        return space.wrap(uni[start:stop])
+    r = [uni[start + i*step] for i in range(sl)]
+    return space.wrap(u''.join(r))
 
-def contains__Unicode_Unicode(space, w_left, w_right):
-    return space.wrap(space.unwrap(w_right) in space.unwrap(w_left))
+def mul__Unicode_ANY(space, w_uni, w_times):
+    return space.wrap(w_uni._value * space.int_w(w_times))
 
-# str.strip(unicode) needs to convert self to unicode and call unicode.strip
-def str_strip__String_Unicode(space, w_self, w_chars ):
-    self = w_self._value
-    return space.wrap( unicode(self).strip( space.unwrap(w_chars) ) )
-def str_lstrip__String_Unicode(space, w_self, w_chars ):
-    self = w_self._value
-    return space.wrap( unicode(self).lstrip( space.unwrap(w_chars) ) )
-def str_rstrip__String_Unicode(space, w_self, w_chars ):
-    self = w_self._value
-    return space.wrap( unicode(self).rstrip( space.unwrap(w_chars) ) )
-# we use the following magic to register strip_string_unicode as a String multimethod
-import stringtype
+def mul__ANY_Unicode(space, w_times, w_uni):
+    return space.wrap(w_uni._value * space.int_w(w_times))
+
+def unicode_strip__Unicode_None(space, w_self, w_chars):
+    return space.wrap(w_self._value.strip())
+def unicode_strip__Unicode_String(space, w_self, w_chars):
+    return space.wrap(w_self._value.strip(space.str_w(w_chars)))
+def unicode_strip__Unicode_Unicode(space, w_self, w_chars):
+    return space.wrap(w_self._value.strip(w_chars._value))
+
+def unicode_lstrip__Unicode_None(space, w_self, w_chars):
+    return space.wrap(w_self._value.lstrip())
+def unicode_lstrip__Unicode_String(space, w_self, w_chars):
+    return space.wrap(w_self._value.lstrip(space.str_w(w_chars)))
+def unicode_lstrip__Unicode_Unicode(space, w_self, w_chars):
+    return space.wrap(w_self._value.lstrip(w_chars._value))
 
+def unicode_rstrip__Unicode_None(space, w_self, w_chars):
+    return space.wrap(w_self._value.rstrip())
+def unicode_rstrip__Unicode_String(space, w_self, w_chars):
+    return space.wrap(w_self._value.rstrip(space.str_w(w_chars)))
+def unicode_rstrip__Unicode_Unicode(space, w_self, w_chars):
+    return space.wrap(w_self._value.rstrip(w_chars._value))
 
-register_all(vars(), stringtype)
+import unicodetype
+register_all(vars(), unicodetype)
+
+# str.strip(unicode) needs to convert self to unicode and call unicode.strip
+# we use the following magic to register strip_string_unicode as a String multimethod.
+class str_methods:
+    import stringtype
+    W_UnicodeObject = W_UnicodeObject
+    from pypy.objspace.std.stringobject import W_StringObject
+    def str_strip__String_Unicode(space, w_self, w_chars ):
+        self = w_self._value
+        return space.wrap( unicode(self).strip( space.unwrap(w_chars) ) )
+    def str_lstrip__String_Unicode(space, w_self, w_chars ):
+        self = w_self._value
+        return space.wrap( unicode(self).lstrip( space.unwrap(w_chars) ) )
+    def str_rstrip__String_Unicode(space, w_self, w_chars ):
+        self = w_self._value
+        return space.wrap( unicode(self).rstrip( space.unwrap(w_chars) ) )
+    register_all(vars(), stringtype)

Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py	(original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py	Wed Apr 27 10:47:34 2005
@@ -1,3 +1,78 @@
-from pypy.objspace.std.fake import fake_type
+from pypy.objspace.std.stdtypedef import *
+from pypy.objspace.std.basestringtype import basestring_typedef
+from pypy.interpreter.error import OperationError
 
-unicode_typedef = fake_type(unicode).typedef
+from sys import maxint
+
+unicode_capitalize = MultiMethod('capitalize', 1)
+unicode_center     = MultiMethod('center', 2, )
+unicode_count      = MultiMethod('count', 4, defaults=(0, maxint))      
+unicode_encode     = MultiMethod('encode', 3, defaults=(None, None))
+unicode_endswith   = MultiMethod('endswith', 2) #[optional arguments not supported now]
+unicode_expandtabs = MultiMethod('expandtabs', 2, defaults=(8,))
+unicode_find       = MultiMethod('find', 4, defaults=(0, maxint))
+unicode_index      = MultiMethod('index', 4, defaults=(0, maxint))
+unicode_isalnum    = MultiMethod('isalnum', 1)
+unicode_isalpha    = MultiMethod('isalpha', 1)
+unicode_isdecimal  = MultiMethod('isdecimal', 1)
+unicode_isdigit    = MultiMethod('isdigit', 1)
+unicode_islower    = MultiMethod('islower', 1)
+unicode_isnumeric  = MultiMethod('isnumeric', 1)
+unicode_isspace    = MultiMethod('isspace', 1)
+unicode_istitle    = MultiMethod('istitle', 1)
+unicode_isupper    = MultiMethod('isupper', 1)
+unicode_join       = MultiMethod('join', 2)
+unicode_ljust      = MultiMethod('ljust', 2)
+unicode_lower      = MultiMethod('lower', 1)
+unicode_lstrip     = MultiMethod('lstrip', 2, defaults=(None,))
+unicode_replace    = MultiMethod('replace', 4, defaults=(-1,))
+unicode_rfind      = MultiMethod('rfind', 4, defaults=(0, maxint))
+unicode_rindex     = MultiMethod('rindex', 4, defaults=(0, maxint))
+unicode_rjust      = MultiMethod('rjust', 2)
+unicode_rstrip     = MultiMethod('rstrip', 2, defaults=(None,))
+unicode_split      = MultiMethod('split', 3, defaults=(None,-1))
+unicode_splitlines = MultiMethod('splitlines', 2, defaults=(0,))
+unicode_startswith = MultiMethod('startswith', 3, defaults=(0,))
+unicode_strip      = MultiMethod('strip',  2, defaults=(None,))
+unicode_swapcase   = MultiMethod('swapcase', 1)
+unicode_title      = MultiMethod('title', 1)
+unicode_translate  = MultiMethod('translate', 3, defaults=('',))
+unicode_upper      = MultiMethod('upper', 1)
+unicode_zfill      = MultiMethod('zfill', 2)
+
+# ____________________________________________________________
+def descr__new__(space, w_unicodetype, w_obj=None, w_encoding=None, w_errors=None):
+    from pypy.objspace.std.unicodeobject import W_UnicodeObject
+    w_obj_type = space.type(w_obj)
+    
+    if space.is_w(w_obj_type, space.w_unicode):
+        if space.is_w(w_unicodetype, space.w_unicode):
+            return w_obj
+        value = space.unwrap(w_obj)
+    elif space.is_w(w_obj, space.w_None):
+        value = u''
+    elif space.is_true(space.isinstance(w_obj, space.w_unicode)):
+        value = w_obj._value
+    elif space.is_w(w_obj_type, space.w_str):
+        try:
+            if space.is_w(w_encoding, space.w_None):
+                value = unicode(space.str_w(w_obj))
+            elif space.is_w(w_errors, space.w_None): 
+                value = unicode(space.str_w(w_obj), space.str_w(w_encoding))
+            else:
+                value = unicode(space.str_w(w_obj), space.str_w(w_encoding),
+                                space.str_w(w_errors))
+        except UnicodeDecodeError, e:
+            raise OperationError(space.w_UnicodeDecodeError, space.wrap(e.reason))
+    else:
+        raise OperationError(space.w_ValueError, space.wrap('Can not create unicode from other than strings (is %r)'%w_obj_type))
+    w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
+    w_newobj.__init__(space, value)
+    return w_newobj
+
+# ____________________________________________________________
+
+unicode_typedef = StdTypeDef("unicode", basestring_typedef,
+    __new__ = newmethod(descr__new__),
+    )
+unicode_typedef.registermethods(globals())