[pypy-svn] r12797 - pypy/dist/pypy/lib

Wed May 25 23:17:35 CEST 2005

Author: ale
Date: Wed May 25 23:17:35 2005
New Revision: 12797

Modified:
   pypy/dist/pypy/lib/inprogress__codecs.py
   pypy/dist/pypy/lib/unicodecodec.py
Log:
_codecs.py is almost ready to get enabled. 

pypy's tests pass, but I havnt been able to test the regression tests

Modified: pypy/dist/pypy/lib/inprogress__codecs.py
==============================================================================

--- pypy/dist/pypy/lib/inprogress__codecs.py	(original)
+++ pypy/dist/pypy/lib/inprogress__codecs.py	Wed May 25 23:17:35 2005
@@ -34,8 +34,7 @@
 Copyright (c) Corporation for National Research Initiatives.
 
 """
-from unicodecodec_ import *
-
+from unicodecodec import *
 #/* --- Registry ----------------------------------------------------------- */
 codec_search_path = []
 codec_search_cache = {}
@@ -61,12 +60,24 @@
     
     result = codec_search_cache.get(encoding,None)
     if not result:
+        if len(codec_search_path) == 0:
+            import encodings
+            if len(codec_search_path) == 0:
+                raise LookupError("no codec search functions registered: can't find encoding")
+        if not isinstance(encoding,str):
+            raise TypeError("Encoding must be a string")
         for search in codec_search_path:
             result=search(encoding)
             if result :
-                codec_search_cache[encoding] = result 
-                break
+                if not( type(result) == tuple and len(result) == 4):
+                    raise TypeError("codec search functions must return 4-tuples")
+                else:
+                    codec_search_cache[encoding] = result 
+                    return result
+        if not result:
+            raise LookupError( "unknown encoding: %s", encoding)
     return result
+    
 
 lookup = codec_lookup
 
@@ -80,11 +91,15 @@
     'xmlcharrefreplace' as well as any other name registered with
     codecs.register_error that can handle ValueErrors.
     """
-    
-    encoder = lookup(encoding)[0]
-    if encoder :
-        res = encoder(v,errors)
-    return res[0]
+    if isinstance(encoding,str):
+        encoder = lookup(encoding)[0]
+        if encoder and isinstance(errors,str):
+            res = encoder(v,errors)
+            return res[0]
+        else:
+            raise TypeError("Errors must be a string")
+    else:
+        raise TypeError("Encoding must be a string")
 
 def decode(obj,encoding='defaultencoding',errors='strict'):
     """decode(obj, [encoding[,errors]]) -> object
@@ -96,12 +111,15 @@
     as well as any other name registerd with codecs.register_error that is
     able to handle ValueErrors.
     """
-    decoder = lookup(encoding)[1]
-    if decoder:
-        res = decoder(obj,errors)
+    if isinstance(encoding,str):
+        decoder = lookup(encoding)[1]
+        if decoder and isinstance(errors,str):
+            res = decoder(v,errors)
+            return res[0]
+        else:
+            raise TypeError("Errors must be a string")
     else:
-        raise LookupError("No such encoding")
-    return res[0]
+        raise TypeError("Encoding must be a string")
 
 def latin_1_encode( obj,errors='strict'):
     """None
@@ -132,7 +150,7 @@
     """None
     """
     res = PyUnicode_DecodeUTF8Stateful(data, len(data), errors, final)
-    res = ''.join(res)
+    res = u''.join(res)
     return res,len(res)
 
 def raw_unicode_escape_decode( data,errors='strict'):
@@ -145,7 +163,7 @@
 def utf_7_decode( data,errors='strict'):
     """None
     """
-    res = PyUnicode_DecodeUTF7(data,errors='strict')
+    res = PyUnicode_DecodeUTF7(data,len(data),errors='strict')
     res = ''.join(res)
     return res,len(res)
 
@@ -160,7 +178,7 @@
     """None
     """
     res = PyUnicode_DecodeLatin1(data,len(data),errors)
-    res = ''.join(res)
+    res = u''.join(res)
     return res, len(res)
 
 def utf_16_decode( data,errors='strict',final=None):
@@ -182,7 +200,7 @@
     """None
     """
     res = PyUnicode_DecodeASCII(data,len(data),errors)
-    res = ''.join(res)
+    res = u''.join(res)
     return res, len(res)
 
 def charmap_encode(obj,errors='strict',mapping='latin-1'):

Modified: pypy/dist/pypy/lib/unicodecodec.py
==============================================================================
--- pypy/dist/pypy/lib/unicodecodec.py	(original)
+++ pypy/dist/pypy/lib/unicodecodec.py	Wed May 25 23:17:35 2005
@@ -190,8 +190,8 @@
             i+=1
 
     if (inShift) :
-        outpos = p-PyUnicode_AS_UNICODE(unicode);
-        endinpos = size;
+        #XXX This aint right
+        endinpos = size
         raise UnicodeDecodeError, "unterminated shift sequence"
         
     return p
@@ -232,8 +232,8 @@
             else:
                 bitsleft += 16
                 charsleft += ch #((ord(charsleft) << 16) | ord(ch))
-                out, charsleft, bitsleft =  ENCODE(out, charsleft, bitsleft)
-
+                p, bitsleft =  ENCODE(charsleft, bitsleft)
+                out += p
 ##                /* If the next character is special then we dont' need to terminate
 ##                   the shift sequence. If the next character is not a BASE64 character
 ##                   or '-' then the shift sequence will be terminated implicitly and we
@@ -401,22 +401,22 @@
 
 #    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
     if (size == 1 and ord(s) < 128) :
-        return PyUnicode_FromUnicode(unicode(s), 1)
+        return [unichr(ord(s))]
     if (size == 0):
-        return unicode('')
+        return [u''] #unicode('')
     p = []
     pos = 0
     while pos < len(s):
         c = s[pos]
         if ord(c) < 128:
-            p += c
+            p += unichr(ord(c))
             pos += 1
         else:
             
             res = unicode_call_errorhandler(
                     errors, "ascii", "ordinal not in range(128)",
                     s,  pos, pos+1)
-            p += res[0]
+            p += unicode(res[0])
             pos = res[1]
     return p
 
@@ -565,7 +565,7 @@
         
     p = []
     bom = sys.byteorder
-    if (byteorder == 0):
+    if (byteorder == 'native'):
         
         bom = sys.byteorder
         p += STORECHAR(0xFEFF,bom)
@@ -573,12 +573,12 @@
     if (size == 0):
         return ""
 
-    if (byteorder == -1):
+    if (byteorder == 'little' ):
         bom = 'little'
-    elif (byteorder == 1):
+    elif (byteorder == 'big'):
         bom = 'big'
 
-    
+
     for c in s:
         ch = ord(c)
         ch2 = 0
@@ -845,7 +845,7 @@
 
 def PyUnicode_EncodeUTF8(s,size,errors):
 
-    assert(s != None)
+    #assert(s != None)
     assert(size >= 0)
     p = []
     i = 0
@@ -892,12 +892,12 @@
 
 def PyUnicode_DecodeLatin1(s, size, errors):
     #/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
-    if (size == 1):
-        return [PyUnicode_FromUnicode(s, 1)]
+##    if (size == 1):
+##        return [PyUnicode_FromUnicode(s, 1)]
     pos = 0
     p = []
     while (pos < size):
-        p += s[pos]
+        p += unichr(ord(s[pos]))
         pos += 1
     return p
 
@@ -911,16 +911,13 @@
         encoding = "ascii"
     
     if (size == 0):
-        return ''
+        return ['']
     res = []
     pos=0
     while pos < len(p):
     #for ch in p:
         ch = p[pos]
-        try:
-            ord(ch)
-        except TypeError:
-            print "Typeerror",ch,type(ch)
+        
         if ord(ch) < limit:
             res += chr(ord(ch))
             pos += 1
@@ -933,6 +930,7 @@
             x = unicode_call_errorhandler(errors,encoding,reason,p,collstart,collend,False)
             res += str(x[0])
             pos = x[1]
+    
     return res
 
 def PyUnicode_EncodeLatin1(p,size,errors):
@@ -983,7 +981,7 @@
 
     if (size == 0):
         return u''
-
+    
     p = []
     pos = 0
     while (pos < size): 
@@ -1044,7 +1042,7 @@
     
           #  /* \UXXXXXXXX */
             elif ch == 'U':
-                digits = 8;
+                digits = 8
                 message = "truncated \\UXXXXXXXX escape";
                 x = hexescape(s,pos+1,digits,message,errors)
                 p += x[0]
@@ -1052,6 +1050,7 @@
 ##        /* \N{name} */
             elif ch == 'N':
                 message = "malformed \\N character escape"
+                pos += 1
                 try:
                     import unicodedata
                 except ImportError:
@@ -1068,8 +1067,9 @@
                         look += 1
                         try:
                             chr = unicodedata.lookup(s[pos:look])
+                            #x = hexescape(chr,pos+1,8,message,errors)
                         except KeyError:
-                            x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,size)
+                            x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,look)
                         else:
                             x = hexescape(s,pos+1,look-pos,message,errors)
                         p += x[0]
@@ -1115,15 +1115,13 @@
 
     
     rep = mapping[c]
-    if not rep:
-        raise UnicodeError
     if isinstance(rep,(int,long)):
         if rep<256:
             return chr(rep)
         else:
             raise TypeError
-    elif isinstance(rep,unicode):
-        raise TypeError
+##    elif isinstance(rep,unicode):
+##        raise TypeError
     else:
         return rep