[pypy-svn] r12797 - pypy/dist/pypy/lib
ale at codespeak.net
ale at codespeak.net
Wed May 25 23:17:35 CEST 2005
Author: ale
Date: Wed May 25 23:17:35 2005
New Revision: 12797
Modified:
pypy/dist/pypy/lib/inprogress__codecs.py
pypy/dist/pypy/lib/unicodecodec.py
Log:
_codecs.py is almost ready to get enabled.
pypy's tests pass, but I havnt been able to test the regression tests
Modified: pypy/dist/pypy/lib/inprogress__codecs.py
==============================================================================
--- pypy/dist/pypy/lib/inprogress__codecs.py (original)
+++ pypy/dist/pypy/lib/inprogress__codecs.py Wed May 25 23:17:35 2005
@@ -34,8 +34,7 @@
Copyright (c) Corporation for National Research Initiatives.
"""
-from unicodecodec_ import *
-
+from unicodecodec import *
#/* --- Registry ----------------------------------------------------------- */
codec_search_path = []
codec_search_cache = {}
@@ -61,12 +60,24 @@
result = codec_search_cache.get(encoding,None)
if not result:
+ if len(codec_search_path) == 0:
+ import encodings
+ if len(codec_search_path) == 0:
+ raise LookupError("no codec search functions registered: can't find encoding")
+ if not isinstance(encoding,str):
+ raise TypeError("Encoding must be a string")
for search in codec_search_path:
result=search(encoding)
if result :
- codec_search_cache[encoding] = result
- break
+ if not( type(result) == tuple and len(result) == 4):
+ raise TypeError("codec search functions must return 4-tuples")
+ else:
+ codec_search_cache[encoding] = result
+ return result
+ if not result:
+ raise LookupError( "unknown encoding: %s", encoding)
return result
+
lookup = codec_lookup
@@ -80,11 +91,15 @@
'xmlcharrefreplace' as well as any other name registered with
codecs.register_error that can handle ValueErrors.
"""
-
- encoder = lookup(encoding)[0]
- if encoder :
- res = encoder(v,errors)
- return res[0]
+ if isinstance(encoding,str):
+ encoder = lookup(encoding)[0]
+ if encoder and isinstance(errors,str):
+ res = encoder(v,errors)
+ return res[0]
+ else:
+ raise TypeError("Errors must be a string")
+ else:
+ raise TypeError("Encoding must be a string")
def decode(obj,encoding='defaultencoding',errors='strict'):
"""decode(obj, [encoding[,errors]]) -> object
@@ -96,12 +111,15 @@
as well as any other name registerd with codecs.register_error that is
able to handle ValueErrors.
"""
- decoder = lookup(encoding)[1]
- if decoder:
- res = decoder(obj,errors)
+ if isinstance(encoding,str):
+ decoder = lookup(encoding)[1]
+ if decoder and isinstance(errors,str):
+ res = decoder(v,errors)
+ return res[0]
+ else:
+ raise TypeError("Errors must be a string")
else:
- raise LookupError("No such encoding")
- return res[0]
+ raise TypeError("Encoding must be a string")
def latin_1_encode( obj,errors='strict'):
"""None
@@ -132,7 +150,7 @@
"""None
"""
res = PyUnicode_DecodeUTF8Stateful(data, len(data), errors, final)
- res = ''.join(res)
+ res = u''.join(res)
return res,len(res)
def raw_unicode_escape_decode( data,errors='strict'):
@@ -145,7 +163,7 @@
def utf_7_decode( data,errors='strict'):
"""None
"""
- res = PyUnicode_DecodeUTF7(data,errors='strict')
+ res = PyUnicode_DecodeUTF7(data,len(data),errors='strict')
res = ''.join(res)
return res,len(res)
@@ -160,7 +178,7 @@
"""None
"""
res = PyUnicode_DecodeLatin1(data,len(data),errors)
- res = ''.join(res)
+ res = u''.join(res)
return res, len(res)
def utf_16_decode( data,errors='strict',final=None):
@@ -182,7 +200,7 @@
"""None
"""
res = PyUnicode_DecodeASCII(data,len(data),errors)
- res = ''.join(res)
+ res = u''.join(res)
return res, len(res)
def charmap_encode(obj,errors='strict',mapping='latin-1'):
Modified: pypy/dist/pypy/lib/unicodecodec.py
==============================================================================
--- pypy/dist/pypy/lib/unicodecodec.py (original)
+++ pypy/dist/pypy/lib/unicodecodec.py Wed May 25 23:17:35 2005
@@ -190,8 +190,8 @@
i+=1
if (inShift) :
- outpos = p-PyUnicode_AS_UNICODE(unicode);
- endinpos = size;
+ #XXX This aint right
+ endinpos = size
raise UnicodeDecodeError, "unterminated shift sequence"
return p
@@ -232,8 +232,8 @@
else:
bitsleft += 16
charsleft += ch #((ord(charsleft) << 16) | ord(ch))
- out, charsleft, bitsleft = ENCODE(out, charsleft, bitsleft)
-
+ p, bitsleft = ENCODE(charsleft, bitsleft)
+ out += p
## /* If the next character is special then we dont' need to terminate
## the shift sequence. If the next character is not a BASE64 character
## or '-' then the shift sequence will be terminated implicitly and we
@@ -401,22 +401,22 @@
# /* ASCII is equivalent to the first 128 ordinals in Unicode. */
if (size == 1 and ord(s) < 128) :
- return PyUnicode_FromUnicode(unicode(s), 1)
+ return [unichr(ord(s))]
if (size == 0):
- return unicode('')
+ return [u''] #unicode('')
p = []
pos = 0
while pos < len(s):
c = s[pos]
if ord(c) < 128:
- p += c
+ p += unichr(ord(c))
pos += 1
else:
res = unicode_call_errorhandler(
errors, "ascii", "ordinal not in range(128)",
s, pos, pos+1)
- p += res[0]
+ p += unicode(res[0])
pos = res[1]
return p
@@ -565,7 +565,7 @@
p = []
bom = sys.byteorder
- if (byteorder == 0):
+ if (byteorder == 'native'):
bom = sys.byteorder
p += STORECHAR(0xFEFF,bom)
@@ -573,12 +573,12 @@
if (size == 0):
return ""
- if (byteorder == -1):
+ if (byteorder == 'little' ):
bom = 'little'
- elif (byteorder == 1):
+ elif (byteorder == 'big'):
bom = 'big'
-
+
for c in s:
ch = ord(c)
ch2 = 0
@@ -845,7 +845,7 @@
def PyUnicode_EncodeUTF8(s,size,errors):
- assert(s != None)
+ #assert(s != None)
assert(size >= 0)
p = []
i = 0
@@ -892,12 +892,12 @@
def PyUnicode_DecodeLatin1(s, size, errors):
#/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
- if (size == 1):
- return [PyUnicode_FromUnicode(s, 1)]
+## if (size == 1):
+## return [PyUnicode_FromUnicode(s, 1)]
pos = 0
p = []
while (pos < size):
- p += s[pos]
+ p += unichr(ord(s[pos]))
pos += 1
return p
@@ -911,16 +911,13 @@
encoding = "ascii"
if (size == 0):
- return ''
+ return ['']
res = []
pos=0
while pos < len(p):
#for ch in p:
ch = p[pos]
- try:
- ord(ch)
- except TypeError:
- print "Typeerror",ch,type(ch)
+
if ord(ch) < limit:
res += chr(ord(ch))
pos += 1
@@ -933,6 +930,7 @@
x = unicode_call_errorhandler(errors,encoding,reason,p,collstart,collend,False)
res += str(x[0])
pos = x[1]
+
return res
def PyUnicode_EncodeLatin1(p,size,errors):
@@ -983,7 +981,7 @@
if (size == 0):
return u''
-
+
p = []
pos = 0
while (pos < size):
@@ -1044,7 +1042,7 @@
# /* \UXXXXXXXX */
elif ch == 'U':
- digits = 8;
+ digits = 8
message = "truncated \\UXXXXXXXX escape";
x = hexescape(s,pos+1,digits,message,errors)
p += x[0]
@@ -1052,6 +1050,7 @@
## /* \N{name} */
elif ch == 'N':
message = "malformed \\N character escape"
+ pos += 1
try:
import unicodedata
except ImportError:
@@ -1068,8 +1067,9 @@
look += 1
try:
chr = unicodedata.lookup(s[pos:look])
+ #x = hexescape(chr,pos+1,8,message,errors)
except KeyError:
- x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,size)
+ x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,look)
else:
x = hexescape(s,pos+1,look-pos,message,errors)
p += x[0]
@@ -1115,15 +1115,13 @@
rep = mapping[c]
- if not rep:
- raise UnicodeError
if isinstance(rep,(int,long)):
if rep<256:
return chr(rep)
else:
raise TypeError
- elif isinstance(rep,unicode):
- raise TypeError
+## elif isinstance(rep,unicode):
+## raise TypeError
else:
return rep
More information about the Pypy-commit
mailing list