[pypy-svn] r17150 - pypy/dist/pypy/module/_codecs
ale at codespeak.net
ale at codespeak.net
Thu Sep 1 13:01:21 CEST 2005
Author: ale
Date: Thu Sep 1 13:01:20 2005
New Revision: 17150
Modified:
pypy/dist/pypy/module/_codecs/app_codecs.py
Log:
Cleanup of app_codecs. Sorry for the large diff.
Trying to follow the coding conventions
Modified: pypy/dist/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/dist/pypy/module/_codecs/app_codecs.py (original)
+++ pypy/dist/pypy/module/_codecs/app_codecs.py Thu Sep 1 13:01:20 2005
@@ -124,22 +124,22 @@
"""
if encoding == None:
encoding = sys.getdefaultencoding()
- if isinstance(encoding,str):
+ if isinstance(encoding, str):
decoder = lookup(encoding)[1]
- if decoder and isinstance(errors,str):
- res = decoder(obj,errors)
- if not isinstance(res,tuple) or len(res) != 2:
- raise TypeError("encoder must return a tuple (object,integer)")
+ if decoder and isinstance(errors, str):
+ res = decoder(obj, errors)
+ if not isinstance(res, tuple) or len(res) != 2:
+ raise TypeError("encoder must return a tuple (object, integer)")
return res[0]
else:
raise TypeError("Errors must be a string")
else:
raise TypeError("Encoding must be a string")
-def latin_1_encode( obj,errors='strict'):
+def latin_1_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeLatin1(obj,len(obj),errors)
+ res = PyUnicode_EncodeLatin1(obj, len(obj), errors)
res = ''.join(res)
return res, len(res)
# XXX MBCS codec might involve ctypes ?
@@ -148,87 +148,87 @@
"""
pass
-def readbuffer_encode( obj,errors='strict'):
+def readbuffer_encode( obj, errors='strict'):
"""None
"""
res = str(obj)
- return res,len(res)
+ return res, len(res)
-def escape_encode( obj,errors='strict'):
+def escape_encode( obj, errors='strict'):
"""None
"""
s = repr(obj)
v = s[1:-1]
- return v,len(v)
+ return v, len(v)
-def utf_8_decode( data,errors='strict',final=False):
+def utf_8_decode( data, errors='strict', final=False):
"""None
"""
consumed = len(data)
if final:
consumed = 0
- res,consumed = PyUnicode_DecodeUTF8Stateful(data, len(data), errors, final)
+ res, consumed = PyUnicode_DecodeUTF8Stateful(data, len(data), errors, final)
res = u''.join(res)
return res, consumed
-def raw_unicode_escape_decode( data,errors='strict'):
+def raw_unicode_escape_decode( data, errors='strict'):
"""None
"""
res = PyUnicode_DecodeRawUnicodeEscape(data, len(data), errors)
res = u''.join(res)
- return res,len(res)
+ return res, len(res)
-def utf_7_decode( data,errors='strict'):
+def utf_7_decode( data, errors='strict'):
"""None
"""
- res = PyUnicode_DecodeUTF7(data,len(data),errors)
+ res = PyUnicode_DecodeUTF7(data, len(data), errors)
res = u''.join(res)
- return res,len(res)
+ return res, len(res)
-def unicode_escape_encode( obj,errors='strict'):
+def unicode_escape_encode( obj, errors='strict'):
"""None
"""
- res = unicodeescape_string(obj,len(obj),0)
+ res = unicodeescape_string(obj, len(obj), 0)
res = ''.join(res)
return res, len(res)
-def latin_1_decode( data,errors='strict'):
+def latin_1_decode( data, errors='strict'):
"""None
"""
- res = PyUnicode_DecodeLatin1(data,len(data),errors)
+ res = PyUnicode_DecodeLatin1(data, len(data), errors)
res = u''.join(res)
return res, len(res)
-def utf_16_decode( data,errors='strict',final=False):
+def utf_16_decode( data, errors='strict', final=False):
"""None
"""
consumed = len(data)
if final:
consumed = 0
- res,consumed,byteorder = PyUnicode_DecodeUTF16Stateful(data,len(data),errors,'native',final)
+ res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'native', final)
res = ''.join(res)
return res, consumed
-def unicode_escape_decode( data,errors='strict'):
+def unicode_escape_decode( data, errors='strict'):
"""None
"""
- res = PyUnicode_DecodeUnicodeEscape(data,len(data),errors)
+ res = PyUnicode_DecodeUnicodeEscape(data, len(data), errors)
res = u''.join(res)
return res, len(res)
-def ascii_decode( data,errors='strict'):
+def ascii_decode( data, errors='strict'):
"""None
"""
- res = PyUnicode_DecodeASCII(data,len(data),errors)
+ res = PyUnicode_DecodeASCII(data, len(data), errors)
res = u''.join(res)
return res, len(res)
-def charmap_encode(obj,errors='strict',mapping='latin-1'):
+def charmap_encode(obj, errors='strict', mapping='latin-1'):
"""None
"""
- res = PyUnicode_EncodeCharmap(obj,len(obj),mapping,errors)
+ res = PyUnicode_EncodeCharmap(obj, len(obj), mapping, errors)
res = ''.join(res)
return res, len(res)
@@ -237,7 +237,7 @@
else:
unicode_bytes = 4
-def unicode_internal_encode( obj,errors='strict'):
+def unicode_internal_encode( obj, errors='strict'):
"""None
"""
if type(obj) == unicode:
@@ -255,16 +255,16 @@
return res, len(res)
else:
res = "You can do better than this" # XXX make this right
- return res,len(res)
+ return res, len(res)
-def unicode_internal_decode( unistr,errors='strict'):
+def unicode_internal_decode( unistr, errors='strict'):
"""None
"""
if type(unistr) == unicode:
- return unistr,len(unistr)
+ return unistr, len(unistr)
else:
- p=[]
- i=0
+ p = []
+ i = 0
if sys.byteorder == "big":
start = unicode_bytes - 1
stop = -1
@@ -296,18 +296,18 @@
consumed = len(data)
if final:
consumed = 0
- res,consumed,byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, bm, consumed)
+ res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, bm, consumed)
res = ''.join(res)
return res, consumed, byteorder
# XXX needs error messages when the input is invalid
-def escape_decode(data,errors='strict'):
+def escape_decode(data, errors='strict'):
"""None
"""
l = len(data)
i = 0
res = []
- while i<l:
+ while i < l:
if data[i] == '\\':
i += 1
@@ -338,26 +338,26 @@
# emulate a strange wrap-around behavior of CPython:
# \400 is the same as \000 because 0400 == 256
octal = data[i:i+3]
- res += chr(int(octal,8) & 0xFF)
+ res += chr(int(octal, 8) & 0xFF)
i += 2
elif data[i] == 'x':
hexa = data[i+1:i+3]
- res += chr(int(hexa,16))
+ res += chr(int(hexa, 16))
i += 2
else:
res += data[i]
i += 1
res = ''.join(res)
- return res,len(res)
+ return res, len(res)
-def charbuffer_encode( obj,errors='strict'):
+def charbuffer_encode( obj, errors='strict'):
"""None
"""
res = str(obj)
res = ''.join(res)
return res, len(res)
-def charmap_decode( data,errors='strict',mapping=None):
+def charmap_decode( data, errors='strict', mapping=None):
"""None
"""
res = PyUnicode_DecodeCharmap(data, len(data), mapping, errors)
@@ -365,14 +365,14 @@
return res, len(res)
-def utf_7_encode( obj,errors='strict'):
+def utf_7_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeUTF7(obj,len(obj),0,0,errors)
+ res = PyUnicode_EncodeUTF7(obj, len(obj), 0, 0, errors)
res = ''.join(res)
return res, len(res)
-def mbcs_encode( obj,errors='strict'):
+def mbcs_encode( obj, errors='strict'):
"""None
"""
pass
@@ -383,117 +383,119 @@
## len(obj))
-def ascii_encode( obj,errors='strict'):
+def ascii_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeASCII(obj,len(obj),errors)
+ res = PyUnicode_EncodeASCII(obj, len(obj), errors)
res = ''.join(res)
return res, len(res)
-def utf_16_encode( obj,errors='strict'):
+def utf_16_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeUTF16(obj,len(obj),errors,'native')
+ res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'native')
res = ''.join(res)
return res, len(res)
-def raw_unicode_escape_encode( obj,errors='strict'):
+def raw_unicode_escape_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeRawUnicodeEscape(obj,len(obj))
+ res = PyUnicode_EncodeRawUnicodeEscape(obj, len(obj))
res = ''.join(res)
return res, len(res)
-def utf_8_encode( obj,errors='strict'):
+def utf_8_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeUTF8(obj,len(obj),errors)
+ res = PyUnicode_EncodeUTF8(obj, len(obj), errors)
res = ''.join(res)
return res, len(res)
-def utf_16_le_encode( obj,errors='strict'):
+def utf_16_le_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeUTF16(obj,len(obj),errors,'little')
+ res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'little')
res = ''.join(res)
return res, len(res)
-def utf_16_be_encode( obj,errors='strict'):
+def utf_16_be_encode( obj, errors='strict'):
"""None
"""
- res = PyUnicode_EncodeUTF16(obj,len(obj),errors,'big')
+ res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'big')
res = ''.join(res)
return res, len(res)
-def utf_16_le_decode( data,errors='strict',byteorder=0, final = 0):
+def utf_16_le_decode( data, errors='strict', byteorder=0, final = 0):
"""None
"""
consumed = len(data)
if final:
- consumed = 0
- res,consumed,byteorder = PyUnicode_DecodeUTF16Stateful(data,len(data),errors,'little',consumed)
+ consumed = 0
+ res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'little', consumed)
res = u''.join(res)
return res, consumed
-def utf_16_be_decode( data,errors='strict',byteorder=0, final = 0):
+def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0):
"""None
"""
consumed = len(data)
if final:
- consumed = 0
- res,consumed,byteorder = PyUnicode_DecodeUTF16Stateful(data,len(data),errors,'big',consumed)
+ consumed = 0
+ res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'big', consumed)
res = u''.join(res)
return res, consumed
def strict_errors(exc):
- if isinstance(exc,Exception):
+ if isinstance(exc, Exception):
raise exc
else:
raise TypeError("codec must pass exception instance")
def ignore_errors(exc):
- if isinstance(exc,(UnicodeEncodeError,UnicodeDecodeError,UnicodeTranslateError)):
- return u'',exc.end
- else:
+ if isinstance(exc, UnicodeEncodeError):
+ return u'', exc.end
+ elif isinstance(exc, (UnicodeDecodeError, UnicodeTranslateError)):
+ return u'', exc.end
+ else:
raise TypeError("don't know how to handle %.400s in error callback"%exc)
Py_UNICODE_REPLACEMENT_CHARACTER = u"\ufffd"
def replace_errors(exc):
- if isinstance(exc,UnicodeEncodeError):
- return u'?'*(exc.end-exc.start),exc.end
- elif isinstance(exc,(UnicodeTranslateError,UnicodeDecodeError)):
- return Py_UNICODE_REPLACEMENT_CHARACTER*(exc.end-exc.start),exc.end
+ if isinstance(exc, UnicodeEncodeError):
+ return u'?'*(exc.end-exc.start), exc.end
+ elif isinstance(exc, (UnicodeTranslateError, UnicodeDecodeError)):
+ return Py_UNICODE_REPLACEMENT_CHARACTER*(exc.end-exc.start), exc.end
else:
raise TypeError("don't know how to handle %.400s in error callback"%exc)
def xmlcharrefreplace_errors(exc):
- if isinstance(exc,UnicodeEncodeError):
+ if isinstance(exc, UnicodeEncodeError):
res = []
for ch in exc.object[exc.start:exc.end]:
res += '&#'
res += str(ord(ch))
res += ';'
- return u''.join(res),exc.end
+ return ''.join(res), exc.end
else:
raise TypeError("don't know how to handle %.400s in error callback"%type(exc))
def backslashreplace_errors(exc):
- if isinstance(exc,UnicodeEncodeError):
- p=[]
+ if isinstance(exc, UnicodeEncodeError):
+ p = []
for c in exc.object[exc.start:exc.end]:
- p.append('\\')
+ p += '\\'
oc = ord(c)
if (oc >= 0x00010000):
- p.append('U')
- p.append("%.8x" % ord(c))
+ p += 'U'
+ p += "%.8x" % ord(c)
elif (oc >= 0x100):
- p.append('u')
- p.append("%.4x" % ord(c))
+ p += 'u'
+ p += "%.4x" % ord(c)
else:
- p.append('x')
- p.append("%.2x" % ord(c))
- return u''.join(p),exc.end
+ p += 'x'
+ p += "%.2x" % ord(c)
+ return ''.join(p), exc.end
else:
raise TypeError("don't know how to handle %.400s in error callback"%type(exc))
@@ -526,7 +528,7 @@
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1,
]
-unicode_latin1=[None]*256
+unicode_latin1 = [None]*256
def lookup_error(errors):
@@ -556,11 +558,11 @@
else:
raise TypeError("handler must be callable")
-register_error("strict",strict_errors)
-register_error("ignore",ignore_errors)
-register_error("replace",replace_errors)
-register_error("xmlcharrefreplace",xmlcharrefreplace_errors)
-register_error("backslashreplace",backslashreplace_errors)
+register_error("strict", strict_errors)
+register_error("ignore", ignore_errors)
+register_error("replace", replace_errors)
+register_error("xmlcharrefreplace", xmlcharrefreplace_errors)
+register_error("backslashreplace", backslashreplace_errors)
def SPECIAL(c, encodeO, encodeWS):
c = ord(c)
@@ -584,16 +586,11 @@
return ord(c) + 4
def ENCODE( ch, bits) :
- charvalue = 0
out = []
-## for c in ch:
-## charvalue <<= 16
-## charvalue += ord(c)
while (bits >= 6):
out += B64(ch >> (bits-6))
bits -= 6
- return out,bits
-
+ return out, bits
def PyUnicode_DecodeUTF7(s, size, errors):
@@ -631,7 +628,7 @@
## it in a 16-bit character
surrogate = 1
msg = "code pairs are not supported"
- out,x = unicode_call_errorhandler(errors,'utf-7',msg,s,i-1,i)
+ out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
p += out
bitsleft = 0
break
@@ -643,7 +640,7 @@
## bitsleft < 6 then we could just classify it as padding
## but that is not the case here */
msg = "partial character in shift sequence"
- out,x = unicode_call_errorhandler(errors,'utf-7',msg,s,i-1,i)
+ out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i)
## /* According to RFC2152 the remaining bits should be zero. We
## choose to signal an error/insert a replacement character
@@ -657,32 +654,32 @@
p += '-'
inShift = 1
- elif SPECIAL(ch,0,0) :
- raise UnicodeDecodeError,"unexpected special character"
+ elif SPECIAL(ch, 0, 0) :
+ raise UnicodeDecodeError, "unexpected special character"
else:
p += ch
else:
charsleft = (charsleft << 6) | UB64(ch)
bitsleft += 6
- i+=1
+ i += 1
## /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
elif ( ch == '+' ):
startinpos = i
- i+=1
+ i += 1
if (i<size and s[i] == '-'):
- i+=1
+ i += 1
p += '+'
else:
inShift = 1
bitsleft = 0
- elif (SPECIAL(ch,0,0)):
- i+=1
- raise UnicodeDecodeError,"unexpected special character"
+ elif (SPECIAL(ch, 0, 0)):
+ i += 1
+ raise UnicodeDecodeError, "unexpected special character"
else:
p += ch
- i+=1
+ i += 1
if (inShift) :
#XXX This aint right
@@ -748,7 +745,7 @@
else:
out += '-'
inShift = False
- i+=1
+ i += 1
if (bitsleft):
out += B64(charsleft << (6-bitsleft) )
@@ -756,30 +753,14 @@
return out
-unicode_empty=u''
-
-##def PyUnicode_Decode(s,size,encoding,errors):
-##
-## if (encoding == None):
-## encoding = PyUnicode_GetDefaultEncoding()
-##
-#### /* Shortcuts for common default encodings */
-## decoder = encodings.get(encoding,None)
-## if decoder:
-## return decoder(s,encoding,errors)
-#### /* Decode via the codec registry */
-## buf = buffer(s)
-## result = PyCodec_Decode(buf, encoding, errors)
-## if (not isinstance(result,unicode)):
-## raise UnicodeDecodeError, "decoder did not return an unicode object (type=%.400s)"%type(result)
-## return result
+unicode_empty = u''
def unicodeescape_string(s, size, quotes):
p = []
if (quotes) :
p += 'u'
- if (s.find('\'')!=-1 and s.find('"')==-1):
+ if (s.find('\'') != -1 and s.find('"') == -1):
p += '"'
else:
p += '\''
@@ -798,7 +779,7 @@
elif (ord(ch) >= 0x10000):
p += '\\'
p += 'U'
- p += '%08x'%ord(ch)
+ p += '%08x' % ord(ch)
pos += 1
continue
#endif
@@ -811,7 +792,7 @@
ucs = (((ord(ch) & 0x03FF) << 10) | (ord(ch2) & 0x03FF)) + 0x00010000
p += '\\'
p += 'U'
- p += '%08x'%ucs
+ p += '%08x' % ucs
pos += 1
continue
@@ -822,7 +803,7 @@
if (ord(ch) >= 256):
p += '\\'
p += 'u'
- p += '%04x'%ord(ch)
+ p += '%04x' % ord(ch)
#/* Map special whitespace to '\t', \n', '\r' */
elif (ch == '\t'):
@@ -841,7 +822,7 @@
elif (ch < ' ' or ch >= 0x7F) :
p += '\\'
p += 'x'
- p += '%02x'%ord(ch)
+ p += '%02x' % ord(ch)
#/* Copy everything else as-is */
else:
p += chr(ord(ch))
@@ -873,7 +854,7 @@
pos = res[1]
return p
-def PyUnicode_EncodeASCII(p,size,errors):
+def PyUnicode_EncodeASCII(p, size, errors):
return unicode_encode_ucs1(p, size, errors, 128)
@@ -885,7 +866,7 @@
len(unicode),
None)
-def PyUnicode_DecodeUTF16Stateful(s,size,errors,byteorder='native',final=True):
+def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=True):
bo = 0 #/* assume native ordering by default */
consumed = 0
@@ -912,26 +893,26 @@
bom = (ord(s[ihi]) << 8) | ord(s[ilo])
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
if sys.byteorder == 'little':
- if (bom == 0xFEFF):
- q += 2
- bo = -1
- elif bom == 0xFFFE:
- q += 2
- bo = 1
+ if (bom == 0xFEFF):
+ q += 2
+ bo = -1
+ elif bom == 0xFFFE:
+ q += 2
+ bo = 1
else:
- if bom == 0xFEFF:
- q += 2
- bo = 1
- elif bom == 0xFFFE:
- q += 2
- bo = -1
+ if bom == 0xFEFF:
+ q += 2
+ bo = 1
+ elif bom == 0xFFFE:
+ q += 2
+ bo = -1
elif byteorder == 'little':
bo = -1
else:
bo = 1
if (size == 0):
- return [u''],0,bo
+ return [u''], 0, bo
if (bo == -1):
#/* force LE */
@@ -952,7 +933,7 @@
errmsg = "truncated data"
startinpos = q
endinpos = len(s)
- unicode_call_errorhandler(errors,'utf-16',errmsg,s,startinpos,endinpos,True)
+ unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
# /* The remaining input chars are ignored if the callback
## chooses to skip the input */
@@ -960,15 +941,15 @@
q += 2
if (ch < 0xD800 or ch > 0xDFFF):
- p += unichr(ch)
- continue
+ p += unichr(ch)
+ continue
#/* UTF-16 code pair: */
if (q >= len(s)):
errmsg = "unexpected end of data"
startinpos = q-2
endinpos = len(s)
- unicode_call_errorhandler(errors,'utf-16',errmsg,s,startinpos,endinpos,True)
+ unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
if (0xD800 <= ch and ch <= 0xDBFF):
ch2 = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
@@ -987,12 +968,12 @@
errmsg = "illegal UTF-16 surrogate"
startinpos = q-4
endinpos = startinpos+2
- unicode_call_errorhandler(errors,'utf-16',errmsg,s,startinpos,endinpos,True)
+ unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
errmsg = "illegal encoding"
startinpos = q-2
endinpos = startinpos+2
- unicode_call_errorhandler(errors,'utf-16',errmsg,s,startinpos,endinpos,True)
+ unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
return p, q, bo
@@ -1017,7 +998,7 @@
if (byteorder == 'native'):
bom = sys.byteorder
- p += STORECHAR(0xFEFF,bom)
+ p += STORECHAR(0xFEFF, bom)
if (size == 0):
return ""
@@ -1035,9 +1016,9 @@
ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF)
ch = 0xD800 | ((ch-0x10000) >> 10)
- p += STORECHAR(ch,bom)
+ p += STORECHAR(ch, bom)
if (ch2):
- p +=STORECHAR(ch2,bom)
+ p += STORECHAR(ch2, bom)
return p
@@ -1047,24 +1028,9 @@
def PyUnicode_EncodeMBCS(p, size, errors):
pass
-#### /* If there are no characters, bail now! */
-## if (size==0)
-## return ""
-## from ctypes import *
-## WideCharToMultiByte = windll.kernel32.WideCharToMultiByte
-#### /* First get the size of the result */
-## mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, s, 0, None, None);
-## if (mbcssize==0)
-## raise UnicodeEncodeError, "Windows cannot decode the string %s" %p
-### More error handling required (check windows errors and such)
-##
-### /* Do the conversion */
-#### s = ' '*mbcssize
-#### if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)):
-#### raise UnicodeEncodeError, "Windows cannot decode the string %s" %p
-## return s
+
def unicode_call_errorhandler(errors, encoding,
- reason, input, startinpos, endinpos,decode=True):
+ reason, input, startinpos, endinpos, decode=True):
errorHandler = lookup_error(errors)
if decode:
@@ -1072,18 +1038,17 @@
else:
exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason)
res = errorHandler(exceptionObject)
- if isinstance(res,tuple) and isinstance(res[0],unicode) and isinstance(res[1],int):
+ if isinstance(res, tuple) and isinstance(res[0], unicode) and isinstance(res[1], int):
newpos = res[1]
- if (newpos<0):
- newpos = len(input)+newpos
- if newpos<0 or newpos>len(input):
+ if (newpos < 0):
+ newpos = len(input) + newpos
+ if newpos < 0 or newpos > len(input):
raise IndexError( "position %d from error handler out of bounds" % newpos)
- return res[0],newpos
+ return res[0], newpos
else:
- raise TypeError("encoding error handler must return (unicode, int) tuple")
+ raise TypeError("encoding error handler must return (unicode, int) tuple, not %s" % repr(res))
def PyUnicode_DecodeUTF8(s, size, errors):
-
return PyUnicode_DecodeUTF8Stateful(s, size, errors, False)
## /* Map UTF-8 encoded prefix byte to sequence length. zero means
@@ -1107,7 +1072,7 @@
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
]
-def PyUnicode_DecodeUTF8Stateful(s,size,errors,final):
+def PyUnicode_DecodeUTF8Stateful(s, size, errors, final):
consumed = 0
if (size == 0):
@@ -1163,7 +1128,7 @@
pos = res[1]
else:
c = ((ord(s[pos]) & 0x1f) << 6) + (ord(s[pos+1]) & 0x3f)
- if c<0x80:
+ if c < 0x80:
errmsg = "illegal encoding"
endinpos = startinpos+2
res = unicode_call_errorhandler(
@@ -1240,7 +1205,7 @@
pos = res[1]
else:
#ifdef Py_UNICODE_WIDE
- if c<sys.maxunicode:
+ if c < sys.maxunicode:
p += unichr(c)
pos += n
else:
@@ -1270,15 +1235,15 @@
consumed = pos
return p, pos # consumed
-def PyUnicode_EncodeUTF8(s,size,errors):
+def PyUnicode_EncodeUTF8(s, size, errors):
#assert(s != None)
assert(size >= 0)
p = []
i = 0
- while i<size:
+ while i < size:
ch = s[i]
- i+=1
+ i += 1
if (ord(ch) < 0x80):
## /* Encode ASCII */
p += chr(ord(ch))
@@ -1290,13 +1255,13 @@
## /* Encode UCS2 Unicode ordinals */
if (ord(ch) < 0x10000):
## /* Special case: check for high surrogate */
- if (0xD800 <=ord(ch) and ord(ch) <= 0xDBFF and i != size) :
+ if (0xD800 <= ord(ch) and ord(ch) <= 0xDBFF and i != size) :
ch2 = s[i]
## /* Check for low surrogate and combine the two to
## form a UCS4 value */
if (0xDC00 <= ord(ch2) and ord(ch2) <= 0xDFFF) :
ch3 = ((ord(ch) - 0xD800) << 10 | (ord(ch2) - 0xDC00)) + 0x10000
- i+=1
+ i += 1
p.extend(encodeUCS4(ch3))
continue
## /* Fall through: handles isolated high surrogates */
@@ -1310,7 +1275,7 @@
def encodeUCS4(ch):
## /* Encode UCS4 Unicode ordinals */
- p=[]
+ p = []
p += (chr((0xf0 | (ch >> 18))))
p += (chr((0x80 | ((ch >> 12) & 0x3f))))
p += (chr((0x80 | ((ch >> 6) & 0x3f))))
@@ -1330,7 +1295,7 @@
pos += 1
return p
-def unicode_encode_ucs1(p,size,errors,limit):
+def unicode_encode_ucs1(p, size, errors, limit):
if limit == 256:
reason = "ordinal not in range(256)"
@@ -1342,7 +1307,7 @@
if (size == 0):
return ['']
res = []
- pos=0
+ pos = 0
while pos < len(p):
#for ch in p:
ch = p[pos]
@@ -1356,41 +1321,41 @@
collend = pos+1
while collend < len(p) and ord(p[collend]) >= limit:
collend += 1
- x = unicode_call_errorhandler(errors,encoding,reason,p,collstart,collend,False)
+ x = unicode_call_errorhandler(errors, encoding, reason, p, collstart, collend, False)
res += str(x[0])
pos = x[1]
return res
-def PyUnicode_EncodeLatin1(p,size,errors):
- res=unicode_encode_ucs1(p, size, errors, 256)
+def PyUnicode_EncodeLatin1(p, size, errors):
+ res = unicode_encode_ucs1(p, size, errors, 256)
return res
-hexdigits = [hex(i)[-1] for i in range(16)]+[hex(i)[-1].upper() for i in range(10,16)]
+hexdigits = [hex(i)[-1] for i in range(16)]+[hex(i)[-1].upper() for i in range(10, 16)]
-def hexescape(s,pos,digits,message,errors):
+def hexescape(s, pos, digits, message, errors):
chr = 0
p = []
if (pos+digits>len(s)):
message = "end of string in escape sequence"
- x = unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-2,len(s))
+ x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2, len(s))
p += x[0]
pos = x[1]
else:
try:
- chr = int(s[pos:pos+digits],16)
+ chr = int(s[pos:pos+digits], 16)
except ValueError:
endinpos = pos
while s[endinpos] in hexdigits:
- endinpos +=1
- x = unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-2,
+ endinpos += 1
+ x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2,
endinpos+1)
p += x[0]
pos = x[1]
#/* when we get here, chr is a 32-bit unicode character */
else:
if chr <= sys.maxunicode:
- p += [unichr(chr)]
+ p += unichr(chr)
pos += digits
elif (chr <= 0x10ffff):
@@ -1400,12 +1365,12 @@
pos += digits
else:
message = "illegal Unicode character"
- x = unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-2,
+ x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2,
pos+1)
p += x[0]
pos = x[1]
res = p
- return res,pos
+ return res, pos
def PyUnicode_DecodeUnicodeEscape(s, size, errors):
@@ -1422,45 +1387,41 @@
continue
## /* \ - Escapes */
else:
- pos +=1
- if pos>=len(s):
+ pos += 1
+ if pos >= len(s):
errmessage = "\\ at end of string"
- unicode_call_errorhandler(errors,"unicodeescape",errmessage,s,pos-1,size)
+ unicode_call_errorhandler(errors, "unicodeescape", errmessage, s, pos-1, size)
ch = s[pos]
pos += 1
## /* \x escapes */
- #if ch == '\n': break;
- if ch == '\\': p += u'\\'
+ if ch == '\\' : p += u'\\'
elif ch == '\'': p += u'\''
elif ch == '\"': p += u'\"'
- elif ch == 'b': p += u'\b'
- elif ch == 'f': p += u'\014' #/* FF */
- elif ch == 't': p += u'\t'
- elif ch == 'n': p += u'\n'
- elif ch == 'r':
- p += u'\r'
-
+ elif ch == 'b' : p += u'\b'
+ elif ch == 'f' : p += u'\014' #/* FF */
+ elif ch == 't' : p += u'\t'
+ elif ch == 'n' : p += u'\n'
+ elif ch == 'r' : p += u'\r'
elif ch == 'v': p += u'\013' #break; /* VT */
elif ch == 'a': p += u'\007' # break; /* BEL, not classic C */
-
- ## /* \OOO (octal) escapes */
- elif ch in [ '0','1', '2', '3','4', '5', '6','7']:
- x = ord(ch) - ord('0')
- ch = s[pos]
- if ('0' <= ch and ch <= '7'):
- x = (x<<3) + ord(ch) - ord('0')
- ch = s[pos+1]
- if ('0' <= ch and ch <= '7'):
- x = (x<<3) + ord(ch) - ord('0')
- pos += 2
-
+ elif ch in [ '0', '1', '2', '3', '4', '5', '6', '7']:
+ x = int(s[pos, pos+3], 8)
+ # x = ord(ch) - ord('0')
+ # ch = s[pos]
+ # if ('0' <= ch and ch <= '7'):
+ # x = (x<<3) + ord(ch) - ord('0')
+ # ch = s[pos+1]
+ # if ('0' <= ch and ch <= '7'):
+ # x = (x<<3) + ord(ch) - ord('0')
+ # pos += 2
+ pos += 3
p += unichr(x)
## /* hex escapes */
## /* \xXX */
elif ch == 'x':
digits = 2
message = "truncated \\xXX escape"
- x = hexescape(s,pos,digits,message,errors)
+ x = hexescape(s, pos, digits, message, errors)
p += x[0]
pos = x[1]
@@ -1468,7 +1429,7 @@
elif ch == 'u':
digits = 4
message = "truncated \\uXXXX escape"
- x = hexescape(s,pos,digits,message,errors)
+ x = hexescape(s, pos, digits, message, errors)
p += x[0]
pos = x[1]
@@ -1476,7 +1437,7 @@
elif ch == 'U':
digits = 8
message = "truncated \\UXXXXXXXX escape"
- x = hexescape(s,pos,digits,message,errors)
+ x = hexescape(s, pos, digits, message, errors)
p += x[0]
pos = x[1]
## /* \N{name} */
@@ -1488,7 +1449,7 @@
import unicodedata
except ImportError:
message = "\\N escapes not supported (can't load unicodedata module)"
- unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,size)
+ unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, size)
if look < size and s[look] == '{':
#/* look for the closing brace */
while (look < size and s[look] != '}'):
@@ -1500,21 +1461,21 @@
try:
chr = unicodedata.lookup("%s" % st)
except KeyError, e:
- x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,look+1)
+ x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
else:
- x = chr,look + 1
+ x = chr, look + 1
p += x[0]
pos = x[1]
else:
- x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,look+1)
+ x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
else:
- x=unicode_call_errorhandler(errors,"unicodeescape",message,s,pos-1,look+1)
+ x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
else:
if (pos > size):
message = "\\ at end of string"
handler = lookup_error(errors)
- x = handler(UnicodeDecodeError("unicodeescape",s,pos,
- size,message))
+ x = handler(UnicodeDecodeError("unicodeescape", s, pos,
+ size, message))
p += x[0]
pos = x[1]
else:
@@ -1522,7 +1483,7 @@
p += s[pos]
return p
-def PyUnicode_EncodeRawUnicodeEscape(s,size):
+def PyUnicode_EncodeRawUnicodeEscape(s, size):
if (size == 0):
return ''
@@ -1533,12 +1494,12 @@
if (ord(ch) >= 0x10000):
p += '\\'
p += 'U'
- p += '%08x'%(ord(ch))
+ p += '%08x' % (ord(ch))
elif (ord(ch) >= 256) :
# /* Map 16-bit characters to '\uxxxx' */
p += '\\'
p += 'u'
- p += '%04x'%(ord(ch))
+ p += '%04x' % (ord(ch))
# /* Copy everything else as-is */
else:
p += chr(ord(ch))
@@ -1546,22 +1507,22 @@
#p += '\0'
return p
-def charmapencode_output(c,mapping):
+def charmapencode_output(c, mapping):
rep = mapping[c]
- if isinstance(rep,int) or isinstance(rep, long):
- if rep<256:
+ if isinstance(rep, int) or isinstance(rep, long):
+ if rep < 256:
return chr(rep)
else:
raise TypeError("character mapping must be in range(256)")
- elif isinstance(rep,str):
+ elif isinstance(rep, str):
return rep
elif rep == None:
raise KeyError("character maps to <undefined>")
else:
raise TypeError("character mapping must return integer, None or str")
-def PyUnicode_EncodeCharmap(p,size,mapping='latin-1',errors='strict'):
+def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'):
## /* the following variable is used for caching string comparisons
## * -1=not initialized, 0=unknown, 1=strict, 2=replace,
@@ -1577,27 +1538,17 @@
while (inpos<size):
#/* try to encode it */
try:
- x = charmapencode_output(ord(p[inpos]),mapping)
+ x = charmapencode_output(ord(p[inpos]), mapping)
res += [x]
except KeyError:
- x = unicode_call_errorhandler(errors,"charmap",
- "character maps to <undefined>",p,inpos,inpos+1,False)
+ x = unicode_call_errorhandler(errors, "charmap",
+ "character maps to <undefined>", p, inpos, inpos+1, False)
try:
- res += [charmapencode_output(ord(y),mapping) for y in x[0]]
+ res += [charmapencode_output(ord(y), mapping) for y in x[0]]
except KeyError:
- raise UnicodeEncodeError("charmap",p,inpos,inpos+1,
+ raise UnicodeEncodeError("charmap", p, inpos, inpos+1,
"character maps to <undefined>")
-## except TypeError,err:
-## x = unicode_call_errorhandler(errors,"charmap",
-## err,p,inpos,inpos+1,False)
-## try:
-## res += [charmapencode_output(ord(y),mapping) for y in x[0]]
-## except KeyError:
-## raise UnicodeEncodeError("charmap",p,inpos,inpos+1,
-## "character maps to <undefined>")
-##
- #/* done with this character => adjust input position */
- inpos+=1
+ inpos += 1
return res
def PyUnicode_DecodeCharmap(s, size, mapping, errors):
@@ -1616,30 +1567,25 @@
ch = s[inpos]
try:
x = mapping[ord(ch)]
- if isinstance(x,int):
- if x<65536:
+ if isinstance(x, int):
+ if x < 65536:
p += unichr(x)
else:
raise TypeError("character mapping must be in range(65536)")
- elif isinstance(x,unicode):
+ elif isinstance(x, unicode):
p += x
elif not x:
raise KeyError
else:
raise TypeError
except KeyError:
- x = unicode_call_errorhandler(errors,"charmap",
- "character maps to <undefined>",s,inpos,inpos+1)
+ x = unicode_call_errorhandler(errors, "charmap",
+ "character maps to <undefined>", s, inpos, inpos+1)
p += x[0]
-## except TypeError:
-## x = unicode_call_errorhandler(errors,"charmap",
-## "character mapping must return integer, None or unicode",
-## s,inpos,inpos+1)
-## p += x[0]
- inpos +=1
+ inpos += 1
return p
-def PyUnicode_DecodeRawUnicodeEscape(s, size,errors):
+def PyUnicode_DecodeRawUnicodeEscape(s, size, errors):
if (size == 0):
return u''
@@ -1653,7 +1599,6 @@
pos += 1
continue
startinpos = pos
- #pos += 1
## /* \u-escapes are only interpreted iff the number of leading
## backslashes is odd */
bs = pos
@@ -1678,11 +1623,9 @@
pos += 1
#/* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
-
- i = 0
x = 0
try:
- x = int(s[pos:pos+count],16)
+ x = int(s[pos:pos+count], 16)
except ValueError:
res = unicode_call_errorhandler(
errors, "rawunicodeescape", "truncated \\uXXXX",
@@ -1696,9 +1639,8 @@
res = unicode_call_errorhandler(
errors, "rawunicodeescape", "\\Uxxxxxxxx out of range",
s, size, pos, pos+1)
- pos = i = res[1]
+ pos = res[1]
p += res[0]
- i += 1
else:
p += unichr(x)
pos += count
@@ -1707,7 +1649,7 @@
res = unicode_call_errorhandler(
errors, "rawunicodeescape", "\\Uxxxxxxxx out of range",
s, size, pos, pos+1)
- pos = i = res[1]
+ pos = res[1]
p += res[0]
#endif
More information about the Pypy-commit
mailing list