[Python-checkins] r43320 - in python/trunk: Lib/encodings/big5.py Lib/encodings/big5hkscs.py Lib/encodings/cp932.py Lib/encodings/cp949.py Lib/encodings/cp950.py Lib/encodings/euc_jis_2004.py Lib/encodings/euc_jisx0213.py Lib/encodings/euc_jp.py
Neal Norwitz
nnorwitz at gmail.com
Sun Mar 26 04:58:14 CEST 2006
Shouldn't all the PyInt_Checks() also check for a long. If a long
fits in a Py_ssize_t, they should be used.
How about a NEWS entry, mentioning changes, particularly any
new/changed APIs including attributes?
n
--
On 3/25/06, hyeshik.chang <python-checkins at python.org> wrote:
> Author: hyeshik.chang
> Date: Sun Mar 26 04:34:59 2006
> New Revision: 43320
>
> Added:
> python/trunk/Tools/unicode/gencjkcodecs.py
> Modified:
> python/trunk/Lib/encodings/big5.py
> python/trunk/Lib/encodings/big5hkscs.py
> python/trunk/Lib/encodings/cp932.py
> python/trunk/Lib/encodings/cp949.py
> python/trunk/Lib/encodings/cp950.py
> python/trunk/Lib/encodings/euc_jis_2004.py
> python/trunk/Lib/encodings/euc_jisx0213.py
> python/trunk/Lib/encodings/euc_jp.py
> python/trunk/Lib/encodings/euc_kr.py
> python/trunk/Lib/encodings/gb18030.py
> python/trunk/Lib/encodings/gb2312.py
> python/trunk/Lib/encodings/gbk.py
> python/trunk/Lib/encodings/hz.py
> python/trunk/Lib/encodings/iso2022_jp.py
> python/trunk/Lib/encodings/iso2022_jp_1.py
> python/trunk/Lib/encodings/iso2022_jp_2.py
> python/trunk/Lib/encodings/iso2022_jp_2004.py
> python/trunk/Lib/encodings/iso2022_jp_3.py
> python/trunk/Lib/encodings/iso2022_jp_ext.py
> python/trunk/Lib/encodings/iso2022_kr.py
> python/trunk/Lib/encodings/johab.py
> python/trunk/Lib/encodings/shift_jis.py
> python/trunk/Lib/encodings/shift_jis_2004.py
> python/trunk/Lib/encodings/shift_jisx0213.py
> python/trunk/Lib/test/test_multibytecodec.py
> python/trunk/Lib/test/test_multibytecodec_support.py
> python/trunk/Modules/cjkcodecs/_codecs_cn.c
> python/trunk/Modules/cjkcodecs/multibytecodec.c
> python/trunk/Modules/cjkcodecs/multibytecodec.h
> python/trunk/Tools/unicode/Makefile
> Log:
> Patch #1443155: Add the incremental codecs support for CJK codecs.
> (reviewed by Walter Dörwald)
>
>
> Modified: python/trunk/Lib/encodings/big5.py
> ==============================================================================
> --- python/trunk/Lib/encodings/big5.py (original)
> +++ python/trunk/Lib/encodings/big5.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # big5.py: Python Unicode Codec for BIG5
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_tw, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_tw.getcodec('big5')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='big5',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/big5hkscs.py
> ==============================================================================
> --- python/trunk/Lib/encodings/big5hkscs.py (original)
> +++ python/trunk/Lib/encodings/big5hkscs.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # big5hkscs.py: Python Unicode Codec for BIG5HKSCS
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $
> #
>
> import _codecs_hk, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_hk.getcodec('big5hkscs')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='big5hkscs',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/cp932.py
> ==============================================================================
> --- python/trunk/Lib/encodings/cp932.py (original)
> +++ python/trunk/Lib/encodings/cp932.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # cp932.py: Python Unicode Codec for CP932
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('cp932')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='cp932',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/cp949.py
> ==============================================================================
> --- python/trunk/Lib/encodings/cp949.py (original)
> +++ python/trunk/Lib/encodings/cp949.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # cp949.py: Python Unicode Codec for CP949
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_kr, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_kr.getcodec('cp949')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='cp949',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/cp950.py
> ==============================================================================
> --- python/trunk/Lib/encodings/cp950.py (original)
> +++ python/trunk/Lib/encodings/cp950.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # cp950.py: Python Unicode Codec for CP950
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_tw, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_tw.getcodec('cp950')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='cp950',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/euc_jis_2004.py
> ==============================================================================
> --- python/trunk/Lib/encodings/euc_jis_2004.py (original)
> +++ python/trunk/Lib/encodings/euc_jis_2004.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('euc_jis_2004')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='euc_jis_2004',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/euc_jisx0213.py
> ==============================================================================
> --- python/trunk/Lib/encodings/euc_jisx0213.py (original)
> +++ python/trunk/Lib/encodings/euc_jisx0213.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('euc_jisx0213')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='euc_jisx0213',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/euc_jp.py
> ==============================================================================
> --- python/trunk/Lib/encodings/euc_jp.py (original)
> +++ python/trunk/Lib/encodings/euc_jp.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # euc_jp.py: Python Unicode Codec for EUC_JP
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('euc_jp')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='euc_jp',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/euc_kr.py
> ==============================================================================
> --- python/trunk/Lib/encodings/euc_kr.py (original)
> +++ python/trunk/Lib/encodings/euc_kr.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # euc_kr.py: Python Unicode Codec for EUC_KR
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_kr, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_kr.getcodec('euc_kr')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='euc_kr',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/gb18030.py
> ==============================================================================
> --- python/trunk/Lib/encodings/gb18030.py (original)
> +++ python/trunk/Lib/encodings/gb18030.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # gb18030.py: Python Unicode Codec for GB18030
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_cn, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_cn.getcodec('gb18030')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='gb18030',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/gb2312.py
> ==============================================================================
> --- python/trunk/Lib/encodings/gb2312.py (original)
> +++ python/trunk/Lib/encodings/gb2312.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # gb2312.py: Python Unicode Codec for GB2312
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_cn, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_cn.getcodec('gb2312')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='gb2312',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/gbk.py
> ==============================================================================
> --- python/trunk/Lib/encodings/gbk.py (original)
> +++ python/trunk/Lib/encodings/gbk.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # gbk.py: Python Unicode Codec for GBK
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_cn, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_cn.getcodec('gbk')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='gbk',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/hz.py
> ==============================================================================
> --- python/trunk/Lib/encodings/hz.py (original)
> +++ python/trunk/Lib/encodings/hz.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # hz.py: Python Unicode Codec for HZ
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_cn, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_cn.getcodec('hz')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='hz',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_jp.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_jp.py (original)
> +++ python/trunk/Lib/encodings/iso2022_jp.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_jp.py: Python Unicode Codec for ISO2022_JP
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_jp')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_jp',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_jp_1.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_jp_1.py (original)
> +++ python/trunk/Lib/encodings/iso2022_jp_1.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_jp_1')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_jp_1',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_jp_2.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_jp_2.py (original)
> +++ python/trunk/Lib/encodings/iso2022_jp_2.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_jp_2')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_jp_2',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_jp_2004.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_jp_2004.py (original)
> +++ python/trunk/Lib/encodings/iso2022_jp_2004.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_jp_2004',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_jp_3.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_jp_3.py (original)
> +++ python/trunk/Lib/encodings/iso2022_jp_3.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_jp_3')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_jp_3',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_jp_ext.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_jp_ext.py (original)
> +++ python/trunk/Lib/encodings/iso2022_jp_ext.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_jp_ext',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/iso2022_kr.py
> ==============================================================================
> --- python/trunk/Lib/encodings/iso2022_kr.py (original)
> +++ python/trunk/Lib/encodings/iso2022_kr.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # iso2022_kr.py: Python Unicode Codec for ISO2022_KR
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_iso2022, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_iso2022.getcodec('iso2022_kr')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='iso2022_kr',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/johab.py
> ==============================================================================
> --- python/trunk/Lib/encodings/johab.py (original)
> +++ python/trunk/Lib/encodings/johab.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # johab.py: Python Unicode Codec for JOHAB
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_kr, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_kr.getcodec('johab')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='johab',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/shift_jis.py
> ==============================================================================
> --- python/trunk/Lib/encodings/shift_jis.py (original)
> +++ python/trunk/Lib/encodings/shift_jis.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # shift_jis.py: Python Unicode Codec for SHIFT_JIS
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('shift_jis')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='shift_jis',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/shift_jis_2004.py
> ==============================================================================
> --- python/trunk/Lib/encodings/shift_jis_2004.py (original)
> +++ python/trunk/Lib/encodings/shift_jis_2004.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('shift_jis_2004')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='shift_jis_2004',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/encodings/shift_jisx0213.py
> ==============================================================================
> --- python/trunk/Lib/encodings/shift_jisx0213.py (original)
> +++ python/trunk/Lib/encodings/shift_jisx0213.py Sun Mar 26 04:34:59 2006
> @@ -2,10 +2,10 @@
> # shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213
> #
> # Written by Hye-Shik Chang <perky at FreeBSD.org>
> -# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
> #
>
> import _codecs_jp, codecs
> +import _multibytecodec as mbc
>
> codec = _codecs_jp.getcodec('shift_jisx0213')
>
> @@ -13,22 +13,24 @@
> encode = codec.encode
> decode = codec.decode
>
> -class StreamReader(Codec, codecs.StreamReader):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamReader.__init__(self, stream, errors)
> - __codec = codec.StreamReader(stream, errors)
> - self.read = __codec.read
> - self.readline = __codec.readline
> - self.readlines = __codec.readlines
> - self.reset = __codec.reset
> -
> -class StreamWriter(Codec, codecs.StreamWriter):
> - def __init__(self, stream, errors='strict'):
> - codecs.StreamWriter.__init__(self, stream, errors)
> - __codec = codec.StreamWriter(stream, errors)
> - self.write = __codec.write
> - self.writelines = __codec.writelines
> - self.reset = __codec.reset
> +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
> + codecs.IncrementalEncoder):
> + codec = codec
> +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
> + codecs.IncrementalDecoder):
> + codec = codec
> +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
> + codec = codec
> +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
> + codec = codec
>
> def getregentry():
> - return (codec.encode, codec.decode, StreamReader, StreamWriter)
> + return codecs.CodecInfo(
> + name='shift_jisx0213',
> + encode=Codec().encode,
> + decode=Codec().decode,
> + incrementalencoder=IncrementalEncoder,
> + incrementaldecoder=IncrementalDecoder,
> + streamreader=StreamReader,
> + streamwriter=StreamWriter,
> + )
>
> Modified: python/trunk/Lib/test/test_multibytecodec.py
> ==============================================================================
> --- python/trunk/Lib/test/test_multibytecodec.py (original)
> +++ python/trunk/Lib/test/test_multibytecodec.py Sun Mar 26 04:34:59 2006
> @@ -9,11 +9,106 @@
> from test import test_multibytecodec_support
> import unittest, StringIO, codecs
>
> +class Test_MultibyteCodec(unittest.TestCase):
> +
> + def test_nullcoding(self):
> + self.assertEqual(''.decode('gb18030'), u'')
> + self.assertEqual(unicode('', 'gb18030'), u'')
> + self.assertEqual(u''.encode('gb18030'), '')
> +
> + def test_str_decode(self):
> + self.assertEqual('abcd'.encode('gb18030'), 'abcd')
> +
> +
> +class Test_IncrementalEncoder(unittest.TestCase):
> +
> + def test_stateless(self):
> + # cp949 encoder isn't stateful at all.
> + encoder = codecs.getincrementalencoder('cp949')()
> + self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),
> + '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
> + self.assertEqual(encoder.reset(), None)
> + self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),
> + '\xa1\xd9\xa1\xad\xa1\xd9')
> + self.assertEqual(encoder.reset(), None)
> + self.assertEqual(encoder.encode(u'', True), '')
> + self.assertEqual(encoder.encode(u'', False), '')
> + self.assertEqual(encoder.reset(), None)
> +
> + def test_stateful(self):
> + # jisx0213 encoder is stateful for a few codepoints. eg)
> + # U+00E6 => A9DC
> + # U+00E6 U+0300 => ABC4
> + # U+0300 => ABDC
> +
> + encoder = codecs.getincrementalencoder('jisx0213')()
> + self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')
> + self.assertEqual(encoder.encode(u'\u00e6'), '')
> + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')
> + self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')
> +
> + self.assertEqual(encoder.reset(), None)
> + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
> +
> + self.assertEqual(encoder.encode(u'\u00e6'), '')
> + self.assertEqual(encoder.encode('', True), '\xa9\xdc')
> + self.assertEqual(encoder.encode('', True), '')
> +
> + def test_stateful_keep_buffer(self):
> + encoder = codecs.getincrementalencoder('jisx0213')()
> + self.assertEqual(encoder.encode(u'\u00e6'), '')
> + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
> + self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')
> + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
> + self.assertEqual(encoder.reset(), None)
> + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
> + self.assertEqual(encoder.encode(u'\u00e6'), '')
> + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
> + self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')
> +
> +
> +class Test_IncrementalDecoder(unittest.TestCase):
> +
> + def test_dbcs(self):
> + # cp949 decoder is simple with only 1 or 2 bytes sequences.
> + decoder = codecs.getincrementaldecoder('cp949')()
> + self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
> + u'\ud30c\uc774')
> + self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
> + u'\uc36c \ub9c8\uc744')
> + self.assertEqual(decoder.decode(''), u'')
> +
> + def test_dbcs_keep_buffer(self):
> + decoder = codecs.getincrementaldecoder('cp949')()
> + self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
> + self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
> + self.assertEqual(decoder.decode('\xcc'), u'\uc774')
> +
> + self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
> + self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
> + self.assertEqual(decoder.decode('\xcc'), u'\uc774')
> +
> + def test_iso2022(self):
> + decoder = codecs.getincrementaldecoder('iso2022-jp')()
> + ESC = '\x1b'
> + self.assertEqual(decoder.decode(ESC + '('), u'')
> + self.assertEqual(decoder.decode('B', True), u'')
> + self.assertEqual(decoder.decode(ESC + '$'), u'')
> + self.assertEqual(decoder.decode('B@$'), u'\u4e16')
> + self.assertEqual(decoder.decode('@$@'), u'\u4e16')
> + self.assertEqual(decoder.decode('$', True), u'\u4e16')
> + self.assertEqual(decoder.reset(), None)
> + self.assertEqual(decoder.decode('@$'), u'@$')
> + self.assertEqual(decoder.decode(ESC + '$'), u'')
> + self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
> + self.assertEqual(decoder.decode('B@$'), u'\u4e16')
> +
> +
> class Test_StreamWriter(unittest.TestCase):
> if len(u'\U00012345') == 2: # UCS2
> def test_gb18030(self):
> s= StringIO.StringIO()
> - c = codecs.lookup('gb18030')[3](s)
> + c = codecs.getwriter('gb18030')(s)
> c.write(u'123')
> self.assertEqual(s.getvalue(), '123')
> c.write(u'\U00012345')
> @@ -30,15 +125,16 @@
> self.assertEqual(s.getvalue(),
> '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
>
> - # standard utf-8 codecs has broken StreamReader
> - if test_multibytecodec_support.__cjkcodecs__:
> - def test_utf_8(self):
> - s= StringIO.StringIO()
> - c = codecs.lookup('utf-8')[3](s)
> - c.write(u'123')
> - self.assertEqual(s.getvalue(), '123')
> - c.write(u'\U00012345')
> - self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
> + def test_utf_8(self):
> + s= StringIO.StringIO()
> + c = codecs.getwriter('utf-8')(s)
> + c.write(u'123')
> + self.assertEqual(s.getvalue(), '123')
> + c.write(u'\U00012345')
> + self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
> +
> + # Python utf-8 codec can't buffer surrogate pairs yet.
> + if 0:
> c.write(u'\U00012345'[0])
> self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
> c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
> @@ -61,14 +157,6 @@
> else: # UCS4
> pass
>
> - def test_nullcoding(self):
> - self.assertEqual(''.decode('gb18030'), u'')
> - self.assertEqual(unicode('', 'gb18030'), u'')
> - self.assertEqual(u''.encode('gb18030'), '')
> -
> - def test_str_decode(self):
> - self.assertEqual('abcd'.encode('gb18030'), 'abcd')
> -
> def test_streamwriter_strwrite(self):
> s = StringIO.StringIO()
> wr = codecs.getwriter('gb18030')(s)
> @@ -83,6 +171,9 @@
>
> def test_main():
> suite = unittest.TestSuite()
> + suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
> + suite.addTest(unittest.makeSuite(Test_IncrementalEncoder))
> + suite.addTest(unittest.makeSuite(Test_IncrementalDecoder))
> suite.addTest(unittest.makeSuite(Test_StreamWriter))
> suite.addTest(unittest.makeSuite(Test_ISO2022))
> test_support.run_suite(suite)
>
> Modified: python/trunk/Lib/test/test_multibytecodec_support.py
> ==============================================================================
> --- python/trunk/Lib/test/test_multibytecodec_support.py (original)
> +++ python/trunk/Lib/test/test_multibytecodec_support.py Sun Mar 26 04:34:59 2006
> @@ -3,15 +3,12 @@
> # test_multibytecodec_support.py
> # Common Unittest Routines for CJK codecs
> #
> -# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
>
> import sys, codecs, os.path
> import unittest
> from test import test_support
> from StringIO import StringIO
>
> -__cjkcodecs__ = 0 # define this as 0 for python
> -
> class TestBase:
> encoding = '' # codec name
> codec = None # codec tuple (with 4 elements)
> @@ -21,11 +18,17 @@
> roundtriptest = 1 # set if roundtrip is possible with unicode
> has_iso10646 = 0 # set if this encoding contains whole iso10646 map
> xmlcharnametest = None # string to test xmlcharrefreplace
> + unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped.
>
> def setUp(self):
> if self.codec is None:
> self.codec = codecs.lookup(self.encoding)
> - self.encode, self.decode, self.reader, self.writer = self.codec
> + self.encode = self.codec.encode
> + self.decode = self.codec.decode
> + self.reader = self.codec.streamreader
> + self.writer = self.codec.streamwriter
> + self.incrementalencoder = self.codec.incrementalencoder
> + self.incrementaldecoder = self.codec.incrementaldecoder
>
> def test_chunkcoding(self):
> for native, utf8 in zip(*[StringIO(f).readlines()
> @@ -47,51 +50,142 @@
> else:
> self.assertRaises(UnicodeError, func, source, scheme)
>
> - if sys.hexversion >= 0x02030000:
> - def test_xmlcharrefreplace(self):
> - if self.has_iso10646:
> - return
> -
> - s = u"\u0b13\u0b23\u0b60 nd eggs"
> - self.assertEqual(
> - self.encode(s, "xmlcharrefreplace")[0],
> - "ଓଣୠ nd eggs"
> - )
> -
> - def test_customreplace(self):
> - if self.has_iso10646:
> - return
> -
> - import htmlentitydefs
> -
> - names = {}
> - for (key, value) in htmlentitydefs.entitydefs.items():
> - if len(value)==1:
> - names[value.decode('latin-1')] = self.decode(key)[0]
> + def test_xmlcharrefreplace(self):
> + if self.has_iso10646:
> + return
> +
> + s = u"\u0b13\u0b23\u0b60 nd eggs"
> + self.assertEqual(
> + self.encode(s, "xmlcharrefreplace")[0],
> + "ଓଣୠ nd eggs"
> + )
> +
> + def test_customreplace(self):
> + if self.has_iso10646:
> + return
> +
> + from htmlentitydefs import codepoint2name
> +
> + def xmlcharnamereplace(exc):
> + if not isinstance(exc, UnicodeEncodeError):
> + raise TypeError("don't know how to handle %r" % exc)
> + l = []
> + for c in exc.object[exc.start:exc.end]:
> + if ord(c) in codepoint2name:
> + l.append(u"&%s;" % codepoint2name[ord(c)])
> else:
> - names[unichr(int(value[2:-1]))] = self.decode(key)[0]
> -
> - def xmlcharnamereplace(exc):
> - if not isinstance(exc, UnicodeEncodeError):
> - raise TypeError("don't know how to handle %r" % exc)
> - l = []
> - for c in exc.object[exc.start:exc.end]:
> - try:
> - l.append(u"&%s;" % names[c])
> - except KeyError:
> - l.append(u"&#%d;" % ord(c))
> - return (u"".join(l), exc.end)
> + l.append(u"&#%d;" % ord(c))
> + return (u"".join(l), exc.end)
>
> - codecs.register_error(
> - "test.xmlcharnamereplace", xmlcharnamereplace)
> + codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)
>
> - if self.xmlcharnametest:
> - sin, sout = self.xmlcharnametest
> + if self.xmlcharnametest:
> + sin, sout = self.xmlcharnametest
> + else:
> + sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
> + sout = "«ℜ» = ⟨ሴ⟩"
> + self.assertEqual(self.encode(sin,
> + "test.xmlcharnamereplace")[0], sout)
> +
> + def test_callback_wrong_objects(self):
> + def myreplace(exc):
> + return (ret, exc.end)
> + codecs.register_error("test.cjktest", myreplace)
> +
> + for ret in ([1, 2, 3], [], None, object(), 'string', ''):
> + self.assertRaises(TypeError, self.encode, self.unmappedunicode,
> + 'test.cjktest')
> +
> + def test_callback_None_index(self):
> + def myreplace(exc):
> + return (u'x', None)
> + codecs.register_error("test.cjktest", myreplace)
> + self.assertRaises(TypeError, self.encode, self.unmappedunicode,
> + 'test.cjktest')
> +
> + def test_callback_backward_index(self):
> + def myreplace(exc):
> + if myreplace.limit > 0:
> + myreplace.limit -= 1
> + return (u'REPLACED', 0)
> else:
> - sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
> - sout = "«ℜ» = ⟨ሴ⟩"
> - self.assertEqual(self.encode(sin,
> - "test.xmlcharnamereplace")[0], sout)
> + return (u'TERMINAL', exc.end)
> + myreplace.limit = 3
> + codecs.register_error("test.cjktest", myreplace)
> + self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
> + 'test.cjktest'),
> + ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
> +
> + def test_callback_forward_index(self):
> + def myreplace(exc):
> + return (u'REPLACED', exc.end + 2)
> + codecs.register_error("test.cjktest", myreplace)
> + self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
> + 'test.cjktest'), ('abcdREPLACEDgh', 9))
> +
> + def test_callback_index_outofbound(self):
> + def myreplace(exc):
> + return (u'TERM', 100)
> + codecs.register_error("test.cjktest", myreplace)
> + self.assertRaises(IndexError, self.encode, self.unmappedunicode,
> + 'test.cjktest')
> +
> + def test_incrementalencoder(self):
> + UTF8Reader = codecs.getreader('utf-8')
> + for sizehint in [None] + range(1, 33) + \
> + [64, 128, 256, 512, 1024]:
> + istream = UTF8Reader(StringIO(self.tstring[1]))
> + ostream = StringIO()
> + encoder = self.incrementalencoder()
> + while 1:
> + if sizehint is not None:
> + data = istream.read(sizehint)
> + else:
> + data = istream.read()
> +
> + if not data:
> + break
> + e = encoder.encode(data)
> + ostream.write(e)
> +
> + self.assertEqual(ostream.getvalue(), self.tstring[0])
> +
> + def test_incrementaldecoder(self):
> + UTF8Writer = codecs.getwriter('utf-8')
> + for sizehint in [None, -1] + range(1, 33) + \
> + [64, 128, 256, 512, 1024]:
> + istream = StringIO(self.tstring[0])
> + ostream = UTF8Writer(StringIO())
> + decoder = self.incrementaldecoder()
> + while 1:
> + data = istream.read(sizehint)
> + if not data:
> + break
> + else:
> + u = decoder.decode(data)
> + ostream.write(u)
> +
> + self.assertEqual(ostream.getvalue(), self.tstring[1])
> +
> + def test_incrementalencoder_error_callback(self):
> + inv = self.unmappedunicode
> +
> + e = self.incrementalencoder()
> + self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
> +
> + e.errors = 'ignore'
> + self.assertEqual(e.encode(inv, True), '')
> +
> + e.reset()
> + def tempreplace(exc):
> + return (u'called', exc.end)
> + codecs.register_error('test.incremental_error_callback', tempreplace)
> + e.errors = 'test.incremental_error_callback'
> + self.assertEqual(e.encode(inv, True), 'called')
> +
> + # again
> + e.errors = 'ignore'
> + self.assertEqual(e.encode(inv, True), '')
>
> def test_streamreader(self):
> UTF8Writer = codecs.getwriter('utf-8')
> @@ -113,11 +207,7 @@
> self.assertEqual(ostream.getvalue(), self.tstring[1])
>
> def test_streamwriter(self):
> - if __cjkcodecs__:
> - readfuncs = ('read', 'readline', 'readlines')
> - else:
> - # standard utf8 codec has broken readline and readlines.
> - readfuncs = ('read',)
> + readfuncs = ('read', 'readline', 'readlines')
> UTF8Reader = codecs.getreader('utf-8')
> for name in readfuncs:
> for sizehint in [None] + range(1, 33) + \
> @@ -211,10 +301,5 @@
> self.assertEqual(unicode(csetch, self.encoding), unich)
>
> def load_teststring(encoding):
> - if __cjkcodecs__:
> - etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
> - utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
> - return (etxt, utxt)
> - else:
> - from test import cjkencodings_test
> - return cjkencodings_test.teststring[encoding]
> + from test import cjkencodings_test
> + return cjkencodings_test.teststring[encoding]
>
> Modified: python/trunk/Modules/cjkcodecs/_codecs_cn.c
> ==============================================================================
> --- python/trunk/Modules/cjkcodecs/_codecs_cn.c (original)
> +++ python/trunk/Modules/cjkcodecs/_codecs_cn.c Sun Mar 26 04:34:59 2006
> @@ -217,11 +217,8 @@
> break;
> }
>
> - if (utrrange->first == 0) {
> - PyErr_SetString(PyExc_RuntimeError,
> - "unicode mapping invalid");
> + if (utrrange->first == 0)
> return 1;
> - }
> continue;
> }
>
>
> Modified: python/trunk/Modules/cjkcodecs/multibytecodec.c
> ==============================================================================
> --- python/trunk/Modules/cjkcodecs/multibytecodec.c (original)
> +++ python/trunk/Modules/cjkcodecs/multibytecodec.c Sun Mar 26 04:34:59 2006
> @@ -6,6 +6,7 @@
>
> #define PY_SSIZE_T_CLEAN
> #include "Python.h"
> +#include "structmember.h"
> #include "multibytecodec.h"
>
> typedef struct {
> @@ -38,22 +39,14 @@
> are 'ignore' and 'replace' as well as any other name registerd with\n\
> codecs.register_error that is able to handle UnicodeDecodeErrors.");
>
> -PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__,
> -"I.StreamReader(stream[, errors]) -> StreamReader instance");
> -
> -PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__,
> -"I.StreamWriter(stream[, errors]) -> StreamWriter instance");
> -
> static char *codeckwarglist[] = {"input", "errors", NULL};
> +static char *incnewkwarglist[] = {"errors", NULL};
> +static char *incrementalkwarglist[] = {"input", "final", NULL};
> static char *streamkwarglist[] = {"stream", "errors", NULL};
>
> static PyObject *multibytecodec_encode(MultibyteCodec *,
> MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
> PyObject *, int);
> -static PyObject *mbstreamreader_create(MultibyteCodec *,
> - PyObject *, const char *);
> -static PyObject *mbstreamwriter_create(MultibyteCodec *,
> - PyObject *, const char *);
>
> #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
>
> @@ -83,7 +76,7 @@
> }
>
> static PyObject *
> -get_errorcallback(const char *errors)
> +internal_error_callback(const char *errors)
> {
> if (errors == NULL || strcmp(errors, "strict") == 0)
> return ERROR_STRICT;
> @@ -91,17 +84,88 @@
> return ERROR_IGNORE;
> else if (strcmp(errors, "replace") == 0)
> return ERROR_REPLACE;
> + else
> + return PyString_FromString(errors);
> +}
> +
> +static PyObject *
> +call_error_callback(PyObject *errors, PyObject *exc)
> +{
> + PyObject *args, *cb, *r;
> +
> + assert(PyString_Check(errors));
> + cb = PyCodec_LookupError(PyString_AS_STRING(errors));
> + if (cb == NULL)
> + return NULL;
> +
> + args = PyTuple_New(1);
> + if (args == NULL) {
> + Py_DECREF(cb);
> + return NULL;
> + }
> +
> + PyTuple_SET_ITEM(args, 0, exc);
> + Py_INCREF(exc);
> +
> + r = PyObject_CallObject(cb, args);
> + Py_DECREF(args);
> + Py_DECREF(cb);
> + return r;
> +}
> +
> +static PyObject *
> +codecctx_errors_get(MultibyteStatefulCodecContext *self)
> +{
> + const char *errors;
> +
> + if (self->errors == ERROR_STRICT)
> + errors = "strict";
> + else if (self->errors == ERROR_IGNORE)
> + errors = "ignore";
> + else if (self->errors == ERROR_REPLACE)
> + errors = "replace";
> else {
> - return PyCodec_LookupError(errors);
> + Py_INCREF(self->errors);
> + return self->errors;
> + }
> +
> + return PyString_FromString(errors);
> +}
> +
> +static int
> +codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
> + void *closure)
> +{
> + PyObject *cb;
> +
> + if (!PyString_Check(value)) {
> + PyErr_SetString(PyExc_TypeError, "errors must be a string");
> + return -1;
> }
> +
> + cb = internal_error_callback(PyString_AS_STRING(value));
> + if (cb == NULL)
> + return -1;
> +
> + ERROR_DECREF(self->errors);
> + self->errors = cb;
> + return 0;
> }
>
> +/* This getset handlers list is used by all the stateful codec objects */
> +static PyGetSetDef codecctx_getsets[] = {
> + {"errors", (getter)codecctx_errors_get,
> + (setter)codecctx_errors_set,
> + PyDoc_STR("how to treat errors")},
> + {NULL,}
> +};
> +
> static int
> expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
> {
> Py_ssize_t orgpos, orgsize;
>
> - orgpos = (Py_ssize_t)((char*)buf->outbuf -
> + orgpos = (Py_ssize_t)((char *)buf->outbuf -
> PyString_AS_STRING(buf->outobj));
> orgsize = PyString_GET_SIZE(buf->outobj);
> if (_PyString_Resize(&buf->outobj, orgsize + (
> @@ -125,8 +189,7 @@
> {
> Py_ssize_t orgpos, orgsize;
>
> - orgpos = (Py_ssize_t)(buf->outbuf -
> - PyUnicode_AS_UNICODE(buf->outobj));
> + orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
> orgsize = PyUnicode_GET_SIZE(buf->outobj);
> if (PyUnicode_Resize(&buf->outobj, orgsize + (
> esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
> @@ -144,16 +207,21 @@
> goto errorexit; \
> }
>
> +
> +/**
> + * MultibyteCodec object
> + */
> +
> static int
> multibytecodec_encerror(MultibyteCodec *codec,
> MultibyteCodec_State *state,
> MultibyteEncodeBuffer *buf,
> PyObject *errors, Py_ssize_t e)
> {
> - PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
> + PyObject *retobj = NULL, *retstr = NULL, *tobj;
> Py_ssize_t retstrsize, newpos;
> - const char *reason;
> Py_ssize_t esize, start, end;
> + const char *reason;
>
> if (e > 0) {
> reason = "illegal multibyte sequence";
> @@ -166,7 +234,7 @@
> return 0; /* retry it */
> case MBERR_TOOFEW:
> reason = "incomplete multibyte sequence";
> - esize = (size_t)(buf->inbuf_end - buf->inbuf);
> + esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
> break;
> case MBERR_INTERNAL:
> PyErr_SetString(PyExc_RuntimeError,
> @@ -230,21 +298,14 @@
> goto errorexit;
> }
>
> - argsobj = PyTuple_New(1);
> - if (argsobj == NULL)
> - goto errorexit;
> -
> - PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
> - Py_INCREF(buf->excobj);
> - retobj = PyObject_CallObject(errors, argsobj);
> - Py_DECREF(argsobj);
> + retobj = call_error_callback(errors, buf->excobj);
> if (retobj == NULL)
> goto errorexit;
>
> if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
> !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
> !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
> - PyErr_SetString(PyExc_ValueError,
> + PyErr_SetString(PyExc_TypeError,
> "encoding error handler must return "
> "(unicode, int) tuple");
> goto errorexit;
> @@ -293,7 +354,7 @@
> MultibyteDecodeBuffer *buf,
> PyObject *errors, Py_ssize_t e)
> {
> - PyObject *argsobj, *retobj = NULL, *retuni = NULL;
> + PyObject *retobj = NULL, *retuni = NULL;
> Py_ssize_t retunisize, newpos;
> const char *reason;
> Py_ssize_t esize, start, end;
> @@ -309,7 +370,7 @@
> return 0; /* retry it */
> case MBERR_TOOFEW:
> reason = "incomplete multibyte sequence";
> - esize = (size_t)(buf->inbuf_end - buf->inbuf);
> + esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
> break;
> case MBERR_INTERNAL:
> PyErr_SetString(PyExc_RuntimeError,
> @@ -354,21 +415,14 @@
> goto errorexit;
> }
>
> - argsobj = PyTuple_New(1);
> - if (argsobj == NULL)
> - goto errorexit;
> -
> - PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
> - Py_INCREF(buf->excobj);
> - retobj = PyObject_CallObject(errors, argsobj);
> - Py_DECREF(argsobj);
> + retobj = call_error_callback(errors, buf->excobj);
> if (retobj == NULL)
> goto errorexit;
>
> if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
> !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
> !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
> - PyErr_SetString(PyExc_ValueError,
> + PyErr_SetString(PyExc_TypeError,
> "decoding error handler must return "
> "(unicode, int) tuple");
> goto errorexit;
> @@ -453,7 +507,7 @@
> goto errorexit;
> }
>
> - finalsize = (Py_ssize_t)((char*)buf.outbuf -
> + finalsize = (Py_ssize_t)((char *)buf.outbuf -
> PyString_AS_STRING(buf.outobj));
>
> if (finalsize != PyString_GET_SIZE(buf.outobj))
> @@ -500,7 +554,7 @@
> data = PyUnicode_AS_UNICODE(arg);
> datalen = PyUnicode_GET_SIZE(arg);
>
> - errorcb = get_errorcallback(errors);
> + errorcb = internal_error_callback(errors);
> if (errorcb == NULL) {
> Py_XDECREF(ucvt);
> return NULL;
> @@ -515,16 +569,12 @@
> if (r == NULL)
> goto errorexit;
>
> - if (errorcb > ERROR_MAX) {
> - Py_DECREF(errorcb);
> - }
> + ERROR_DECREF(errorcb);
> Py_XDECREF(ucvt);
> return make_tuple(r, datalen);
>
> errorexit:
> - if (errorcb > ERROR_MAX) {
> - Py_DECREF(errorcb);
> - }
> + ERROR_DECREF(errorcb);
> Py_XDECREF(ucvt);
> return NULL;
> }
> @@ -543,18 +593,16 @@
> codeckwarglist, &data, &datalen, &errors))
> return NULL;
>
> - errorcb = get_errorcallback(errors);
> + errorcb = internal_error_callback(errors);
> if (errorcb == NULL)
> return NULL;
>
> if (datalen == 0) {
> - if (errorcb > ERROR_MAX) {
> - Py_DECREF(errorcb);
> - }
> + ERROR_DECREF(errorcb);
> return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
> }
>
> - buf.outobj = buf.excobj = NULL;
> + buf.excobj = NULL;
> buf.inbuf = buf.inbuf_top = (unsigned char *)data;
> buf.inbuf_end = buf.inbuf_top + datalen;
> buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
> @@ -590,49 +638,17 @@
> goto errorexit;
>
> Py_XDECREF(buf.excobj);
> - if (errorcb > ERROR_MAX) {
> - Py_DECREF(errorcb);
> - }
> + ERROR_DECREF(errorcb);
> return make_tuple(buf.outobj, datalen);
>
> errorexit:
> - if (errorcb > ERROR_MAX) {
> - Py_DECREF(errorcb);
> - }
> + ERROR_DECREF(errorcb);
> Py_XDECREF(buf.excobj);
> Py_XDECREF(buf.outobj);
>
> return NULL;
> }
>
> -static PyObject *
> -MultibyteCodec_StreamReader(MultibyteCodecObject *self,
> - PyObject *args, PyObject *kwargs)
> -{
> - PyObject *stream;
> - char *errors = NULL;
> -
> - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader",
> - streamkwarglist, &stream, &errors))
> - return NULL;
> -
> - return mbstreamreader_create(self->codec, stream, errors);
> -}
> -
> -static PyObject *
> -MultibyteCodec_StreamWriter(MultibyteCodecObject *self,
> - PyObject *args, PyObject *kwargs)
> -{
> - PyObject *stream;
> - char *errors = NULL;
> -
> - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter",
> - streamkwarglist, &stream, &errors))
> - return NULL;
> -
> - return mbstreamwriter_create(self->codec, stream, errors);
> -}
> -
> static struct PyMethodDef multibytecodec_methods[] = {
> {"encode", (PyCFunction)MultibyteCodec_Encode,
> METH_VARARGS | METH_KEYWORDS,
> @@ -640,12 +656,6 @@
> {"decode", (PyCFunction)MultibyteCodec_Decode,
> METH_VARARGS | METH_KEYWORDS,
> MultibyteCodec_Decode__doc__},
> - {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader,
> - METH_VARARGS | METH_KEYWORDS,
> - MultibyteCodec_StreamReader__doc__},
> - {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter,
> - METH_VARARGS | METH_KEYWORDS,
> - MultibyteCodec_StreamWriter__doc__},
> {NULL, NULL},
> };
>
> @@ -655,8 +665,6 @@
> PyObject_Del(self);
> }
>
> -
> -
> static PyTypeObject MultibyteCodec_Type = {
> PyObject_HEAD_INIT(NULL)
> 0, /* ob_size */
> @@ -690,244 +698,740 @@
> multibytecodec_methods, /* tp_methods */
> };
>
> -static PyObject *
> -mbstreamreader_iread(MultibyteStreamReaderObject *self,
> - const char *method, Py_ssize_t sizehint)
> -{
> - MultibyteDecodeBuffer buf;
> - PyObject *cres;
> - Py_ssize_t rsize, r, finalsize = 0;
>
> - if (sizehint == 0)
> - return PyUnicode_FromUnicode(NULL, 0);
> +/**
> + * Utility functions for stateful codec mechanism
> + */
>
> - buf.outobj = buf.excobj = NULL;
> - cres = NULL;
> +#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
> +#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
>
> - for (;;) {
> - if (sizehint < 0)
> - cres = PyObject_CallMethod(self->stream,
> - (char *)method, NULL);
> - else
> - cres = PyObject_CallMethod(self->stream,
> - (char *)method, "i", sizehint);
> - if (cres == NULL)
> - goto errorexit;
> +static PyObject *
> +encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
> + PyObject *unistr, int final)
> +{
> + PyObject *ucvt, *r = NULL;
> + Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
> + Py_ssize_t datalen, origpending;
>
> - if (!PyString_Check(cres)) {
> + if (PyUnicode_Check(unistr))
> + ucvt = NULL;
> + else {
> + unistr = ucvt = PyObject_Unicode(unistr);
> + if (unistr == NULL)
> + return NULL;
> + else if (!PyUnicode_Check(unistr)) {
> PyErr_SetString(PyExc_TypeError,
> - "stream function returned a "
> - "non-string object");
> - goto errorexit;
> - }
> -
> - if (self->pendingsize > 0) {
> - PyObject *ctr;
> - char *ctrdata;
> -
> - rsize = PyString_GET_SIZE(cres) + self->pendingsize;
> - ctr = PyString_FromStringAndSize(NULL, rsize);
> - if (ctr == NULL)
> - goto errorexit;
> - ctrdata = PyString_AS_STRING(ctr);
> - memcpy(ctrdata, self->pending, self->pendingsize);
> - memcpy(ctrdata + self->pendingsize,
> - PyString_AS_STRING(cres),
> - PyString_GET_SIZE(cres));
> - Py_DECREF(cres);
> - cres = ctr;
> - self->pendingsize = 0;
> - }
> -
> - rsize = PyString_GET_SIZE(cres);
> - buf.inbuf = buf.inbuf_top =
> - (unsigned char *)PyString_AS_STRING(cres);
> - buf.inbuf_end = buf.inbuf_top + rsize;
> - if (buf.outobj == NULL) {
> - buf.outobj = PyUnicode_FromUnicode(NULL, rsize);
> - if (buf.outobj == NULL)
> - goto errorexit;
> - buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
> - buf.outbuf_end = buf.outbuf +
> - PyUnicode_GET_SIZE(buf.outobj);
> - }
> -
> - r = 0;
> - if (rsize > 0)
> - while (buf.inbuf < buf.inbuf_end) {
> - Py_ssize_t inleft, outleft;
> -
> - inleft = (Py_ssize_t)(buf.inbuf_end -
> - buf.inbuf);
> - outleft = (Py_ssize_t)(buf.outbuf_end -
> - buf.outbuf);
> -
> - r = self->codec->decode(&self->state,
> - self->codec->config,
> - &buf.inbuf, inleft,
> - &buf.outbuf, outleft);
> - if (r == 0 || r == MBERR_TOOFEW)
> - break;
> - else if (multibytecodec_decerror(self->codec,
> - &self->state, &buf,
> - self->errors, r))
> - goto errorexit;
> - }
> -
> - if (rsize == 0 || sizehint < 0) { /* end of file */
> - if (buf.inbuf < buf.inbuf_end &&
> - multibytecodec_decerror(self->codec, &self->state,
> - &buf, self->errors, MBERR_TOOFEW))
> - goto errorexit;
> + "couldn't convert the object to unicode.");
> + Py_DECREF(ucvt);
> + return NULL;
> }
> + }
>
> - if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
> - Py_ssize_t npendings;
> -
> - /* we can't assume that pendingsize is still 0 here.
> - * because this function can be called recursively
> - * from error callback */
> - npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
> - if (npendings + self->pendingsize > MAXDECPENDING) {
> - PyErr_SetString(PyExc_RuntimeError,
> - "pending buffer overflow");
> - goto errorexit;
> - }
> - memcpy(self->pending + self->pendingsize, buf.inbuf,
> - npendings);
> - self->pendingsize += npendings;
> - }
> + datalen = PyUnicode_GET_SIZE(unistr);
> + origpending = ctx->pendingsize;
>
> - finalsize = (Py_ssize_t)(buf.outbuf -
> - PyUnicode_AS_UNICODE(buf.outobj));
> - Py_DECREF(cres);
> - cres = NULL;
> + if (ctx->pendingsize > 0) {
> + inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
> + if (inbuf_tmp == NULL)
> + goto errorexit;
> + memcpy(inbuf_tmp, ctx->pending,
> + Py_UNICODE_SIZE * ctx->pendingsize);
> + memcpy(inbuf_tmp + ctx->pendingsize,
> + PyUnicode_AS_UNICODE(unistr),
> + Py_UNICODE_SIZE * datalen);
> + datalen += ctx->pendingsize;
> + ctx->pendingsize = 0;
> + inbuf = inbuf_tmp;
> + }
> + else
> + inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
>
> - if (sizehint < 0 || finalsize != 0 || rsize == 0)
> - break;
> + inbuf_end = inbuf + datalen;
>
> - sizehint = 1; /* read 1 more byte and retry */
> + r = multibytecodec_encode(ctx->codec, &ctx->state,
> + (const Py_UNICODE **)&inbuf,
> + datalen, ctx->errors, final ? MBENC_FLUSH : 0);
> + if (r == NULL) {
> + /* recover the original pending buffer */
> + memcpy(ctx->pending, inbuf_tmp, Py_UNICODE_SIZE * origpending);
> + ctx->pendingsize = origpending;
> + goto errorexit;
> }
>
> - if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
> - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
> + if (inbuf < inbuf_end) {
> + ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
> + if (ctx->pendingsize > MAXENCPENDING) {
> + /* normal codecs can't reach here */
> + ctx->pendingsize = 0;
> + PyErr_SetString(PyExc_UnicodeError,
> + "pending buffer overflow");
> goto errorexit;
> + }
> + memcpy(ctx->pending, inbuf,
> + ctx->pendingsize * Py_UNICODE_SIZE);
> + }
>
> - Py_XDECREF(cres);
> - Py_XDECREF(buf.excobj);
> - return buf.outobj;
> + if (inbuf_tmp != NULL)
> + PyMem_Del(inbuf_tmp);
> + Py_XDECREF(ucvt);
> + return r;
>
> errorexit:
> - Py_XDECREF(cres);
> - Py_XDECREF(buf.excobj);
> - Py_XDECREF(buf.outobj);
> + if (inbuf_tmp != NULL)
> + PyMem_Del(inbuf_tmp);
> + Py_XDECREF(r);
> + Py_XDECREF(ucvt);
> return NULL;
> }
>
> -static PyObject *
> -mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
> +static int
> +decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
> + MultibyteDecodeBuffer *buf)
> {
> - PyObject *sizeobj = NULL;
> - Py_ssize_t size;
> + Py_ssize_t npendings;
>
> - if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
> - return NULL;
> + npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
> + if (npendings + ctx->pendingsize > MAXDECPENDING) {
> + PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
> + return -1;
> + }
> + memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
> + ctx->pendingsize += npendings;
> + return 0;
> +}
>
> - if (sizeobj == Py_None || sizeobj == NULL)
> - size = -1;
> - else if (PyInt_Check(sizeobj))
> - size = PyInt_AsSsize_t(sizeobj);
> - else {
> - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
> - return NULL;
> +static int
> +decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
> + Py_ssize_t size)
> +{
> + buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
> + buf->inbuf_end = buf->inbuf_top + size;
> + if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
> + buf->outobj = PyUnicode_FromUnicode(NULL, size);
> + if (buf->outobj == NULL)
> + return -1;
> + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
> + buf->outbuf_end = buf->outbuf +
> + PyUnicode_GET_SIZE(buf->outobj);
> }
>
> - return mbstreamreader_iread(self, "read", size);
> + return 0;
> }
>
> -static PyObject *
> -mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
> +static int
> +decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
> + MultibyteDecodeBuffer *buf)
> {
> - PyObject *sizeobj = NULL;
> - Py_ssize_t size;
> + while (buf->inbuf < buf->inbuf_end) {
> + Py_ssize_t inleft, outleft;
> + int r;
>
> - if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
> - return NULL;
> + inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
> + outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
>
> - if (sizeobj == Py_None || sizeobj == NULL)
> - size = -1;
> - else if (PyInt_Check(sizeobj))
> - size = PyInt_AsSsize_t(sizeobj);
> - else {
> - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
> - return NULL;
> + r = ctx->codec->decode(&ctx->state, ctx->codec->config,
> + &buf->inbuf, inleft, &buf->outbuf, outleft);
> + if (r == 0 || r == MBERR_TOOFEW)
> + break;
> + else if (multibytecodec_decerror(ctx->codec, &ctx->state,
> + buf, ctx->errors, r))
> + return -1;
> }
> -
> - return mbstreamreader_iread(self, "readline", size);
> + return 0;
> }
>
> -static PyObject *
> -mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
> -{
> - PyObject *sizehintobj = NULL, *r, *sr;
> - Py_ssize_t sizehint;
>
> - if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
> - return NULL;
> +/**
> + * MultibyteIncrementalEncoder object
> + */
>
> - if (sizehintobj == Py_None || sizehintobj == NULL)
> - sizehint = -1;
> - else if (PyInt_Check(sizehintobj))
> - sizehint = PyInt_AsSsize_t(sizehintobj);
> - else {
> - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
> - return NULL;
> - }
> +static PyObject *
> +mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
> + PyObject *args, PyObject *kwargs)
> +{
> + PyObject *data;
> + int final = 0;
>
> - r = mbstreamreader_iread(self, "read", sizehint);
> - if (r == NULL)
> + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
> + incrementalkwarglist, &data, &final))
> return NULL;
>
> - sr = PyUnicode_Splitlines(r, 1);
> - Py_DECREF(r);
> - return sr;
> + return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
> }
>
> static PyObject *
> -mbstreamreader_reset(MultibyteStreamReaderObject *self)
> +mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
> {
> if (self->codec->decreset != NULL &&
> self->codec->decreset(&self->state, self->codec->config) != 0)
> return NULL;
> self->pendingsize = 0;
>
> - Py_INCREF(Py_None);
> - return Py_None;
> + Py_RETURN_NONE;
> }
>
> -static struct PyMethodDef mbstreamreader_methods[] = {
> - {"read", (PyCFunction)mbstreamreader_read,
> - METH_VARARGS, NULL},
> - {"readline", (PyCFunction)mbstreamreader_readline,
> - METH_VARARGS, NULL},
> - {"readlines", (PyCFunction)mbstreamreader_readlines,
> - METH_VARARGS, NULL},
> - {"reset", (PyCFunction)mbstreamreader_reset,
> +static struct PyMethodDef mbiencoder_methods[] = {
> + {"encode", (PyCFunction)mbiencoder_encode,
> + METH_VARARGS | METH_KEYWORDS, NULL},
> + {"reset", (PyCFunction)mbiencoder_reset,
> METH_NOARGS, NULL},
> {NULL, NULL},
> };
>
> +static PyObject *
> +mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
> +{
> + MultibyteIncrementalEncoderObject *self;
> + PyObject *codec;
> + char *errors = NULL;
> +
> + codec = PyObject_GetAttrString((PyObject *)type, "codec");
> + if (codec == NULL)
> + return NULL;
> + if (!MultibyteCodec_Check(codec)) {
> + PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
> + return NULL;
> + }
> +
> + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
> + incnewkwarglist, &errors))
> + return NULL;
> +
> + self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
> + if (self == NULL)
> + return NULL;
> +
> + self->codec = ((MultibyteCodecObject *)codec)->codec;
> + self->pendingsize = 0;
> + self->errors = internal_error_callback(errors);
> + if (self->errors == NULL)
> + goto errorexit;
> + if (self->codec->encinit != NULL &&
> + self->codec->encinit(&self->state, self->codec->config) != 0)
> + goto errorexit;
> +
> + return (PyObject *)self;
> +
> +errorexit:
> + Py_XDECREF(self);
> + return NULL;
> +}
> +
> +static int
> +mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
> + visitproc visit, void *arg)
> +{
> + if (ERROR_ISCUSTOM(self->errors))
> + Py_VISIT(self->errors);
> + return 0;
> +}
> +
> static void
> -mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
> +mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
> +{
> + PyObject_GC_UnTrack(self);
> + ERROR_DECREF(self->errors);
> + self->ob_type->tp_free(self);
> +}
> +
> +static PyTypeObject MultibyteIncrementalEncoder_Type = {
> + PyObject_HEAD_INIT(NULL)
> + 0, /* ob_size */
> + "MultibyteIncrementalEncoder", /* tp_name */
> + sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
> + 0, /* tp_itemsize */
> + /* methods */
> + (destructor)mbiencoder_dealloc, /* tp_dealloc */
> + 0, /* tp_print */
> + 0, /* tp_getattr */
> + 0, /* tp_setattr */
> + 0, /* tp_compare */
> + 0, /* tp_repr */
> + 0, /* tp_as_number */
> + 0, /* tp_as_sequence */
> + 0, /* tp_as_mapping */
> + 0, /* tp_hash */
> + 0, /* tp_call */
> + 0, /* tp_str */
> + PyObject_GenericGetAttr, /* tp_getattro */
> + 0, /* tp_setattro */
> + 0, /* tp_as_buffer */
> + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
> + | Py_TPFLAGS_BASETYPE, /* tp_flags */
> + 0, /* tp_doc */
> + (traverseproc)mbiencoder_traverse, /* tp_traverse */
> + 0, /* tp_clear */
> + 0, /* tp_richcompare */
> + 0, /* tp_weaklistoffset */
> + 0, /* tp_iter */
> + 0, /* tp_iterext */
> + mbiencoder_methods, /* tp_methods */
> + 0, /* tp_members */
> + codecctx_getsets, /* tp_getset */
> + 0, /* tp_base */
> + 0, /* tp_dict */
> + 0, /* tp_descr_get */
> + 0, /* tp_descr_set */
> + 0, /* tp_dictoffset */
> + 0, /* tp_init */
> + 0, /* tp_alloc */
> + mbiencoder_new, /* tp_new */
> +};
> +
> +
> +/**
> + * MultibyteIncrementalDecoder object
> + */
> +
> +static PyObject *
> +mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
> + PyObject *args, PyObject *kwargs)
> +{
> + MultibyteDecodeBuffer buf;
> + char *data, *wdata;
> + Py_ssize_t wsize, finalsize = 0, size, origpending;
> + int final = 0;
> +
> + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "t#|i:decode",
> + incrementalkwarglist, &data, &size, &final))
> + return NULL;
> +
> + buf.outobj = buf.excobj = NULL;
> + origpending = self->pendingsize;
> +
> + if (self->pendingsize == 0) {
> + wsize = size;
> + wdata = data;
> + }
> + else {
> + wsize = size + self->pendingsize;
> + wdata = PyMem_Malloc(wsize);
> + if (wdata == NULL)
> + goto errorexit;
> + memcpy(wdata, self->pending, self->pendingsize);
> + memcpy(wdata + self->pendingsize, data, size);
> + self->pendingsize = 0;
> + }
> +
> + if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
> + goto errorexit;
> +
> + if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
> + goto errorexit;
> +
> + if (final && buf.inbuf < buf.inbuf_end) {
> + if (multibytecodec_decerror(self->codec, &self->state,
> + &buf, self->errors, MBERR_TOOFEW)) {
> + /* recover the original pending buffer */
> + memcpy(self->pending, wdata, origpending);
> + self->pendingsize = origpending;
> + goto errorexit;
> + }
> + }
> +
> + if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
> + if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
> + goto errorexit;
> + }
> +
> + finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
> + if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
> + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
> + goto errorexit;
> +
> + if (wdata != data)
> + PyMem_Del(wdata);
> + Py_XDECREF(buf.excobj);
> + return buf.outobj;
> +
> +errorexit:
> + if (wdata != NULL && wdata != data)
> + PyMem_Del(wdata);
> + Py_XDECREF(buf.excobj);
> + Py_XDECREF(buf.outobj);
> + return NULL;
> +}
> +
> +static PyObject *
> +mbidecoder_reset(MultibyteIncrementalDecoderObject *self)
> +{
> + if (self->codec->decreset != NULL &&
> + self->codec->decreset(&self->state, self->codec->config) != 0)
> + return NULL;
> + self->pendingsize = 0;
> +
> + Py_RETURN_NONE;
> +}
> +
> +static struct PyMethodDef mbidecoder_methods[] = {
> + {"decode", (PyCFunction)mbidecoder_decode,
> + METH_VARARGS | METH_KEYWORDS, NULL},
> + {"reset", (PyCFunction)mbidecoder_reset,
> + METH_NOARGS, NULL},
> + {NULL, NULL},
> +};
> +
> +static PyObject *
> +mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
> +{
> + MultibyteIncrementalDecoderObject *self;
> + PyObject *codec;
> + char *errors = NULL;
> +
> + codec = PyObject_GetAttrString((PyObject *)type, "codec");
> + if (codec == NULL)
> + return NULL;
> + if (!MultibyteCodec_Check(codec)) {
> + PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
> + return NULL;
> + }
> +
> + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
> + incnewkwarglist, &errors))
> + return NULL;
> +
> + self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
> + if (self == NULL)
> + return NULL;
> +
> + self->codec = ((MultibyteCodecObject *)codec)->codec;
> + self->pendingsize = 0;
> + self->errors = internal_error_callback(errors);
> + if (self->errors == NULL)
> + goto errorexit;
> + if (self->codec->decinit != NULL &&
> + self->codec->decinit(&self->state, self->codec->config) != 0)
> + goto errorexit;
> +
> + return (PyObject *)self;
> +
> +errorexit:
> + Py_XDECREF(self);
> + return NULL;
> +}
> +
> +static int
> +mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
> + visitproc visit, void *arg)
> +{
> + if (ERROR_ISCUSTOM(self->errors))
> + Py_VISIT(self->errors);
> + return 0;
> +}
> +
> +static void
> +mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
> +{
> + PyObject_GC_UnTrack(self);
> + ERROR_DECREF(self->errors);
> + self->ob_type->tp_free(self);
> +}
> +
> +static PyTypeObject MultibyteIncrementalDecoder_Type = {
> + PyObject_HEAD_INIT(NULL)
> + 0, /* ob_size */
> + "MultibyteIncrementalDecoder", /* tp_name */
> + sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
> + 0, /* tp_itemsize */
> + /* methods */
> + (destructor)mbidecoder_dealloc, /* tp_dealloc */
> + 0, /* tp_print */
> + 0, /* tp_getattr */
> + 0, /* tp_setattr */
> + 0, /* tp_compare */
> + 0, /* tp_repr */
> + 0, /* tp_as_number */
> + 0, /* tp_as_sequence */
> + 0, /* tp_as_mapping */
> + 0, /* tp_hash */
> + 0, /* tp_call */
> + 0, /* tp_str */
> + PyObject_GenericGetAttr, /* tp_getattro */
> + 0, /* tp_setattro */
> + 0, /* tp_as_buffer */
> + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
> + | Py_TPFLAGS_BASETYPE, /* tp_flags */
> + 0, /* tp_doc */
> + (traverseproc)mbidecoder_traverse, /* tp_traverse */
> + 0, /* tp_clear */
> + 0, /* tp_richcompare */
> + 0, /* tp_weaklistoffset */
> + 0, /* tp_iter */
> + 0, /* tp_iterext */
> + mbidecoder_methods, /* tp_methods */
> + 0, /* tp_members */
> + codecctx_getsets, /* tp_getset */
> + 0, /* tp_base */
> + 0, /* tp_dict */
> + 0, /* tp_descr_get */
> + 0, /* tp_descr_set */
> + 0, /* tp_dictoffset */
> + 0, /* tp_init */
> + 0, /* tp_alloc */
> + mbidecoder_new, /* tp_new */
> +};
> +
> +
> +/**
> + * MultibyteStreamReader object
> + */
> +
> +static PyObject *
> +mbstreamreader_iread(MultibyteStreamReaderObject *self,
> + const char *method, Py_ssize_t sizehint)
> +{
> + MultibyteDecodeBuffer buf;
> + PyObject *cres;
> + Py_ssize_t rsize, finalsize = 0;
> +
> + if (sizehint == 0)
> + return PyUnicode_FromUnicode(NULL, 0);
> +
> + buf.outobj = buf.excobj = NULL;
> + cres = NULL;
> +
> + for (;;) {
> + if (sizehint < 0)
> + cres = PyObject_CallMethod(self->stream,
> + (char *)method, NULL);
> + else
> + cres = PyObject_CallMethod(self->stream,
> + (char *)method, "i", sizehint);
> + if (cres == NULL)
> + goto errorexit;
> +
> + if (!PyString_Check(cres)) {
> + PyErr_SetString(PyExc_TypeError,
> + "stream function returned a "
> + "non-string object");
> + goto errorexit;
> + }
> +
> + if (self->pendingsize > 0) {
> + PyObject *ctr;
> + char *ctrdata;
> +
> + rsize = PyString_GET_SIZE(cres) + self->pendingsize;
> + ctr = PyString_FromStringAndSize(NULL, rsize);
> + if (ctr == NULL)
> + goto errorexit;
> + ctrdata = PyString_AS_STRING(ctr);
> + memcpy(ctrdata, self->pending, self->pendingsize);
> + memcpy(ctrdata + self->pendingsize,
> + PyString_AS_STRING(cres),
> + PyString_GET_SIZE(cres));
> + Py_DECREF(cres);
> + cres = ctr;
> + self->pendingsize = 0;
> + }
> +
> + rsize = PyString_GET_SIZE(cres);
> + if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres),
> + rsize) != 0)
> + goto errorexit;
> +
> + if (rsize > 0 && decoder_feed_buffer(
> + (MultibyteStatefulDecoderContext *)self, &buf))
> + goto errorexit;
> +
> + if (rsize == 0 || sizehint < 0) { /* end of file */
> + if (buf.inbuf < buf.inbuf_end &&
> + multibytecodec_decerror(self->codec, &self->state,
> + &buf, self->errors, MBERR_TOOFEW))
> + goto errorexit;
> + }
> +
> + if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
> + if (decoder_append_pending(STATEFUL_DCTX(self),
> + &buf) != 0)
> + goto errorexit;
> + }
> +
> + finalsize = (Py_ssize_t)(buf.outbuf -
> + PyUnicode_AS_UNICODE(buf.outobj));
> + Py_DECREF(cres);
> + cres = NULL;
> +
> + if (sizehint < 0 || finalsize != 0 || rsize == 0)
> + break;
> +
> + sizehint = 1; /* read 1 more byte and retry */
> + }
> +
> + if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
> + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
> + goto errorexit;
> +
> + Py_XDECREF(cres);
> + Py_XDECREF(buf.excobj);
> + return buf.outobj;
> +
> +errorexit:
> + Py_XDECREF(cres);
> + Py_XDECREF(buf.excobj);
> + Py_XDECREF(buf.outobj);
> + return NULL;
> +}
> +
> +static PyObject *
> +mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
> +{
> + PyObject *sizeobj = NULL;
> + Py_ssize_t size;
> +
> + if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
> + return NULL;
> +
> + if (sizeobj == Py_None || sizeobj == NULL)
> + size = -1;
> + else if (PyInt_Check(sizeobj))
> + size = PyInt_AsSsize_t(sizeobj);
> + else {
> + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
> + return NULL;
> + }
> +
> + return mbstreamreader_iread(self, "read", size);
> +}
> +
> +static PyObject *
> +mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
> +{
> + PyObject *sizeobj = NULL;
> + Py_ssize_t size;
> +
> + if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
> + return NULL;
> +
> + if (sizeobj == Py_None || sizeobj == NULL)
> + size = -1;
> + else if (PyInt_Check(sizeobj))
> + size = PyInt_AsSsize_t(sizeobj);
> + else {
> + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
> + return NULL;
> + }
> +
> + retur...
>
> [Message clipped]
More information about the Python-checkins
mailing list