[Python-checkins] r43320 - in python/trunk: Lib/encodings/big5.py Lib/encodings/big5hkscs.py Lib/encodings/cp932.py Lib/encodings/cp949.py Lib/encodings/cp950.py Lib/encodings/euc_jis_2004.py Lib/encodings/euc_jisx0213.py Lib/encodings/euc_jp.py Lib/encodings/euc_kr.py Lib/encodings/gb18030.py Lib/encodings/gb2312.py Lib/encodings/gbk.py Lib/encodings/hz.py Lib/encodings/iso2022_jp.py Lib/encodings/iso2022_jp_1.py Lib/encodings/iso2022_jp_2.py Lib/encodings/iso2022_jp_2004.py Lib/encodings/iso2022_jp_3.py Lib/encodings/iso2022_jp_ext.py Lib/encodings/iso2022_kr.py Lib/encodings/johab.py Lib/encodings/shift_jis.py Lib/encodings/shift_jis_2004.py Lib/encodings/shift_jisx0213.py Lib/test/test_multibytecodec.py Lib/test/test_multibytecodec_support.py Modules/cjkcodecs/_codecs_cn.c Modules/cjkcodecs/multibytecodec.c Modules/cjkcodecs/multibytecodec.h Tools/unicode/Makefile Tools/unicode/gencjkcodecs.py

hyeshik.chang python-checkins at python.org
Sun Mar 26 04:35:03 CEST 2006


Author: hyeshik.chang
Date: Sun Mar 26 04:34:59 2006
New Revision: 43320

Added:
   python/trunk/Tools/unicode/gencjkcodecs.py
Modified:
   python/trunk/Lib/encodings/big5.py
   python/trunk/Lib/encodings/big5hkscs.py
   python/trunk/Lib/encodings/cp932.py
   python/trunk/Lib/encodings/cp949.py
   python/trunk/Lib/encodings/cp950.py
   python/trunk/Lib/encodings/euc_jis_2004.py
   python/trunk/Lib/encodings/euc_jisx0213.py
   python/trunk/Lib/encodings/euc_jp.py
   python/trunk/Lib/encodings/euc_kr.py
   python/trunk/Lib/encodings/gb18030.py
   python/trunk/Lib/encodings/gb2312.py
   python/trunk/Lib/encodings/gbk.py
   python/trunk/Lib/encodings/hz.py
   python/trunk/Lib/encodings/iso2022_jp.py
   python/trunk/Lib/encodings/iso2022_jp_1.py
   python/trunk/Lib/encodings/iso2022_jp_2.py
   python/trunk/Lib/encodings/iso2022_jp_2004.py
   python/trunk/Lib/encodings/iso2022_jp_3.py
   python/trunk/Lib/encodings/iso2022_jp_ext.py
   python/trunk/Lib/encodings/iso2022_kr.py
   python/trunk/Lib/encodings/johab.py
   python/trunk/Lib/encodings/shift_jis.py
   python/trunk/Lib/encodings/shift_jis_2004.py
   python/trunk/Lib/encodings/shift_jisx0213.py
   python/trunk/Lib/test/test_multibytecodec.py
   python/trunk/Lib/test/test_multibytecodec_support.py
   python/trunk/Modules/cjkcodecs/_codecs_cn.c
   python/trunk/Modules/cjkcodecs/multibytecodec.c
   python/trunk/Modules/cjkcodecs/multibytecodec.h
   python/trunk/Tools/unicode/Makefile
Log:
Patch #1443155: Add the incremental codecs support for CJK codecs.
(reviewed by Walter Dörwald)


Modified: python/trunk/Lib/encodings/big5.py
==============================================================================
--- python/trunk/Lib/encodings/big5.py	(original)
+++ python/trunk/Lib/encodings/big5.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # big5.py: Python Unicode Codec for BIG5
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_tw, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_tw.getcodec('big5')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='big5',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/big5hkscs.py
==============================================================================
--- python/trunk/Lib/encodings/big5hkscs.py	(original)
+++ python/trunk/Lib/encodings/big5hkscs.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # big5hkscs.py: Python Unicode Codec for BIG5HKSCS
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $
 #
 
 import _codecs_hk, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_hk.getcodec('big5hkscs')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='big5hkscs',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/cp932.py
==============================================================================
--- python/trunk/Lib/encodings/cp932.py	(original)
+++ python/trunk/Lib/encodings/cp932.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # cp932.py: Python Unicode Codec for CP932
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('cp932')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='cp932',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/cp949.py
==============================================================================
--- python/trunk/Lib/encodings/cp949.py	(original)
+++ python/trunk/Lib/encodings/cp949.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # cp949.py: Python Unicode Codec for CP949
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_kr, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_kr.getcodec('cp949')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='cp949',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/cp950.py
==============================================================================
--- python/trunk/Lib/encodings/cp950.py	(original)
+++ python/trunk/Lib/encodings/cp950.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # cp950.py: Python Unicode Codec for CP950
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_tw, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_tw.getcodec('cp950')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='cp950',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/euc_jis_2004.py
==============================================================================
--- python/trunk/Lib/encodings/euc_jis_2004.py	(original)
+++ python/trunk/Lib/encodings/euc_jis_2004.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('euc_jis_2004')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_jis_2004',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/euc_jisx0213.py
==============================================================================
--- python/trunk/Lib/encodings/euc_jisx0213.py	(original)
+++ python/trunk/Lib/encodings/euc_jisx0213.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('euc_jisx0213')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_jisx0213',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/euc_jp.py
==============================================================================
--- python/trunk/Lib/encodings/euc_jp.py	(original)
+++ python/trunk/Lib/encodings/euc_jp.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # euc_jp.py: Python Unicode Codec for EUC_JP
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('euc_jp')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_jp',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/euc_kr.py
==============================================================================
--- python/trunk/Lib/encodings/euc_kr.py	(original)
+++ python/trunk/Lib/encodings/euc_kr.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # euc_kr.py: Python Unicode Codec for EUC_KR
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_kr, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_kr.getcodec('euc_kr')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_kr',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/gb18030.py
==============================================================================
--- python/trunk/Lib/encodings/gb18030.py	(original)
+++ python/trunk/Lib/encodings/gb18030.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # gb18030.py: Python Unicode Codec for GB18030
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('gb18030')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='gb18030',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/gb2312.py
==============================================================================
--- python/trunk/Lib/encodings/gb2312.py	(original)
+++ python/trunk/Lib/encodings/gb2312.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # gb2312.py: Python Unicode Codec for GB2312
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('gb2312')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='gb2312',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/gbk.py
==============================================================================
--- python/trunk/Lib/encodings/gbk.py	(original)
+++ python/trunk/Lib/encodings/gbk.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # gbk.py: Python Unicode Codec for GBK
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('gbk')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='gbk',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/hz.py
==============================================================================
--- python/trunk/Lib/encodings/hz.py	(original)
+++ python/trunk/Lib/encodings/hz.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # hz.py: Python Unicode Codec for HZ
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('hz')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='hz',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_jp.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_jp.py	(original)
+++ python/trunk/Lib/encodings/iso2022_jp.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_jp.py: Python Unicode Codec for ISO2022_JP
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_jp_1.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_jp_1.py	(original)
+++ python/trunk/Lib/encodings/iso2022_jp_1.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_1')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_1',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_jp_2.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_jp_2.py	(original)
+++ python/trunk/Lib/encodings/iso2022_jp_2.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_2')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_2',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_jp_2004.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_jp_2004.py	(original)
+++ python/trunk/Lib/encodings/iso2022_jp_2004.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_2004',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_jp_3.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_jp_3.py	(original)
+++ python/trunk/Lib/encodings/iso2022_jp_3.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_3')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_3',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_jp_ext.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_jp_ext.py	(original)
+++ python/trunk/Lib/encodings/iso2022_jp_ext.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_ext',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/iso2022_kr.py
==============================================================================
--- python/trunk/Lib/encodings/iso2022_kr.py	(original)
+++ python/trunk/Lib/encodings/iso2022_kr.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # iso2022_kr.py: Python Unicode Codec for ISO2022_KR
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_kr')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_kr',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/johab.py
==============================================================================
--- python/trunk/Lib/encodings/johab.py	(original)
+++ python/trunk/Lib/encodings/johab.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # johab.py: Python Unicode Codec for JOHAB
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_kr, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_kr.getcodec('johab')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='johab',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/shift_jis.py
==============================================================================
--- python/trunk/Lib/encodings/shift_jis.py	(original)
+++ python/trunk/Lib/encodings/shift_jis.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # shift_jis.py: Python Unicode Codec for SHIFT_JIS
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('shift_jis')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='shift_jis',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/shift_jis_2004.py
==============================================================================
--- python/trunk/Lib/encodings/shift_jis_2004.py	(original)
+++ python/trunk/Lib/encodings/shift_jis_2004.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('shift_jis_2004')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='shift_jis_2004',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/encodings/shift_jisx0213.py
==============================================================================
--- python/trunk/Lib/encodings/shift_jisx0213.py	(original)
+++ python/trunk/Lib/encodings/shift_jisx0213.py	Sun Mar 26 04:34:59 2006
@@ -2,10 +2,10 @@
 # shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213
 #
 # Written by Hye-Shik Chang <perky at FreeBSD.org>
-# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('shift_jisx0213')
 
@@ -13,22 +13,24 @@
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='shift_jisx0213',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )

Modified: python/trunk/Lib/test/test_multibytecodec.py
==============================================================================
--- python/trunk/Lib/test/test_multibytecodec.py	(original)
+++ python/trunk/Lib/test/test_multibytecodec.py	Sun Mar 26 04:34:59 2006
@@ -9,11 +9,106 @@
 from test import test_multibytecodec_support
 import unittest, StringIO, codecs
 
+class Test_MultibyteCodec(unittest.TestCase):
+
+    def test_nullcoding(self):
+        self.assertEqual(''.decode('gb18030'), u'')
+        self.assertEqual(unicode('', 'gb18030'), u'')
+        self.assertEqual(u''.encode('gb18030'), '')
+
+    def test_str_decode(self):
+        self.assertEqual('abcd'.encode('gb18030'), 'abcd')
+
+
+class Test_IncrementalEncoder(unittest.TestCase):
+
+    def test_stateless(self):
+        # cp949 encoder isn't stateful at all.
+        encoder = codecs.getincrementalencoder('cp949')()
+        self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),
+                         '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),
+                         '\xa1\xd9\xa1\xad\xa1\xd9')
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'', True), '')
+        self.assertEqual(encoder.encode(u'', False), '')
+        self.assertEqual(encoder.reset(), None)
+
+    def test_stateful(self):
+        # jisx0213 encoder is stateful for a few codepoints. eg)
+        #   U+00E6 => A9DC
+        #   U+00E6 U+0300 => ABC4
+        #   U+0300 => ABDC
+
+        encoder = codecs.getincrementalencoder('jisx0213')()
+        self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')
+        self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')
+
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
+
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertEqual(encoder.encode('', True), '\xa9\xdc')
+        self.assertEqual(encoder.encode('', True), '')
+
+    def test_stateful_keep_buffer(self):
+        encoder = codecs.getincrementalencoder('jisx0213')()
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')
+
+
+class Test_IncrementalDecoder(unittest.TestCase):
+
+    def test_dbcs(self):
+        # cp949 decoder is simple with only 1 or 2 bytes sequences.
+        decoder = codecs.getincrementaldecoder('cp949')()
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
+                         u'\ud30c\uc774')
+        self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
+                         u'\uc36c \ub9c8\uc744')
+        self.assertEqual(decoder.decode(''), u'')
+
+    def test_dbcs_keep_buffer(self):
+        decoder = codecs.getincrementaldecoder('cp949')()
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
+        self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
+        self.assertEqual(decoder.decode('\xcc'), u'\uc774')
+
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
+        self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
+        self.assertEqual(decoder.decode('\xcc'), u'\uc774')
+
+    def test_iso2022(self):
+        decoder = codecs.getincrementaldecoder('iso2022-jp')()
+        ESC = '\x1b'
+        self.assertEqual(decoder.decode(ESC + '('), u'')
+        self.assertEqual(decoder.decode('B', True), u'')
+        self.assertEqual(decoder.decode(ESC + '$'), u'')
+        self.assertEqual(decoder.decode('B@$'), u'\u4e16')
+        self.assertEqual(decoder.decode('@$@'), u'\u4e16')
+        self.assertEqual(decoder.decode('$', True), u'\u4e16')
+        self.assertEqual(decoder.reset(), None)
+        self.assertEqual(decoder.decode('@$'), u'@$')
+        self.assertEqual(decoder.decode(ESC + '$'), u'')
+        self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
+        self.assertEqual(decoder.decode('B@$'), u'\u4e16')
+
+
 class Test_StreamWriter(unittest.TestCase):
     if len(u'\U00012345') == 2: # UCS2
         def test_gb18030(self):
             s= StringIO.StringIO()
-            c = codecs.lookup('gb18030')[3](s)
+            c = codecs.getwriter('gb18030')(s)
             c.write(u'123')
             self.assertEqual(s.getvalue(), '123')
             c.write(u'\U00012345')
@@ -30,15 +125,16 @@
             self.assertEqual(s.getvalue(),
                     '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
 
-        # standard utf-8 codecs has broken StreamReader
-        if test_multibytecodec_support.__cjkcodecs__:
-            def test_utf_8(self):
-                s= StringIO.StringIO()
-                c = codecs.lookup('utf-8')[3](s)
-                c.write(u'123')
-                self.assertEqual(s.getvalue(), '123')
-                c.write(u'\U00012345')
-                self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
+        def test_utf_8(self):
+            s= StringIO.StringIO()
+            c = codecs.getwriter('utf-8')(s)
+            c.write(u'123')
+            self.assertEqual(s.getvalue(), '123')
+            c.write(u'\U00012345')
+            self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
+
+            # Python utf-8 codec can't buffer surrogate pairs yet.
+            if 0:
                 c.write(u'\U00012345'[0])
                 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
                 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
@@ -61,14 +157,6 @@
     else: # UCS4
         pass
 
-    def test_nullcoding(self):
-        self.assertEqual(''.decode('gb18030'), u'')
-        self.assertEqual(unicode('', 'gb18030'), u'')
-        self.assertEqual(u''.encode('gb18030'), '')
-
-    def test_str_decode(self):
-        self.assertEqual('abcd'.encode('gb18030'), 'abcd')
-
     def test_streamwriter_strwrite(self):
         s = StringIO.StringIO()
         wr = codecs.getwriter('gb18030')(s)
@@ -83,6 +171,9 @@
 
 def test_main():
     suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
+    suite.addTest(unittest.makeSuite(Test_IncrementalEncoder))
+    suite.addTest(unittest.makeSuite(Test_IncrementalDecoder))
     suite.addTest(unittest.makeSuite(Test_StreamWriter))
     suite.addTest(unittest.makeSuite(Test_ISO2022))
     test_support.run_suite(suite)

Modified: python/trunk/Lib/test/test_multibytecodec_support.py
==============================================================================
--- python/trunk/Lib/test/test_multibytecodec_support.py	(original)
+++ python/trunk/Lib/test/test_multibytecodec_support.py	Sun Mar 26 04:34:59 2006
@@ -3,15 +3,12 @@
 # test_multibytecodec_support.py
 #   Common Unittest Routines for CJK codecs
 #
-# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
 
 import sys, codecs, os.path
 import unittest
 from test import test_support
 from StringIO import StringIO
 
-__cjkcodecs__ = 0 # define this as 0 for python
-
 class TestBase:
     encoding        = ''   # codec name
     codec           = None # codec tuple (with 4 elements)
@@ -21,11 +18,17 @@
     roundtriptest   = 1    # set if roundtrip is possible with unicode
     has_iso10646    = 0    # set if this encoding contains whole iso10646 map
     xmlcharnametest = None # string to test xmlcharrefreplace
+    unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped.
 
     def setUp(self):
         if self.codec is None:
             self.codec = codecs.lookup(self.encoding)
-        self.encode, self.decode, self.reader, self.writer = self.codec
+        self.encode = self.codec.encode
+        self.decode = self.codec.decode
+        self.reader = self.codec.streamreader
+        self.writer = self.codec.streamwriter
+        self.incrementalencoder = self.codec.incrementalencoder
+        self.incrementaldecoder = self.codec.incrementaldecoder
 
     def test_chunkcoding(self):
         for native, utf8 in zip(*[StringIO(f).readlines()
@@ -47,51 +50,142 @@
             else:
                 self.assertRaises(UnicodeError, func, source, scheme)
 
-    if sys.hexversion >= 0x02030000:
-        def test_xmlcharrefreplace(self):
-            if self.has_iso10646:
-                return
-
-            s = u"\u0b13\u0b23\u0b60 nd eggs"
-            self.assertEqual(
-                self.encode(s, "xmlcharrefreplace")[0],
-                "&#2835;&#2851;&#2912; nd eggs"
-            )
-
-        def test_customreplace(self):
-            if self.has_iso10646:
-                return
-
-            import htmlentitydefs
-
-            names = {}
-            for (key, value) in htmlentitydefs.entitydefs.items():
-                if len(value)==1:
-                    names[value.decode('latin-1')] = self.decode(key)[0]
+    def test_xmlcharrefreplace(self):
+        if self.has_iso10646:
+            return
+
+        s = u"\u0b13\u0b23\u0b60 nd eggs"
+        self.assertEqual(
+            self.encode(s, "xmlcharrefreplace")[0],
+            "&#2835;&#2851;&#2912; nd eggs"
+        )
+
+    def test_customreplace(self):
+        if self.has_iso10646:
+            return
+
+        from htmlentitydefs import codepoint2name
+
+        def xmlcharnamereplace(exc):
+            if not isinstance(exc, UnicodeEncodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            l = []
+            for c in exc.object[exc.start:exc.end]:
+                if ord(c) in codepoint2name:
+                    l.append(u"&%s;" % codepoint2name[ord(c)])
                 else:
-                    names[unichr(int(value[2:-1]))] = self.decode(key)[0]
-
-            def xmlcharnamereplace(exc):
-                if not isinstance(exc, UnicodeEncodeError):
-                    raise TypeError("don't know how to handle %r" % exc)
-                l = []
-                for c in exc.object[exc.start:exc.end]:
-                    try:
-                        l.append(u"&%s;" % names[c])
-                    except KeyError:
-                        l.append(u"&#%d;" % ord(c))
-                return (u"".join(l), exc.end)
+                    l.append(u"&#%d;" % ord(c))
+            return (u"".join(l), exc.end)
 
-            codecs.register_error(
-                "test.xmlcharnamereplace", xmlcharnamereplace)
+        codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)
 
-            if self.xmlcharnametest:
-                sin, sout = self.xmlcharnametest
+        if self.xmlcharnametest:
+            sin, sout = self.xmlcharnametest
+        else:
+            sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
+            sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
+        self.assertEqual(self.encode(sin,
+                                    "test.xmlcharnamereplace")[0], sout)
+
+    def test_callback_wrong_objects(self):
+        def myreplace(exc):
+            return (ret, exc.end)
+        codecs.register_error("test.cjktest", myreplace)
+
+        for ret in ([1, 2, 3], [], None, object(), 'string', ''):
+            self.assertRaises(TypeError, self.encode, self.unmappedunicode,
+                              'test.cjktest')
+
+    def test_callback_None_index(self):
+        def myreplace(exc):
+            return (u'x', None)
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertRaises(TypeError, self.encode, self.unmappedunicode,
+                          'test.cjktest')
+
+    def test_callback_backward_index(self):
+        def myreplace(exc):
+            if myreplace.limit > 0:
+                myreplace.limit -= 1
+                return (u'REPLACED', 0)
             else:
-                sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
-                sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
-            self.assertEqual(self.encode(sin,
-                                        "test.xmlcharnamereplace")[0], sout)
+                return (u'TERMINAL', exc.end)
+        myreplace.limit = 3
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+                                     'test.cjktest'),
+                ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
+
+    def test_callback_forward_index(self):
+        def myreplace(exc):
+            return (u'REPLACED', exc.end + 2)
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+                                     'test.cjktest'), ('abcdREPLACEDgh', 9))
+
+    def test_callback_index_outofbound(self):
+        def myreplace(exc):
+            return (u'TERM', 100)
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
+                          'test.cjktest')
+
+    def test_incrementalencoder(self):
+        UTF8Reader = codecs.getreader('utf-8')
+        for sizehint in [None] + range(1, 33) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = UTF8Reader(StringIO(self.tstring[1]))
+            ostream = StringIO()
+            encoder = self.incrementalencoder()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
+
+                if not data:
+                    break
+                e = encoder.encode(data)
+                ostream.write(e)
+
+            self.assertEqual(ostream.getvalue(), self.tstring[0])
+
+    def test_incrementaldecoder(self):
+        UTF8Writer = codecs.getwriter('utf-8')
+        for sizehint in [None, -1] + range(1, 33) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = StringIO(self.tstring[0])
+            ostream = UTF8Writer(StringIO())
+            decoder = self.incrementaldecoder()
+            while 1:
+                data = istream.read(sizehint)
+                if not data:
+                    break
+                else:
+                    u = decoder.decode(data)
+                    ostream.write(u)
+
+            self.assertEqual(ostream.getvalue(), self.tstring[1])
+
+    def test_incrementalencoder_error_callback(self):
+        inv = self.unmappedunicode
+
+        e = self.incrementalencoder()
+        self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
+
+        e.errors = 'ignore'
+        self.assertEqual(e.encode(inv, True), '')
+
+        e.reset()
+        def tempreplace(exc):
+            return (u'called', exc.end)
+        codecs.register_error('test.incremental_error_callback', tempreplace)
+        e.errors = 'test.incremental_error_callback'
+        self.assertEqual(e.encode(inv, True), 'called')
+
+        # again
+        e.errors = 'ignore'
+        self.assertEqual(e.encode(inv, True), '')
 
     def test_streamreader(self):
         UTF8Writer = codecs.getwriter('utf-8')
@@ -113,11 +207,7 @@
                 self.assertEqual(ostream.getvalue(), self.tstring[1])
 
     def test_streamwriter(self):
-        if __cjkcodecs__:
-            readfuncs = ('read', 'readline', 'readlines')
-        else:
-            # standard utf8 codec has broken readline and readlines.
-            readfuncs = ('read',)
+        readfuncs = ('read', 'readline', 'readlines')
         UTF8Reader = codecs.getreader('utf-8')
         for name in readfuncs:
             for sizehint in [None] + range(1, 33) + \
@@ -211,10 +301,5 @@
             self.assertEqual(unicode(csetch, self.encoding), unich)
 
 def load_teststring(encoding):
-    if __cjkcodecs__:
-        etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
-        utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
-        return (etxt, utxt)
-    else:
-        from test import cjkencodings_test
-        return cjkencodings_test.teststring[encoding]
+    from test import cjkencodings_test
+    return cjkencodings_test.teststring[encoding]

Modified: python/trunk/Modules/cjkcodecs/_codecs_cn.c
==============================================================================
--- python/trunk/Modules/cjkcodecs/_codecs_cn.c	(original)
+++ python/trunk/Modules/cjkcodecs/_codecs_cn.c	Sun Mar 26 04:34:59 2006
@@ -217,11 +217,8 @@
 					break;
 				}
 
-			if (utrrange->first == 0) {
-				PyErr_SetString(PyExc_RuntimeError,
-						"unicode mapping invalid");
+			if (utrrange->first == 0)
 				return 1;
-			}
 			continue;
 		}
 

Modified: python/trunk/Modules/cjkcodecs/multibytecodec.c
==============================================================================
--- python/trunk/Modules/cjkcodecs/multibytecodec.c	(original)
+++ python/trunk/Modules/cjkcodecs/multibytecodec.c	Sun Mar 26 04:34:59 2006
@@ -6,6 +6,7 @@
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "structmember.h"
 #include "multibytecodec.h"
 
 typedef struct {
@@ -38,22 +39,14 @@
 are 'ignore' and 'replace' as well as any other name registerd with\n\
 codecs.register_error that is able to handle UnicodeDecodeErrors.");
 
-PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__,
-"I.StreamReader(stream[, errors]) -> StreamReader instance");
-
-PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__,
-"I.StreamWriter(stream[, errors]) -> StreamWriter instance");
-
 static char *codeckwarglist[] = {"input", "errors", NULL};
+static char *incnewkwarglist[] = {"errors", NULL};
+static char *incrementalkwarglist[] = {"input", "final", NULL};
 static char *streamkwarglist[] = {"stream", "errors", NULL};
 
 static PyObject *multibytecodec_encode(MultibyteCodec *,
 		MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
 		PyObject *, int);
-static PyObject *mbstreamreader_create(MultibyteCodec *,
-		PyObject *, const char *);
-static PyObject *mbstreamwriter_create(MultibyteCodec *,
-		PyObject *, const char *);
 
 #define MBENC_RESET	MBENC_MAX<<1 /* reset after an encoding session */
 
@@ -83,7 +76,7 @@
 }
 
 static PyObject *
-get_errorcallback(const char *errors)
+internal_error_callback(const char *errors)
 {
 	if (errors == NULL || strcmp(errors, "strict") == 0)
 		return ERROR_STRICT;
@@ -91,17 +84,88 @@
 		return ERROR_IGNORE;
 	else if (strcmp(errors, "replace") == 0)
 		return ERROR_REPLACE;
+	else
+		return PyString_FromString(errors);
+}
+
+static PyObject *
+call_error_callback(PyObject *errors, PyObject *exc)
+{
+	PyObject *args, *cb, *r;
+
+	assert(PyString_Check(errors));
+	cb = PyCodec_LookupError(PyString_AS_STRING(errors));
+	if (cb == NULL)
+		return NULL;
+
+	args = PyTuple_New(1);
+	if (args == NULL) {
+		Py_DECREF(cb);
+		return NULL;
+	}
+
+	PyTuple_SET_ITEM(args, 0, exc);
+	Py_INCREF(exc);
+
+	r = PyObject_CallObject(cb, args);
+	Py_DECREF(args);
+	Py_DECREF(cb);
+	return r;
+}
+
+static PyObject *
+codecctx_errors_get(MultibyteStatefulCodecContext *self)
+{
+	const char *errors;
+
+	if (self->errors == ERROR_STRICT)
+		errors = "strict";
+	else if (self->errors == ERROR_IGNORE)
+		errors = "ignore";
+	else if (self->errors == ERROR_REPLACE)
+		errors = "replace";
 	else {
-		return PyCodec_LookupError(errors);
+		Py_INCREF(self->errors);
+		return self->errors;
+	}
+
+	return PyString_FromString(errors);
+}
+
+static int
+codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
+		    void *closure)
+{
+	PyObject *cb;
+
+	if (!PyString_Check(value)) {
+		PyErr_SetString(PyExc_TypeError, "errors must be a string");
+		return -1;
 	}
+
+	cb = internal_error_callback(PyString_AS_STRING(value));
+	if (cb == NULL)
+		return -1;
+
+	ERROR_DECREF(self->errors);
+	self->errors = cb;
+	return 0;
 }
 
+/* This getset handlers list is used by all the stateful codec objects */
+static PyGetSetDef codecctx_getsets[] = {
+	{"errors",	(getter)codecctx_errors_get,
+			(setter)codecctx_errors_set,
+			PyDoc_STR("how to treat errors")},
+	{NULL,}
+};
+
 static int
 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
 {
 	Py_ssize_t orgpos, orgsize;
 
-	orgpos = (Py_ssize_t)((char*)buf->outbuf -
+	orgpos = (Py_ssize_t)((char *)buf->outbuf -
 				PyString_AS_STRING(buf->outobj));
 	orgsize = PyString_GET_SIZE(buf->outobj);
 	if (_PyString_Resize(&buf->outobj, orgsize + (
@@ -125,8 +189,7 @@
 {
 	Py_ssize_t orgpos, orgsize;
 
-	orgpos = (Py_ssize_t)(buf->outbuf -
-				PyUnicode_AS_UNICODE(buf->outobj));
+	orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
 	orgsize = PyUnicode_GET_SIZE(buf->outobj);
 	if (PyUnicode_Resize(&buf->outobj, orgsize + (
 	    esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
@@ -144,16 +207,21 @@
 			goto errorexit;					\
 }
 
+
+/**
+ * MultibyteCodec object
+ */
+
 static int
 multibytecodec_encerror(MultibyteCodec *codec,
 			MultibyteCodec_State *state,
 			MultibyteEncodeBuffer *buf,
 			PyObject *errors, Py_ssize_t e)
 {
-	PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
+	PyObject *retobj = NULL, *retstr = NULL, *tobj;
 	Py_ssize_t retstrsize, newpos;
-	const char *reason;
 	Py_ssize_t esize, start, end;
+	const char *reason;
 
 	if (e > 0) {
 		reason = "illegal multibyte sequence";
@@ -166,7 +234,7 @@
 			return 0; /* retry it */
 		case MBERR_TOOFEW:
 			reason = "incomplete multibyte sequence";
-			esize = (size_t)(buf->inbuf_end - buf->inbuf);
+			esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
 			break;
 		case MBERR_INTERNAL:
 			PyErr_SetString(PyExc_RuntimeError,
@@ -230,21 +298,14 @@
 		goto errorexit;
 	}
 
-	argsobj = PyTuple_New(1);
-	if (argsobj == NULL)
-		goto errorexit;
-
-	PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
-	Py_INCREF(buf->excobj);
-	retobj = PyObject_CallObject(errors, argsobj);
-	Py_DECREF(argsobj);
+	retobj = call_error_callback(errors, buf->excobj);
 	if (retobj == NULL)
 		goto errorexit;
 
 	if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
 	    !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
 	    !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
-		PyErr_SetString(PyExc_ValueError,
+		PyErr_SetString(PyExc_TypeError,
 				"encoding error handler must return "
 				"(unicode, int) tuple");
 		goto errorexit;
@@ -293,7 +354,7 @@
 			MultibyteDecodeBuffer *buf,
 			PyObject *errors, Py_ssize_t e)
 {
-	PyObject *argsobj, *retobj = NULL, *retuni = NULL;
+	PyObject *retobj = NULL, *retuni = NULL;
 	Py_ssize_t retunisize, newpos;
 	const char *reason;
 	Py_ssize_t esize, start, end;
@@ -309,7 +370,7 @@
 			return 0; /* retry it */
 		case MBERR_TOOFEW:
 			reason = "incomplete multibyte sequence";
-			esize = (size_t)(buf->inbuf_end - buf->inbuf);
+			esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
 			break;
 		case MBERR_INTERNAL:
 			PyErr_SetString(PyExc_RuntimeError,
@@ -354,21 +415,14 @@
 		goto errorexit;
 	}
 
-	argsobj = PyTuple_New(1);
-	if (argsobj == NULL)
-		goto errorexit;
-
-	PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
-	Py_INCREF(buf->excobj);
-	retobj = PyObject_CallObject(errors, argsobj);
-	Py_DECREF(argsobj);
+	retobj = call_error_callback(errors, buf->excobj);
 	if (retobj == NULL)
 		goto errorexit;
 
 	if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
 	    !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
 	    !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
-		PyErr_SetString(PyExc_ValueError,
+		PyErr_SetString(PyExc_TypeError,
 				"decoding error handler must return "
 				"(unicode, int) tuple");
 		goto errorexit;
@@ -453,7 +507,7 @@
 				goto errorexit;
 		}
 
-	finalsize = (Py_ssize_t)((char*)buf.outbuf -
+	finalsize = (Py_ssize_t)((char *)buf.outbuf -
 				 PyString_AS_STRING(buf.outobj));
 
 	if (finalsize != PyString_GET_SIZE(buf.outobj))
@@ -500,7 +554,7 @@
 	data = PyUnicode_AS_UNICODE(arg);
 	datalen = PyUnicode_GET_SIZE(arg);
 
-	errorcb = get_errorcallback(errors);
+	errorcb = internal_error_callback(errors);
 	if (errorcb == NULL) {
 		Py_XDECREF(ucvt);
 		return NULL;
@@ -515,16 +569,12 @@
 	if (r == NULL)
 		goto errorexit;
 
-	if (errorcb > ERROR_MAX) {
-		Py_DECREF(errorcb);
-	}
+	ERROR_DECREF(errorcb);
 	Py_XDECREF(ucvt);
 	return make_tuple(r, datalen);
 
 errorexit:
-	if (errorcb > ERROR_MAX) {
-		Py_DECREF(errorcb);
-	}
+	ERROR_DECREF(errorcb);
 	Py_XDECREF(ucvt);
 	return NULL;
 }
@@ -543,18 +593,16 @@
 				codeckwarglist, &data, &datalen, &errors))
 		return NULL;
 
-	errorcb = get_errorcallback(errors);
+	errorcb = internal_error_callback(errors);
 	if (errorcb == NULL)
 		return NULL;
 
 	if (datalen == 0) {
-		if (errorcb > ERROR_MAX) {
-			Py_DECREF(errorcb);
-		}
+		ERROR_DECREF(errorcb);
 		return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
 	}
 
-	buf.outobj = buf.excobj = NULL;
+	buf.excobj = NULL;
 	buf.inbuf = buf.inbuf_top = (unsigned char *)data;
 	buf.inbuf_end = buf.inbuf_top + datalen;
 	buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
@@ -590,49 +638,17 @@
 			goto errorexit;
 
 	Py_XDECREF(buf.excobj);
-	if (errorcb > ERROR_MAX) {
-		Py_DECREF(errorcb);
-	}
+	ERROR_DECREF(errorcb);
 	return make_tuple(buf.outobj, datalen);
 
 errorexit:
-	if (errorcb > ERROR_MAX) {
-		Py_DECREF(errorcb);
-	}
+	ERROR_DECREF(errorcb);
 	Py_XDECREF(buf.excobj);
 	Py_XDECREF(buf.outobj);
 
 	return NULL;
 }
 
-static PyObject *
-MultibyteCodec_StreamReader(MultibyteCodecObject *self,
-			    PyObject *args, PyObject *kwargs)
-{
-	PyObject *stream;
-	char *errors = NULL;
-
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader",
-				streamkwarglist, &stream, &errors))
-		return NULL;
-
-	return mbstreamreader_create(self->codec, stream, errors);
-}
-
-static PyObject *
-MultibyteCodec_StreamWriter(MultibyteCodecObject *self,
-			    PyObject *args, PyObject *kwargs)
-{
-	PyObject *stream;
-	char *errors = NULL;
-
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter",
-				streamkwarglist, &stream, &errors))
-		return NULL;
-
-	return mbstreamwriter_create(self->codec, stream, errors);
-}
-
 static struct PyMethodDef multibytecodec_methods[] = {
 	{"encode",	(PyCFunction)MultibyteCodec_Encode,
 			METH_VARARGS | METH_KEYWORDS,
@@ -640,12 +656,6 @@
 	{"decode",	(PyCFunction)MultibyteCodec_Decode,
 			METH_VARARGS | METH_KEYWORDS,
 			MultibyteCodec_Decode__doc__},
-	{"StreamReader",(PyCFunction)MultibyteCodec_StreamReader,
-			METH_VARARGS | METH_KEYWORDS,
-			MultibyteCodec_StreamReader__doc__},
-	{"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter,
-			METH_VARARGS | METH_KEYWORDS,
-			MultibyteCodec_StreamWriter__doc__},
 	{NULL,		NULL},
 };
 
@@ -655,8 +665,6 @@
 	PyObject_Del(self);
 }
 
-
-
 static PyTypeObject MultibyteCodec_Type = {
 	PyObject_HEAD_INIT(NULL)
 	0,				/* ob_size */
@@ -690,244 +698,740 @@
 	multibytecodec_methods,		/* tp_methods */
 };
 
-static PyObject *
-mbstreamreader_iread(MultibyteStreamReaderObject *self,
-		     const char *method, Py_ssize_t sizehint)
-{
-	MultibyteDecodeBuffer buf;
-	PyObject *cres;
-	Py_ssize_t rsize, r, finalsize = 0;
 
-	if (sizehint == 0)
-		return PyUnicode_FromUnicode(NULL, 0);
+/**
+ * Utility functions for stateful codec mechanism
+ */
 
-	buf.outobj = buf.excobj = NULL;
-	cres = NULL;
+#define STATEFUL_DCTX(o)	((MultibyteStatefulDecoderContext *)(o))
+#define STATEFUL_ECTX(o)	((MultibyteStatefulEncoderContext *)(o))
 
-	for (;;) {
-		if (sizehint < 0)
-			cres = PyObject_CallMethod(self->stream,
-					(char *)method, NULL);
-		else
-			cres = PyObject_CallMethod(self->stream,
-					(char *)method, "i", sizehint);
-		if (cres == NULL)
-			goto errorexit;
+static PyObject *
+encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
+			PyObject *unistr, int final)
+{
+	PyObject *ucvt, *r = NULL;
+	Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
+	Py_ssize_t datalen, origpending;
 
-		if (!PyString_Check(cres)) {
+	if (PyUnicode_Check(unistr))
+		ucvt = NULL;
+	else {
+		unistr = ucvt = PyObject_Unicode(unistr);
+		if (unistr == NULL)
+			return NULL;
+		else if (!PyUnicode_Check(unistr)) {
 			PyErr_SetString(PyExc_TypeError,
-					"stream function returned a "
-					"non-string object");
-			goto errorexit;
-		}
-
-		if (self->pendingsize > 0) {
-			PyObject *ctr;
-			char *ctrdata;
-
-			rsize = PyString_GET_SIZE(cres) + self->pendingsize;
-			ctr = PyString_FromStringAndSize(NULL, rsize);
-			if (ctr == NULL)
-				goto errorexit;
-			ctrdata = PyString_AS_STRING(ctr);
-			memcpy(ctrdata, self->pending, self->pendingsize);
-			memcpy(ctrdata + self->pendingsize,
-				PyString_AS_STRING(cres),
-				PyString_GET_SIZE(cres));
-			Py_DECREF(cres);
-			cres = ctr;
-			self->pendingsize = 0;
-		}
-
-		rsize = PyString_GET_SIZE(cres);
-		buf.inbuf = buf.inbuf_top =
-			(unsigned char *)PyString_AS_STRING(cres);
-		buf.inbuf_end = buf.inbuf_top + rsize;
-		if (buf.outobj == NULL) {
-			buf.outobj = PyUnicode_FromUnicode(NULL, rsize);
-			if (buf.outobj == NULL)
-				goto errorexit;
-			buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
-			buf.outbuf_end = buf.outbuf +
-					PyUnicode_GET_SIZE(buf.outobj);
-		}
-
-		r = 0;
-		if (rsize > 0)
-			while (buf.inbuf < buf.inbuf_end) {
-				Py_ssize_t inleft, outleft;
-
-				inleft = (Py_ssize_t)(buf.inbuf_end -
-						      buf.inbuf);
-				outleft = (Py_ssize_t)(buf.outbuf_end -
-						       buf.outbuf);
-
-				r = self->codec->decode(&self->state,
-							self->codec->config,
-							&buf.inbuf, inleft,
-							&buf.outbuf, outleft);
-				if (r == 0 || r == MBERR_TOOFEW)
-					break;
-				else if (multibytecodec_decerror(self->codec,
-						&self->state, &buf,
-						self->errors, r))
-					goto errorexit;
-			}
-
-		if (rsize == 0 || sizehint < 0) { /* end of file */
-			if (buf.inbuf < buf.inbuf_end &&
-			    multibytecodec_decerror(self->codec, &self->state,
-					&buf, self->errors, MBERR_TOOFEW))
-				goto errorexit;
+				"couldn't convert the object to unicode.");
+			Py_DECREF(ucvt);
+			return NULL;
 		}
+	}
 
-		if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
-			Py_ssize_t npendings;
-
-			/* we can't assume that pendingsize is still 0 here.
-			 * because this function can be called recursively
-			 * from error callback */
-			npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
-			if (npendings + self->pendingsize > MAXDECPENDING) {
-				PyErr_SetString(PyExc_RuntimeError,
-						"pending buffer overflow");
-				goto errorexit;
-			}
-			memcpy(self->pending + self->pendingsize, buf.inbuf,
-				npendings);
-			self->pendingsize += npendings;
-		}
+	datalen = PyUnicode_GET_SIZE(unistr);
+	origpending = ctx->pendingsize;
 
-		finalsize = (Py_ssize_t)(buf.outbuf -
-				PyUnicode_AS_UNICODE(buf.outobj));
-		Py_DECREF(cres);
-		cres = NULL;
+	if (ctx->pendingsize > 0) {
+		inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
+		if (inbuf_tmp == NULL)
+			goto errorexit;
+		memcpy(inbuf_tmp, ctx->pending,
+			Py_UNICODE_SIZE * ctx->pendingsize);
+		memcpy(inbuf_tmp + ctx->pendingsize,
+			PyUnicode_AS_UNICODE(unistr),
+			Py_UNICODE_SIZE * datalen);
+		datalen += ctx->pendingsize;
+		ctx->pendingsize = 0;
+		inbuf = inbuf_tmp;
+	}
+	else
+		inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
 
-		if (sizehint < 0 || finalsize != 0 || rsize == 0)
-			break;
+	inbuf_end = inbuf + datalen;
 
-		sizehint = 1; /* read 1 more byte and retry */
+	r = multibytecodec_encode(ctx->codec, &ctx->state,
+			(const Py_UNICODE **)&inbuf,
+			datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+	if (r == NULL) {
+		/* recover the original pending buffer */
+		memcpy(ctx->pending, inbuf_tmp, Py_UNICODE_SIZE * origpending);
+		ctx->pendingsize = origpending;
+		goto errorexit;
 	}
 
-	if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
-		if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+	if (inbuf < inbuf_end) {
+		ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
+		if (ctx->pendingsize > MAXENCPENDING) {
+			/* normal codecs can't reach here */
+			ctx->pendingsize = 0;
+			PyErr_SetString(PyExc_UnicodeError,
+					"pending buffer overflow");
 			goto errorexit;
+		}
+		memcpy(ctx->pending, inbuf,
+			ctx->pendingsize * Py_UNICODE_SIZE);
+	}
 
-	Py_XDECREF(cres);
-	Py_XDECREF(buf.excobj);
-	return buf.outobj;
+	if (inbuf_tmp != NULL)
+		PyMem_Del(inbuf_tmp);
+	Py_XDECREF(ucvt);
+	return r;
 
 errorexit:
-	Py_XDECREF(cres);
-	Py_XDECREF(buf.excobj);
-	Py_XDECREF(buf.outobj);
+	if (inbuf_tmp != NULL)
+		PyMem_Del(inbuf_tmp);
+	Py_XDECREF(r);
+	Py_XDECREF(ucvt);
 	return NULL;
 }
 
-static PyObject *
-mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
+static int
+decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
+		       MultibyteDecodeBuffer *buf)
 {
-	PyObject *sizeobj = NULL;
-	Py_ssize_t size;
+	Py_ssize_t npendings;
 
-	if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
-		return NULL;
+	npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+	if (npendings + ctx->pendingsize > MAXDECPENDING) {
+		PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
+		return -1;
+	}
+	memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
+	ctx->pendingsize += npendings;
+	return 0;
+}
 
-	if (sizeobj == Py_None || sizeobj == NULL)
-		size = -1;
-	else if (PyInt_Check(sizeobj))
-		size = PyInt_AsSsize_t(sizeobj);
-	else {
-		PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
-		return NULL;
+static int
+decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
+		       Py_ssize_t size)
+{
+	buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
+	buf->inbuf_end = buf->inbuf_top + size;
+	if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
+		buf->outobj = PyUnicode_FromUnicode(NULL, size);
+		if (buf->outobj == NULL)
+			return -1;
+		buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
+		buf->outbuf_end = buf->outbuf +
+				  PyUnicode_GET_SIZE(buf->outobj);
 	}
 
-	return mbstreamreader_iread(self, "read", size);
+	return 0;
 }
 
-static PyObject *
-mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
+static int
+decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
+		    MultibyteDecodeBuffer *buf)
 {
-	PyObject *sizeobj = NULL;
-	Py_ssize_t size;
+	while (buf->inbuf < buf->inbuf_end) {
+		Py_ssize_t inleft, outleft;
+		int r;
 
-	if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
-		return NULL;
+		inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+		outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
 
-	if (sizeobj == Py_None || sizeobj == NULL)
-		size = -1;
-	else if (PyInt_Check(sizeobj))
-		size = PyInt_AsSsize_t(sizeobj);
-	else {
-		PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
-		return NULL;
+		r = ctx->codec->decode(&ctx->state, ctx->codec->config,
+			&buf->inbuf, inleft, &buf->outbuf, outleft);
+		if (r == 0 || r == MBERR_TOOFEW)
+			break;
+		else if (multibytecodec_decerror(ctx->codec, &ctx->state,
+						 buf, ctx->errors, r))
+			return -1;
 	}
-
-	return mbstreamreader_iread(self, "readline", size);
+	return 0;
 }
 
-static PyObject *
-mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
-{
-	PyObject *sizehintobj = NULL, *r, *sr;
-	Py_ssize_t sizehint;
 
-	if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
-		return NULL;
+/**
+ * MultibyteIncrementalEncoder object
+ */
 
-	if (sizehintobj == Py_None || sizehintobj == NULL)
-		sizehint = -1;
-	else if (PyInt_Check(sizehintobj))
-		sizehint = PyInt_AsSsize_t(sizehintobj);
-	else {
-		PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
-		return NULL;
-	}
+static PyObject *
+mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
+		  PyObject *args, PyObject *kwargs)
+{
+	PyObject *data;
+	int final = 0;
 
-	r = mbstreamreader_iread(self, "read", sizehint);
-	if (r == NULL)
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
+			incrementalkwarglist, &data, &final))
 		return NULL;
 
-	sr = PyUnicode_Splitlines(r, 1);
-	Py_DECREF(r);
-	return sr;
+	return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
 }
 
 static PyObject *
-mbstreamreader_reset(MultibyteStreamReaderObject *self)
+mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
 {
 	if (self->codec->decreset != NULL &&
 	    self->codec->decreset(&self->state, self->codec->config) != 0)
 		return NULL;
 	self->pendingsize = 0;
 
-	Py_INCREF(Py_None);
-	return Py_None;
+	Py_RETURN_NONE;
 }
 
-static struct PyMethodDef mbstreamreader_methods[] = {
-	{"read",	(PyCFunction)mbstreamreader_read,
-			METH_VARARGS, NULL},
-	{"readline",	(PyCFunction)mbstreamreader_readline,
-			METH_VARARGS, NULL},
-	{"readlines",	(PyCFunction)mbstreamreader_readlines,
-			METH_VARARGS, NULL},
-	{"reset",	(PyCFunction)mbstreamreader_reset,
+static struct PyMethodDef mbiencoder_methods[] = {
+	{"encode",	(PyCFunction)mbiencoder_encode,
+			METH_VARARGS | METH_KEYWORDS, NULL},
+	{"reset",	(PyCFunction)mbiencoder_reset,
 			METH_NOARGS, NULL},
 	{NULL,		NULL},
 };
 
+static PyObject *
+mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	MultibyteIncrementalEncoderObject *self;
+	PyObject *codec;
+	char *errors = NULL;
+
+	codec = PyObject_GetAttrString((PyObject *)type, "codec");
+	if (codec == NULL)
+		return NULL;
+	if (!MultibyteCodec_Check(codec)) {
+		PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+		return NULL;
+	}
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
+					 incnewkwarglist, &errors))
+		return NULL;
+
+	self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
+	if (self == NULL)
+		return NULL;
+
+	self->codec = ((MultibyteCodecObject *)codec)->codec;
+	self->pendingsize = 0;
+	self->errors = internal_error_callback(errors);
+	if (self->errors == NULL)
+		goto errorexit;
+	if (self->codec->encinit != NULL &&
+	    self->codec->encinit(&self->state, self->codec->config) != 0)
+		goto errorexit;
+
+	return (PyObject *)self;
+
+errorexit:
+	Py_XDECREF(self);
+	return NULL;
+}
+
+static int
+mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
+		    visitproc visit, void *arg)
+{
+	if (ERROR_ISCUSTOM(self->errors))
+		Py_VISIT(self->errors);
+	return 0;
+}
+
 static void
-mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
+mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
+{
+	PyObject_GC_UnTrack(self);
+	ERROR_DECREF(self->errors);
+	self->ob_type->tp_free(self);
+}
+
+static PyTypeObject MultibyteIncrementalEncoder_Type = {
+	PyObject_HEAD_INIT(NULL)
+	0,				/* ob_size */
+	"MultibyteIncrementalEncoder",	/* tp_name */
+	sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
+	0,				/* tp_itemsize */
+	/*  methods  */
+	(destructor)mbiencoder_dealloc, /* tp_dealloc */
+	0,				/* tp_print */
+	0,				/* tp_getattr */
+	0,				/* tp_setattr */
+	0,				/* tp_compare */
+	0,				/* tp_repr */
+	0,				/* tp_as_number */
+	0,				/* tp_as_sequence */
+	0,				/* tp_as_mapping */
+	0,				/* tp_hash */
+	0,				/* tp_call */
+	0,				/* tp_str */
+	PyObject_GenericGetAttr,	/* tp_getattro */
+	0,				/* tp_setattro */
+	0,				/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+		| Py_TPFLAGS_BASETYPE,	/* tp_flags */
+	0,				/* tp_doc */
+	(traverseproc)mbiencoder_traverse,	/* tp_traverse */
+	0,				/* tp_clear */
+	0,				/* tp_richcompare */
+	0,				/* tp_weaklistoffset */
+	0,				/* tp_iter */
+	0,				/* tp_iterext */
+	mbiencoder_methods,		/* tp_methods */
+	0,				/* tp_members */
+	codecctx_getsets,		/* tp_getset */
+	0,				/* tp_base */
+	0,				/* tp_dict */
+	0,				/* tp_descr_get */
+	0,				/* tp_descr_set */
+	0,				/* tp_dictoffset */
+	0,				/* tp_init */
+	0,				/* tp_alloc */
+	mbiencoder_new,			/* tp_new */
+};
+
+
+/**
+ * MultibyteIncrementalDecoder object
+ */
+
+static PyObject *
+mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
+		  PyObject *args, PyObject *kwargs)
+{
+	MultibyteDecodeBuffer buf;
+	char *data, *wdata;
+	Py_ssize_t wsize, finalsize = 0, size, origpending;
+	int final = 0;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "t#|i:decode",
+			incrementalkwarglist, &data, &size, &final))
+		return NULL;
+
+	buf.outobj = buf.excobj = NULL;
+	origpending = self->pendingsize;
+
+	if (self->pendingsize == 0) {
+		wsize = size;
+		wdata = data;
+	}
+	else {
+		wsize = size + self->pendingsize;
+		wdata = PyMem_Malloc(wsize);
+		if (wdata == NULL)
+			goto errorexit;
+		memcpy(wdata, self->pending, self->pendingsize);
+		memcpy(wdata + self->pendingsize, data, size);
+		self->pendingsize = 0;
+	}
+
+	if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
+		goto errorexit;
+
+	if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
+		goto errorexit;
+
+	if (final && buf.inbuf < buf.inbuf_end) {
+		if (multibytecodec_decerror(self->codec, &self->state,
+				&buf, self->errors, MBERR_TOOFEW)) {
+			/* recover the original pending buffer */
+			memcpy(self->pending, wdata, origpending);
+			self->pendingsize = origpending;
+			goto errorexit;
+		}
+	}
+
+	if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
+		if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
+			goto errorexit;
+	}
+
+	finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
+	if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+		if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+			goto errorexit;
+
+	if (wdata != data)
+		PyMem_Del(wdata);
+	Py_XDECREF(buf.excobj);
+	return buf.outobj;
+
+errorexit:
+	if (wdata != NULL && wdata != data)
+		PyMem_Del(wdata);
+	Py_XDECREF(buf.excobj);
+	Py_XDECREF(buf.outobj);
+	return NULL;
+}
+
+static PyObject *
+mbidecoder_reset(MultibyteIncrementalDecoderObject *self)
+{
+	if (self->codec->decreset != NULL &&
+	    self->codec->decreset(&self->state, self->codec->config) != 0)
+		return NULL;
+	self->pendingsize = 0;
+
+	Py_RETURN_NONE;
+}
+
+static struct PyMethodDef mbidecoder_methods[] = {
+	{"decode",	(PyCFunction)mbidecoder_decode,
+			METH_VARARGS | METH_KEYWORDS, NULL},
+	{"reset",	(PyCFunction)mbidecoder_reset,
+			METH_NOARGS, NULL},
+	{NULL,		NULL},
+};
+
+static PyObject *
+mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	MultibyteIncrementalDecoderObject *self;
+	PyObject *codec;
+	char *errors = NULL;
+
+	codec = PyObject_GetAttrString((PyObject *)type, "codec");
+	if (codec == NULL)
+		return NULL;
+	if (!MultibyteCodec_Check(codec)) {
+		PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+		return NULL;
+	}
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
+					 incnewkwarglist, &errors))
+		return NULL;
+
+	self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
+	if (self == NULL)
+		return NULL;
+
+	self->codec = ((MultibyteCodecObject *)codec)->codec;
+	self->pendingsize = 0;
+	self->errors = internal_error_callback(errors);
+	if (self->errors == NULL)
+		goto errorexit;
+	if (self->codec->decinit != NULL &&
+	    self->codec->decinit(&self->state, self->codec->config) != 0)
+		goto errorexit;
+
+	return (PyObject *)self;
+
+errorexit:
+	Py_XDECREF(self);
+	return NULL;
+}
+
+static int
+mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
+		    visitproc visit, void *arg)
+{
+	if (ERROR_ISCUSTOM(self->errors))
+		Py_VISIT(self->errors);
+	return 0;
+}
+
+static void
+mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
+{
+	PyObject_GC_UnTrack(self);
+	ERROR_DECREF(self->errors);
+	self->ob_type->tp_free(self);
+}
+
+static PyTypeObject MultibyteIncrementalDecoder_Type = {
+	PyObject_HEAD_INIT(NULL)
+	0,				/* ob_size */
+	"MultibyteIncrementalDecoder",	/* tp_name */
+	sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
+	0,				/* tp_itemsize */
+	/*  methods  */
+	(destructor)mbidecoder_dealloc, /* tp_dealloc */
+	0,				/* tp_print */
+	0,				/* tp_getattr */
+	0,				/* tp_setattr */
+	0,				/* tp_compare */
+	0,				/* tp_repr */
+	0,				/* tp_as_number */
+	0,				/* tp_as_sequence */
+	0,				/* tp_as_mapping */
+	0,				/* tp_hash */
+	0,				/* tp_call */
+	0,				/* tp_str */
+	PyObject_GenericGetAttr,	/* tp_getattro */
+	0,				/* tp_setattro */
+	0,				/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+		| Py_TPFLAGS_BASETYPE,	/* tp_flags */
+	0,				/* tp_doc */
+	(traverseproc)mbidecoder_traverse,	/* tp_traverse */
+	0,				/* tp_clear */
+	0,				/* tp_richcompare */
+	0,				/* tp_weaklistoffset */
+	0,				/* tp_iter */
+	0,				/* tp_iterext */
+	mbidecoder_methods,		/* tp_methods */
+	0,				/* tp_members */
+	codecctx_getsets,		/* tp_getset */
+	0,				/* tp_base */
+	0,				/* tp_dict */
+	0,				/* tp_descr_get */
+	0,				/* tp_descr_set */
+	0,				/* tp_dictoffset */
+	0,				/* tp_init */
+	0,				/* tp_alloc */
+	mbidecoder_new,			/* tp_new */
+};
+
+
+/**
+ * MultibyteStreamReader object
+ */
+
+static PyObject *
+mbstreamreader_iread(MultibyteStreamReaderObject *self,
+		     const char *method, Py_ssize_t sizehint)
+{
+	MultibyteDecodeBuffer buf;
+	PyObject *cres;
+	Py_ssize_t rsize, finalsize = 0;
+
+	if (sizehint == 0)
+		return PyUnicode_FromUnicode(NULL, 0);
+
+	buf.outobj = buf.excobj = NULL;
+	cres = NULL;
+
+	for (;;) {
+		if (sizehint < 0)
+			cres = PyObject_CallMethod(self->stream,
+					(char *)method, NULL);
+		else
+			cres = PyObject_CallMethod(self->stream,
+					(char *)method, "i", sizehint);
+		if (cres == NULL)
+			goto errorexit;
+
+		if (!PyString_Check(cres)) {
+			PyErr_SetString(PyExc_TypeError,
+					"stream function returned a "
+					"non-string object");
+			goto errorexit;
+		}
+
+		if (self->pendingsize > 0) {
+			PyObject *ctr;
+			char *ctrdata;
+
+			rsize = PyString_GET_SIZE(cres) + self->pendingsize;
+			ctr = PyString_FromStringAndSize(NULL, rsize);
+			if (ctr == NULL)
+				goto errorexit;
+			ctrdata = PyString_AS_STRING(ctr);
+			memcpy(ctrdata, self->pending, self->pendingsize);
+			memcpy(ctrdata + self->pendingsize,
+				PyString_AS_STRING(cres),
+				PyString_GET_SIZE(cres));
+			Py_DECREF(cres);
+			cres = ctr;
+			self->pendingsize = 0;
+		}
+
+		rsize = PyString_GET_SIZE(cres);
+		if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres),
+					   rsize) != 0)
+			goto errorexit;
+
+		if (rsize > 0 && decoder_feed_buffer(
+				(MultibyteStatefulDecoderContext *)self, &buf))
+			goto errorexit;
+
+		if (rsize == 0 || sizehint < 0) { /* end of file */
+			if (buf.inbuf < buf.inbuf_end &&
+			    multibytecodec_decerror(self->codec, &self->state,
+					&buf, self->errors, MBERR_TOOFEW))
+				goto errorexit;
+		}
+
+		if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
+			if (decoder_append_pending(STATEFUL_DCTX(self),
+						   &buf) != 0)
+				goto errorexit;
+		}
+
+		finalsize = (Py_ssize_t)(buf.outbuf -
+				PyUnicode_AS_UNICODE(buf.outobj));
+		Py_DECREF(cres);
+		cres = NULL;
+
+		if (sizehint < 0 || finalsize != 0 || rsize == 0)
+			break;
+
+		sizehint = 1; /* read 1 more byte and retry */
+	}
+
+	if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+		if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+			goto errorexit;
+
+	Py_XDECREF(cres);
+	Py_XDECREF(buf.excobj);
+	return buf.outobj;
+
+errorexit:
+	Py_XDECREF(cres);
+	Py_XDECREF(buf.excobj);
+	Py_XDECREF(buf.outobj);
+	return NULL;
+}
+
+static PyObject *
+mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
+{
+	PyObject *sizeobj = NULL;
+	Py_ssize_t size;
+
+	if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
+		return NULL;
+
+	if (sizeobj == Py_None || sizeobj == NULL)
+		size = -1;
+	else if (PyInt_Check(sizeobj))
+		size = PyInt_AsSsize_t(sizeobj);
+	else {
+		PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+		return NULL;
+	}
+
+	return mbstreamreader_iread(self, "read", size);
+}
+
+static PyObject *
+mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
+{
+	PyObject *sizeobj = NULL;
+	Py_ssize_t size;
+
+	if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
+		return NULL;
+
+	if (sizeobj == Py_None || sizeobj == NULL)
+		size = -1;
+	else if (PyInt_Check(sizeobj))
+		size = PyInt_AsSsize_t(sizeobj);
+	else {
+		PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+		return NULL;
+	}
+
+	return mbstreamreader_iread(self, "readline", size);
+}
+
+static PyObject *
+mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
 {
-	if (self->errors > ERROR_MAX) {
-		Py_DECREF(self->errors);
+	PyObject *sizehintobj = NULL, *r, *sr;
+	Py_ssize_t sizehint;
+
+	if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
+		return NULL;
+
+	if (sizehintobj == Py_None || sizehintobj == NULL)
+		sizehint = -1;
+	else if (PyInt_Check(sizehintobj))
+		sizehint = PyInt_AsSsize_t(sizehintobj);
+	else {
+		PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+		return NULL;
 	}
-	Py_DECREF(self->stream);
-	PyObject_Del(self);
+
+	r = mbstreamreader_iread(self, "read", sizehint);
+	if (r == NULL)
+		return NULL;
+
+	sr = PyUnicode_Splitlines(r, 1);
+	Py_DECREF(r);
+	return sr;
+}
+
+static PyObject *
+mbstreamreader_reset(MultibyteStreamReaderObject *self)
+{
+	if (self->codec->decreset != NULL &&
+	    self->codec->decreset(&self->state, self->codec->config) != 0)
+		return NULL;
+	self->pendingsize = 0;
+
+	Py_RETURN_NONE;
+}
+
+static struct PyMethodDef mbstreamreader_methods[] = {
+	{"read",	(PyCFunction)mbstreamreader_read,
+			METH_VARARGS, NULL},
+	{"readline",	(PyCFunction)mbstreamreader_readline,
+			METH_VARARGS, NULL},
+	{"readlines",	(PyCFunction)mbstreamreader_readlines,
+			METH_VARARGS, NULL},
+	{"reset",	(PyCFunction)mbstreamreader_reset,
+			METH_NOARGS, NULL},
+	{NULL,		NULL},
+};
+
+static PyMemberDef mbstreamreader_members[] = {
+	{"stream",	T_OBJECT,
+			offsetof(MultibyteStreamReaderObject, stream),
+			READONLY, NULL},
+	{NULL,}
+};
+
+static PyObject *
+mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	MultibyteStreamReaderObject *self;
+	PyObject *codec, *stream;
+	char *errors = NULL;
+
+	codec = PyObject_GetAttrString((PyObject *)type, "codec");
+	if (codec == NULL)
+		return NULL;
+	if (!MultibyteCodec_Check(codec)) {
+		PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+		return NULL;
+	}
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
+				streamkwarglist, &stream, &errors))
+		return NULL;
+
+	self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
+	if (self == NULL)
+		return NULL;
+
+	self->codec = ((MultibyteCodecObject *)codec)->codec;
+	self->stream = stream;
+	Py_INCREF(stream);
+	self->pendingsize = 0;
+	self->errors = internal_error_callback(errors);
+	if (self->errors == NULL)
+		goto errorexit;
+	if (self->codec->decinit != NULL &&
+	    self->codec->decinit(&self->state, self->codec->config) != 0)
+		goto errorexit;
+
+	return (PyObject *)self;
+
+errorexit:
+	Py_XDECREF(self);
+	return NULL;
 }
 
+static int
+mbstreamreader_traverse(MultibyteStreamReaderObject *self,
+			visitproc visit, void *arg)
+{
+	if (ERROR_ISCUSTOM(self->errors))
+		Py_VISIT(self->errors);
+	Py_VISIT(self->stream);
+	return 0;
+}
 
+static void
+mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
+{
+	PyObject_GC_UnTrack(self);
+	ERROR_DECREF(self->errors);
+	Py_DECREF(self->stream);
+	self->ob_type->tp_free(self);
+}
 
 static PyTypeObject MultibyteStreamReader_Type = {
 	PyObject_HEAD_INIT(NULL)
@@ -951,97 +1455,49 @@
 	PyObject_GenericGetAttr,	/* tp_getattro */
 	0,				/* tp_setattro */
 	0,				/* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT,		/* tp_flags */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+		| Py_TPFLAGS_BASETYPE,	/* tp_flags */
 	0,				/* tp_doc */
-	0,				/* tp_traverse */
+	(traverseproc)mbstreamreader_traverse,	/* tp_traverse */
 	0,				/* tp_clear */
 	0,				/* tp_richcompare */
 	0,				/* tp_weaklistoffset */
 	0,				/* tp_iter */
 	0,				/* tp_iterext */
 	mbstreamreader_methods,		/* tp_methods */
+	mbstreamreader_members,		/* tp_members */
+	codecctx_getsets,		/* tp_getset */
+	0,				/* tp_base */
+	0,				/* tp_dict */
+	0,				/* tp_descr_get */
+	0,				/* tp_descr_set */
+	0,				/* tp_dictoffset */
+	0,				/* tp_init */
+	0,				/* tp_alloc */
+	mbstreamreader_new,		/* tp_new */
 };
 
+
+/**
+ * MultibyteStreamWriter object
+ */
+
 static int
 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
 		      PyObject *unistr)
 {
-	PyObject *wr, *ucvt, *r = NULL;
-	Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
-	Py_ssize_t datalen;
-
-	if (PyUnicode_Check(unistr))
-		ucvt = NULL;
-	else {
-		unistr = ucvt = PyObject_Unicode(unistr);
-		if (unistr == NULL)
-			return -1;
-		else if (!PyUnicode_Check(unistr)) {
-			PyErr_SetString(PyExc_TypeError,
-				"couldn't convert the object to unicode.");
-			Py_DECREF(ucvt);
-			return -1;
-		}
-	}
-
-	datalen = PyUnicode_GET_SIZE(unistr);
-	if (datalen == 0) {
-		Py_XDECREF(ucvt);
-		return 0;
-	}
-
-	if (self->pendingsize > 0) {
-		inbuf_tmp = PyMem_New(Py_UNICODE, datalen + self->pendingsize);
-		if (inbuf_tmp == NULL)
-			goto errorexit;
-		memcpy(inbuf_tmp, self->pending,
-			Py_UNICODE_SIZE * self->pendingsize);
-		memcpy(inbuf_tmp + self->pendingsize,
-			PyUnicode_AS_UNICODE(unistr),
-			Py_UNICODE_SIZE * datalen);
-		datalen += self->pendingsize;
-		self->pendingsize = 0;
-		inbuf = inbuf_tmp;
-	}
-	else
-		inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
-
-	inbuf_end = inbuf + datalen;
-
-	r = multibytecodec_encode(self->codec, &self->state,
-			(const Py_UNICODE **)&inbuf, datalen, self->errors, 0);
-	if (r == NULL)
-		goto errorexit;
+	PyObject *str, *wr;
 
-	if (inbuf < inbuf_end) {
-		self->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
-		if (self->pendingsize > MAXENCPENDING) {
-			self->pendingsize = 0;
-			PyErr_SetString(PyExc_RuntimeError,
-					"pending buffer overflow");
-			goto errorexit;
-		}
-		memcpy(self->pending, inbuf,
-			self->pendingsize * Py_UNICODE_SIZE);
-	}
+	str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
+	if (str == NULL)
+		return -1;
 
-	wr = PyObject_CallMethod(self->stream, "write", "O", r);
+	wr = PyObject_CallMethod(self->stream, "write", "O", str);
+	Py_DECREF(str);
 	if (wr == NULL)
-		goto errorexit;
+		return -1;
 
-	if (inbuf_tmp != NULL)
-		PyMem_Del(inbuf_tmp);
-	Py_DECREF(r);
-	Py_DECREF(wr);
-	Py_XDECREF(ucvt);
 	return 0;
-
-errorexit:
-	if (inbuf_tmp != NULL)
-		PyMem_Del(inbuf_tmp);
-	Py_XDECREF(r);
-	Py_XDECREF(ucvt);
-	return -1;
 }
 
 static PyObject *
@@ -1054,10 +1510,8 @@
 
 	if (mbstreamwriter_iwrite(self, strobj))
 		return NULL;
-	else {
-		Py_INCREF(Py_None);
-		return Py_None;
-	}
+	else
+		Py_RETURN_NONE;
 }
 
 static PyObject *
@@ -1087,8 +1541,7 @@
 			return NULL;
 	}
 
-	Py_INCREF(Py_None);
-	return Py_None;
+	Py_RETURN_NONE;
 }
 
 static PyObject *
@@ -1119,18 +1572,67 @@
 	}
 	Py_DECREF(pwrt);
 
-	Py_INCREF(Py_None);
-	return Py_None;
+	Py_RETURN_NONE;
+}
+
+static PyObject *
+mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	MultibyteStreamWriterObject *self;
+	PyObject *codec, *stream;
+	char *errors = NULL;
+
+	codec = PyObject_GetAttrString((PyObject *)type, "codec");
+	if (codec == NULL)
+		return NULL;
+	if (!MultibyteCodec_Check(codec)) {
+		PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+		return NULL;
+	}
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
+				streamkwarglist, &stream, &errors))
+		return NULL;
+
+	self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
+	if (self == NULL)
+		return NULL;
+
+	self->codec = ((MultibyteCodecObject *)codec)->codec;
+	self->stream = stream;
+	Py_INCREF(stream);
+	self->pendingsize = 0;
+	self->errors = internal_error_callback(errors);
+	if (self->errors == NULL)
+		goto errorexit;
+	if (self->codec->encinit != NULL &&
+	    self->codec->encinit(&self->state, self->codec->config) != 0)
+		goto errorexit;
+
+	return (PyObject *)self;
+
+errorexit:
+	Py_XDECREF(self);
+	return NULL;
+}
+
+static int
+mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
+			visitproc visit, void *arg)
+{
+	if (ERROR_ISCUSTOM(self->errors))
+		Py_VISIT(self->errors);
+	Py_VISIT(self->stream);
+	return 0;
 }
 
 static void
 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
 {
-	if (self->errors > ERROR_MAX) {
-		Py_DECREF(self->errors);
-	}
+	PyObject_GC_UnTrack(self);
+	ERROR_DECREF(self->errors);
 	Py_DECREF(self->stream);
-	PyObject_Del(self);
+	self->ob_type->tp_free(self);
 }
 
 static struct PyMethodDef mbstreamwriter_methods[] = {
@@ -1143,7 +1645,12 @@
 	{NULL,		NULL},
 };
 
-
+static PyMemberDef mbstreamwriter_members[] = {
+	{"stream",	T_OBJECT,
+			offsetof(MultibyteStreamWriterObject, stream),
+			READONLY, NULL},
+	{NULL,}
+};
 
 static PyTypeObject MultibyteStreamWriter_Type = {
 	PyObject_HEAD_INIT(NULL)
@@ -1167,17 +1674,33 @@
 	PyObject_GenericGetAttr,	/* tp_getattro */
 	0,				/* tp_setattro */
 	0,				/* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT,		/* tp_flags */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+		| Py_TPFLAGS_BASETYPE,	/* tp_flags */
 	0,				/* tp_doc */
-	0,				/* tp_traverse */
+	(traverseproc)mbstreamwriter_traverse,	/* tp_traverse */
 	0,				/* tp_clear */
 	0,				/* tp_richcompare */
 	0,				/* tp_weaklistoffset */
 	0,				/* tp_iter */
 	0,				/* tp_iterext */
 	mbstreamwriter_methods,		/* tp_methods */
+	mbstreamwriter_members,		/* tp_members */
+	codecctx_getsets,		/* tp_getset */
+	0,				/* tp_base */
+	0,				/* tp_dict */
+	0,				/* tp_descr_get */
+	0,				/* tp_descr_set */
+	0,				/* tp_dictoffset */
+	0,				/* tp_init */
+	0,				/* tp_alloc */
+	mbstreamwriter_new,		/* tp_new */
 };
 
+
+/**
+ * Exposed factory function
+ */
+
 static PyObject *
 __create_codec(PyObject *ignore, PyObject *arg)
 {
@@ -1201,80 +1724,38 @@
 	return (PyObject *)self;
 }
 
-static PyObject *
-mbstreamreader_create(MultibyteCodec *codec,
-		      PyObject *stream, const char *errors)
-{
-	MultibyteStreamReaderObject *self;
-
-	self = PyObject_New(MultibyteStreamReaderObject,
-			&MultibyteStreamReader_Type);
-	if (self == NULL)
-		return NULL;
-
-	self->codec = codec;
-	self->stream = stream;
-	Py_INCREF(stream);
-	self->pendingsize = 0;
-	self->errors = get_errorcallback(errors);
-	if (self->errors == NULL)
-		goto errorexit;
-	if (self->codec->decinit != NULL &&
-	    self->codec->decinit(&self->state, self->codec->config) != 0)
-		goto errorexit;
-
-	return (PyObject *)self;
-
-errorexit:
-	Py_XDECREF(self);
-	return NULL;
-}
-
-static PyObject *
-mbstreamwriter_create(MultibyteCodec *codec,
-		      PyObject *stream, const char *errors)
-{
-	MultibyteStreamWriterObject *self;
-
-	self = PyObject_New(MultibyteStreamWriterObject,
-			&MultibyteStreamWriter_Type);
-	if (self == NULL)
-		return NULL;
-
-	self->codec = codec;
-	self->stream = stream;
-	Py_INCREF(stream);
-	self->pendingsize = 0;
-	self->errors = get_errorcallback(errors);
-	if (self->errors == NULL)
-		goto errorexit;
-	if (self->codec->encinit != NULL &&
-	    self->codec->encinit(&self->state, self->codec->config) != 0)
-		goto errorexit;
-
-	return (PyObject *)self;
-
-errorexit:
-	Py_XDECREF(self);
-	return NULL;
-}
-
 static struct PyMethodDef __methods[] = {
 	{"__create_codec", (PyCFunction)__create_codec, METH_O},
 	{NULL, NULL},
 };
 
-void
+PyMODINIT_FUNC
 init_multibytecodec(void)
 {
+	int i;
+	PyObject *m;
+	PyTypeObject *typelist[] = {
+		&MultibyteIncrementalEncoder_Type,
+		&MultibyteIncrementalDecoder_Type,
+		&MultibyteStreamReader_Type,
+		&MultibyteStreamWriter_Type,
+		NULL
+	};
+
 	if (PyType_Ready(&MultibyteCodec_Type) < 0)
 		return;
-	if (PyType_Ready(&MultibyteStreamReader_Type) < 0)
-		return;
-	if (PyType_Ready(&MultibyteStreamWriter_Type) < 0)
+
+	m = Py_InitModule("_multibytecodec", __methods);
+	if (m == NULL)
 		return;
 
-	Py_InitModule("_multibytecodec", __methods);
+	for (i = 0; typelist[i] != NULL; i++) {
+		if (PyType_Ready(typelist[i]) < 0)
+			return;
+		Py_INCREF(typelist[i]);
+		PyModule_AddObject(m, typelist[i]->tp_name,
+				   (PyObject *)typelist[i]);
+	}
 
 	if (PyErr_Occurred())
 		Py_FatalError("can't initialize the _multibytecodec module");

Modified: python/trunk/Modules/cjkcodecs/multibytecodec.h
==============================================================================
--- python/trunk/Modules/cjkcodecs/multibytecodec.h	(original)
+++ python/trunk/Modules/cjkcodecs/multibytecodec.h	Sun Mar 26 04:34:59 2006
@@ -67,24 +67,51 @@
 	MultibyteCodec *codec;
 } MultibyteCodecObject;
 
-#define MAXDECPENDING	8
+#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
+
+#define _MultibyteStatefulCodec_HEAD		\
+	PyObject_HEAD				\
+	MultibyteCodec *codec;			\
+	MultibyteCodec_State state;		\
+	PyObject *errors;
 typedef struct {
-	PyObject_HEAD
-	MultibyteCodec *codec;
-	MultibyteCodec_State state;
-	unsigned char pending[MAXDECPENDING];
-	Py_ssize_t pendingsize;
-	PyObject *stream, *errors;
-} MultibyteStreamReaderObject;
+	_MultibyteStatefulCodec_HEAD
+} MultibyteStatefulCodecContext;
 
 #define MAXENCPENDING	2
+#define _MultibyteStatefulEncoder_HEAD		\
+	_MultibyteStatefulCodec_HEAD		\
+	Py_UNICODE pending[MAXENCPENDING];	\
+	Py_ssize_t pendingsize;
 typedef struct {
-	PyObject_HEAD
-	MultibyteCodec *codec;
-	MultibyteCodec_State state;
-	Py_UNICODE pending[MAXENCPENDING];
+	_MultibyteStatefulEncoder_HEAD
+} MultibyteStatefulEncoderContext;
+
+#define MAXDECPENDING	8
+#define _MultibyteStatefulDecoder_HEAD		\
+	_MultibyteStatefulCodec_HEAD		\
+	unsigned char pending[MAXDECPENDING];	\
 	Py_ssize_t pendingsize;
-	PyObject *stream, *errors;
+typedef struct {
+	_MultibyteStatefulDecoder_HEAD
+} MultibyteStatefulDecoderContext;
+
+typedef struct {
+	_MultibyteStatefulEncoder_HEAD
+} MultibyteIncrementalEncoderObject;
+
+typedef struct {
+	_MultibyteStatefulDecoder_HEAD
+} MultibyteIncrementalDecoderObject;
+
+typedef struct {
+	_MultibyteStatefulDecoder_HEAD
+	PyObject *stream;
+} MultibyteStreamReaderObject;
+
+typedef struct {
+	_MultibyteStatefulEncoder_HEAD
+	PyObject *stream;
 } MultibyteStreamWriterObject;
 
 /* positive values for illegal sequences */
@@ -95,7 +122,12 @@
 #define ERROR_STRICT		(PyObject *)(1)
 #define ERROR_IGNORE		(PyObject *)(2)
 #define ERROR_REPLACE		(PyObject *)(3)
-#define ERROR_MAX		ERROR_REPLACE
+#define ERROR_ISCUSTOM(p)	((p) < ERROR_STRICT || ERROR_REPLACE < (p))
+#define ERROR_DECREF(p) do {		\
+	if (ERROR_ISCUSTOM(p)) {	\
+		Py_DECREF(p);		\
+	}				\
+} while (0);
 
 #define MBENC_FLUSH		0x0001 /* encode all characters encodable */
 #define MBENC_MAX		MBENC_FLUSH

Modified: python/trunk/Tools/unicode/Makefile
==============================================================================
--- python/trunk/Tools/unicode/Makefile	(original)
+++ python/trunk/Tools/unicode/Makefile	Sun Mar 26 04:34:59 2006
@@ -15,7 +15,7 @@
 
 all:	distclean mappings codecs
 
-codecs:	misc windows iso apple ebcdic custom-mappings
+codecs:	misc windows iso apple ebcdic custom-mappings cjk
 
 ### Mappings
 
@@ -72,6 +72,9 @@
 	$(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/EBCDIC/ build/
 	$(RM) -f build/readme.*
 
+cjk:	build/
+	$(PYTHON) gencjkcodecs.py build/
+
 ### Cleanup
 
 clean:

Added: python/trunk/Tools/unicode/gencjkcodecs.py
==============================================================================
--- (empty file)
+++ python/trunk/Tools/unicode/gencjkcodecs.py	Sun Mar 26 04:34:59 2006
@@ -0,0 +1,65 @@
+import os, string
+
+codecs = {
+    'cn': ('gb2312', 'gbk', 'gb18030', 'hz'),
+    'tw': ('big5', 'cp950'),
+    'hk': ('big5hkscs',),
+    'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
+           'euc_jis_2004', 'shift_jis_2004'),
+    'kr': ('cp949', 'euc_kr', 'johab'),
+    'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
+                'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
+                'iso2022_kr'),
+}
+
+TEMPLATE = string.Template("""\
+#
+# $encoding.py: Python Unicode Codec for $ENCODING
+#
+# Written by Hye-Shik Chang <perky at FreeBSD.org>
+#
+
+import _codecs_$owner, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_$owner.getcodec('$encoding')
+
+class Codec(codecs.Codec):
+    encode = codec.encode
+    decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='$encoding',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
+""")
+
+def gencodecs(prefix):
+    for loc, encodings in codecs.iteritems():
+        for enc in encodings:
+            code = TEMPLATE.substitute(ENCODING=enc.upper(),
+                                       encoding=enc.lower(),
+                                       owner=loc)
+            codecpath = os.path.join(prefix, enc + '.py')
+            open(codecpath, 'w').write(code)
+
+if __name__ == '__main__':
+    import sys
+    gencodecs(sys.argv[1])


More information about the Python-checkins mailing list