[Python-checkins] python/dist/src/Modules/cjkcodecs README, NONE, 1.3.4.1 _codecs_cn.c, NONE, 1.1.6.1 _codecs_hk.c, NONE, 1.2.4.1 _codecs_iso2022.c, NONE, 1.3.4.1 _codecs_jp.c, NONE, 1.1.6.1 _codecs_kr.c, NONE, 1.1.6.1 _codecs_tw.c, NONE, 1.1.6.1 alg_jisx0201.h, NONE, 1.2.6.1 cjkcodecs.h, NONE, 1.5.4.1 emu_jisx0213_2000.h, NONE, 1.1.6.1 mappings_cn.h, NONE, 1.1.6.1 mappings_hk.h, NONE, 1.1.6.1 mappings_jisx0213_pair.h, NONE, 1.1.6.1 mappings_jp.h, NONE, 1.1.6.1 mappings_kr.h, NONE, 1.1.6.1 mappings_tw.h, NONE, 1.1.6.1 multibytecodec.c, NONE, 1.3.4.1 multibytecodec.h, NONE, 1.2.6.1

kbk at users.sourceforge.net kbk at users.sourceforge.net
Fri Jan 7 08:03:42 CET 2005


Update of /cvsroot/python/python/dist/src/Modules/cjkcodecs
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12896/Modules/cjkcodecs

Added Files:
      Tag: ast-branch
	README _codecs_cn.c _codecs_hk.c _codecs_iso2022.c 
	_codecs_jp.c _codecs_kr.c _codecs_tw.c alg_jisx0201.h 
	cjkcodecs.h emu_jisx0213_2000.h mappings_cn.h mappings_hk.h 
	mappings_jisx0213_pair.h mappings_jp.h mappings_kr.h 
	mappings_tw.h multibytecodec.c multibytecodec.h 
Log Message:
Merge MAIN into ast-branch
cvs  up -kk -j mrg_to_ast-branch_24APR03  -j mrg_to_ast-branch_05JAN05
  (date of earlier merge estimated 24Apr03 17:30 UTC, repository tagged)

Not merged:
Lib/test/test_compile.py,  Python/compile.c

Refer to Tracker Patch # 1097671 for the merge output and list of
conflicts resolved.


--- NEW FILE: README ---
Notes on cjkcodecs
-------------------
This directory contains source files for cjkcodecs extension modules.
They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs)
as of Aug 20 2004 currently.



To generate or modify mapping headers
-------------------------------------
Mapping headers are imported from CJKCodecs as pre-generated form.
If you need to tweak or add something on it, please look at tools/
subdirectory of CJKCodecs' distribution.



Notes on implmentation characteristics of each codecs
-----------------------------------------------------

1) Big5 codec

  The big5 codec maps the following characters as cp950 does rather
  than conforming Unicode.org's that maps to 0xFFFD.

    BIG5        Unicode     Description

    0xA15A      0x2574      SPACING UNDERSCORE
    0xA1C3      0xFFE3      SPACING HEAVY OVERSCORE
    0xA1C5      0x02CD      SPACING HEAVY UNDERSCORE
    0xA1FE      0xFF0F      LT DIAG UP RIGHT TO LOW LEFT
    0xA240      0xFF3C      LT DIAG UP LEFT TO LOW RIGHT
    0xA2CC      0x5341      HANGZHOU NUMERAL TEN
    0xA2CE      0x5345      HANGZHOU NUMERAL THIRTY

  Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
  big5 codes already, a roundtrip compatibility is not guaranteed for
  them.


2) cp932 codec

  To conform to Windows's real mapping, cp932 codec maps the following
  codepoints in addition of the official cp932 mapping.

    CP932     Unicode     Description

    0x80      0x80        UNDEFINED
    0xA0      0xF8F0      UNDEFINED
    0xFD      0xF8F1      UNDEFINED
    0xFE      0xF8F2      UNDEFINED
    0xFF      0xF8F3      UNDEFINED


3) euc-jisx0213 codec

  The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
  unicode U+FF3C instead of U+005C as on unicode.org's mapping.
  Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
  is shown as a full width character, mapping to U+FF3C can make
  more sense.

  The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
  codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
  overlapped by each other, it doesn't bother standard conformations
  (and JIS X 0213 Plane 2 is intended to use so.) On encoding
  sessions, the codec will try to encode kanji characters in this
  order:

    JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212


4) euc-jp codec

  The euc-jp codec is a compatibility instance on these points:
   - U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
   - U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
   - U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)


5) shift-jis codec

  The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
  instead of using JIS X 0201 for compatibility. The differences are:
   - U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
   - U+007E TILDE is mapped to SHIFT-JIS 0x7e.
   - U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.


--- NEW FILE: _codecs_cn.c ---
/*
 * _codecs_cn.c: Codecs collection for Mainland Chinese encodings
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: _codecs_cn.c,v 1.8 2004/07/07 14:59:26 perky Exp $
 */

#include "cjkcodecs.h"
#include "mappings_cn.h"

#define GBK_PREDECODE(dc1, dc2, assi) \
	if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
	else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
	else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7;
#define GBK_PREENCODE(code, assi) \
	if ((code) == 0x2014) (assi) = 0xa1aa; \
	else if ((code) == 0x2015) (assi) = 0xa844; \
	else if ((code) == 0x00b7) (assi) = 0xa1a4;

/*
 * GB2312 codec
 */

ENCODER(gb2312)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)
		TRYMAP_ENC(gbcommon, code, c);
		else return 1;

		if (code & 0x8000) /* MSB set: GBK */
			return 1;

		OUT1((code >> 8) | 0x80)
		OUT2((code & 0xFF) | 0x80)
		NEXT(1, 2)
	}

	return 0;
}

DECODER(gb2312)
{
	while (inleft > 0) {
		unsigned char c = **inbuf;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)
		TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
			NEXT(2, 1)
		}
		else return 2;
	}

	return 0;
}


/*
 * GBK codec
 */

ENCODER(gbk)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)

		GBK_PREENCODE(c, code)
		else TRYMAP_ENC(gbcommon, code, c);
		else return 1;

		OUT1((code >> 8) | 0x80)
		if (code & 0x8000)
			OUT2((code & 0xFF)) /* MSB set: GBK */
		else
			OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
		NEXT(1, 2)
	}

	return 0;
}

DECODER(gbk)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)

		GBK_PREDECODE(c, IN2, **outbuf)
		else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
		else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
		else return 2;

		NEXT(2, 1)
	}

	return 0;
}


/*
 * GB18030 codec
 */

ENCODER(gb18030)
{
	while (inleft > 0) {
		ucs4_t c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1(c)
			NEXT(1, 1)
			continue;
		}

		DECODE_SURROGATE(c)
		if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
			return 2; /* surrogates pair */
#else
			return 1;
#endif
		else if (c >= 0x10000) {
			ucs4_t tc = c - 0x10000;

			REQUIRE_OUTBUF(4)

			OUT4((unsigned char)(tc % 10) + 0x30)
			tc /= 10;
			OUT3((unsigned char)(tc % 126) + 0x81)
			tc /= 126;
			OUT2((unsigned char)(tc % 10) + 0x30)
			tc /= 10;
			OUT1((unsigned char)(tc + 0x90))

#if Py_UNICODE_SIZE == 2
			NEXT(2, 4) /* surrogates pair */
#else
			NEXT(1, 4)
#endif
			continue;
		}

		REQUIRE_OUTBUF(2)

		GBK_PREENCODE(c, code)
		else TRYMAP_ENC(gbcommon, code, c);
		else TRYMAP_ENC(gb18030ext, code, c);
		else {
			const struct _gb18030_to_unibmp_ranges *utrrange;

			REQUIRE_OUTBUF(4)

			for (utrrange = gb18030_to_unibmp_ranges;
			     utrrange->first != 0;
			     utrrange++)
				if (utrrange->first <= c &&
				    c <= utrrange->last) {
					Py_UNICODE tc;

					tc = c - utrrange->first +
					     utrrange->base;

					OUT4((unsigned char)(tc % 10) + 0x30)
					tc /= 10;
					OUT3((unsigned char)(tc % 126) + 0x81)
					tc /= 126;
					OUT2((unsigned char)(tc % 10) + 0x30)
					tc /= 10;
					OUT1((unsigned char)tc + 0x81)

					NEXT(1, 4)
					break;
				}

			if (utrrange->first == 0) {
				PyErr_SetString(PyExc_RuntimeError,
						"unicode mapping invalid");
				return 1;
			}
			continue;
		}

		OUT1((code >> 8) | 0x80)
		if (code & 0x8000)
			OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
		else
			OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */

		NEXT(1, 2)
	}

	return 0;
}

DECODER(gb18030)
{
	while (inleft > 0) {
		unsigned char c = IN1, c2;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)

		c2 = IN2;
		if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
			const struct _gb18030_to_unibmp_ranges *utr;
			unsigned char c3, c4;
			ucs4_t lseq;

			REQUIRE_INBUF(4)
			c3 = IN3;
			c4 = IN4;
			if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
				return 4;
			c -= 0x81;  c2 -= 0x30;
			c3 -= 0x81; c4 -= 0x30;

			if (c < 4) { /* U+0080 - U+FFFF */
				lseq = ((ucs4_t)c * 10 + c2) * 1260 +
					(ucs4_t)c3 * 10 + c4;
				if (lseq < 39420) {
					for (utr = gb18030_to_unibmp_ranges;
					     lseq >= (utr + 1)->base;
					     utr++) ;
					OUT1(utr->first - utr->base + lseq)
					NEXT(4, 1)
					continue;
				}
			}
			else if (c >= 15) { /* U+10000 - U+10FFFF */
				lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
					* 1260 + (ucs4_t)c3 * 10 + c4;
				if (lseq <= 0x10FFFF) {
					WRITEUCS4(lseq);
					NEXT_IN(4)
					continue;
				}
			}
			return 4;
		}

		GBK_PREDECODE(c, c2, **outbuf)
		else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
		else TRYMAP_DEC(gbkext, **outbuf, c, c2);
		else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
		else return 2;

		NEXT(2, 1)
	}

	return 0;
}


/*
 * HZ codec
 */

ENCODER_INIT(hz)
{
	state->i = 0;
	return 0;
}

ENCODER_RESET(hz)
{
	if (state->i != 0) {
		WRITE2('~', '}')
		state->i = 0;
		NEXT_OUT(2)
	}
	return 0;
}

ENCODER(hz)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			if (state->i == 0) {
				WRITE1((unsigned char)c)
				NEXT(1, 1)
			}
			else {
				WRITE3('~', '}', (unsigned char)c)
				NEXT(1, 3)
				state->i = 0;
			}
			continue;
		}

		UCS4INVALID(c)

		TRYMAP_ENC(gbcommon, code, c);
		else return 1;

		if (code & 0x8000) /* MSB set: GBK */
			return 1;

		if (state->i == 0) {
			WRITE4('~', '{', code >> 8, code & 0xff)
			NEXT(1, 4)
			state->i = 1;
		}
		else {
			WRITE2(code >> 8, code & 0xff)
			NEXT(1, 2)
		}
	}

	return 0;
}

DECODER_INIT(hz)
{
	state->i = 0;
	return 0;
}

DECODER_RESET(hz)
{
	state->i = 0;
	return 0;
}

DECODER(hz)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		if (c == '~') {
			unsigned char c2 = IN2;

			REQUIRE_INBUF(2)
			if (c2 == '~') {
				WRITE1('~')
				NEXT(2, 1)
				continue;
			}
			else if (c2 == '{' && state->i == 0)
				state->i = 1; /* set GB */
			else if (c2 == '}' && state->i == 1)
				state->i = 0; /* set ASCII */
			else if (c2 == '\n')
				; /* line-continuation */
			else
				return 2;
			NEXT(2, 0);
			continue;
		}

		if (c & 0x80)
			return 1;

		if (state->i == 0) { /* ASCII mode */
			WRITE1(c)
			NEXT(1, 1)
		}
		else { /* GB mode */
			REQUIRE_INBUF(2)
			REQUIRE_OUTBUF(1)
			TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
				NEXT(2, 1)
			}
			else
				return 2;
		}
	}

	return 0;
}


BEGIN_MAPPINGS_LIST
  MAPPING_DECONLY(gb2312)
  MAPPING_DECONLY(gbkext)
  MAPPING_ENCONLY(gbcommon)
  MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST

BEGIN_CODECS_LIST
  CODEC_STATELESS(gb2312)
  CODEC_STATELESS(gbk)
  CODEC_STATELESS(gb18030)
  CODEC_STATEFUL(hz)
END_CODECS_LIST

I_AM_A_MODULE_FOR(cn)

--- NEW FILE: _codecs_hk.c ---
/*
 * _codecs_hk.c: Codecs collection for encodings from Hong Kong
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: _codecs_hk.c,v 1.4 2004/07/18 04:44:27 perky Exp $
 */

#define USING_IMPORTED_MAPS

#include "cjkcodecs.h"
#include "mappings_hk.h"

/*
 * BIG5HKSCS codec
 */

static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;

CODEC_INIT(big5hkscs)
{
	static int initialized = 0;

	if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
		return -1;
	initialized = 1;
	return 0;
}

ENCODER(big5hkscs)
{
	while (inleft > 0) {
		ucs4_t c = **inbuf;
		DBCHAR code;
		int insize;

		if (c < 0x80) {
			REQUIRE_OUTBUF(1)
			**outbuf = (unsigned char)c;
			NEXT(1, 1)
			continue;
		}

		DECODE_SURROGATE(c)
		insize = GET_INSIZE(c);

		REQUIRE_OUTBUF(2)

		if (c < 0x10000) {
			TRYMAP_ENC(big5hkscs_bmp, code, c);
			else TRYMAP_ENC(big5, code, c);
			else return 1;
		}
		else if (c < 0x20000)
			return insize;
		else if (c < 0x30000) {
			TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
			else return insize;
		}
		else
			return insize;

		OUT1(code >> 8)
		OUT2(code & 0xFF)
		NEXT(insize, 2)
	}

	return 0;
}

#define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40))

DECODER(big5hkscs)
{
	while (inleft > 0) {
		unsigned char c = IN1;
		ucs4_t decoded;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)

		if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
			goto hkscsdec;

		TRYMAP_DEC(big5, **outbuf, c, IN2) {
			NEXT(2, 1)
		}
		else
hkscsdec:	TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
			int s = BH2S(c, IN2);
			const unsigned char *hintbase;

			assert(0x88 <= c && c <= 0xfe);
			assert(0x40 <= IN2 && IN2 <= 0xfe);

			if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
				hintbase = big5hkscs_phint_0;
				s -= BH2S(0x88, 0x40);
			}
			else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
				hintbase = big5hkscs_phint_11939;
				s -= BH2S(0xc6, 0xa1);
			}
			else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
				hintbase = big5hkscs_phint_21733;
				s -= BH2S(0xf9, 0xd6);
			}
			else
				return MBERR_INTERNAL;

			if (hintbase[s >> 3] & (1 << (s & 7))) {
				WRITEUCS4(decoded | 0x20000)
				NEXT_IN(2)
			}
			else {
				OUT1(decoded)
				NEXT(2, 1)
			}
		}
		else return 2;
	}

	return 0;
}


BEGIN_MAPPINGS_LIST
  MAPPING_DECONLY(big5hkscs)
  MAPPING_ENCONLY(big5hkscs_bmp)
  MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST

BEGIN_CODECS_LIST
  CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST

I_AM_A_MODULE_FOR(hk)

--- NEW FILE: _codecs_iso2022.c ---
/*
 * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: _codecs_iso2022.c,v 1.22 2004/08/19 17:08:13 perky Exp $
 */

#define USING_IMPORTED_MAPS
#define USING_BINARY_PAIR_SEARCH
#define EXTERN_JISX0213_PAIR
#define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE
#define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE

#include "cjkcodecs.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"
#include "mappings_jisx0213_pair.h"

/* STATE
[...1082 lines suppressed...]
END_MAPPINGS_LIST

#define ISO2022_CODEC(variation) {		\
	"iso2022_" #variation,			\
	&iso2022_##variation##_config,		\
	iso2022_codec_init,			\
	_STATEFUL_METHODS(iso2022)		\
},

BEGIN_CODECS_LIST
  ISO2022_CODEC(kr)
  ISO2022_CODEC(jp)
  ISO2022_CODEC(jp_1)
  ISO2022_CODEC(jp_2)
  ISO2022_CODEC(jp_2004)
  ISO2022_CODEC(jp_3)
  ISO2022_CODEC(jp_ext)
END_CODECS_LIST

I_AM_A_MODULE_FOR(iso2022)

--- NEW FILE: _codecs_jp.c ---
/*
 * _codecs_jp.c: Codecs collection for Japanese encodings
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: _codecs_jp.c,v 1.14 2004/07/07 17:54:47 perky Exp $
 */

#define USING_BINARY_PAIR_SEARCH
#define EMPBASE 0x20000

#include "cjkcodecs.h"
#include "mappings_jp.h"
#include "mappings_jisx0213_pair.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"

/*
 * CP932 codec
 */

ENCODER(cp932)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;
		unsigned char c1, c2;

		if (c <= 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		else if (c >= 0xff61 && c <= 0xff9f) {
			WRITE1(c - 0xfec0)
			NEXT(1, 1)
			continue;
		}
		else if (c >= 0xf8f0 && c <= 0xf8f3) {
			/* Windows compatability */
			REQUIRE_OUTBUF(1)
			if (c == 0xf8f0)
				OUT1(0xa0)
			else
				OUT1(c - 0xfef1 + 0xfd)
			NEXT(1, 1)
			continue;
		}

		UCS4INVALID(c)
		REQUIRE_OUTBUF(2)

		TRYMAP_ENC(cp932ext, code, c) {
			OUT1(code >> 8)
			OUT2(code & 0xff)
		}
		else TRYMAP_ENC(jisxcommon, code, c) {
			if (code & 0x8000) /* MSB set: JIS X 0212 */
				return 1;

			/* JIS X 0208 */
			c1 = code >> 8;
			c2 = code & 0xff;
			c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
			c1 = (c1 - 0x21) >> 1;
			OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
			OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
		}
		else if (c >= 0xe000 && c < 0xe758) {
			/* User-defined area */
			c1 = (Py_UNICODE)(c - 0xe000) / 188;
			c2 = (Py_UNICODE)(c - 0xe000) % 188;
			OUT1(c1 + 0xf0)
			OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
		}
		else
			return 1;

		NEXT(1, 2)
	}

	return 0;
}

DECODER(cp932)
{
	while (inleft > 0) {
		unsigned char c = IN1, c2;

		REQUIRE_OUTBUF(1)
		if (c <= 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}
		else if (c >= 0xa0 && c <= 0xdf) {
			if (c == 0xa0)
				OUT1(0xf8f0) /* half-width katakana */
			else
				OUT1(0xfec0 + c)
			NEXT(1, 1)
			continue;
		}
		else if (c >= 0xfd/* && c <= 0xff*/) {
			/* Windows compatibility */
			OUT1(0xf8f1 - 0xfd + c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)
		c2 = IN2;

		TRYMAP_DEC(cp932ext, **outbuf, c, c2);
		else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
			if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
				return 2;

			c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
			c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
			c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
			c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;

			TRYMAP_DEC(jisx0208, **outbuf, c, c2);
			else return 2;
		}
		else if (c >= 0xf0 && c <= 0xf9) {
			if ((c2 >= 0x40 && c2 <= 0x7e) ||
			    (c2 >= 0x80 && c2 <= 0xfc))
				OUT1(0xe000 + 188 * (c - 0xf0) +
				     (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
			else
				return 2;
		}
		else
			return 2;

		NEXT(2, 1)
	}

	return 0;
}


/*
 * EUC-JIS-2004 codec
 */

ENCODER(euc_jis_2004)
{
	while (inleft > 0) {
		ucs4_t c = IN1;
		DBCHAR code;
		int insize;

		if (c < 0x80) {
			WRITE1(c)
			NEXT(1, 1)
			continue;
		}

		DECODE_SURROGATE(c)
		insize = GET_INSIZE(c);

		if (c <= 0xFFFF) {
			EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
			else TRYMAP_ENC(jisx0213_bmp, code, c) {
				if (code == MULTIC) {
					if (inleft < 2) {
						if (flags & MBENC_FLUSH) {
							code = find_pairencmap(
							    (ucs2_t)c, 0,
							  jisx0213_pair_encmap,
							    JISX0213_ENCPAIRS);
							if (code == DBCINV)
								return 1;
						}
						else
							return MBERR_TOOFEW;
					}
					else {
						code = find_pairencmap(
							(ucs2_t)c, (*inbuf)[1],
							jisx0213_pair_encmap,
							JISX0213_ENCPAIRS);
						if (code == DBCINV) {
							code = find_pairencmap(
							    (ucs2_t)c, 0,
							  jisx0213_pair_encmap,
							    JISX0213_ENCPAIRS);
							if (code == DBCINV)
								return 1;
						} else
							insize = 2;
					}
				}
			}
			else TRYMAP_ENC(jisxcommon, code, c);
			else if (c >= 0xff61 && c <= 0xff9f) {
				/* JIS X 0201 half-width katakana */
				WRITE2(0x8e, c - 0xfec0)
				NEXT(1, 2)
				continue;
			}
			else if (c == 0xff3c)
				/* F/W REVERSE SOLIDUS (see NOTES) */
				code = 0x2140;
			else if (c == 0xff5e)
				/* F/W TILDE (see NOTES) */
				code = 0x2232;
			else
				return 1;
		}
		else if (c >> 16 == EMPBASE >> 16) {
			EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
			else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
			else return insize;
		}
		else
			return insize;

		if (code & 0x8000) {
			/* Codeset 2 */
			WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
			NEXT(insize, 3)
		} else {
			/* Codeset 1 */
			WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
			NEXT(insize, 2)
		}
	}

	return 0;
}

DECODER(euc_jis_2004)
{
	while (inleft > 0) {
		unsigned char c = IN1;
		ucs4_t code;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		if (c == 0x8e) {
			/* JIS X 0201 half-width katakana */
			unsigned char c2;

			REQUIRE_INBUF(2)
			c2 = IN2;
			if (c2 >= 0xa1 && c2 <= 0xdf) {
				OUT1(0xfec0 + c2)
				NEXT(2, 1)
			}
			else
				return 2;
		}
		else if (c == 0x8f) {
			unsigned char c2, c3;

			REQUIRE_INBUF(3)
			c2 = IN2 ^ 0x80;
			c3 = IN3 ^ 0x80;

			/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
			EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
			else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
			else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
				WRITEUCS4(EMPBASE | code)
				NEXT_IN(3)
				continue;
			}
			else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
			else return 3;
			NEXT(3, 1)
		}
		else {
			unsigned char c2;

			REQUIRE_INBUF(2)
			c ^= 0x80;
			c2 = IN2 ^ 0x80;

			/* JIS X 0213 Plane 1 */
			EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
			else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
			else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
			else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
			else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
			else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
				WRITEUCS4(EMPBASE | code)
				NEXT_IN(2)
				continue;
			}
			else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
				WRITE2(code >> 16, code & 0xffff)
				NEXT(2, 2)
				continue;
			}
			else return 2;
			NEXT(2, 1)
		}
	}

	return 0;
}


/*
 * EUC-JP codec
 */

ENCODER(euc_jp)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}

		UCS4INVALID(c)

		TRYMAP_ENC(jisxcommon, code, c);
		else if (c >= 0xff61 && c <= 0xff9f) {
			/* JIS X 0201 half-width katakana */
			WRITE2(0x8e, c - 0xfec0)
			NEXT(1, 2)
			continue;
		}
#ifndef STRICT_BUILD
		else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
			code = 0x2140;
		else if (c == 0xa5) { /* YEN SIGN */
			WRITE1(0x5c);
			NEXT(1, 1)
			continue;
		} else if (c == 0x203e) { /* OVERLINE */
			WRITE1(0x7e);
			NEXT(1, 1)
			continue;
		}
#endif
		else
			return 1;

		if (code & 0x8000) {
			/* JIS X 0212 */
			WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
			NEXT(1, 3)
		} else {
			/* JIS X 0208 */
			WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
			NEXT(1, 2)
		}
	}

	return 0;
}

DECODER(euc_jp)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

			if (c < 0x80) {
				OUT1(c)
				NEXT(1, 1)
				continue;
			}

		if (c == 0x8e) {
			/* JIS X 0201 half-width katakana */
			unsigned char c2;

			REQUIRE_INBUF(2)
			c2 = IN2;
			if (c2 >= 0xa1 && c2 <= 0xdf) {
				OUT1(0xfec0 + c2)
				NEXT(2, 1)
			}
			else
				return 2;
		}
		else if (c == 0x8f) {
			unsigned char c2, c3;

			REQUIRE_INBUF(3)
			c2 = IN2;
			c3 = IN3;
			/* JIS X 0212 */
			TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
				NEXT(3, 1)
			}
			else
				return 3;
		}
		else {
			unsigned char c2;

			REQUIRE_INBUF(2)
			c2 = IN2;
			/* JIS X 0208 */
#ifndef STRICT_BUILD
			if (c == 0xa1 && c2 == 0xc0)
				/* FULL-WIDTH REVERSE SOLIDUS */
				**outbuf = 0xff3c;
			else
#endif
				TRYMAP_DEC(jisx0208, **outbuf,
					   c ^ 0x80, c2 ^ 0x80) ;
			else return 2;
			NEXT(2, 1)
		}
	}

	return 0;
}


/*
 * SHIFT_JIS codec
 */

ENCODER(shift_jis)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;
		unsigned char c1, c2;

#ifdef STRICT_BUILD
		JISX0201_R_ENCODE(c, code)
#else
		if (c < 0x80) code = c;
		else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
		else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
		else JISX0201_K_ENCODE(c, code)
		else UCS4INVALID(c)
		else code = NOCHAR;

		if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
			REQUIRE_OUTBUF(1)

			OUT1((unsigned char)code)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_OUTBUF(2)

		if (code == NOCHAR) {
			TRYMAP_ENC(jisxcommon, code, c);
#ifndef STRICT_BUILD
			else if (c == 0xff3c)
				code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
			else
				return 1;

			if (code & 0x8000) /* MSB set: JIS X 0212 */
				return 1;
		}

		c1 = code >> 8;
		c2 = code & 0xff;
		c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
		c1 = (c1 - 0x21) >> 1;
		OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
		OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
		NEXT(1, 2)
	}

	return 0;
}

DECODER(shift_jis)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

#ifdef STRICT_BUILD
		JISX0201_R_DECODE(c, **outbuf)
#else
		if (c < 0x80) **outbuf = c;
#endif
		else JISX0201_K_DECODE(c, **outbuf)
		else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
			unsigned char c1, c2;

			REQUIRE_INBUF(2)
			c2 = IN2;
			if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
				return 2;

			c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
			c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
			c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
			c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;

#ifndef STRICT_BUILD
			if (c1 == 0x21 && c2 == 0x40) {
				/* FULL-WIDTH REVERSE SOLIDUS */
				OUT1(0xff3c)
				NEXT(2, 1)
				continue;
			}
#endif
			TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
				NEXT(2, 1)
				continue;
			}
			else
				return 2;
		}
		else
			return 2;

		NEXT(1, 1) /* JIS X 0201 */
	}

	return 0;
}


/*
 * SHIFT_JIS-2004 codec
 */

ENCODER(shift_jis_2004)
{
	while (inleft > 0) {
		ucs4_t c = IN1;
		DBCHAR code = NOCHAR;
		int c1, c2;
		size_t insize;

		JISX0201_ENCODE(c, code)
		else DECODE_SURROGATE(c)

		if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
			WRITE1((unsigned char)code)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_OUTBUF(2)
		insize = GET_INSIZE(c);

		if (code == NOCHAR) {
			if (c <= 0xffff) {
				EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
				else TRYMAP_ENC(jisx0213_bmp, code, c) {
					if (code == MULTIC) {
						if (inleft < 2) {
						    if (flags & MBENC_FLUSH) {
							code = find_pairencmap
							    ((ucs2_t)c, 0,
							  jisx0213_pair_encmap,
							    JISX0213_ENCPAIRS);
							if (code == DBCINV)
							    return 1;
						    }
						    else
							    return MBERR_TOOFEW;
						}
						else {
						    code = find_pairencmap(
							    (ucs2_t)c, IN2,
							  jisx0213_pair_encmap,
							    JISX0213_ENCPAIRS);
						    if (code == DBCINV) {
							code = find_pairencmap(
							    (ucs2_t)c, 0,
							  jisx0213_pair_encmap,
							    JISX0213_ENCPAIRS);
							if (code == DBCINV)
							    return 1;
							}
							else
							    insize = 2;
						}
					}
				}
				else TRYMAP_ENC(jisxcommon, code, c) {
					/* abandon JIS X 0212 codes */
					if (code & 0x8000)
						return 1;
				}
				else return 1;
			}
			else if (c >> 16 == EMPBASE >> 16) {
				EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
				else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
				else return insize;
			}
			else
				return insize;
		}

		c1 = code >> 8;
		c2 = (code & 0xff) - 0x21;

		if (c1 & 0x80) { /* Plane 2 */
			if (c1 >= 0xee) c1 -= 0x87;
			else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
			else c1 -= 0x43;
		}
		else /* Plane 1 */
			c1 -= 0x21;

		if (c1 & 1) c2 += 0x5e;
		c1 >>= 1;
		OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
		OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))

		NEXT(insize, 2)
	}

	return 0;
}

DECODER(shift_jis_2004)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)
		JISX0201_DECODE(c, **outbuf)
		else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
			unsigned char c1, c2 = IN2;
			ucs4_t code;

			REQUIRE_INBUF(2)
			if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
				return 2;

			c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
			c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
			c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
			c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;

			if (c1 < 0x5e) { /* Plane 1 */
				c1 += 0x21;
				EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
						c1, c2)
				else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
					NEXT_OUT(1)
				}
				else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
						c1, c2) {
					NEXT_OUT(1)
				}
				else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
					WRITEUCS4(EMPBASE | code)
				}
				else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
					WRITE2(code >> 16, code & 0xffff)
					NEXT_OUT(2)
				}
				else
					return 2;
				NEXT_IN(2)
			}
			else { /* Plane 2 */
				if (c1 >= 0x67) c1 += 0x07;
				else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
				else c1 -= 0x3d;

				EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
						c1, c2)
				else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
						c1, c2) ;
				else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
					WRITEUCS4(EMPBASE | code)
					NEXT_IN(2)
					continue;
				}
				else
					return 2;
				NEXT(2, 1)
			}
			continue;
		}
		else
			return 2;

		NEXT(1, 1) /* JIS X 0201 */
	}

	return 0;
}


BEGIN_MAPPINGS_LIST
  MAPPING_DECONLY(jisx0208)
  MAPPING_DECONLY(jisx0212)
  MAPPING_ENCONLY(jisxcommon)
  MAPPING_DECONLY(jisx0213_1_bmp)
  MAPPING_DECONLY(jisx0213_2_bmp)
  MAPPING_ENCONLY(jisx0213_bmp)
  MAPPING_DECONLY(jisx0213_1_emp)
  MAPPING_DECONLY(jisx0213_2_emp)
  MAPPING_ENCONLY(jisx0213_emp)
  MAPPING_ENCDEC(jisx0213_pair)
  MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST

BEGIN_CODECS_LIST
  CODEC_STATELESS(shift_jis)
  CODEC_STATELESS(cp932)
  CODEC_STATELESS(euc_jp)
  CODEC_STATELESS(shift_jis_2004)
  CODEC_STATELESS(euc_jis_2004)
  { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
  { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
END_CODECS_LIST

I_AM_A_MODULE_FOR(jp)

--- NEW FILE: _codecs_kr.c ---
/*
 * _codecs_kr.c: Codecs collection for Korean encodings
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: _codecs_kr.c,v 1.8 2004/07/07 14:59:26 perky Exp $
 */

#include "cjkcodecs.h"
#include "mappings_kr.h"

/*
 * EUC-KR codec
 */

ENCODER(euc_kr)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)
		TRYMAP_ENC(cp949, code, c);
		else return 1;

		if (code & 0x8000) /* MSB set: CP949 */
			return 1;

		OUT1((code >> 8) | 0x80)
		OUT2((code & 0xFF) | 0x80)
		NEXT(1, 2)
	}

	return 0;
}

DECODER(euc_kr)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)

		TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
			NEXT(2, 1)
		} else return 2;
	}

	return 0;
}


/*
 * CP949 codec
 */

ENCODER(cp949)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)
		TRYMAP_ENC(cp949, code, c);
		else return 1;

		OUT1((code >> 8) | 0x80)
		if (code & 0x8000)
			OUT2(code & 0xFF) /* MSB set: CP949 */
		else
			OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
		NEXT(1, 2)
	}

	return 0;
}

DECODER(cp949)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)
		TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
		else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
		else return 2;

		NEXT(2, 1)
	}

	return 0;
}


/*
 * JOHAB codec
 */

static const unsigned char u2johabidx_choseong[32] = {
                0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x14,
};
static const unsigned char u2johabidx_jungseong[32] = {
                      0x03, 0x04, 0x05, 0x06, 0x07,
                0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
                0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
                0x1a, 0x1b, 0x1c, 0x1d,
};
static const unsigned char u2johabidx_jongseong[32] = {
          0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11,       0x13, 0x14, 0x15, 0x16, 0x17,
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
};
static const DBCHAR u2johabjamo[] = {
            0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
    0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
    0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
    0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
    0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
    0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
    0x8741, 0x8761, 0x8781, 0x87a1,
};

ENCODER(johab)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)

		if (c >= 0xac00 && c <= 0xd7a3) {
			c -= 0xac00;
			code = 0x8000 |
				(u2johabidx_choseong[c / 588] << 10) |
				(u2johabidx_jungseong[(c / 28) % 21] << 5) |
				u2johabidx_jongseong[c % 28];
		}
		else if (c >= 0x3131 && c <= 0x3163)
			code = u2johabjamo[c - 0x3131];
		else TRYMAP_ENC(cp949, code, c) {
			unsigned char c1, c2, t2;
			unsigned short t1;

			assert((code & 0x8000) == 0);
			c1 = code >> 8;
			c2 = code & 0xff;
			if (((c1 >= 0x21 && c1 <= 0x2c) ||
			    (c1 >= 0x4a && c1 <= 0x7d)) &&
			    (c2 >= 0x21 && c2 <= 0x7e)) {
				t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
						  (c1 - 0x21 + 0x197));
				t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
				OUT1(t1 >> 1)
				OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
				NEXT(1, 2)
				continue;
			}
			else
				return 1;
		}
		else
			return 1;

		OUT1(code >> 8)
		OUT2(code & 0xff)
		NEXT(1, 2)
	}

	return 0;
}

#define FILL 0xfd
#define NONE 0xff

static const unsigned char johabidx_choseong[32] = {
    NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
    0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
    NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabidx_jungseong[32] = {
    NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
    NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
    NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
    NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
};
static const unsigned char johabidx_jongseong[32] = {
    NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
    0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
    0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
};

static const unsigned char johabjamo_choseong[32] = {
    NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
    0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
    0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
    NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabjamo_jungseong[32] = {
    NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
    NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
    NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
    NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
};
static const unsigned char johabjamo_jongseong[32] = {
    NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
    0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
    0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
    0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
};

DECODER(johab)
{
	while (inleft > 0) {
		unsigned char    c = IN1, c2;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)
		c2 = IN2;

		if (c < 0xd8) {
			/* johab hangul */
			unsigned char c_cho, c_jung, c_jong;
			unsigned char i_cho, i_jung, i_jong;

			c_cho = (c >> 2) & 0x1f;
			c_jung = ((c << 3) | c2 >> 5) & 0x1f;
			c_jong = c2 & 0x1f;

			i_cho = johabidx_choseong[c_cho];
			i_jung = johabidx_jungseong[c_jung];
			i_jong = johabidx_jongseong[c_jong];

			if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
				return 2;

			/* we don't use U+1100 hangul jamo yet. */
			if (i_cho == FILL) {
				if (i_jung == FILL) {
					if (i_jong == FILL)
						OUT1(0x3000)
					else
						OUT1(0x3100 |
						  johabjamo_jongseong[c_jong])
				}
				else {
					if (i_jong == FILL)
						OUT1(0x3100 |
						  johabjamo_jungseong[c_jung])
					else
						return 2;
				}
			} else {
				if (i_jung == FILL) {
					if (i_jong == FILL)
						OUT1(0x3100 |
						  johabjamo_choseong[c_cho])
					else
						return 2;
				}
				else
					OUT1(0xac00 +
					     i_cho * 588 +
					     i_jung * 28 +
					     (i_jong == FILL ? 0 : i_jong))
			}
			NEXT(2, 1)
		} else {
			/* KS X 1001 except hangul jamos and syllables */
			if (c == 0xdf || c > 0xf9 ||
			    c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
			    (c2 & 0x7f) == 0x7f ||
			    (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
				return 2;
			else {
				unsigned char t1, t2;

				t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
						 2 * c - 0x197);
				t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
				t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
				t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;

				TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
				else return 2;
				NEXT(2, 1)
			}
		}
	}

	return 0;
}
#undef NONE
#undef FILL


BEGIN_MAPPINGS_LIST
  MAPPING_DECONLY(ksx1001)
  MAPPING_ENCONLY(cp949)
  MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST

BEGIN_CODECS_LIST
  CODEC_STATELESS(euc_kr)
  CODEC_STATELESS(cp949)
  CODEC_STATELESS(johab)
END_CODECS_LIST

I_AM_A_MODULE_FOR(kr)

--- NEW FILE: _codecs_tw.c ---
/*
 * _codecs_tw.c: Codecs collection for Taiwan's encodings
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: _codecs_tw.c,v 1.10 2004/07/07 14:59:26 perky Exp $
 */

#include "cjkcodecs.h"
#include "mappings_tw.h"

/*
 * BIG5 codec
 */

ENCODER(big5)
{
	while (inleft > 0) {
		Py_UNICODE c = **inbuf;
		DBCHAR code;

		if (c < 0x80) {
			REQUIRE_OUTBUF(1)
			**outbuf = (unsigned char)c;
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)

		TRYMAP_ENC(big5, code, c);
		else return 1;

		OUT1(code >> 8)
		OUT2(code & 0xFF)
		NEXT(1, 2)
	}

	return 0;
}

DECODER(big5)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)
		TRYMAP_DEC(big5, **outbuf, c, IN2) {
			NEXT(2, 1)
		}
		else return 2;
	}

	return 0;
}


/*
 * CP950 codec
 */

ENCODER(cp950)
{
	while (inleft > 0) {
		Py_UNICODE c = IN1;
		DBCHAR code;

		if (c < 0x80) {
			WRITE1((unsigned char)c)
			NEXT(1, 1)
			continue;
		}
		UCS4INVALID(c)

		REQUIRE_OUTBUF(2)
		TRYMAP_ENC(cp950ext, code, c);
		else TRYMAP_ENC(big5, code, c);
		else return 1;

		OUT1(code >> 8)
		OUT2(code & 0xFF)
		NEXT(1, 2)
	}

	return 0;
}

DECODER(cp950)
{
	while (inleft > 0) {
		unsigned char c = IN1;

		REQUIRE_OUTBUF(1)

		if (c < 0x80) {
			OUT1(c)
			NEXT(1, 1)
			continue;
		}

		REQUIRE_INBUF(2)

		TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
		else TRYMAP_DEC(big5, **outbuf, c, IN2);
		else return 2;

		NEXT(2, 1)
	}

	return 0;
}



BEGIN_MAPPINGS_LIST
  MAPPING_ENCDEC(big5)
  MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST

BEGIN_CODECS_LIST
  CODEC_STATELESS(big5)
  CODEC_STATELESS(cp950)
END_CODECS_LIST

I_AM_A_MODULE_FOR(tw)

--- NEW FILE: alg_jisx0201.h ---
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2004/06/29 05:42:08 perky Exp $ */

#define JISX0201_R_ENCODE(c, assi)			\
	if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e)	\
		(assi) = (c);				\
	else if ((c) == 0x00a5) (assi) = 0x5c;		\
	else if ((c) == 0x203e) (assi) = 0x7e;
#define JISX0201_K_ENCODE(c, assi)			\
	if ((c) >= 0xff61 && (c) <= 0xff9f)		\
		(assi) = (c) - 0xfec0;
#define JISX0201_ENCODE(c, assi)			\
	JISX0201_R_ENCODE(c, assi)			\
	else JISX0201_K_ENCODE(c, assi)

#define JISX0201_R_DECODE(c, assi)			\
	if ((c) < 0x5c) (assi) = (c);			\
	else if ((c) == 0x5c) (assi) = 0x00a5;		\
	else if ((c) < 0x7e) (assi) = (c);		\
	else if ((c) == 0x7e) (assi) = 0x203e;		\
	else if ((c) == 0x7f) (assi) = 0x7f;
#define JISX0201_K_DECODE(c, assi)			\
	if ((c) >= 0xa1 && (c) <= 0xdf)			\
	(assi) = 0xfec0 + (c);
#define JISX0201_DECODE(c, assi)			\
	JISX0201_R_DECODE(c, assi)			\
	else JISX0201_K_DECODE(c, assi)

--- NEW FILE: cjkcodecs.h ---
/*
 * cjkcodecs.h: common header for cjkcodecs
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: cjkcodecs.h,v 1.6 2004/07/18 15:22:31 perky Exp $
 */

#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_

#include "Python.h"
#include "multibytecodec.h"


#define UNIINV	Py_UNICODE_REPLACEMENT_CHARACTER
#define NOCHAR	0xFFFF
#define MULTIC	0xFFFE
#define DBCINV	0xFFFD

/* shorter macros to save source size of mapping tables */
#define U UNIINV
#define N NOCHAR
#define M MULTIC
#define D DBCINV

struct dbcs_index {
	const ucs2_t *map;
	unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;

struct widedbcs_index {
	const ucs4_t *map;
	unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;

struct unim_index {
	const DBCHAR *map;
	unsigned char bottom, top;
};
typedef struct unim_index encode_map;

struct unim_index_bytebased {
	const unsigned char *map;
	unsigned char bottom, top;
};

struct dbcs_map {
	const char *charset;
	const struct unim_index *encmap;
	const struct dbcs_index *decmap;
};

struct pair_encodemap {
	ucs4_t uniseq;
	DBCHAR code;
};

static const MultibyteCodec *codec_list;
static const struct dbcs_map *mapping_list;

#define CODEC_INIT(encoding)						\
	static int encoding##_codec_init(const void *config)

#define ENCODER_INIT(encoding)						\
	static int encoding##_encode_init(				\
		MultibyteCodec_State *state, const void *config)
#define ENCODER(encoding)						\
	static int encoding##_encode(					\
		MultibyteCodec_State *state, const void *config,	\
		const Py_UNICODE **inbuf, size_t inleft,		\
		unsigned char **outbuf, size_t outleft, int flags)
#define ENCODER_RESET(encoding)						\
	static int encoding##_encode_reset(				\
		MultibyteCodec_State *state, const void *config,	\
		unsigned char **outbuf, size_t outleft)

#define DECODER_INIT(encoding)						\
	static int encoding##_decode_init(				\
		MultibyteCodec_State *state, const void *config)
#define DECODER(encoding)						\
	static int encoding##_decode(					\
		MultibyteCodec_State *state, const void *config,	\
		const unsigned char **inbuf, size_t inleft,		\
		Py_UNICODE **outbuf, size_t outleft)
#define DECODER_RESET(encoding)						\
	static int encoding##_decode_reset(				\
		MultibyteCodec_State *state, const void *config)

#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code)	\
	if ((code) > 0xFFFF)	\
	return 1;
#else
#define UCS4INVALID(code)	\
	if (0) ;
#endif

#define NEXT_IN(i)				\
	(*inbuf) += (i);			\
	(inleft) -= (i);
#define NEXT_OUT(o)				\
	(*outbuf) += (o);			\
	(outleft) -= (o);
#define NEXT(i, o)				\
	NEXT_IN(i) NEXT_OUT(o)

#define REQUIRE_INBUF(n)			\
	if (inleft < (n))			\
		return MBERR_TOOFEW;
#define REQUIRE_OUTBUF(n)			\
	if (outleft < (n))			\
		return MBERR_TOOSMALL;

#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])

#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);

#define WRITE1(c1)		\
	REQUIRE_OUTBUF(1)	\
	(*outbuf)[0] = (c1);
#define WRITE2(c1, c2)		\
	REQUIRE_OUTBUF(2)	\
	(*outbuf)[0] = (c1);	\
	(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3)	\
	REQUIRE_OUTBUF(3)	\
	(*outbuf)[0] = (c1);	\
	(*outbuf)[1] = (c2);	\
	(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4)	\
	REQUIRE_OUTBUF(4)	\
	(*outbuf)[0] = (c1);	\
	(*outbuf)[1] = (c2);	\
	(*outbuf)[2] = (c3);	\
	(*outbuf)[3] = (c4);

#if Py_UNICODE_SIZE == 2
# define WRITEUCS4(c)						\
	REQUIRE_OUTBUF(2)					\
	(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10);	\
	(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff);	\
	NEXT_OUT(2)
#else
# define WRITEUCS4(c)						\
	REQUIRE_OUTBUF(1)					\
	**outbuf = (Py_UNICODE)(c);				\
	NEXT_OUT(1)
#endif

#define _TRYMAP_ENC(m, assi, val)				\
	if ((m)->map != NULL && (val) >= (m)->bottom &&		\
	    (val)<= (m)->top && ((assi) = (m)->map[(val) -	\
	    (m)->bottom]) != NOCHAR)
#define TRYMAP_ENC(charset, assi, uni)				\
	_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
#define _TRYMAP_DEC(m, assi, val)				\
	if ((m)->map != NULL && (val) >= (m)->bottom &&		\
	    (val)<= (m)->top && ((assi) = (m)->map[(val) -	\
	    (m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2)			\
	_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)

#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val)	\
	if ((m)->map != NULL && (val) >= (m)->bottom &&		\
	    (val)<= (m)->top &&					\
	    ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
	    (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
	    (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni)	\
	_TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
			   assplane, asshi, asslo, (uni) & 0xff)
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2)		\
	_TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)

#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c)					\
	if (c >> 10 == 0xd800 >> 10) { /* high surrogate */	\
		REQUIRE_INBUF(2)				\
		if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
		    c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
			((ucs4_t)(IN2) - 0xdc00);		\
		}						\
	}
#define GET_INSIZE(c)	((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c)	1
#endif

#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
#define END_MAPPINGS_LIST				\
	{"", NULL, NULL} };				\
	static const struct dbcs_map *mapping_list =	\
		(const struct dbcs_map *)_mapping_list;

#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
#define _STATEFUL_METHODS(enc)		\
	enc##_encode,			\
	enc##_encode_init,		\
	enc##_encode_reset,		\
	enc##_decode,			\
	enc##_decode_init,		\
	enc##_decode_reset,
#define _STATELESS_METHODS(enc)		\
	enc##_encode, NULL, NULL,	\
	enc##_decode, NULL, NULL,
#define CODEC_STATEFUL(enc) {		\
	#enc, NULL, NULL,		\
	_STATEFUL_METHODS(enc)		\
},
#define CODEC_STATELESS(enc) {		\
	#enc, NULL, NULL,		\
	_STATELESS_METHODS(enc)		\
},
#define CODEC_STATELESS_WINIT(enc) {	\
	#enc, NULL,			\
	enc##_codec_init,		\
	_STATELESS_METHODS(enc)		\
},
#define END_CODECS_LIST					\
	{"", NULL,} };					\
	static const MultibyteCodec *codec_list =	\
		(const MultibyteCodec *)_codec_list;

static PyObject *
getmultibytecodec(void)
{
	static PyObject *cofunc = NULL;

	if (cofunc == NULL) {
		PyObject *mod = PyImport_ImportModule("_multibytecodec");
		if (mod == NULL)
			return NULL;
		cofunc = PyObject_GetAttrString(mod, "__create_codec");
		Py_DECREF(mod);
	}
	return cofunc;
}

static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
	PyObject *codecobj, *r, *cofunc;
	const MultibyteCodec *codec;
	const char *enc;

	if (!PyString_Check(encoding)) {
		PyErr_SetString(PyExc_TypeError,
				"encoding name must be a string.");
		return NULL;
	}

	cofunc = getmultibytecodec();
	if (cofunc == NULL)
		return NULL;

	enc = PyString_AS_STRING(encoding);
	for (codec = codec_list; codec->encoding[0]; codec++)
		if (strcmp(codec->encoding, enc) == 0)
			break;

	if (codec->encoding[0] == '\0') {
		PyErr_SetString(PyExc_LookupError,
				"no such codec is supported.");
		return NULL;
	}

	codecobj = PyCObject_FromVoidPtr((void *)codec, NULL);
	if (codecobj == NULL)
		return NULL;

	r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
	Py_DECREF(codecobj);

	return r;
}

static struct PyMethodDef __methods[] = {
	{"getcodec", (PyCFunction)getcodec, METH_O, ""},
	{NULL, NULL},
};

static int
register_maps(PyObject *module)
{
	const struct dbcs_map *h;

	for (h = mapping_list; h->charset[0] != '\0'; h++) {
		char mhname[256] = "__map_";
		int r;
		strcpy(mhname + sizeof("__map_") - 1, h->charset);
		r = PyModule_AddObject(module, mhname,
				PyCObject_FromVoidPtr((void *)h, NULL));
		if (r == -1)
			return -1;
	}
	return 0;
}

#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
		const struct pair_encodemap *haystack, int haystacksize)
{
	int pos, min, max;
	ucs4_t value = body << 16 | modifier;

	min = 0;
	max = haystacksize;

	for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
		if (value < haystack[pos].uniseq) {
			if (max == pos) break;
			else max = pos;
		}
		else if (value > haystack[pos].uniseq) {
			if (min == pos) break;
			else min = pos;
		}
		else
			break;

		if (value == haystack[pos].uniseq)
			return haystack[pos].code;
		else
			return DBCINV;
}
#endif

#ifdef USING_IMPORTED_MAPS
#define IMPORT_MAP(locale, charset, encmap, decmap) \
	importmap("_codecs_" #locale, "__map_" #charset, \
		  (const void**)encmap, (const void**)decmap)

static int
importmap(const char *modname, const char *symbol,
	  const void **encmap, const void **decmap)
{
	PyObject *o, *mod;

	mod = PyImport_ImportModule((char *)modname);
	if (mod == NULL)
		return -1;

	o = PyObject_GetAttrString(mod, (char*)symbol);
	if (o == NULL)
		goto errorexit;
	else if (!PyCObject_Check(o)) {
		PyErr_SetString(PyExc_ValueError,
				"map data must be a CObject.");
		goto errorexit;
	}
	else {
		struct dbcs_map *map;
		map = PyCObject_AsVoidPtr(o);
		if (encmap != NULL)
			*encmap = map->encmap;
		if (decmap != NULL)
			*decmap = map->decmap;
		Py_DECREF(o);
	}

	Py_DECREF(mod);
	return 0;

errorexit:
	Py_DECREF(mod);
	return -1;
}
#endif

#define I_AM_A_MODULE_FOR(loc)						\
	void								\
	init_codecs_##loc(void)						\
	{								\
		PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
		(void)register_maps(m);					\
	}

#endif

--- NEW FILE: emu_jisx0213_2000.h ---
/* $CJKCodecs: emu_jisx0213_2000.h,v 1.3 2004/07/08 02:53:37 perky Exp $ */

/* These routines may be quite inefficient, but it's used only to emulate old
 * standards. */

#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif

#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c)			\
	if (config == (void *)2000 && (					\
			(c) == 0x9B1C || (c) == 0x4FF1 ||		\
			(c) == 0x525D || (c) == 0x541E ||		\
			(c) == 0x5653 || (c) == 0x59F8 ||		\
			(c) == 0x5C5B || (c) == 0x5E77 ||		\
			(c) == 0x7626 || (c) == 0x7E6B))		\
		return EMULATE_JISX0213_2000_ENCODE_INVALID;		\
	else if (config == (void *)2000 && (c) == 0x9B1D)		\
		(assi) = 0x8000 | 0x7d3b;				\

#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c)			\
	if (config == (void *)2000 && (c) == 0x20B9F)			\
		return EMULATE_JISX0213_2000_ENCODE_INVALID;

#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
#endif

#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2)		\
	if (config == (void *)2000 &&					\
			(((c1) == 0x2E && (c2) == 0x21) ||		\
			 ((c1) == 0x2F && (c2) == 0x7E) ||		\
			 ((c1) == 0x4F && (c2) == 0x54) ||		\
			 ((c1) == 0x4F && (c2) == 0x7E) ||		\
			 ((c1) == 0x74 && (c2) == 0x27) ||		\
			 ((c1) == 0x7E && (c2) == 0x7A) ||		\
			 ((c1) == 0x7E && (c2) == 0x7B) ||		\
			 ((c1) == 0x7E && (c2) == 0x7C) ||		\
			 ((c1) == 0x7E && (c2) == 0x7D) ||		\
			 ((c1) == 0x7E && (c2) == 0x7E)))		\
		return EMULATE_JISX0213_2000_DECODE_INVALID;

#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2)		\
	if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B)	\
		(assi) = 0x9B1D;

--- NEW FILE: mappings_cn.h ---
/*
 * $CJKCodecs: mappings_cn.h,v 1.1 2004/07/07 14:59:27 perky Exp $
 */

static const ucs2_t __gb2312_decmap[7482] = {
12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216,
8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,
12310,12311,12304,12305,177,215,247,8758,8743,8744,8721,8719,8746,8745,8712,
8759,8730,8869,8741,8736,8978,8857,8747,8750,8801,8780,8776,8765,8733,8800,
8814,8815,8804,8805,8734,8757,8756,9794,9792,176,8242,8243,8451,65284,164,
65504,65505,8240,167,8470,9734,9733,9675,9679,9678,9671,9670,9633,9632,9651,
9650,8251,8594,8592,8593,8595,12307,9352,9353,9354,9355,9356,9357,9358,9359,
9360,9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9332,9333,9334,
9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345,9346,9347,9348,9349,
9350,9351,9312,9313,9314,9315,9316,9317,9318,9319,9320,9321,U,U,12832,12833,
12834,12835,12836,12837,12838,12839,12840,12841,U,U,8544,8545,8546,8547,8548,
8549,8550,8551,8552,8553,8554,8555,65281,65282,65283,65509,65285,65286,65287,
65288,65289,65290,65291,65292,65293,65294,65295,65296,65297,65298,65299,65300,
65301,65302,65303,65304,65305,65306,65307,65308,65309,65310,65311,65312,65313,
[...4068 lines suppressed...]
13268,12395},{13270,13382,12397},{13384,13426,12510},{13428,13725,12553},{
13727,13837,12851},{13839,13849,12962},{13851,14615,12973},{14617,14701,13738
},{14703,14798,13823},{14801,14814,13919},{14816,14962,13933},{14964,15181,
14080},{15183,15469,14298},{15471,15583,14585},{15585,16469,14698},{16471,
16734,15583},{16736,17206,15847},{17208,17323,16318},{17325,17328,16434},{
17330,17372,16438},{17374,17621,16481},{17623,17995,16729},{17997,18016,17102
},{18018,18210,17122},{18212,18216,17315},{18218,18299,17320},{18301,18316,
17402},{18318,18758,17418},{18760,18809,17859},{18811,18812,17909},{18814,
18817,17911},{18820,18820,17915},{18823,18842,17916},{18844,18846,17936},{
18848,18869,17939},{18872,19574,17961},{19576,19614,18664},{19620,19730,18703
},{19738,19885,18814},{19887,19967,18962},{40870,55295,19043},{59244,59244,
33469},{59336,59336,33470},{59367,59379,33471},{59413,59413,33484},{59417,
59421,33485},{59423,59429,33490},{59431,59434,33497},{59437,59440,33501},{
59443,59450,33505},{59452,59458,33513},{59460,59475,33520},{59478,59491,33536
},{59493,63787,33550},{63789,63864,37845},{63866,63892,37921},{63894,63974,
37948},{63976,63984,38029},{63986,64011,38038},{64016,64016,38064},{64018,
64018,38065},{64021,64023,38066},{64025,64030,38069},{64034,64034,38075},{
64037,64038,38076},{64042,65071,38078},{65074,65074,39108},{65093,65096,39109
},{65107,65107,39113},{65112,65112,39114},{65127,65127,39115},{65132,65280,
39116},{65375,65503,39265},{65510,65535,39394},{0,0,39420}};

--- NEW FILE: mappings_hk.h ---
/*
 * $CJKCodecs: mappings_hk.h,v 1.2 2004/07/07 15:07:23 perky Exp $
 */

static const ucs2_t __big5hkscs_decmap[6095] = {
62211,62212,62213,62214,62215,268,62217,209,205,62220,62221,203,8168,62224,
202,62226,62227,62228,62229,270,62231,62232,256,193,461,192,274,201,282,200,
332,211,465,210,62245,7870,62247,7872,202,257,225,462,224,593,275,233,283,232,
299,237,464,236,333,243,466,242,363,250,468,249,470,472,474,U,U,U,U,U,U,U,U,U,
U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,476,252,62276,7871,62278,
7873,234,609,62282,62283,41897,4421,U,25866,U,U,20029,28381,40270,37343,U,U,
30517,25745,20250,20264,20392,20822,20852,20892,20964,21153,21160,21307,21326,
21457,21464,22242,22768,22788,22791,22834,22836,23398,23454,23455,23706,24198,
24635,25993,26622,26628,26725,27982,28860,30005,32420,32428,32442,32455,32463,
32479,32518,32567,33402,33487,33647,35270,35774,35810,36710,36711,36718,U,U,U,
U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,29713,31996,
32205,26950,31433,21031,U,U,U,U,37260,30904,37214,32956,U,36107,33014,2535,U,
U,32927,40647,19661,40393,40460,19518,40438,28686,40458,41267,13761,U,28314,
33342,29977,U,18705,39532,39567,40857,31111,33900,7626,1488,10982,20004,20097,
[...2305 lines suppressed...]
__big5hkscs_nonbmp_encmap+25830,16,241},{__big5hkscs_nonbmp_encmap+26056,3,201
},{__big5hkscs_nonbmp_encmap+26255,40,77},{__big5hkscs_nonbmp_encmap+26293,5,
213},{__big5hkscs_nonbmp_encmap+26502,115,173},{__big5hkscs_nonbmp_encmap+
26561,62,246},{__big5hkscs_nonbmp_encmap+26746,6,248},{
__big5hkscs_nonbmp_encmap+26989,35,222},{__big5hkscs_nonbmp_encmap+27177,20,
254},{__big5hkscs_nonbmp_encmap+27412,7,245},{__big5hkscs_nonbmp_encmap+27651,
32,255},{__big5hkscs_nonbmp_encmap+27875,169,169},{__big5hkscs_nonbmp_encmap+
27876,52,91},{__big5hkscs_nonbmp_encmap+27916,198,203},{
__big5hkscs_nonbmp_encmap+27922,1,169},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
__big5hkscs_nonbmp_encmap+28091,37,205},{__big5hkscs_nonbmp_encmap+28260,148,
212},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
};

--- NEW FILE: mappings_jisx0213_pair.h ---
/*
 * $CJKCodecs: mappings_jisx0213_pair.h,v 1.2 2004/07/07 15:28:02 perky Exp $
 */

#define JISX0213_ENCPAIRS 46
#ifdef EXTERN_JISX0213_PAIR
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
static const ucs4_t __jisx0213_pair_decmap[49] = {
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
39453441,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,48825061,48562921,
};

static const struct widedbcs_index jisx0213_pair_decmap[256] = {
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap
+0,119,123},{__jisx0213_pair_decmap+5,119,126},{__jisx0213_pair_decmap+13,120,
120},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap+14,68,102},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
};

static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
{0x00e60000,0x295c},{0x00e60300,0x2b44},{0x02540000,0x2b38},{0x02540300,0x2b48
},{0x02540301,0x2b49},{0x02590000,0x2b30},{0x02590300,0x2b4c},{0x02590301,
0x2b4d},{0x025a0000,0x2b43},{0x025a0300,0x2b4e},{0x025a0301,0x2b4f},{
0x028c0000,0x2b37},{0x028c0300,0x2b4a},{0x028c0301,0x2b4b},{0x02e50000,0x2b60
},{0x02e502e9,0x2b66},{0x02e90000,0x2b64},{0x02e902e5,0x2b65},{0x304b0000,
0x242b},{0x304b309a,0x2477},{0x304d0000,0x242d},{0x304d309a,0x2478},{
0x304f0000,0x242f},{0x304f309a,0x2479},{0x30510000,0x2431},{0x3051309a,0x247a
},{0x30530000,0x2433},{0x3053309a,0x247b},{0x30ab0000,0x252b},{0x30ab309a,
0x2577},{0x30ad0000,0x252d},{0x30ad309a,0x2578},{0x30af0000,0x252f},{
0x30af309a,0x2579},{0x30b10000,0x2531},{0x30b1309a,0x257a},{0x30b30000,0x2533
},{0x30b3309a,0x257b},{0x30bb0000,0x253b},{0x30bb309a,0x257c},{0x30c40000,
0x2544},{0x30c4309a,0x257d},{0x30c80000,0x2548},{0x30c8309a,0x257e},{
0x31f70000,0x2675},{0x31f7309a,0x2678},
};
#endif

--- NEW FILE: mappings_jp.h ---
/*
 * $CJKCodecs: mappings_jp.h,v 1.3 2004/07/07 17:40:27 perky Exp $
 */

static const ucs2_t __jisx0208_decmap[6956] = {
12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,
65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,
12295,12540,8213,8208,65295,92,12316,8214,65372,8230,8229,8216,8217,8220,8221,
65288,65289,12308,12309,65339,65341,65371,65373,12296,12297,12298,12299,12300,
12301,12302,12303,12304,12305,65291,8722,177,215,247,65309,8800,65308,65310,
8806,8807,8734,8756,9794,9792,176,8242,8243,8451,65509,65284,162,163,65285,
65283,65286,65290,65312,167,9734,9733,9675,9679,9678,9671,9670,9633,9632,9651,
9650,9661,9660,8251,12306,8594,8592,8593,8595,12307,U,U,U,U,U,U,U,U,U,U,U,
8712,8715,8838,8839,8834,8835,8746,8745,U,U,U,U,U,U,U,U,8743,8744,172,8658,
8660,8704,8707,U,U,U,U,U,U,U,U,U,U,U,8736,8869,8978,8706,8711,8801,8786,8810,
8811,8730,8765,8733,8757,8747,8748,U,U,U,U,U,U,U,8491,8240,9839,9837,9834,
8224,8225,182,U,U,U,U,9711,65296,65297,65298,65299,65300,65301,65302,65303,
65304,65305,U,U,U,U,U,U,U,65313,65314,65315,65316,65317,65318,65319,65320,
65321,65322,65323,65324,65325,65326,65327,65328,65329,65330,65331,65332,65333,
[...4730 lines suppressed...]
__jisx0213_emp_encmap+7993,144,144},{__jisx0213_emp_encmap+7994,207,207},{
__jisx0213_emp_encmap+7995,127,240},{__jisx0213_emp_encmap+8109,25,80},{
__jisx0213_emp_encmap+8165,198,198},{0,0,0},{__jisx0213_emp_encmap+8166,114,
114},{0,0,0},{0,0,0},{__jisx0213_emp_encmap+8167,219,219},{
__jisx0213_emp_encmap+8168,21,233},{__jisx0213_emp_encmap+8381,206,206},{
__jisx0213_emp_encmap+8382,26,249},{__jisx0213_emp_encmap+8606,144,144},{0,0,0
},{__jisx0213_emp_encmap+8607,140,140},{__jisx0213_emp_encmap+8608,55,55},{
__jisx0213_emp_encmap+8609,241,241},{__jisx0213_emp_encmap+8610,2,178},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},
};


--- NEW FILE: mappings_kr.h ---
/*
 * $CJKCodecs: mappings_kr.h,v 1.1 2004/07/07 14:59:27 perky Exp $
 */

static const ucs2_t __ksx1001_decmap[8264] = {
12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217,
8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304,
12305,177,215,247,8800,8804,8805,8734,8756,176,8242,8243,8451,8491,65504,
65505,65509,9794,9792,8736,8869,8978,8706,8711,8801,8786,167,8251,9734,9733,
9675,9679,9678,9671,9670,9633,9632,9651,9650,9661,9660,8594,8592,8593,8595,
8596,12307,8810,8811,8730,8765,8733,8757,8747,8748,8712,8715,8838,8839,8834,
8835,8746,8745,8743,8744,65506,8658,8660,8704,8707,180,65374,711,728,733,730,
729,184,731,161,191,720,8750,8721,8719,164,8457,8240,9665,9664,9655,9654,9828,
9824,9825,9829,9831,9827,8857,9672,9635,9680,9681,9618,9636,9637,9640,9639,
9638,9641,9832,9743,9742,9756,9758,182,8224,8225,8597,8599,8601,8598,8600,
9837,9833,9834,9836,12927,12828,8470,13255,8482,13250,13272,8481,8364,174,
65281,65282,65283,65284,65285,65286,65287,65288,65289,65290,65291,65292,65293,
65294,65295,65296,65297,65298,65299,65300,65301,65302,65303,65304,65305,65306,
65307,65308,65309,65310,65311,65312,65313,65314,65315,65316,65317,65318,65319,
[...3216 lines suppressed...]
__cp949_encmap+24279,0,255},{__cp949_encmap+24535,0,255},{__cp949_encmap+24791
,0,255},{__cp949_encmap+25047,0,255},{__cp949_encmap+25303,0,255},{
__cp949_encmap+25559,0,255},{__cp949_encmap+25815,0,255},{__cp949_encmap+26071
,0,255},{__cp949_encmap+26327,0,255},{__cp949_encmap+26583,0,255},{
__cp949_encmap+26839,0,255},{__cp949_encmap+27095,0,255},{__cp949_encmap+27351
,0,255},{__cp949_encmap+27607,0,255},{__cp949_encmap+27863,0,255},{
__cp949_encmap+28119,0,255},{__cp949_encmap+28375,0,255},{__cp949_encmap+28631
,0,255},{__cp949_encmap+28887,0,255},{__cp949_encmap+29143,0,255},{
__cp949_encmap+29399,0,255},{__cp949_encmap+29655,0,255},{__cp949_encmap+29911
,0,255},{__cp949_encmap+30167,0,255},{__cp949_encmap+30423,0,255},{
__cp949_encmap+30679,0,255},{__cp949_encmap+30935,0,255},{__cp949_encmap+31191
,0,255},{__cp949_encmap+31447,0,255},{__cp949_encmap+31703,0,255},{
__cp949_encmap+31959,0,255},{__cp949_encmap+32215,0,255},{__cp949_encmap+32471
,0,163},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp949_encmap+32635,0,255},{
__cp949_encmap+32891,0,11},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp949_encmap+
32903,1,230},
};

--- NEW FILE: mappings_tw.h ---
/*
 * $CJKCodecs: mappings_tw.h,v 1.2 2004/07/07 15:07:23 perky Exp $
 */

static const ucs2_t __big5_decmap[16702] = {
12288,65292,12289,12290,65294,8226,65307,65306,65311,65281,65072,8230,8229,
65104,65380,65106,183,65108,65109,65110,65111,65372,8211,65073,8212,65075,
9588,65076,65103,65288,65289,65077,65078,65371,65373,65079,65080,12308,12309,
65081,65082,12304,12305,65083,65084,12298,12299,65085,65086,12296,12297,65087,
65088,12300,12301,65089,65090,12302,12303,65091,65092,65113,65114,U,U,U,U,U,U,
U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,65115,65116,65117,
65118,8216,8217,8220,8221,12317,12318,8245,8242,65283,65286,65290,8251,167,
12291,9675,9679,9651,9650,9678,9734,9733,9671,9670,9633,9632,9661,9660,12963,
8453,8254,65507,65343,717,65097,65098,65101,65102,65099,65100,65119,65120,
65121,65291,65293,215,247,177,8730,65308,65310,65309,8806,8807,8800,8734,8786,
8801,65122,65123,65124,65125,65126,8764,8745,8746,8869,8736,8735,8895,13266,
13265,8747,8750,8757,8756,9792,9794,9793,9737,8593,8595,8592,8594,8598,8599,
8601,8600,8741,8739,65295,65340,65295,65340,65284,165,12306,162,163,65285,
65312,8451,8457,65129,65130,65131,13269,13212,13213,13214,13262,13217,13198,
[...2598 lines suppressed...]
82,82},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp950ext_encmap+338,129,129},{0,0,0},{
0,0,0},{0,0,0},{__cp950ext_encmap+339,167,167},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp950ext_encmap+
340,207,207},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{__cp950ext_encmap+341,185,185},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp950ext_encmap+342,81,104},{
__cp950ext_encmap+366,15,229},
};

--- NEW FILE: multibytecodec.c ---
/*
 * multibytecodec.c: Common Multibyte Codec Implementation
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: multibytecodec.c,v 1.13 2004/08/19 16:57:19 perky Exp $
 */

#include "Python.h"
#include "multibytecodec.h"


typedef struct {
    const Py_UNICODE    *inbuf, *inbuf_top, *inbuf_end;
    unsigned char       *outbuf, *outbuf_end;
    PyObject            *excobj, *outobj;
} MultibyteEncodeBuffer;

typedef struct {
    const unsigned char *inbuf, *inbuf_top, *inbuf_end;
[...1233 lines suppressed...]
	return (PyObject *)self;

errorexit:
	Py_XDECREF(self);
	return NULL;
}

static struct PyMethodDef __methods[] = {
	{"__create_codec", (PyCFunction)__create_codec, METH_O},
	{NULL, NULL},
};

void
init_multibytecodec(void)
{
	Py_InitModule("_multibytecodec", __methods);

	if (PyErr_Occurred())
		Py_FatalError("can't initialize the _multibytecodec module");
}

--- NEW FILE: multibytecodec.h ---
/*
 * multibytecodec.h: Common Multibyte Codec Implementation
 *
 * Written by Hye-Shik Chang <perky at FreeBSD.org>
 * $CJKCodecs: multibytecodec.h,v 1.7 2004/06/27 10:39:28 perky Exp $
 */

#ifndef _PYTHON_MULTIBYTECODEC_H_
#define _PYTHON_MULTIBYTECODEC_H_
#ifdef __cplusplus
extern "C" {
#endif

#ifdef uint32_t
typedef uint32_t ucs4_t;
#else
typedef unsigned int ucs4_t;
#endif

#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
typedef unsigned short ucs2_t, DBCHAR;
#endif

typedef union {
	void *p;
	int i;
	unsigned char c[8];
	ucs2_t u2[4];
	ucs4_t u4[2];
} MultibyteCodec_State;

typedef int (*mbcodec_init)(const void *config);
typedef int (*mbencode_func)(MultibyteCodec_State *state, const void *config,
			     const Py_UNICODE **inbuf, size_t inleft,
			     unsigned char **outbuf, size_t outleft,
			     int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
				 const void *config);
typedef int (*mbencodereset_func)(MultibyteCodec_State *state,
				  const void *config,
				  unsigned char **outbuf, size_t outleft);
typedef int (*mbdecode_func)(MultibyteCodec_State *state,
			     const void *config,
			     const unsigned char **inbuf, size_t inleft,
			     Py_UNICODE **outbuf, size_t outleft);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
				 const void *config);
typedef int (*mbdecodereset_func)(MultibyteCodec_State *state,
				  const void *config);

typedef struct {
	const char *encoding;
	const void *config;
	mbcodec_init codecinit;
	mbencode_func encode;
	mbencodeinit_func encinit;
	mbencodereset_func encreset;
	mbdecode_func decode;
	mbdecodeinit_func decinit;
	mbdecodereset_func decreset;
} MultibyteCodec;

typedef struct {
	PyObject_HEAD
	MultibyteCodec *codec;
} MultibyteCodecObject;

#define MAXDECPENDING	8
typedef struct {
	PyObject_HEAD
	MultibyteCodec *codec;
	MultibyteCodec_State state;
	unsigned char pending[MAXDECPENDING];
	int pendingsize;
	PyObject *stream, *errors;
} MultibyteStreamReaderObject;

#define MAXENCPENDING	2
typedef struct {
	PyObject_HEAD
	MultibyteCodec *codec;
	MultibyteCodec_State state;
	Py_UNICODE pending[MAXENCPENDING];
	int pendingsize;
	PyObject *stream, *errors;
} MultibyteStreamWriterObject;

/* positive values for illegal sequences */
#define MBERR_TOOSMALL		(-1) /* insufficient output buffer space */
#define MBERR_TOOFEW		(-2) /* incomplete input buffer */
#define MBERR_INTERNAL		(-3) /* internal runtime error */

#define ERROR_STRICT		(PyObject *)(1)
#define ERROR_IGNORE		(PyObject *)(2)
#define ERROR_REPLACE		(PyObject *)(3)
#define ERROR_MAX		ERROR_REPLACE

#define MBENC_FLUSH		0x0001 /* encode all characters encodable */
#define MBENC_MAX		MBENC_FLUSH

#ifdef __cplusplus
}
#endif
#endif



More information about the Python-checkins mailing list