[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.106,2.107
Tim Peters
tim_one@users.sourceforge.net
Thu, 09 Aug 2001 15:21:58 -0700
Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv28410/python/dist/src/Objects
Modified Files:
unicodeobject.c
Log Message:
SF patch #438013 Remove 2-byte Py_UCS2 assumptions
Removed all instances of Py_UCS2 from the codebase, and so also (I hope)
the last remaining reliance on the platform having an integral type
with exactly 16 bits.
PyUnicode_DecodeUTF16() and PyUnicode_EncodeUTF16() now read and write
one byte at a time.
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.106
retrieving revision 2.107
diff -C2 -d -r2.106 -r2.107
*** unicodeobject.c 2001/08/02 04:15:00 2.106
--- unicodeobject.c 2001/08/09 22:21:55 2.107
***************
*** 945,950 ****
static
! int utf16_decoding_error(const Py_UCS2 **source,
! Py_UNICODE **dest,
const char *errors,
const char *details)
--- 945,949 ----
static
! int utf16_decoding_error(Py_UNICODE **dest,
const char *errors,
const char *details)
***************
*** 976,996 ****
}
! PyObject *PyUnicode_DecodeUTF16(const char *s,
! int size,
! const char *errors,
! int *byteorder)
{
PyUnicodeObject *unicode;
Py_UNICODE *p;
! const Py_UCS2 *q, *e;
! int bo = 0;
const char *errmsg = "";
/* size should be an even number */
! if (size % sizeof(Py_UCS2) != 0) {
! if (utf16_decoding_error(NULL, NULL, errors, "truncated data"))
! return NULL;
! /* The remaining input chars are ignored if we fall through
! here... */
}
--- 975,1001 ----
}
! PyObject *
! PyUnicode_DecodeUTF16(const char *s,
! int size,
! const char *errors,
! int *byteorder)
{
PyUnicodeObject *unicode;
Py_UNICODE *p;
! const unsigned char *q, *e;
! int bo = 0; /* assume native ordering by default */
const char *errmsg = "";
+ /* Offsets from q for retrieving byte pairs in the right order. */
+ #ifdef BYTEORDER_IS_LITTLE_ENDIAN
+ int ihi = 1, ilo = 0;
+ #else
+ int ihi = 0, ilo = 1;
+ #endif
/* size should be an even number */
! if (size & 1) {
! if (utf16_decoding_error(NULL, errors, "truncated data"))
! return NULL;
! --size; /* else ignore the oddball byte */
}
***************
*** 1005,1013 ****
/* Unpack UTF-16 encoded data */
p = unicode->str;
! q = (Py_UCS2 *)s;
! e = q + (size / sizeof(Py_UCS2));
if (byteorder)
! bo = *byteorder;
/* Check for BOM marks (U+FEFF) in the input and adjust current
--- 1010,1018 ----
/* Unpack UTF-16 encoded data */
p = unicode->str;
! q = (unsigned char *)s;
! e = q + size;
if (byteorder)
! bo = *byteorder;
/* Check for BOM marks (U+FEFF) in the input and adjust current
***************
*** 1016,1050 ****
stream as-is (giving a ZWNBSP character). */
if (bo == 0) {
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
! if (*q == 0xFEFF) {
! q++;
bo = -1;
! } else if (*q == 0xFFFE) {
! q++;
bo = 1;
}
#else
! if (*q == 0xFEFF) {
! q++;
bo = 1;
! } else if (*q == 0xFFFE) {
! q++;
bo = -1;
}
#endif
}
!
while (q < e) {
! register Py_UCS2 ch = *q++;
- /* Swap input bytes if needed. (This assumes
- sizeof(Py_UNICODE) == 2 !) */
- #ifdef BYTEORDER_IS_LITTLE_ENDIAN
- if (bo == 1)
- ch = (ch >> 8) | (ch << 8);
- #else
- if (bo == -1)
- ch = (ch >> 8) | (ch << 8);
- #endif
if (ch < 0xD800 || ch > 0xDFFF) {
*p++ = ch;
--- 1021,1061 ----
stream as-is (giving a ZWNBSP character). */
if (bo == 0) {
+ const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
! if (bom == 0xFEFF) {
! q += 2;
bo = -1;
! }
! else if (bom == 0xFFFE) {
! q += 2;
bo = 1;
}
#else
! if (bom == 0xFEFF) {
! q += 2;
bo = 1;
! }
! else if (bom == 0xFFFE) {
! q += 2;
bo = -1;
}
#endif
}
!
! if (bo == -1) {
! /* force LE */
! ihi = 1;
! ilo = 0;
! }
! else if (bo == 1) {
! /* force BE */
! ihi = 0;
! ilo = 1;
! }
!
while (q < e) {
! Py_UNICODE ch = (q[ihi] << 8) | q[ilo];
! q += 2;
if (ch < 0xD800 || ch > 0xDFFF) {
*p++ = ch;
***************
*** 1058,1069 ****
}
if (0xD800 <= ch && ch <= 0xDBFF) {
! Py_UCS2 ch2 = *q++;
! #ifdef BYTEORDER_IS_LITTLE_ENDIAN
! if (bo == 1)
! ch2 = (ch2 >> 8) | (ch2 << 8);
! #else
! if (bo == -1)
! ch2 = (ch2 >> 8) | (ch2 << 8);
! #endif
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#ifndef Py_UNICODE_WIDE
--- 1069,1074 ----
}
if (0xD800 <= ch && ch <= 0xDBFF) {
! Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
! q += 2;
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#ifndef Py_UNICODE_WIDE
***************
*** 1085,1089 ****
utf16Error:
! if (utf16_decoding_error(&q, &p, errors, errmsg))
goto onError;
}
--- 1090,1094 ----
utf16Error:
! if (utf16_decoding_error(&p, errors, errmsg))
goto onError;
}
***************
*** 1103,1158 ****
}
! #undef UTF16_ERROR
!
! PyObject *PyUnicode_EncodeUTF16(const Py_UNICODE *s,
! int size,
! const char *errors,
! int byteorder)
{
PyObject *v;
! Py_UCS2 *p;
! char *q;
! int i, pairs, doswap = 1;
for (i = pairs = 0; i < size; i++)
if (s[i] >= 0x10000)
pairs++;
v = PyString_FromStringAndSize(NULL,
! sizeof(Py_UCS2) * (size + pairs + (byteorder == 0)));
if (v == NULL)
return NULL;
! q = PyString_AS_STRING(v);
! p = (Py_UCS2 *)q;
if (byteorder == 0)
! *p++ = 0xFEFF;
if (size == 0)
return v;
! if (byteorder == 0 ||
! #ifdef BYTEORDER_IS_LITTLE_ENDIAN
! byteorder == -1
! #else
! byteorder == 1
! #endif
! )
! doswap = 0;
while (size-- > 0) {
Py_UNICODE ch = *s++;
Py_UNICODE ch2 = 0;
if (ch >= 0x10000) {
! ch2 = 0xDC00|((ch-0x10000) & 0x3FF);
! ch = 0xD800|((ch-0x10000)>>10);
! }
! if (doswap){
! *p++ = (ch >> 8) | (ch << 8);
! if (ch2)
! *p++ = (ch2 >> 8) | (ch2 << 8);
! }else{
! *p++ = ch;
! if(ch2)
! *p++ = ch2;
}
}
return v;
}
--- 1108,1172 ----
}
! PyObject *
! PyUnicode_EncodeUTF16(const Py_UNICODE *s,
! int size,
! const char *errors,
! int byteorder)
{
PyObject *v;
! unsigned char *p;
! int i, pairs;
! /* Offsets from p for storing byte pairs in the right order. */
! #ifdef BYTEORDER_IS_LITTLE_ENDIAN
! int ihi = 1, ilo = 0;
! #else
! int ihi = 0, ilo = 1;
! #endif
+ #define STORECHAR(CH) \
+ do { \
+ p[ihi] = ((CH) >> 8) & 0xff; \
+ p[ilo] = (CH) & 0xff; \
+ p += 2; \
+ } while(0)
+
for (i = pairs = 0; i < size; i++)
if (s[i] >= 0x10000)
pairs++;
v = PyString_FromStringAndSize(NULL,
! 2 * (size + pairs + (byteorder == 0)));
if (v == NULL)
return NULL;
! p = (unsigned char *)PyString_AS_STRING(v);
if (byteorder == 0)
! STORECHAR(0xFEFF);
if (size == 0)
return v;
!
! if (byteorder == -1) {
! /* force LE */
! ihi = 1;
! ilo = 0;
! }
! else if (byteorder == 1) {
! /* force BE */
! ihi = 0;
! ilo = 1;
! }
!
while (size-- > 0) {
Py_UNICODE ch = *s++;
Py_UNICODE ch2 = 0;
if (ch >= 0x10000) {
! ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF);
! ch = 0xD800 | ((ch-0x10000) >> 10);
}
+ STORECHAR(ch);
+ if (ch2)
+ STORECHAR(ch2);
}
return v;
+ #undef STORECHAR
}