[Patches] Unicode Patch Set 2000-03-28
M.-A. Lemburg
mal@lemburg.com
Tue, 28 Mar 2000 11:30:11 +0200
This is a multi-part message in MIME format.
--------------5ABFD5148A2E5499A86B9C08
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
The attached patch set includes a workaround to get Python with
Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause
is a bug in the BSDI wchar.h header file) and Python interfaces
for the MBCS codec donated by Mark Hammond.
Also included are some minor corrections w/r to the docs of
the new "es" and "es#" parser markers (use PyMem_Free() instead
of free(); thanks to Mark Hammond for finding these).
The unicodedata tests are now in a separate file
(test_unicodedata.py) to avoid problems if the module cannot
be found.
--
Marc-Andre Lemburg
______________________________________________________________________
Business: http://www.lemburg.com/
Python Pages: http://www.lemburg.com/python/
--------------5ABFD5148A2E5499A86B9C08
Content-Type: text/plain; charset=iso-8859-1;
name="Unicode-Implementation-2000-03-28.patch"
Content-Transfer-Encoding: 8bit
Content-Disposition: inline;
filename="Unicode-Implementation-2000-03-28.patch"
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Include/unicodeobject.h Python+Unicode/Include/unicodeobject.h
--- CVS-Python/Include/unicodeobject.h Tue Mar 28 09:19:14 2000
+++ Python+Unicode/Include/unicodeobject.h Tue Mar 28 09:10:22 2000
@@ -82,6 +82,10 @@
#endif
#ifdef HAVE_WCHAR_H
+/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
+# ifdef _HAVE_BSDI
+# include <time.h>
+# endif
# include "wchar.h"
#endif
@@ -562,7 +566,9 @@
);
#ifdef MS_WIN32
+
/* --- MBCS codecs for Windows -------------------------------------------- */
+
extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
const char *string, /* MBCS encoded string */
int length, /* size of string */
@@ -579,8 +585,8 @@
const char *errors /* error handling */
);
-
#endif /* MS_WIN32 */
+
/* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/encodings/mbcs.py Python+Unicode/Lib/encodings/mbcs.py
--- CVS-Python/Lib/encodings/mbcs.py Tue Mar 28 03:58:50 2000
+++ Python+Unicode/Lib/encodings/mbcs.py Tue Mar 28 08:57:31 2000
@@ -34,4 +34,3 @@
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
-
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/test/output/test_unicode Python+Unicode/Lib/test/output/test_unicode
--- CVS-Python/Lib/test/output/test_unicode Sat Mar 25 11:56:30 2000
+++ Python+Unicode/Lib/test/output/test_unicode Tue Mar 28 11:18:47 2000
@@ -1,4 +1,5 @@
test_unicode
Testing Unicode comparisons... done.
+Testing Unicode contains method... done.
Testing Unicode formatting strings... done.
-Testing unicodedata module... done.
+Testing builtin codecs... done.
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/test/output/test_unicodedata Python+Unicode/Lib/test/output/test_unicodedata
--- CVS-Python/Lib/test/output/test_unicodedata Thu Jan 1 01:00:00 1970
+++ Python+Unicode/Lib/test/output/test_unicodedata Tue Mar 28 11:18:47 2000
@@ -0,0 +1,2 @@
+test_unicodedata
+Testing unicodedata module... done.
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/test/test_unicode.py Python+Unicode/Lib/test/test_unicode.py
--- CVS-Python/Lib/test/test_unicode.py Sat Mar 25 11:56:30 2000
+++ Python+Unicode/Lib/test/test_unicode.py Sat Mar 25 11:30:52 2000
@@ -1,6 +1,5 @@
""" Test script for the Unicode implementation.
-
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
@@ -249,50 +248,6 @@
assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
print 'done.'
-
-# Test Unicode database APIs
-try:
- import unicodedata
-except ImportError:
- pass
-else:
- print 'Testing unicodedata module...',
-
- assert unicodedata.digit(u'A',None) is None
- assert unicodedata.digit(u'9') == 9
- assert unicodedata.digit(u'\u215b',None) is None
- assert unicodedata.digit(u'\u2468') == 9
-
- assert unicodedata.numeric(u'A',None) is None
- assert unicodedata.numeric(u'9') == 9
- assert unicodedata.numeric(u'\u215b') == 0.125
- assert unicodedata.numeric(u'\u2468') == 9.0
-
- assert unicodedata.decimal(u'A',None) is None
- assert unicodedata.decimal(u'9') == 9
- assert unicodedata.decimal(u'\u215b',None) is None
- assert unicodedata.decimal(u'\u2468',None) is None
-
- assert unicodedata.category(u'\uFFFE') == 'Cn'
- assert unicodedata.category(u'a') == 'Ll'
- assert unicodedata.category(u'A') == 'Lu'
-
- assert unicodedata.bidirectional(u'\uFFFE') == ''
- assert unicodedata.bidirectional(u' ') == 'WS'
- assert unicodedata.bidirectional(u'A') == 'L'
-
- assert unicodedata.decomposition(u'\uFFFE') == ''
- assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
-
- assert unicodedata.mirrored(u'\uFFFE') == 0
- assert unicodedata.mirrored(u'a') == 0
- assert unicodedata.mirrored(u'\u2201') == 1
-
- assert unicodedata.combining(u'\uFFFE') == 0
- assert unicodedata.combining(u'a') == 0
- assert unicodedata.combining(u'\u20e1') == 230
-
- print 'done.'
# Test builtin codecs
print 'Testing builtin codecs...',
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/test/test_unicodedata.py Python+Unicode/Lib/test/test_unicodedata.py
--- CVS-Python/Lib/test/test_unicodedata.py Thu Jan 1 01:00:00 1970
+++ Python+Unicode/Lib/test/test_unicodedata.py Sat Mar 25 11:31:16 2000
@@ -0,0 +1,50 @@
+""" Test script for the unicodedata module.
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+from test_support import verbose
+import sys
+
+# Test Unicode database APIs
+import unicodedata
+
+print 'Testing unicodedata module...',
+
+assert unicodedata.digit(u'A',None) is None
+assert unicodedata.digit(u'9') == 9
+assert unicodedata.digit(u'\u215b',None) is None
+assert unicodedata.digit(u'\u2468') == 9
+
+assert unicodedata.numeric(u'A',None) is None
+assert unicodedata.numeric(u'9') == 9
+assert unicodedata.numeric(u'\u215b') == 0.125
+assert unicodedata.numeric(u'\u2468') == 9.0
+
+assert unicodedata.decimal(u'A',None) is None
+assert unicodedata.decimal(u'9') == 9
+assert unicodedata.decimal(u'\u215b',None) is None
+assert unicodedata.decimal(u'\u2468',None) is None
+
+assert unicodedata.category(u'\uFFFE') == 'Cn'
+assert unicodedata.category(u'a') == 'Ll'
+assert unicodedata.category(u'A') == 'Lu'
+
+assert unicodedata.bidirectional(u'\uFFFE') == ''
+assert unicodedata.bidirectional(u' ') == 'WS'
+assert unicodedata.bidirectional(u'A') == 'L'
+
+assert unicodedata.decomposition(u'\uFFFE') == ''
+assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
+
+assert unicodedata.mirrored(u'\uFFFE') == 0
+assert unicodedata.mirrored(u'a') == 0
+assert unicodedata.mirrored(u'\u2201') == 1
+
+assert unicodedata.combining(u'\uFFFE') == 0
+assert unicodedata.combining(u'a') == 0
+assert unicodedata.combining(u'\u20e1') == 230
+
+print 'done.'
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Misc/unicode.txt Python+Unicode/Misc/unicode.txt
--- CVS-Python/Misc/unicode.txt Sat Mar 25 11:56:31 2000
+++ Python+Unicode/Misc/unicode.txt Tue Mar 28 11:18:46 2000
@@ -740,8 +740,8 @@
On output, a buffer of the needed size is allocated and
returned through *buffer as NULL-terminated string.
The encoded may not contain embedded NULL characters.
- The caller is responsible for free()ing the allocated *buffer
- after usage.
+ The caller is responsible for calling PyMem_Free()
+ to free the allocated *buffer after usage.
"es#":
Takes three parameters: encoding (const char *),
@@ -755,8 +755,9 @@
If *buffer is NULL, a buffer of the needed size is
allocated and output copied into it. *buffer is then
- updated to point to the allocated memory area. The caller
- is responsible for free()ing *buffer after usage.
+ updated to point to the allocated memory area.
+ The caller is responsible for calling PyMem_Free()
+ to free the allocated *buffer after usage.
In both cases *buffer_len is updated to the number of
characters written (excluding the trailing NULL-byte).
@@ -784,7 +785,7 @@
return NULL;
}
str = PyString_FromStringAndSize(buffer, buffer_len);
- free(buffer);
+ PyMem_Free(buffer);
return str;
}
@@ -807,7 +808,7 @@
return NULL;
}
str = PyString_FromString(buffer);
- free(buffer);
+ PyMem_Free(buffer);
return str;
}
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Modules/_codecsmodule.c Python+Unicode/Modules/_codecsmodule.c
--- CVS-Python/Modules/_codecsmodule.c Sat Mar 11 00:09:23 2000
+++ Python+Unicode/Modules/_codecsmodule.c Tue Mar 28 09:12:19 2000
@@ -286,6 +286,26 @@
size);
}
+#ifdef MS_WIN32
+
+static PyObject *
+mbcs_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
+ size);
+}
+
+#endif /* MS_WIN32 */
+
/* --- Encoder ------------------------------------------------------------ */
static PyObject *
@@ -491,6 +511,28 @@
PyUnicode_GET_SIZE(str));
}
+#ifdef MS_WIN32
+
+static PyObject *
+mbcs_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:mbcs_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeMBCS(
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors),
+ PyUnicode_GET_SIZE(str));
+}
+
+#endif /* MS_WIN32 */
+
/* --- Module API --------------------------------------------------------- */
static PyMethodDef _codecs_functions[] = {
@@ -519,6 +561,10 @@
{"charmap_decode", charmap_decode, 1},
{"readbuffer_encode", readbuffer_encode, 1},
{"charbuffer_encode", charbuffer_encode, 1},
+#ifdef MS_WIN32
+ {"mbcs_encode", mbcs_encode, 1},
+ {"mbcs_decode", mbcs_decode, 1},
+#endif
{NULL, NULL} /* sentinel */
};
Only in CVS-Python/Objects: .#stringobject.c.2.59
Only in CVS-Python/Objects: stringobject.c.orig
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Python/getargs.c Python+Unicode/Python/getargs.c
--- CVS-Python/Python/getargs.c Tue Mar 28 09:19:18 2000
+++ Python+Unicode/Python/getargs.c Tue Mar 28 09:00:34 2000
@@ -704,7 +704,7 @@
the data copied into it; *buffer is
updated to point to the new buffer;
the caller is responsible for
- free()ing it after usage
+ PyMem_Free()ing it after usage
- if *buffer is not NULL, the data
is copied to *buffer; *buffer_len
@@ -752,7 +752,7 @@
is allocated and the data copied
into it; *buffer is updated to
point to the new buffer; the caller
- is responsible for free()ing it
+ is responsible for PyMem_Free()ing it
after usage
*/
--------------5ABFD5148A2E5499A86B9C08--