[Python-Dev] Unicode: When Things Get Hairy
M.-A. Lemburg
mal@lemburg.com
Sat, 11 Mar 2000 14:57:34 +0100
This is a multi-part message in MIME format.
--------------56A130F1FCAC300009B200AD
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
I couldn't resist :-) Here's the patch...
BTW, how should we proceed with future patches ? Should I wrap
them together about once a week, or send them as soon as they
are done ?
--
Marc-Andre Lemburg
______________________________________________________________________
Business: http://www.lemburg.com/
Python Pages: http://www.lemburg.com/python/
--------------56A130F1FCAC300009B200AD
Content-Type: text/plain; charset=us-ascii;
name="Unicode-Implementation-2000-03-11.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="Unicode-Implementation-2000-03-11.patch"
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Include/unicodeobject.h Python+Unicode/Include/unicodeobject.h
--- CVS-Python/Include/unicodeobject.h Fri Mar 10 23:33:05 2000
+++ Python+Unicode/Include/unicodeobject.h Sat Mar 11 14:45:59 2000
@@ -683,6 +683,17 @@
PyObject *args /* Argument tuple or dictionary */
);
+/* Checks whether element is contained in container and return 1/0
+ accordingly.
+
+ element has to coerce to an one element Unicode string. -1 is
+ returned in case of an error. */
+
+extern DL_IMPORT(int) PyUnicode_Contains(
+ PyObject *container, /* Container string */
+ PyObject *element /* Element string */
+ );
+
/* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/test/test_unicode.py Python+Unicode/Lib/test/test_unicode.py
--- CVS-Python/Lib/test/test_unicode.py Sat Mar 11 00:23:20 2000
+++ Python+Unicode/Lib/test/test_unicode.py Sat Mar 11 14:52:29 2000
@@ -219,6 +219,19 @@
test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
+# Contains:
+print 'Testing Unicode contains method...',
+assert ('a' in 'abdb') == 1
+assert ('a' in 'bdab') == 1
+assert ('a' in 'bdaba') == 1
+assert ('a' in 'bdba') == 1
+assert ('a' in u'bdba') == 1
+assert (u'a' in u'bdba') == 1
+assert (u'a' in u'bdb') == 0
+assert (u'a' in 'bdb') == 0
+assert (u'a' in 'bdba') == 1
+print 'done.'
+
# Formatting:
print 'Testing Unicode formatting strings...',
assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Misc/unicode.txt Python+Unicode/Misc/unicode.txt
--- CVS-Python/Misc/unicode.txt Sat Mar 11 00:14:11 2000
+++ Python+Unicode/Misc/unicode.txt Sat Mar 11 14:53:37 2000
@@ -743,8 +743,9 @@
stream codecs as available through the codecs module should
be used.
-XXX There should be a short-cut open(filename,mode,encoding) available which
- also assures that mode contains the 'b' character when needed.
+The codecs module should provide a short-cut open(filename,mode,encoding)
+available which also assures that mode contains the 'b' character when
+needed.
File/Stream Input:
@@ -810,6 +811,10 @@
Introduction to Unicode (a little outdated by still nice to read):
http://www.nada.kth.se/i18n/ucs/unicode-iso10646-oview.html
+For comparison:
+ Introducing Unicode to ECMAScript --
+ http://www-4.ibm.com/software/developer/library/internationalization-support.html
+
Encodings:
Overview:
@@ -832,7 +837,7 @@
History of this Proposal:
-------------------------
-1.2:
+1.2: Removed POD about codecs.open()
1.1: Added note about comparisons and hash values. Added note about
case mapping algorithms. Changed stream codecs .read() and
.write() method to match the standard file-like object methods
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Objects/stringobject.c Python+Unicode/Objects/stringobject.c
--- CVS-Python/Objects/stringobject.c Sat Mar 11 10:55:09 2000
+++ Python+Unicode/Objects/stringobject.c Sat Mar 11 14:47:45 2000
@@ -389,7 +389,9 @@
{
register char *s, *end;
register char c;
- if (!PyString_Check(el) || PyString_Size(el) != 1) {
+ if (!PyString_Check(el))
+ return PyUnicode_Contains(a, el);
+ if (PyString_Size(el) != 1) {
PyErr_SetString(PyExc_TypeError,
"string member test needs char left operand");
return -1;
diff -u -rP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Objects/unicodeobject.c Python+Unicode/Objects/unicodeobject.c
--- CVS-Python/Objects/unicodeobject.c Fri Mar 10 23:53:23 2000
+++ Python+Unicode/Objects/unicodeobject.c Sat Mar 11 14:48:52 2000
@@ -2737,6 +2737,49 @@
return -1;
}
+int PyUnicode_Contains(PyObject *container,
+ PyObject *element)
+{
+ PyUnicodeObject *u = NULL, *v = NULL;
+ int result;
+ register const Py_UNICODE *p, *e;
+ register Py_UNICODE ch;
+
+ /* Coerce the two arguments */
+ u = (PyUnicodeObject *)PyUnicode_FromObject(container);
+ if (u == NULL)
+ goto onError;
+ v = (PyUnicodeObject *)PyUnicode_FromObject(element);
+ if (v == NULL)
+ goto onError;
+
+ /* Check v in u */
+ if (PyUnicode_GET_SIZE(v) != 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "string member test needs char left operand");
+ goto onError;
+ }
+ ch = *PyUnicode_AS_UNICODE(v);
+ p = PyUnicode_AS_UNICODE(u);
+ e = p + PyUnicode_GET_SIZE(u);
+ result = 0;
+ while (p < e) {
+ if (*p++ == ch) {
+ result = 1;
+ break;
+ }
+ }
+
+ Py_DECREF(u);
+ Py_DECREF(v);
+ return result;
+
+onError:
+ Py_XDECREF(u);
+ Py_XDECREF(v);
+ return -1;
+}
+
/* Concat to string or Unicode object giving a new Unicode object. */
PyObject *PyUnicode_Concat(PyObject *left,
@@ -3817,6 +3860,7 @@
(intintargfunc) unicode_slice, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
+ (objobjproc)PyUnicode_Contains, /*sq_contains*/
};
static int
--------------56A130F1FCAC300009B200AD--