[Python-checkins] python/dist/src/Python ast.c,1.1.2.45,1.1.2.46
nascheme at users.sourceforge.net
nascheme at users.sourceforge.net
Sat Mar 20 17:38:24 EST 2004
Update of /cvsroot/python/python/dist/src/Python
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13848/Python
Modified Files:
Tag: ast-branch
ast.c
Log Message:
Refactor code in parsestr. Decoding of unicode strings with explict
encoding is still broken.
Index: ast.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/Attic/ast.c,v
retrieving revision 1.1.2.45
retrieving revision 1.1.2.46
diff -C2 -d -r1.1.2.45 -r1.1.2.46
*** ast.c 20 Mar 2004 21:28:22 -0000 1.1.2.45
--- ast.c 20 Mar 2004 22:38:22 -0000 1.1.2.46
***************
*** 2298,2301 ****
--- 2298,2361 ----
}
+ static PyObject *
+ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
+ {
+ PyObject *v, *u;
+ char *buf;
+ char *p;
+ const char *end;
+ if (encoding == NULL) {
+ u = NULL;
+ } else if (strcmp(encoding, "iso-8859-1") == 0) {
+ u = NULL;
+ } else {
+ /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
+ u = PyString_FromStringAndSize((char *)NULL, len * 4);
+ if (u == NULL)
+ return NULL;
+ p = buf = PyString_AsString(u);
+ end = s + len;
+ while (s < end) {
+ if (*s == '\\') {
+ *p++ = *s++;
+ if (*s & 0x80) {
+ strcpy(p, "u005c");
+ p += 5;
+ }
+ }
+ if (*s & 0x80) { /* XXX inefficient */
+ PyObject *w;
+ char *r;
+ int rn, i;
+ w = decode_utf8(&s, end, "utf-16-be");
+ if (w == NULL) {
+ Py_DECREF(u);
+ return NULL;
+ }
+ r = PyString_AsString(w);
+ rn = PyString_Size(w);
+ assert(rn % 2 == 0);
+ for (i = 0; i < rn; i += 2) {
+ sprintf(p, "\\u%02x%02x",
+ r[i + 0] & 0xFF,
+ r[i + 1] & 0xFF);
+ p += 6;
+ }
+ Py_DECREF(w);
+ } else {
+ *p++ = *s++;
+ }
+ }
+ len = p - buf;
+ s = buf;
+ }
+ if (rawmode)
+ v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
+ else
+ v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
+ Py_XDECREF(u);
+ return v;
+ }
+
/* s is a Python string literal, including the bracketing quote characters,
* and r &/or u prefixes (if any), and embedded escape sequences (if any).
***************
*** 2347,2413 ****
#ifdef Py_USING_UNICODE
if (unicode || Py_UnicodeFlag) {
! PyObject *u;
! if (encoding == NULL) {
! u = NULL;
! } else if (strcmp(encoding, "iso-8859-1") == 0) {
! u = NULL;
! } else {
! #if 0 /* XXX still broken */
! PyObject *w;
! char *buf;
! char *p;
! const char *end;
! /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
! u = PyString_FromStringAndSize((char *)NULL, len * 4);
! if (u == NULL)
! return NULL;
! p = buf = PyString_AsString(u);
! end = s + len;
! while (s < end) {
! if (*s == '\\') {
! *p++ = *s++;
! if (*s & 0x80) {
! strcpy(p, "u005c");
! p += 5;
! }
! }
! if (*s & 0x80) { /* XXX inefficient */
! char *r;
! int rn, i;
! w = decode_utf8(&s, end, "utf-16-be");
! if (w == NULL) {
! Py_DECREF(u);
! return NULL;
! }
! r = PyString_AsString(w);
! rn = PyString_Size(w);
! assert(rn % 2 == 0);
! for (i = 0; i < rn; i += 2) {
! sprintf(p, "\\u%02x%02x",
! r[i + 0] & 0xFF,
! r[i + 1] & 0xFF);
! p += 6;
! }
! Py_DECREF(w);
! } else {
! *p++ = *s++;
! }
! }
! len = p - buf;
! s = buf;
#else
- u = NULL;
- fprintf(stderr, "ignoring encoding = %s decl\n",
- encoding);
- #endif /* XXX */
- }
-
if (rawmode)
! v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
else
! v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
! Py_XDECREF(u);
! return v;
!
}
#endif
--- 2407,2419 ----
#ifdef Py_USING_UNICODE
if (unicode || Py_UnicodeFlag) {
! #if 0
! /* XXX currently broken */
! return decode_unicode(s, len, rawmode, encoding);
#else
if (rawmode)
! return PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
else
! return PyUnicode_DecodeUnicodeEscape(s, len, NULL);
! #endif
}
#endif
More information about the Python-checkins
mailing list