[Python-checkins] python/dist/src/Objects unicodectype.c, 2.16, 2.17

lemburg@users.sourceforge.net lemburg at users.sourceforge.net
Thu Oct 20 21:06:39 CEST 2005


Update of /cvsroot/python/python/dist/src/Objects
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21307

Modified Files:
	unicodectype.c 
Log Message:
Enhance the performance of two important Unicode character
type lookups: whitespace and linebreak.

These lookup tables are from the Python 1.6 version with the addition
of the 205F code point which was added as whitespace code point to Unicode
since then.



Index: unicodectype.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodectype.c,v
retrieving revision 2.16
retrieving revision 2.17
diff -u -d -r2.16 -r2.17
--- unicodectype.c	4 Aug 2004 07:38:34 -0000	2.16
+++ unicodectype.c	20 Oct 2005 19:06:35 -0000	2.17
@@ -49,14 +49,24 @@
     return &_PyUnicode_TypeRecords[index];
 }
 
-/* Returns 1 for Unicode characters having the category 'Zl' or type
-   'B', 0 otherwise. */
+/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
+   type 'B', 0 otherwise. */
 
-int _PyUnicode_IsLinebreak(Py_UNICODE ch)
+int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
 {
-    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-
-    return (ctype->flags & LINEBREAK_MASK) != 0;
+    switch (ch) {
+    case 0x000A: /* LINE FEED */
+    case 0x000D: /* CARRIAGE RETURN */
+    case 0x001C: /* FILE SEPARATOR */
+    case 0x001D: /* GROUP SEPARATOR */
+    case 0x001E: /* RECORD SEPARATOR */
+    case 0x0085: /* NEXT LINE */
+    case 0x2028: /* LINE SEPARATOR */
+    case 0x2029: /* PARAGRAPH SEPARATOR */
+	return 1;
+    default:
+	return 0;
+    }
 }
 
 /* Returns the titlecase Unicode characters corresponding to ch or just
@@ -327,11 +337,43 @@
 /* Returns 1 for Unicode characters having the bidirectional type
    'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
 
-int _PyUnicode_IsWhitespace(Py_UNICODE ch)
+int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
 {
-    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-
-    return (ctype->flags & SPACE_MASK) != 0;
+    switch (ch) {
+    case 0x0009: /* HORIZONTAL TABULATION */
+    case 0x000A: /* LINE FEED */
+    case 0x000B: /* VERTICAL TABULATION */
+    case 0x000C: /* FORM FEED */
+    case 0x000D: /* CARRIAGE RETURN */
+    case 0x001C: /* FILE SEPARATOR */
+    case 0x001D: /* GROUP SEPARATOR */
+    case 0x001E: /* RECORD SEPARATOR */
+    case 0x001F: /* UNIT SEPARATOR */
+    case 0x0020: /* SPACE */
+    case 0x0085: /* NEXT LINE */
+    case 0x00A0: /* NO-BREAK SPACE */
+    case 0x1680: /* OGHAM SPACE MARK */
+    case 0x2000: /* EN QUAD */
+    case 0x2001: /* EM QUAD */
+    case 0x2002: /* EN SPACE */
+    case 0x2003: /* EM SPACE */
+    case 0x2004: /* THREE-PER-EM SPACE */
+    case 0x2005: /* FOUR-PER-EM SPACE */
+    case 0x2006: /* SIX-PER-EM SPACE */
+    case 0x2007: /* FIGURE SPACE */
+    case 0x2008: /* PUNCTUATION SPACE */
+    case 0x2009: /* THIN SPACE */
+    case 0x200A: /* HAIR SPACE */
+    case 0x200B: /* ZERO WIDTH SPACE */
+    case 0x2028: /* LINE SEPARATOR */
+    case 0x2029: /* PARAGRAPH SEPARATOR */
+    case 0x202F: /* NARROW NO-BREAK SPACE */
+    case 0x205F: /* MEDIUM MATHEMATICAL SPACE */
+    case 0x3000: /* IDEOGRAPHIC SPACE */
+	return 1;
+    default:
+	return 0;
+    }
 }
 
 /* Returns 1 for Unicode characters having the category 'Ll', 0



More information about the Python-checkins mailing list