[Python-checkins] python/dist/src/Modules unicodedata.c,2.20,2.21
loewis@users.sourceforge.net
loewis@users.sourceforge.net
Sat, 23 Nov 2002 04:22:35 -0800
Update of /cvsroot/python/python/dist/src/Modules
In directory sc8-pr-cvs1:/tmp/cvs-serv13615/Modules
Modified Files:
unicodedata.c
Log Message:
Patch #626548: Support Hangul syllable names.
Index: unicodedata.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata.c,v
retrieving revision 2.20
retrieving revision 2.21
diff -C2 -d -r2.20 -r2.21
*** unicodedata.c 18 Oct 2002 16:11:51 -0000 2.20
--- unicodedata.c 23 Nov 2002 12:22:32 -0000 2.21
***************
*** 1,10 ****
/* ------------------------------------------------------------------------
! unicodedata -- Provides access to the Unicode 3.0 data base.
! Data was extracted from the Unicode 3.0 UnicodeData.txt file.
Written by Marc-Andre Lemburg (mal@lemburg.com).
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
Copyright (c) Corporation for National Research Initiatives.
--- 1,11 ----
/* ------------------------------------------------------------------------
! unicodedata -- Provides access to the Unicode 3.2 data base.
! Data was extracted from the Unicode 3.2 UnicodeData.txt file.
Written by Marc-Andre Lemburg (mal@lemburg.com).
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
+ Modified by Martin v. Löwis (martin@v.loewis.de)
Copyright (c) Corporation for National Research Initiatives.
***************
*** 277,280 ****
--- 278,322 ----
}
+ #define SBase 0xAC00
+ #define LBase 0x1100
+ #define VBase 0x1161
+ #define TBase 0x11A7
+ #define LCount 19
+ #define VCount 21
+ #define TCount 28
+ #define NCount (VCount*TCount)
+ #define SCount (LCount*NCount)
+
+ static char *hangul_syllables[][3] = {
+ { "G", "A", "" },
+ { "GG", "AE", "G" },
+ { "N", "YA", "GG" },
+ { "D", "YAE", "GS" },
+ { "DD", "EO", "N", },
+ { "R", "E", "NJ" },
+ { "M", "YEO", "NH" },
+ { "B", "YE", "D" },
+ { "BB", "O", "L" },
+ { "S", "WA", "LG" },
+ { "SS", "WAE", "LM" },
+ { "", "OE", "LB" },
+ { "J", "YO", "LS" },
+ { "JJ", "U", "LT" },
+ { "C", "WEO", "LP" },
+ { "K", "WE", "LH" },
+ { "T", "WI", "M" },
+ { "P", "YU", "B" },
+ { "H", "EU", "BS" },
+ { 0, "YI", "S" },
+ { 0, "I", "SS" },
+ { 0, 0, "NG" },
+ { 0, 0, "J" },
+ { 0, 0, "C" },
+ { 0, 0, "K" },
+ { 0, 0, "T" },
+ { 0, 0, "P" },
+ { 0, 0, "H" }
+ };
+
static int
_getucname(Py_UCS4 code, char* buffer, int buflen)
***************
*** 285,288 ****
--- 327,352 ----
unsigned char* w;
+ if (SBase <= code && code <= SBase+SCount) {
+ /* Hangul syllable. */
+ int SIndex = code - SBase;
+ int L = SIndex / NCount;
+ int V = (SIndex % NCount) / TCount;
+ int T = SIndex % TCount;
+
+ if (buflen < 27)
+ /* Worst case: HANGUL SYLLABLE <10chars>. */
+ return 0;
+ strcpy(buffer, "HANGUL SYLLABLE ");
+ buffer += 16;
+ strcpy(buffer, hangul_syllables[L][0]);
+ buffer += strlen(hangul_syllables[L][0]);
+ strcpy(buffer, hangul_syllables[V][1]);
+ buffer += strlen(hangul_syllables[V][1]);
+ strcpy(buffer, hangul_syllables[T][2]);
+ buffer += strlen(hangul_syllables[T][2]);
+ *buffer = '\0';
+ return 1;
+ }
+
if (code >= 0x110000)
return 0;
***************
*** 344,347 ****
--- 408,432 ----
}
+ static void
+ find_syllable(const char *str, int *len, int *pos, int count, int column)
+ {
+ int i, len1;
+ *len = -1;
+ for (i = 0; i < count; i++) {
+ char *s = hangul_syllables[i][column];
+ len1 = strlen(s);
+ if (len1 <= *len)
+ continue;
+ if (strncmp(str, s, len1) == 0) {
+ *len = len1;
+ *pos = i;
+ }
+ }
+ if (*len == -1) {
+ *len = 0;
+ *pos = -1;
+ }
+ }
+
static int
_getcode(const char* name, int namelen, Py_UCS4* code)
***************
*** 351,354 ****
--- 436,455 ----
unsigned int i, incr;
+ /* Check for hangul syllables. */
+ if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
+ int L, V, T, len;
+ const char *pos = name + 16;
+ find_syllable(pos, &len, &L, LCount, 0);
+ pos += len;
+ find_syllable(pos, &len, &V, VCount, 1);
+ pos += len;
+ find_syllable(pos, &len, &T, TCount, 2);
+ pos += len;
+ if (V != -1 && V != -1 && T != -1 && pos-name == namelen) {
+ *code = SBase + (L*VCount+V)*TCount + T;
+ return 1;
+ }
+ }
+
/* the following is the same as python's dictionary lookup, with
only minor changes. see the makeunicodedata script for more
***************
*** 476,477 ****
--- 577,584 ----
PyModule_AddObject(m, "ucnhash_CAPI", v);
}
+
+ /*
+ Local variables:
+ c-basic-offset: 4
+ End:
+ */