[Python-checkins] CVS: python/dist/src/Modules ucnhash.c,1.8,1.9 unicodename_db.h,1.2,1.3 unicodedata.c,2.6,2.7 unicodedatabase.c,2.6,2.7 unicodedatabase.h,2.5,2.6 unicodedata_db.h,1.4,1.5
Fredrik Lundh
effbot@users.sourceforge.net
Sun, 21 Jan 2001 14:41:10 -0800
Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv4331/Modules
Modified Files:
ucnhash.c unicodename_db.h unicodedata.c unicodedatabase.c
unicodedatabase.h unicodedata_db.h
Log Message:
compress unicode decomposition tables (this saves another 55k)
Index: ucnhash.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/ucnhash.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -r1.8 -r1.9
*** ucnhash.c 2001/01/21 17:01:31 1.8
--- ucnhash.c 2001/01/21 22:41:07 1.9
***************
*** 39,43 ****
/* get offset into phrasebook */
offset = phrasebook_offset1[(code>>phrasebook_shift)];
! offset = phrasebook_offset2[(offset<<phrasebook_shift)+
(code&((1<<phrasebook_shift)-1))];
if (!offset)
--- 39,43 ----
/* get offset into phrasebook */
offset = phrasebook_offset1[(code>>phrasebook_shift)];
! offset = phrasebook_offset2[(offset<<phrasebook_shift) +
(code&((1<<phrasebook_shift)-1))];
if (!offset)
***************
*** 48,58 ****
for (;;) {
/* get word index */
! if (phrasebook[offset] & 128) {
! word = phrasebook[offset] & 127;
! offset++;
! } else {
! word = (phrasebook[offset]<<8) + phrasebook[offset+1];
! offset+=2;
! }
if (i) {
if (i > buflen)
--- 48,57 ----
for (;;) {
/* get word index */
! word = phrasebook[offset] - phrasebook_short;
! if (word >= 0) {
! word = (word << 8) + phrasebook[offset+1];
! offset += 2;
! } else
! word = phrasebook[offset++];
if (i) {
if (i > buflen)
Index: unicodename_db.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodename_db.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** unicodename_db.h 2001/01/21 17:01:31 1.2
--- unicodename_db.h 2001/01/21 22:41:07 1.3
***************
*** 2,5 ****
--- 2,6 ----
#define NAME_MAXLEN 256
+
/* lexicon */
static unsigned char lexicon[] = {
***************
*** 47,1654 ****
77, 66, 69, 210, 68, 73, 65, 69, 82, 69, 83, 73, 83, 128, 69, 81, 85, 65,
204, 71, 82, 65, 86, 69, 128, 78, 85, 77, 69, 82, 65, 204, 84, 72, 65,
[...12959 lines suppressed...]
! 52006, 52012, 52017, 52021, 52026, 0, 0, 52028, 52032, 52037, 52042,
! 52046, 52052, 52057, 52062, 52067, 52072, 52077, 52082, 52087, 52093,
! 52099, 52105, 52113, 52117, 52121, 52125, 52129, 52133, 52137, 52142,
! 52147, 52152, 52157, 52161, 52166, 52171, 52176, 52181, 52185, 52190,
! 52195, 52200, 52204, 52208, 52213, 52218, 52223, 52228, 52232, 52237,
! 52242, 52247, 52252, 52256, 52261, 52266, 52271, 52276, 52280, 52285,
! 52290, 52294, 52299, 52304, 52309, 52314, 52318, 52323, 52330, 52337,
! 52341, 52346, 52351, 52356, 52361, 52366, 52371, 52376, 52381, 52386,
! 52391, 52396, 52401, 52406, 52411, 52416, 52421, 52426, 52431, 52436,
! 52441, 52446, 52451, 52456, 52461, 52466, 52471, 52476, 52481, 52486, 0,
! 0, 0, 52491, 52495, 52500, 52504, 52509, 52514, 0, 0, 52518, 52523,
! 52528, 52532, 52536, 52541, 0, 0, 52546, 52551, 52555, 52560, 52565,
! 52570, 0, 0, 52575, 52580, 52585, 0, 0, 0, 52589, 52593, 52597, 52600,
! 52602, 52606, 52610, 0, 52614, 52620, 52623, 52626, 52629, 52633, 52637,
! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52641, 52647, 52653, 52659, 52665, 0, 0,
};
! /* name->code dictionary */
static unsigned short code_hash[] = {
0, 4851, 0, 0, 0, 0, 7929, 64584, 9518, 64811, 0, 0, 0, 1097, 0, 12064,
Index: unicodedata.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata.c,v
retrieving revision 2.6
retrieving revision 2.7
diff -C2 -r2.6 -r2.7
*** unicodedata.c 2000/09/25 08:07:06 2.6
--- unicodedata.c 2001/01/21 22:41:07 2.7
***************
*** 15,23 ****
#include "unicodedatabase.h"
/* --- Module API --------------------------------------------------------- */
static PyObject *
! unicodedata_decimal(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
--- 15,52 ----
#include "unicodedatabase.h"
+ typedef struct {
+ const unsigned char category; /* index into
+ _PyUnicode_CategoryNames */
+ const unsigned char combining; /* combining class value 0 - 255 */
+ const unsigned char bidirectional; /* index into
+ _PyUnicode_BidirectionalNames */
+ const unsigned char mirrored; /* true if mirrored in bidir mode */
+ } _PyUnicode_DatabaseRecord;
+
+ /* data file generated by Tools/unicode/makeunicodedata.py */
+ #include "unicodedata_db.h"
+
+ static const _PyUnicode_DatabaseRecord*
+ getrecord(PyUnicodeObject* v)
+ {
+ int code;
+ int index;
+
+ code = (int) *PyUnicode_AS_UNICODE(v);
+
+ if (code < 0 || code >= 65536)
+ index = 0;
+ else {
+ index = index1[(code>>SHIFT)];
+ index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
+ }
+
+ return &_PyUnicode_Database_Records[index];
+ }
+
/* --- Module API --------------------------------------------------------- */
static PyObject *
! unicodedata_decimal(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
***************
*** 27,35 ****
if (!PyArg_ParseTuple(args, "O!|O:decimal",
&PyUnicode_Type, &v, &defobj))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v));
--- 56,64 ----
if (!PyArg_ParseTuple(args, "O!|O:decimal",
&PyUnicode_Type, &v, &defobj))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v));
***************
*** 38,42 ****
PyErr_SetString(PyExc_ValueError,
"not a decimal");
! goto onError;
}
else {
--- 67,71 ----
PyErr_SetString(PyExc_ValueError,
"not a decimal");
! return NULL;
}
else {
***************
*** 46,57 ****
}
return PyInt_FromLong(rc);
-
- onError:
- return NULL;
}
static PyObject *
! unicodedata_digit(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
--- 75,82 ----
}
return PyInt_FromLong(rc);
}
static PyObject *
! unicodedata_digit(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
***************
*** 61,69 ****
if (!PyArg_ParseTuple(args, "O!|O:digit",
&PyUnicode_Type, &v, &defobj))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v));
--- 86,94 ----
if (!PyArg_ParseTuple(args, "O!|O:digit",
&PyUnicode_Type, &v, &defobj))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v));
***************
*** 72,76 ****
PyErr_SetString(PyExc_ValueError,
"not a digit");
! goto onError;
}
else {
--- 97,101 ----
PyErr_SetString(PyExc_ValueError,
"not a digit");
! return NULL;
}
else {
***************
*** 80,91 ****
}
return PyInt_FromLong(rc);
-
- onError:
- return NULL;
}
static PyObject *
! unicodedata_numeric(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
--- 105,112 ----
}
return PyInt_FromLong(rc);
}
static PyObject *
! unicodedata_numeric(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
***************
*** 95,103 ****
if (!PyArg_ParseTuple(args, "O!|O:numeric",
&PyUnicode_Type, &v, &defobj))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v));
--- 116,124 ----
if (!PyArg_ParseTuple(args, "O!|O:numeric",
&PyUnicode_Type, &v, &defobj))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v));
***************
*** 106,110 ****
PyErr_SetString(PyExc_ValueError,
"not a numeric character");
! goto onError;
}
else {
--- 127,131 ----
PyErr_SetString(PyExc_ValueError,
"not a numeric character");
! return NULL;
}
else {
***************
*** 114,125 ****
}
return PyFloat_FromDouble(rc);
-
- onError:
- return NULL;
}
static PyObject *
! unicodedata_category(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
--- 135,142 ----
}
return PyFloat_FromDouble(rc);
}
static PyObject *
! unicodedata_category(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
***************
*** 128,149 ****
if (!PyArg_ParseTuple(args, "O!:category",
&PyUnicode_Type, &v))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
! index = (int) _PyUnicode_Database_GetRecord(
! (int) *PyUnicode_AS_UNICODE(v)
! )->category;
return PyString_FromString(_PyUnicode_CategoryNames[index]);
-
- onError:
- return NULL;
}
static PyObject *
! unicodedata_bidirectional(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
--- 145,160 ----
if (!PyArg_ParseTuple(args, "O!:category",
&PyUnicode_Type, &v))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
! index = (int) getrecord(v)->category;
return PyString_FromString(_PyUnicode_CategoryNames[index]);
}
static PyObject *
! unicodedata_bidirectional(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
***************
*** 152,240 ****
if (!PyArg_ParseTuple(args, "O!:bidirectional",
&PyUnicode_Type, &v))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
! index = (int) _PyUnicode_Database_GetRecord(
! (int) *PyUnicode_AS_UNICODE(v)
! )->bidirectional;
return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
-
- onError:
- return NULL;
}
static PyObject *
! unicodedata_combining(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
- int value;
if (!PyArg_ParseTuple(args, "O!:combining",
&PyUnicode_Type, &v))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
! value = (int) _PyUnicode_Database_GetRecord(
! (int) *PyUnicode_AS_UNICODE(v)
! )->combining;
! return PyInt_FromLong(value);
!
! onError:
! return NULL;
}
static PyObject *
! unicodedata_mirrored(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
- int value;
if (!PyArg_ParseTuple(args, "O!:mirrored",
&PyUnicode_Type, &v))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
! value = (int) _PyUnicode_Database_GetRecord(
! (int) *PyUnicode_AS_UNICODE(v)
! )->mirrored;
! return PyInt_FromLong(value);
!
! onError:
! return NULL;
}
static PyObject *
! unicodedata_decomposition(PyObject *self,
! PyObject *args)
{
PyUnicodeObject *v;
! const char *value;
if (!PyArg_ParseTuple(args, "O!:decomposition",
&PyUnicode_Type, &v))
! goto onError;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! goto onError;
}
! value = _PyUnicode_Database_GetDecomposition(
! (int) *PyUnicode_AS_UNICODE(v)
! );
! return PyString_FromString(value);
! onError:
! return NULL;
}
--- 163,255 ----
if (!PyArg_ParseTuple(args, "O!:bidirectional",
&PyUnicode_Type, &v))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
! index = (int) getrecord(v)->bidirectional;
return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
}
static PyObject *
! unicodedata_combining(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
if (!PyArg_ParseTuple(args, "O!:combining",
&PyUnicode_Type, &v))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
! return PyInt_FromLong((int) getrecord(v)->combining);
}
static PyObject *
! unicodedata_mirrored(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
if (!PyArg_ParseTuple(args, "O!:mirrored",
&PyUnicode_Type, &v))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
}
! return PyInt_FromLong((int) getrecord(v)->mirrored);
}
static PyObject *
! unicodedata_decomposition(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
! char decomp[256];
! int code, index, count, i;
if (!PyArg_ParseTuple(args, "O!:decomposition",
&PyUnicode_Type, &v))
! return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
! return NULL;
! }
!
! code = (int) *PyUnicode_AS_UNICODE(v);
!
! if (code < 0 || code >= 65536)
! index = 0;
! else {
! index = decomp_index1[(code>>DECOMP_SHIFT)];
! index = decomp_index2[(index<<DECOMP_SHIFT)+
! (code&((1<<DECOMP_SHIFT)-1))];
}
!
! /* high byte is of hex bytes (usually one or two), low byte
! is prefix code (from*/
! count = decomp_data[index] >> 8;
!
! /* XXX: could allocate the PyString up front instead
! (strlen(prefix) + 5 * count + 1 bytes) */
!
! /* copy prefix */
! i = strlen(decomp_prefix[decomp_data[index] & 255]);
! memcpy(decomp, decomp_prefix[decomp_data[index] & 255], i);
!
! while (count-- > 0) {
! if (i)
! decomp[i++] = ' ';
! sprintf(decomp + i, "%04X", decomp_data[++index]);
! i += strlen(decomp + i);
! }
! decomp[i] = '\0';
!
! return PyString_FromString(decomp);
}
Index: unicodedatabase.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedatabase.c,v
retrieving revision 2.6
retrieving revision 2.7
diff -C2 -r2.6 -r2.7
*** unicodedatabase.c 2000/09/30 17:34:31 2.6
--- unicodedatabase.c 2001/01/21 22:41:07 2.7
***************
*** 1,47 ****
! /* ------------------------------------------------------------------------
!
! unicodedatabase -- The Unicode 3.0 data base.
!
! Data was extracted from the Unicode 3.0 UnicodeData.txt file.
!
! Written by Marc-Andre Lemburg (mal@lemburg.com).
! Rewritten for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
!
! Copyright (c) Corporation for National Research Initiatives.
!
! ------------------------------------------------------------------------ */
!
! #include "Python.h"
! #include "unicodedatabase.h"
!
! /* read the actual data from a separate file! */
! #include "unicodedata_db.h"
!
! const _PyUnicode_DatabaseRecord *
! _PyUnicode_Database_GetRecord(int code)
! {
! int index;
!
! if (code < 0 || code >= 65536)
! index = 0;
! else {
! index = index1[(code>>SHIFT)];
! index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
! }
! return &_PyUnicode_Database_Records[index];
! }
!
! const char *
! _PyUnicode_Database_GetDecomposition(int code)
! {
! int index;
!
! if (code < 0 || code >= 65536)
! index = 0;
! else {
! index = decomp_index1[(code>>DECOMP_SHIFT)];
! index = decomp_index2[(index<<DECOMP_SHIFT)+
! (code&((1<<DECOMP_SHIFT)-1))];
! }
! return decomp_data[index];
! }
--- 1 ----
! /* remove this file! */
Index: unicodedatabase.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedatabase.h,v
retrieving revision 2.5
retrieving revision 2.6
diff -C2 -r2.5 -r2.6
*** unicodedatabase.h 2000/09/25 08:07:06 2.5
--- unicodedatabase.h 2001/01/21 22:41:08 2.6
***************
*** 1,33 ****
! /* ------------------------------------------------------------------------
!
! unicodedatabase -- The Unicode 3.0 data base.
!
! Data was extracted from the Unicode 3.0 UnicodeData.txt file.
!
! Written by Marc-Andre Lemburg (mal@lemburg.com).
! Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
!
! Copyright (c) Corporation for National Research Initiatives.
!
! ------------------------------------------------------------------------ */
!
! /* --- Unicode database entry --------------------------------------------- */
!
! typedef struct {
! const unsigned char category; /* index into
! _PyUnicode_CategoryNames */
! const unsigned char combining; /* combining class value 0 - 255 */
! const unsigned char bidirectional; /* index into
! _PyUnicode_BidirectionalNames */
! const unsigned char mirrored; /* true if mirrored in bidir mode */
! } _PyUnicode_DatabaseRecord;
!
! /* --- Unicode category names --------------------------------------------- */
!
! extern const char *_PyUnicode_CategoryNames[];
! extern const char *_PyUnicode_BidirectionalNames[];
!
! /* --- Unicode Database --------------------------------------------------- */
!
! extern const _PyUnicode_DatabaseRecord *_PyUnicode_Database_GetRecord(int ch);
! extern const char *_PyUnicode_Database_GetDecomposition(int ch);
--- 1 ----
! /* remove this file! */
Index: unicodedata_db.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata_db.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** unicodedata_db.h 2001/01/21 17:01:31 1.4
--- unicodedata_db.h 2001/01/21 22:41:08 1.5
***************
*** 181,3636 ****
NULL
};
! static const char *decomp_data[] = {
"",
! "<noBreak> 0020",
! "<compat> 0020 0308",
! "<super> 0061",
! "<compat> 0020 0304",
! "<super> 0032",
! "<super> 0033",
[...4939 lines suppressed...]
! 9161, 9163, 9165, 9167, 9169, 9171, 9173, 9175, 9177, 9179, 9181, 9183,
! 9185, 9187, 9189, 9191, 9193, 9195, 9197, 9199, 9201, 9203, 9205, 9207,
! 9209, 9211, 9213, 9215, 9217, 9219, 9221, 9223, 9225, 9227, 9229, 9231,
! 9233, 9235, 9237, 9239, 9241, 9243, 9245, 9247, 9249, 9251, 9253, 9255,
! 9257, 9259, 9261, 9263, 9265, 9267, 9269, 9271, 9273, 9275, 9277, 9279,
! 0, 0, 9281, 9283, 9285, 9287, 9289, 9291, 9293, 9295, 9297, 9299, 9301,
! 9303, 9305, 9307, 9309, 9311, 9313, 9315, 9317, 9319, 9321, 9323, 9325,
! 9327, 9329, 9331, 9333, 9335, 9337, 9339, 9341, 9343, 9345, 9347, 9349,
! 9351, 9353, 9355, 9357, 9359, 9361, 9363, 9365, 9367, 9369, 9371, 9373,
! 9375, 9377, 9379, 9381, 9383, 9385, 9387, 9389, 9391, 9393, 9395, 9397,
! 9399, 9401, 9403, 9405, 9407, 9409, 9411, 9413, 9415, 9417, 9419, 9421,
! 9423, 9425, 9427, 9429, 9431, 9433, 9435, 9437, 9439, 9441, 9443, 9445,
! 9447, 9449, 9451, 9453, 9455, 9457, 9459, 9461, 9463, 9465, 9467, 0, 0,
! 0, 9469, 9471, 9473, 9475, 9477, 9479, 0, 0, 9481, 9483, 9485, 9487,
! 9489, 9491, 0, 0, 9493, 9495, 9497, 9499, 9501, 9503, 0, 0, 9505, 9507,
! 9509, 0, 0, 0, 9511, 9513, 9515, 9517, 9519, 9521, 9523, 0, 9525, 9527,
! 9529, 9531, 9533, 9535, 9537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
! 0, 0, 0,
};