[Python-checkins] CVS: python/dist/src/Tools/unicode makeunicodedata.py,1.2,1.3
Fredrik Lundh
python-dev@python.org
Mon, 25 Sep 2000 01:07:09 -0700
- Previous message: [Python-checkins] CVS: python/dist/src/Tools/unicode makeunicodedata.py,1.1,1.2
- Next message: [Python-checkins] CVS: python/dist/src/Modules unicodedata_db.h,1.1,1.2 unicodedatabase.h,2.4,2.5 unicodedata.c,2.5,2.6 unicodedatabase.c,2.4,2.5
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/python/python/dist/src/Tools/unicode
In directory slayer.i.sourceforge.net:/tmp/cvs-serv23556/Tools/unicode
Modified Files:
makeunicodedata.py
Log Message:
unicode database compression, step 2:
- fixed attributions
- moved decomposition data to a separate table, in preparation
for step 3 (which won't happen before 2.0 final, promise!)
- use relative paths in the generator script
I have a lot more stuff in the works for 2.1, but let's leave
that for another day...
Index: makeunicodedata.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Tools/unicode/makeunicodedata.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** makeunicodedata.py 2000/09/25 07:13:41 1.2
--- makeunicodedata.py 2000/09/25 08:07:06 1.3
***************
*** 1,13 ****
#
! # makeunidb.py -- generate a compact version of the unicode property
! # database (unicodedatabase.h)
#
import sys
SCRIPT = sys.argv[0]
! VERSION = "1.0"
! UNICODE_DATA = "c:/pythonware/modules/unidb/etc/UnicodeData-Latest.txt"
CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
--- 1,18 ----
#
! # generate a compact version of the unicode property database
#
+ # history:
+ # 2000-09-24 fl created (based on bits and pieces from unidb)
+ # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
+ #
+ # written by Fredrik Lundh (fredrik@pythonware.com), September 2000
+ #
import sys
SCRIPT = sys.argv[0]
! VERSION = "1.1"
! UNICODE_DATA = "../UnicodeData-Latest.txt"
CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
***************
*** 25,35 ****
# extract unicode properties
! dummy = (0, 0, 0, 0, "NULL")
table = [dummy]
cache = {0: dummy}
index = [0] * len(unicode.chars)
-
- DECOMPOSITION = [""]
for char in unicode.chars:
record = unicode.table[char]
--- 30,39 ----
# extract unicode properties
! dummy = (0, 0, 0, 0)
table = [dummy]
cache = {0: dummy}
index = [0] * len(unicode.chars)
+ # 1) database properties
for char in unicode.chars:
record = unicode.table[char]
***************
*** 40,49 ****
bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
mirrored = record[9] == "Y"
- if record[5]:
- decomposition = '"%s"' % record[5]
- else:
- decomposition = "NULL"
item = (
! category, combining, bidirectional, mirrored, decomposition
)
# add entry to index and item tables
--- 44,49 ----
bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
mirrored = record[9] == "Y"
item = (
! category, combining, bidirectional, mirrored
)
# add entry to index and item tables
***************
*** 54,59 ****
index[char] = i
! # FIXME: we really should compress the decomposition stuff
! # (see the unidb utilities for one way to do this)
FILE = "unicodedata_db.h"
--- 54,77 ----
index[char] = i
! # 2) decomposition data
!
! # FIXME: <fl> using the encoding stuff from unidb would save
! # another 50k or so, but I'll leave that for 2.1...
!
! decomp_data = [""]
! decomp_index = [0] * len(unicode.chars)
!
! for char in unicode.chars:
! record = unicode.table[char]
! if record:
! if record[5]:
! try:
! i = decomp_data.index(record[5])
! except ValueError:
! i = len(decomp_data)
! decomp_data.append(record[5])
! else:
! i = 0
! decomp_index[char] = i
FILE = "unicodedata_db.h"
***************
*** 66,70 ****
print "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
for item in table:
! print " {%d, %d, %d, %d, %s}," % item
print "};"
print
--- 84,88 ----
print "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
for item in table:
! print " {%d, %d, %d, %d}," % item
print "};"
print
***************
*** 83,86 ****
--- 101,110 ----
print "};"
+ print "static const char *decomp_data[] = {"
+ for name in decomp_data:
+ print " \"%s\"," % name
+ print " NULL"
+ print "};"
+
# split index table
index1, index2, shift = splitbins(index)
***************
*** 90,93 ****
--- 114,125 ----
Array("index1", index1).dump(sys.stdout)
Array("index2", index2).dump(sys.stdout)
+
+ # split index table
+ index1, index2, shift = splitbins(decomp_index)
+
+ print "/* same, for the decomposition data */"
+ print "#define DECOMP_SHIFT", shift
+ Array("decomp_index1", index1).dump(sys.stdout)
+ Array("decomp_index2", index2).dump(sys.stdout)
sys.stdout = sys.__stdout__
- Previous message: [Python-checkins] CVS: python/dist/src/Tools/unicode makeunicodedata.py,1.1,1.2
- Next message: [Python-checkins] CVS: python/dist/src/Modules unicodedata_db.h,1.1,1.2 unicodedatabase.h,2.4,2.5 unicodedata.c,2.5,2.6 unicodedatabase.c,2.4,2.5
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]