[Python-checkins] CVS: python/dist/src/Tools/unicode makeunicodedata.py,1.7,1.8
Fredrik Lundh
python-dev@python.org
Fri, 3 Nov 2000 12:24:17 -0800
Update of /cvsroot/python/python/dist/src/Tools/unicode
In directory slayer.i.sourceforge.net:/tmp/cvs-serv25791/tools/unicode
Modified Files:
makeunicodedata.py
Log Message:
Added 38,642 missing characters to the Unicode database (first-last
ranges) -- but thanks to the 2.0 compression scheme, this doesn't add
a single byte to the resulting binaries (!)
Closes bug #117524
Index: makeunicodedata.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Tools/unicode/makeunicodedata.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -r1.7 -r1.8
*** makeunicodedata.py 2000/10/26 03:56:46 1.7
--- makeunicodedata.py 2000/11/03 20:24:15 1.8
***************
*** 10,13 ****
--- 10,14 ----
# 2000-09-25 fl added character type table
# 2000-09-26 fl added LINEBREAK, DECIMAL, and DIGIT flags/fields
+ # 2000-11-03 fl expand first/last ranges
#
# written by Fredrik Lundh (fredrik@pythonware.com), September 2000
***************
*** 40,47 ****
UPPER_MASK = 0x80
! def maketables():
unicode = UnicodeData(UNICODE_DATA)
# extract unicode properties
dummy = (0, 0, 0, 0)
--- 41,51 ----
UPPER_MASK = 0x80
! def maketables(trace=0):
unicode = UnicodeData(UNICODE_DATA)
+ print "--- Processing", UNICODE_DATA, "..."
+ print len(filter(None, unicode.table)), "characters"
+
# extract unicode properties
dummy = (0, 0, 0, 0)
***************
*** 92,95 ****
--- 96,104 ----
FILE = "Modules/unicodedata_db.h"
+ print "--- Writing", FILE, "..."
+
+ print len(table), "unique properties"
+ print len(decomp_data), "unique decomposition entries"
+
fp = open(FILE, "w")
print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
***************
*** 126,130 ****
# split record index table
! index1, index2, shift = splitbins(index)
print >>fp, "/* index tables for the database records */"
--- 135,139 ----
# split record index table
! index1, index2, shift = splitbins(index, trace)
print >>fp, "/* index tables for the database records */"
***************
*** 134,138 ****
# split decomposition index table
! index1, index2, shift = splitbins(decomp_index)
print >>fp, "/* index tables for the decomposition data */"
--- 143,147 ----
# split decomposition index table
! index1, index2, shift = splitbins(decomp_index, trace)
print >>fp, "/* index tables for the decomposition data */"
***************
*** 201,210 ****
index[char] = i
- print len(table), "ctype entries"
-
FILE = "Objects/unicodetype_db.h"
fp = open(FILE, "w")
print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
print >>fp
--- 210,221 ----
index[char] = i
FILE = "Objects/unicodetype_db.h"
fp = open(FILE, "w")
+ print "--- Writing", FILE, "..."
+
+ print len(table), "unique character type entries"
+
print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
print >>fp
***************
*** 217,221 ****
# split decomposition index table
! index1, index2, shift = splitbins(index)
print >>fp, "/* type indexes */"
--- 228,232 ----
# split decomposition index table
! index1, index2, shift = splitbins(index, trace)
print >>fp, "/* type indexes */"
***************
*** 234,238 ****
class UnicodeData:
! def __init__(self, filename):
file = open(filename)
table = [None] * 65536
--- 245,249 ----
class UnicodeData:
! def __init__(self, filename, expand=1):
file = open(filename)
table = [None] * 65536
***************
*** 245,248 ****
--- 256,275 ----
table[char] = s
+ # expand first-last ranges (ignore surrogates and private use)
+ if expand:
+ field = None
+ for i in range(0, 0xD800):
+ s = table[i]
+ if s:
+ if s[1][-6:] == "First>":
+ s[1] = ""
+ field = s[:]
+ elif s[1][-5:] == "Last>":
+ s[1] = ""
+ field = None
+ elif field:
+ field[0] = hex(i)
+ table[i] = field
+
# public attributes
self.filename = filename
***************
*** 307,312 ****
where mask is a bitmask isolating the last "shift" bits.
! If optional arg trace is true (default false), progress info is
! printed to sys.stderr.
"""
--- 334,340 ----
where mask is a bitmask isolating the last "shift" bits.
! If optional arg trace is non-zero (default zero), progress info
! is printed to sys.stderr. The higher the value, the more info
! you'll get.
"""
***************
*** 342,346 ****
# determine memory size
b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
! if trace:
dump(t1, t2, shift, b)
if b < bytes:
--- 370,374 ----
# determine memory size
b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
! if trace > 1:
dump(t1, t2, shift, b)
if b < bytes:
***************
*** 359,361 ****
if __name__ == "__main__":
! maketables()
--- 387,389 ----
if __name__ == "__main__":
! maketables(1)