[Python-checkins] CVS: python/dist/src/Tools/unicode makeunicodedata.py,1.7,1.8

Fredrik Lundh python-dev@python.org
Fri, 3 Nov 2000 12:24:17 -0800


Update of /cvsroot/python/python/dist/src/Tools/unicode
In directory slayer.i.sourceforge.net:/tmp/cvs-serv25791/tools/unicode

Modified Files:
	makeunicodedata.py 
Log Message:


Added 38,642 missing characters to the Unicode database (first-last
ranges) -- but thanks to the 2.0 compression scheme, this doesn't add
a single byte to the resulting binaries (!)

Closes bug #117524 

Index: makeunicodedata.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Tools/unicode/makeunicodedata.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -r1.7 -r1.8
*** makeunicodedata.py	2000/10/26 03:56:46	1.7
--- makeunicodedata.py	2000/11/03 20:24:15	1.8
***************
*** 10,13 ****
--- 10,14 ----
  # 2000-09-25 fl   added character type table
  # 2000-09-26 fl   added LINEBREAK, DECIMAL, and DIGIT flags/fields
+ # 2000-11-03 fl   expand first/last ranges
  #
  # written by Fredrik Lundh (fredrik@pythonware.com), September 2000
***************
*** 40,47 ****
  UPPER_MASK = 0x80
  
! def maketables():
  
      unicode = UnicodeData(UNICODE_DATA)
  
      # extract unicode properties
      dummy = (0, 0, 0, 0)
--- 41,51 ----
  UPPER_MASK = 0x80
  
! def maketables(trace=0):
  
      unicode = UnicodeData(UNICODE_DATA)
  
+     print "--- Processing", UNICODE_DATA, "..."
+     print len(filter(None, unicode.table)), "characters"
+ 
      # extract unicode properties
      dummy = (0, 0, 0, 0)
***************
*** 92,95 ****
--- 96,104 ----
      FILE = "Modules/unicodedata_db.h"
  
+     print "--- Writing", FILE, "..."
+ 
+     print len(table), "unique properties"
+     print len(decomp_data), "unique decomposition entries"
+ 
      fp = open(FILE, "w")
      print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
***************
*** 126,130 ****
  
      # split record index table
!     index1, index2, shift = splitbins(index)
  
      print >>fp, "/* index tables for the database records */"
--- 135,139 ----
  
      # split record index table
!     index1, index2, shift = splitbins(index, trace)
  
      print >>fp, "/* index tables for the database records */"
***************
*** 134,138 ****
  
      # split decomposition index table
!     index1, index2, shift = splitbins(decomp_index)
  
      print >>fp, "/* index tables for the decomposition data */"
--- 143,147 ----
  
      # split decomposition index table
!     index1, index2, shift = splitbins(decomp_index, trace)
  
      print >>fp, "/* index tables for the decomposition data */"
***************
*** 201,210 ****
              index[char] = i
  
-     print len(table), "ctype entries"
- 
      FILE = "Objects/unicodetype_db.h"
  
      fp = open(FILE, "w")
  
      print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
      print >>fp
--- 210,221 ----
              index[char] = i
  
      FILE = "Objects/unicodetype_db.h"
  
      fp = open(FILE, "w")
  
+     print "--- Writing", FILE, "..."
+ 
+     print len(table), "unique character type entries"
+ 
      print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
      print >>fp
***************
*** 217,221 ****
  
      # split decomposition index table
!     index1, index2, shift = splitbins(index)
  
      print >>fp, "/* type indexes */"
--- 228,232 ----
  
      # split decomposition index table
!     index1, index2, shift = splitbins(index, trace)
  
      print >>fp, "/* type indexes */"
***************
*** 234,238 ****
  class UnicodeData:
  
!     def __init__(self, filename):
          file = open(filename)
          table = [None] * 65536
--- 245,249 ----
  class UnicodeData:
  
!     def __init__(self, filename, expand=1):
          file = open(filename)
          table = [None] * 65536
***************
*** 245,248 ****
--- 256,275 ----
              table[char] = s
  
+         # expand first-last ranges (ignore surrogates and private use)
+         if expand:
+             field = None
+             for i in range(0, 0xD800):
+                 s = table[i]
+                 if s:
+                     if s[1][-6:] == "First>":
+                         s[1] = ""
+                         field = s[:]
+                     elif s[1][-5:] == "Last>":
+                         s[1] = ""
+                         field = None
+                 elif field:
+                     field[0] = hex(i)
+                     table[i] = field
+ 
          # public attributes
          self.filename = filename
***************
*** 307,312 ****
      where mask is a bitmask isolating the last "shift" bits.
  
!     If optional arg trace is true (default false), progress info is
!     printed to sys.stderr.
      """
  
--- 334,340 ----
      where mask is a bitmask isolating the last "shift" bits.
  
!     If optional arg trace is non-zero (default zero), progress info
!     is printed to sys.stderr.  The higher the value, the more info
!     you'll get.
      """
  
***************
*** 342,346 ****
          # determine memory size
          b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
!         if trace:
              dump(t1, t2, shift, b)
          if b < bytes:
--- 370,374 ----
          # determine memory size
          b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
!         if trace > 1:
              dump(t1, t2, shift, b)
          if b < bytes:
***************
*** 359,361 ****
  
  if __name__ == "__main__":
!     maketables()
--- 387,389 ----
  
  if __name__ == "__main__":
!     maketables(1)