[Python-checkins] CVS: python/dist/src/Tools/unicode makeunicodedata.py,1.7,1.8

Fredrik Lundh python-dev@python.org
Fri, 3 Nov 2000 12:24:17 -0800


Update of /cvsroot/python/python/dist/src/Tools/unicode
In directory slayer.i.sourceforge.net:/tmp/cvs-serv25791/tools/unicode
Modified Files:
	makeunicodedata.py 
Log Message:
Added 38,642 missing characters to the Unicode database (first-last
ranges) -- but thanks to the 2.0 compression scheme, this doesn't add
a single byte to the resulting binaries (!)
Closes bug #117524 
Index: makeunicodedata.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Tools/unicode/makeunicodedata.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -r1.7 -r1.8
*** makeunicodedata.py	2000年10月26日 03:56:46	1.7
--- makeunicodedata.py	2000年11月03日 20:24:15	1.8
***************
*** 10,13 ****
--- 10,14 ----
 # 2000年09月25日 fl added character type table
 # 2000年09月26日 fl added LINEBREAK, DECIMAL, and DIGIT flags/fields
+ # 2000年11月03日 fl expand first/last ranges
 #
 # written by Fredrik Lundh (fredrik@pythonware.com), September 2000
***************
*** 40,47 ****
 UPPER_MASK = 0x80
 
! def maketables():
 
 unicode = UnicodeData(UNICODE_DATA)
 
 # extract unicode properties
 dummy = (0, 0, 0, 0)
--- 41,51 ----
 UPPER_MASK = 0x80
 
! def maketables(trace=0):
 
 unicode = UnicodeData(UNICODE_DATA)
 
+ print "--- Processing", UNICODE_DATA, "..."
+ print len(filter(None, unicode.table)), "characters"
+ 
 # extract unicode properties
 dummy = (0, 0, 0, 0)
***************
*** 92,95 ****
--- 96,104 ----
 FILE = "Modules/unicodedata_db.h"
 
+ print "--- Writing", FILE, "..."
+ 
+ print len(table), "unique properties"
+ print len(decomp_data), "unique decomposition entries"
+ 
 fp = open(FILE, "w")
 print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
***************
*** 126,130 ****
 
 # split record index table
! index1, index2, shift = splitbins(index)
 
 print >>fp, "/* index tables for the database records */"
--- 135,139 ----
 
 # split record index table
! index1, index2, shift = splitbins(index, trace)
 
 print >>fp, "/* index tables for the database records */"
***************
*** 134,138 ****
 
 # split decomposition index table
! index1, index2, shift = splitbins(decomp_index)
 
 print >>fp, "/* index tables for the decomposition data */"
--- 143,147 ----
 
 # split decomposition index table
! index1, index2, shift = splitbins(decomp_index, trace)
 
 print >>fp, "/* index tables for the decomposition data */"
***************
*** 201,210 ****
 index[char] = i
 
- print len(table), "ctype entries"
- 
 FILE = "Objects/unicodetype_db.h"
 
 fp = open(FILE, "w")
 
 print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
 print >>fp
--- 210,221 ----
 index[char] = i
 
 FILE = "Objects/unicodetype_db.h"
 
 fp = open(FILE, "w")
 
+ print "--- Writing", FILE, "..."
+ 
+ print len(table), "unique character type entries"
+ 
 print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
 print >>fp
***************
*** 217,221 ****
 
 # split decomposition index table
! index1, index2, shift = splitbins(index)
 
 print >>fp, "/* type indexes */"
--- 228,232 ----
 
 # split decomposition index table
! index1, index2, shift = splitbins(index, trace)
 
 print >>fp, "/* type indexes */"
***************
*** 234,238 ****
 class UnicodeData:
 
! def __init__(self, filename):
 file = open(filename)
 table = [None] * 65536
--- 245,249 ----
 class UnicodeData:
 
! def __init__(self, filename, expand=1):
 file = open(filename)
 table = [None] * 65536
***************
*** 245,248 ****
--- 256,275 ----
 table[char] = s
 
+ # expand first-last ranges (ignore surrogates and private use)
+ if expand:
+ field = None
+ for i in range(0, 0xD800):
+ s = table[i]
+ if s:
+ if s[1][-6:] == "First>":
+ s[1] = ""
+ field = s[:]
+ elif s[1][-5:] == "Last>":
+ s[1] = ""
+ field = None
+ elif field:
+ field[0] = hex(i)
+ table[i] = field
+ 
 # public attributes
 self.filename = filename
***************
*** 307,312 ****
 where mask is a bitmask isolating the last "shift" bits.
 
! If optional arg trace is true (default false), progress info is
! printed to sys.stderr.
 """
 
--- 334,340 ----
 where mask is a bitmask isolating the last "shift" bits.
 
! If optional arg trace is non-zero (default zero), progress info
! is printed to sys.stderr. The higher the value, the more info
! you'll get.
 """
 
***************
*** 342,346 ****
 # determine memory size
 b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
! if trace:
 dump(t1, t2, shift, b)
 if b < bytes:
--- 370,374 ----
 # determine memory size
 b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
! if trace > 1:
 dump(t1, t2, shift, b)
 if b < bytes:
***************
*** 359,361 ****
 
 if __name__ == "__main__":
! maketables()
--- 387,389 ----
 
 if __name__ == "__main__":
! maketables(1)

AltStyle によって変換されたページ (->オリジナル) /