[Python-checkins] r86681 - in python/branches/py3k: Lib/test/test_ucn.py Misc/NEWS Modules/unicodedata.c Tools/unicode/makeunicodedata.py

martin.v.loewis python-checkins at python.org
Mon Nov 22 10:00:02 CET 2010


Author: martin.v.loewis
Date: Mon Nov 22 10:00:02 2010
New Revision: 86681
Log:
Issue #10459: Update CJK character names to Unicode 6.0.
Modified:
 python/branches/py3k/Lib/test/test_ucn.py
 python/branches/py3k/Misc/NEWS
 python/branches/py3k/Modules/unicodedata.c
 python/branches/py3k/Tools/unicode/makeunicodedata.py
Modified: python/branches/py3k/Lib/test/test_ucn.py
==============================================================================
--- python/branches/py3k/Lib/test/test_ucn.py	(original)
+++ python/branches/py3k/Lib/test/test_ucn.py	Mon Nov 22 10:00:02 2010
@@ -88,9 +88,13 @@
 self.checkletter("CJK UNIFIED IDEOGRAPH-3400", "\u3400")
 self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", "\u4db5")
 self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", "\u4e00")
- self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", "\u9fa5")
+ self.checkletter("CJK UNIFIED IDEOGRAPH-9FCB", "\u9fCB")
 self.checkletter("CJK UNIFIED IDEOGRAPH-20000", "\U00020000")
 self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", "\U0002a6d6")
+ self.checkletter("CJK UNIFIED IDEOGRAPH-2A700", "\U0002A700")
+ self.checkletter("CJK UNIFIED IDEOGRAPH-2B734", "\U0002B734")
+ self.checkletter("CJK UNIFIED IDEOGRAPH-2B740", "\U0002B740")
+ self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D")
 
 def test_bmp_characters(self):
 import unicodedata
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Mon Nov 22 10:00:02 2010
@@ -32,6 +32,8 @@
 Library
 -------
 
+- Issue #10459: Update CJK character names to Unicode 6.0.
+
 - Issue #4493: urllib.request adds '/' in front of path components which does not
 start with '/. Common behavior exhibited by browsers and other clients.
 
Modified: python/branches/py3k/Modules/unicodedata.c
==============================================================================
--- python/branches/py3k/Modules/unicodedata.c	(original)
+++ python/branches/py3k/Modules/unicodedata.c	Mon Nov 22 10:00:02 2010
@@ -866,13 +866,16 @@
 { 0, 0, "H" }
 };
 
+/* These ranges need to match makeunicodedata.py:cjk_ranges. */
 static int
 is_unified_ideograph(Py_UCS4 code)
 {
- return (
- (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
- (0x4E00 <= code && code <= 0x9FBB) || /* CJK Ideograph */
- (0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */
+ return
+ (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
+ (0x4E00 <= code && code <= 0x9FCB) || /* CJK Ideograph */
+ (0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
+ (0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
+ (0x2B740 <= code && code <= 0x2B81D); /* CJK Ideograph Extension D */
 }
 
 static int
Modified: python/branches/py3k/Tools/unicode/makeunicodedata.py
==============================================================================
--- python/branches/py3k/Tools/unicode/makeunicodedata.py	(original)
+++ python/branches/py3k/Tools/unicode/makeunicodedata.py	Mon Nov 22 10:00:02 2010
@@ -70,6 +70,15 @@
 NODELTA_MASK = 0x800
 NUMERIC_MASK = 0x1000
 
+# these ranges need to match unicodedata.c:is_unified_ideograph
+cjk_ranges = [
+ ('3400', '4DB5'),
+ ('4E00', '9FCB'),
+ ('20000', '2A6D6'),
+ ('2A700', '2B734'),
+ ('2B740', '2B81D')
+]
+
 def maketables(trace=0):
 
 print("--- Reading", UNICODE_DATA % "", "...")
@@ -81,7 +90,7 @@
 
 for version in old_versions:
 print("--- Reading", UNICODE_DATA % ("-"+version), "...")
- old_unicode = UnicodeData(version)
+ old_unicode = UnicodeData(version, cjk_check=False)
 print(len(list(filter(None, old_unicode.table))), "characters")
 merge_old_version(version, unicode, old_unicode)
 
@@ -804,7 +813,8 @@
 
 def __init__(self, version,
 linebreakprops=False,
- expand=1):
+ expand=1,
+ cjk_check=True):
 self.changed = []
 file = open_data(UNICODE_DATA, version)
 table = [None] * 0x110000
@@ -816,6 +826,8 @@
 char = int(s[0], 16)
 table[char] = s
 
+ cjk_ranges_found = []
+
 # expand first-last ranges
 if expand:
 field = None
@@ -826,12 +838,17 @@
 s[1] = ""
 field = s
 elif s[1][-5:] == "Last>":
+ if s[1].startswith("<CJK Ideograph"):
+ cjk_ranges_found.append((field[0],
+ s[0]))
 s[1] = ""
 field = None
 elif field:
 f2 = field[:]
 f2[0] = "%X" % i
 table[i] = f2
+ if cjk_check and cjk_ranges != cjk_ranges_found:
+ raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
 
 # public attributes
 self.filename = UNICODE_DATA % ''


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /