[Python-checkins] cpython: _PyUnicode_CheckConsistency() also checks maxchar maximum value,

victor.stinner python-checkins at python.org
Sun Nov 20 22:53:24 CET 2011


http://hg.python.org/cpython/rev/26af48f65ef3
changeset: 73638:26af48f65ef3
user: Victor Stinner <victor.stinner at haypocalc.com>
date: Sun Nov 20 18:56:05 2011 +0100
summary:
 _PyUnicode_CheckConsistency() also checks maxchar maximum value,
not only its minimum value
files:
 Include/unicodeobject.h | 13 ++++++++-----
 Objects/unicodeobject.c | 12 +++++++++---
 2 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -303,19 +303,22 @@
 - PyUnicode_1BYTE_KIND (1):
 
 * character type = Py_UCS1 (8 bits, unsigned)
- * if ascii is set, all characters must be in range
- U+0000-U+007F, otherwise at least one character must be in range
+ * all characters are in the range U+0000-U+00FF (latin1)
+ * if ascii is set, all characters are in the range U+0000-U+007F
+ (ASCII), otherwise at least one character is in the range
 U+0080-U+00FF
 
 - PyUnicode_2BYTE_KIND (2):
 
 * character type = Py_UCS2 (16 bits, unsigned)
- * at least one character must be in range U+0100-U+FFFF
+ * all characters are in the range U+0000-U+FFFF (BMP)
+ * at least one character is in the range U+0100-U+FFFF
 
 - PyUnicode_4BYTE_KIND (4):
 
 * character type = Py_UCS4 (32 bits, unsigned)
- * at least one character must be in range U+10000-U+10FFFF
+ * all characters are in the range U+0000-U+10FFFF
+ * at least one character is in the range U+10000-U+10FFFF
 */
 unsigned int kind:3;
 /* Compact is with respect to the allocation scheme. Compact unicode
@@ -323,7 +326,7 @@
 one block for the PyUnicodeObject struct and another for its data
 buffer. */
 unsigned int compact:1;
- /* The string only contains characters in range U+0000-U+007F (ASCII)
+ /* The string only contains characters in the range U+0000-U+007F (ASCII)
 and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
 set, use the PyASCIIObject structure. */
 unsigned int ascii:1;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -392,15 +392,21 @@
 maxchar = ch;
 }
 if (kind == PyUnicode_1BYTE_KIND) {
- if (ascii->state.ascii == 0)
+ if (ascii->state.ascii == 0) {
 assert(maxchar >= 128);
+ assert(maxchar <= 255);
+ }
 else
 assert(maxchar < 128);
 }
- else if (kind == PyUnicode_2BYTE_KIND)
+ else if (kind == PyUnicode_2BYTE_KIND) {
 assert(maxchar >= 0x100);
- else
+ assert(maxchar <= 0xFFFF);
+ }
+ else {
 assert(maxchar >= 0x10000);
+ assert(maxchar <= 0x10FFFF);
+ }
 }
 if (check_content && !unicode_is_singleton(op))
 assert(ascii->hash == -1);
-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /