[Python-checkins] python/dist/src/Objects unicodeobject.c,2.141,2.142

loewis@sourceforge.net loewis@sourceforge.net
2002年4月21日 02:59:48 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv22440
Modified Files:
	unicodeobject.c 
Log Message:
Back out 2.140.
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.141
retrieving revision 2.142
diff -C2 -d -r2.141 -r2.142
*** unicodeobject.c	21 Apr 2002 03:26:37 -0000	2.141
--- unicodeobject.c	21 Apr 2002 09:59:45 -0000	2.142
***************
*** 1173,1176 ****
--- 1173,1182 ----
 #endif
 
+ /* Allocation strategy: we default to Latin-1, then do one resize
+ whenever we hit an order boundary. The assumption is that
+ characters from higher orders usually occur often enough to warrant
+ this.
+ */
+ 
 PyObject *
 PyUnicode_EncodeUTF8(const Py_UNICODE *s,
***************
*** 1180,1222 ****
 PyObject *v;
 char *p;
! int allocated = 0;
! int i;
! 
 /* Short-cut for emtpy strings */
 if (size == 0)
 	return PyString_FromStringAndSize(NULL, 0);
 
! for (i = 0; i < size; ) {
! Py_UCS4 ch = s[i++];
! if (ch < 0x80)
! 	 allocated += 1;
! else if (ch < 0x0800)
! allocated += 2;
! else if (ch < 0x10000) {
! /* Check for high surrogate */
! if (0xD800 <= ch && ch <= 0xDBFF &&
! i != size && 
! 		0xDC00 <= s[i] && s[i] <= 0xDFFF) {
! 		allocated += 1;
! 		i++;
! 	 }
! 	 allocated += 3;
! } else
! allocated += 4;
! }
! 
! v = PyString_FromStringAndSize(NULL, allocated);
 if (v == NULL)
 return NULL;
 
 p = PyString_AS_STRING(v);
! for (i = 0; i < size; ) {
 Py_UCS4 ch = s[i++];
 
! if (ch < 0x80) {
 *p++ = (char) ch;
- }
 
 else if (ch < 0x0800) {
 *p++ = (char)(0xc0 | (ch >> 6));
 *p++ = (char)(0x80 | (ch & 0x3f));
--- 1186,1212 ----
 PyObject *v;
 char *p;
! int i = 0;
! int overalloc = 2;
! int len;
! 
 /* Short-cut for emtpy strings */
 if (size == 0)
 	return PyString_FromStringAndSize(NULL, 0);
 
! v = PyString_FromStringAndSize(NULL, overalloc * size);
 if (v == NULL)
 return NULL;
 
 p = PyString_AS_STRING(v);
! 
! while (i < size) {
 Py_UCS4 ch = s[i++];
 
! if (ch < 0x80)
! 	 /* Encode ASCII */
 *p++ = (char) ch;
 
 else if (ch < 0x0800) {
+ 	 /* Encode Latin-1 */
 *p++ = (char)(0xc0 | (ch >> 6));
 *p++ = (char)(0x80 | (ch & 0x3f));
***************
*** 1224,1258 ****
 
 else {
! 	 
 	 if (ch < 0x10000) {
! 		/* Check for high surrogate */
 		if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
 		 Py_UCS4 ch2 = s[i];
! 		 /* Check for low surrogate */
 		 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
! ch = ((ch - 0xD800)<<10 | (ch2-0xDC00))+0x10000;
! *p++ = (char)((ch >> 18) | 0xf0);
! *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
! 			*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
! 			*p++ = (char)(0x80 | (ch & 0x3f));
! i++;
! 			continue;
 }
 		 /* Fall through: handles isolated high surrogates */
 }
 *p++ = (char)(0xe0 | (ch >> 12));
 		*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
 		*p++ = (char)(0x80 | (ch & 0x3f));
! 
! 	 } else {
! 		*p++ = (char)(0xf0 | (ch>>18));
! 		*p++ = (char)(0x80 | ((ch>>12) & 0x3f));
! 		*p++ = (char)(0x80 | ((ch>>6) & 0x3f));
! 		*p++ = (char)(0x80 | (ch & 0x3f));
 	 }
 	}
 }
! assert(p - PyString_AS_STRING(v) == allocated);
 return v;
 }
 
--- 1214,1270 ----
 
 else {
! 	 /* Encode UCS2 Unicode ordinals */
 	 if (ch < 0x10000) {
! 
! 		/* Special case: check for high surrogate */
 		if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
 		 Py_UCS4 ch2 = s[i];
! 		 /* Check for low surrogate and combine the two to
! 		 form a UCS4 value */
 		 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
! ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
! 			i++;
! 			goto encodeUCS4;
 }
 		 /* Fall through: handles isolated high surrogates */
 }
+ 
+ 		if (overalloc < 3) {
+ 		 len = (int)(p - PyString_AS_STRING(v));
+ 		 overalloc = 3;
+ 		 if (_PyString_Resize(&v, overalloc * size))
+ 			goto onError;
+ 		 p = PyString_AS_STRING(v) + len;
+ 		}
 *p++ = (char)(0xe0 | (ch >> 12));
 		*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
 		*p++ = (char)(0x80 | (ch & 0x3f));
! 		continue;
! 	 }
! 
! 	 /* Encode UCS4 Unicode ordinals */
! 	encodeUCS4:
! 	 if (overalloc < 4) {
! 		len = (int)(p - PyString_AS_STRING(v));
! 		overalloc = 4;
! 		if (_PyString_Resize(&v, overalloc * size))
! 		 goto onError;
! 		p = PyString_AS_STRING(v) + len;
 	 }
+ 	 *p++ = (char)(0xf0 | (ch >> 18));
+ 	 *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
+ 	 *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+ 	 *p++ = (char)(0x80 | (ch & 0x3f));
 	}
 }
! *p = '0円';
! assert((p - PyString_AS_STRING(v)) <= overalloc*size);
! if (_PyString_Resize(&v, (int)(p - PyString_AS_STRING(v))))
! 	goto onError;
 return v;
+ 
+ onError:
+ Py_DECREF(v);
+ return NULL;
 }
 

AltStyle によって変換されたページ (->オリジナル) /