[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.41,2.42

Fredrik Lundh python-dev@python.org
2000年7月10日 11:27:50 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv20359/objects
Modified Files:
	unicodeobject.c 
Log Message:
- changed hash calculation for unicode strings. the new
 value is calculated from the character values, in a way
 that makes sure an 8-bit ASCII string and a unicode string
 with the same contents get the same hash value.
 (as a side effect, this also works for ISO Latin 1 strings).
 for more details, see the python-dev discussion.
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.41
retrieving revision 2.42
diff -C2 -r2.41 -r2.42
*** unicodeobject.c	2000年07月07日 17:51:08	2.41
--- unicodeobject.c	2000年07月10日 18:27:47	2.42
***************
*** 3472,3495 ****
 unicode_hash(PyUnicodeObject *self)
 {
! long hash;
! PyObject *utf8;
 
! /* Since Unicode objects compare equal to their UTF-8 string
! counterparts, they should also use the UTF-8 strings as basis
! for their hash value. This is needed to assure that strings and
! Unicode objects behave in the same way as dictionary
! keys. Unfortunately, this costs some performance and also some
! memory if the cached UTF-8 representation is not used later
! on. */
 if (self->hash != -1)
 	return self->hash;
! utf8 = _PyUnicode_AsUTF8String((PyObject *)self, NULL);
! if (utf8 == NULL)
! 	return -1;
! hash = PyObject_Hash(utf8);
! if (hash == -1)
! 	return -1;
! self->hash = hash;
! return hash;
 }
 
--- 3472,3497 ----
 unicode_hash(PyUnicodeObject *self)
 {
! /* Since Unicode objects compare equal to their ASCII string
! counterparts, they should use the individual character values
! as basis for their hash value. This is needed to assure that
! strings and Unicode objects behave in the same way as
! dictionary keys. */
 
! register int len;
! register Py_UNICODE *p;
! register long x;
! 
 if (self->hash != -1)
 	return self->hash;
! len = PyUnicode_GET_SIZE(self);
! p = PyUnicode_AS_UNICODE(self);
! x = *p << 7;
! while (--len >= 0)
! 	x = (1000003*x) ^ *p++;
! x ^= PyUnicode_GET_SIZE(self);
! if (x == -1)
! 	x = -2;
! self->hash = x;
! return x;
 }
 

AltStyle によって変換されたページ (->オリジナル) /