[Python-checkins] CVS: python/dist/src/Objects abstract.c,2.33,2.34 floatobject.c,2.55,2.56 intobject.c,2.39,2.40 longobject.c,1.54,1.55 unicodeobject.c,2.5,2.6

Guido van Rossum python-dev@python.org
Wed, 5 Apr 2000 16:11:24 -0400 (EDT)


Update of /projects/cvsroot/python/dist/src/Objects
In directory eric:/home/guido/hp/mal/py-patched/Objects
Modified Files:
	abstract.c floatobject.c intobject.c longobject.c 
	unicodeobject.c 
Log Message:
Marc-Andre's third try at this bulk patch seems to work (except that
his copy of test_contains.py seems to be broken -- the lines he
deleted were already absent). Checkin messages:
New Unicode support for int(), float(), complex() and long().
- new APIs PyInt_FromUnicode() and PyLong_FromUnicode()
- added support for Unicode to PyFloat_FromString()
- new encoding API PyUnicode_EncodeDecimal() which converts
 Unicode to a decimal char* string (used in the above new
 APIs)
- shortcuts for calls like int(<int object>) and float(<float obj>)
- tests for all of the above
Unicode compares and contains checks:
- comparing Unicode and non-string types now works; TypeErrors
 are masked, all other errors such as ValueError during
 Unicode coercion are passed through (note that PyUnicode_Compare
 does not implement the masking -- PyObject_Compare does this)
- contains now works for non-string types too; TypeErrors are
 masked and 0 returned; all other errors are passed through
Better testing support for the standard codecs.
Misc minor enhancements, such as an alias dbcs for the mbcs codec.
Changes:
- PyLong_FromString() now applies the same error checks as
 does PyInt_FromString(): trailing garbage is reported
 as error and not longer silently ignored. The only characters
 which may be trailing the digits are 'L' and 'l' -- these
 are still silently ignored.
- string.ato?() now directly interface to int(), long() and
 float(). The error strings are now a little different, but
 the type still remains the same. These functions are now
 ready to get declared obsolete ;-)
- PyNumber_Int() now also does a check for embedded NULL chars
 in the input string; PyNumber_Long() already did this (and
 still does)
Followed by:
Looks like I've gone a step too far there... (and test_contains.py
seem to have a bug too).
I've changed back to reporting all errors in PyUnicode_Contains()
and added a few more test cases to test_contains.py (plus corrected
the join() NameError).
Index: abstract.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/abstract.c,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -r2.33 -r2.34
*** abstract.c	2000年03月10日 22:55:18	2.33
--- abstract.c	2000年04月05日 20:11:20	2.34
***************
*** 727,730 ****
--- 727,751 ----
 }
 
+ /* Add a check for embedded NULL-bytes in the argument. */
+ static PyObject *
+ int_from_string(s, len)
+ 	const char *s;
+ 	int len;
+ {
+ 	char *end;
+ 	PyObject *x;
+ 
+ 	x = PyInt_FromString((char*)s, &end, 10);
+ 	if (x == NULL)
+ 		return NULL;
+ 	if (end != s + len) {
+ 		PyErr_SetString(PyExc_ValueError,
+ 				"null byte in argument for int()");
+ 		Py_DECREF(x);
+ 		return NULL;
+ 	}
+ 	return x;
+ }
+ 
 PyObject *
 PyNumber_Int(o)
***************
*** 737,772 ****
 	if (o == NULL)
 		return null_error();
 	if (PyString_Check(o))
! 		return PyInt_FromString(PyString_AS_STRING(o), NULL, 10);
 	m = o->ob_type->tp_as_number;
 	if (m && m->nb_int)
 		return m->nb_int(o);
 	if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
! 		return PyInt_FromString((char*)buffer, NULL, 10);
 
 	return type_error("object can't be converted to int");
 }
 
! /* There are two C API functions for converting a string to a long,
! * PyNumber_Long() and PyLong_FromString(). Both are used in builtin_long, 
! * reachable from Python with the built-in function long().
! *
! * The difference is this: PyNumber_Long will raise an exception when the
! * string cannot be converted to a long. The most common situation is
! * where a float string is passed in; this raises a ValueError.
! * PyLong_FromString does not raise an exception; it silently truncates the 
! * float to an integer.
! *
! * You can see the different behavior from Python with the following:
! *
! * long('9.5')
! * => ValueError: invalid literal for long(): 9.5
! *
! * long('9.5', 10)
! * => 9L
! *
! * The first example ends up calling PyNumber_Long(), while the second one
! * calls PyLong_FromString().
! */
 static PyObject *
 long_from_string(s, len)
--- 758,782 ----
 	if (o == NULL)
 		return null_error();
+ 	if (PyInt_Check(o)) {
+ 		Py_INCREF(o);
+ 		return o;
+ 	}
 	if (PyString_Check(o))
! 		return int_from_string(PyString_AS_STRING(o), 
! 				 PyString_GET_SIZE(o));
! 	if (PyUnicode_Check(o))
! 		return PyInt_FromUnicode(PyUnicode_AS_UNICODE(o),
! 					 PyUnicode_GET_SIZE(o),
! 					 10);
 	m = o->ob_type->tp_as_number;
 	if (m && m->nb_int)
 		return m->nb_int(o);
 	if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
! 		return int_from_string((char*)buffer, buffer_len);
 
 	return type_error("object can't be converted to int");
 }
 
! /* Add a check for embedded NULL-bytes in the argument. */
 static PyObject *
 long_from_string(s, len)
***************
*** 774,803 ****
 	int len;
 {
- 	const char *start;
 	char *end;
 	PyObject *x;
- 	char buffer[256]; /* For errors */
 
- 	start = s;
- 	while (*s && isspace(Py_CHARMASK(*s)))
- 		s++;
 	x = PyLong_FromString((char*)s, &end, 10);
! 	if (x == NULL) {
! 		if (PyErr_ExceptionMatches(PyExc_ValueError))
! 			goto bad;
 		return NULL;
! 	}
! 	while (*end && isspace(Py_CHARMASK(*end)))
! 		end++;
! 	if (*end != '0円') {
! bad:
! 		sprintf(buffer, "invalid literal for long(): %.200s", s);
! 		PyErr_SetString(PyExc_ValueError, buffer);
! 		Py_XDECREF(x);
! 		return NULL;
! 	}
! 	else if (end != start + len) {
 		PyErr_SetString(PyExc_ValueError,
 				"null byte in argument for long()");
 		return NULL;
 	}
--- 784,797 ----
 	int len;
 {
 	char *end;
 	PyObject *x;
 
 	x = PyLong_FromString((char*)s, &end, 10);
! 	if (x == NULL)
 		return NULL;
! 	if (end != s + len) {
 		PyErr_SetString(PyExc_ValueError,
 				"null byte in argument for long()");
+ 		Py_DECREF(x);
 		return NULL;
 	}
***************
*** 815,818 ****
--- 809,816 ----
 	if (o == NULL)
 		return null_error();
+ 	if (PyLong_Check(o)) {
+ 		Py_INCREF(o);
+ 		return o;
+ 	}
 	if (PyString_Check(o))
 		/* need to do extra error checking that PyLong_FromString() 
***************
*** 822,825 ****
--- 820,828 ----
 		return long_from_string(PyString_AS_STRING(o),
 					PyString_GET_SIZE(o));
+ 	if (PyUnicode_Check(o))
+ 		/* The above check is done in PyLong_FromUnicode(). */
+ 		return PyLong_FromUnicode(PyUnicode_AS_UNICODE(o),
+ 					 PyUnicode_GET_SIZE(o),
+ 					 10);
 	m = o->ob_type->tp_as_number;
 	if (m && m->nb_long)
***************
*** 839,842 ****
--- 842,849 ----
 	if (o == NULL)
 		return null_error();
+ 	if (PyFloat_Check(o)) {
+ 		Py_INCREF(o);
+ 		return o;
+ 	}
 	if (!PyString_Check(o)) {
 		m = o->ob_type->tp_as_number;
Index: floatobject.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/floatobject.c,v
retrieving revision 2.55
retrieving revision 2.56
diff -C2 -r2.55 -r2.56
*** floatobject.c	2000年03月10日 22:55:18	2.55
--- floatobject.c	2000年04月05日 20:11:20	2.56
***************
*** 165,168 ****
--- 165,184 ----
 		len = PyString_GET_SIZE(v);
 	}
+ 	else if (PyUnicode_Check(v)) {
+ 		char s_buffer[256];
+ 
+ 		if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) {
+ 			PyErr_SetString(PyExc_ValueError,
+ 				 "float() literal too large to convert");
+ 			return NULL;
+ 		}
+ 		if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), 
+ 					 PyUnicode_GET_SIZE(v),
+ 					 s_buffer, 
+ 					 NULL))
+ 			return NULL;
+ 		s = s_buffer;
+ 		len = strlen(s);
+ 	}
 	else if (PyObject_AsCharBuffer(v, &s, &len)) {
 		PyErr_SetString(PyExc_TypeError,
Index: intobject.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/intobject.c,v
retrieving revision 2.39
retrieving revision 2.40
diff -C2 -r2.39 -r2.40
*** intobject.c	2000年03月10日 22:55:18	2.39
--- intobject.c	2000年04月05日 20:11:20	2.40
***************
*** 262,265 ****
--- 262,283 ----
 }
 
+ PyObject *
+ PyInt_FromUnicode(s, length, base)
+ 	Py_UNICODE *s;
+ 	int length;
+ 	int base;
+ {
+ 	char buffer[256];
+ 	
+ 	if (length >= sizeof(buffer)) {
+ 		PyErr_SetString(PyExc_ValueError,
+ 				"int() literal too large to convert");
+ 		return NULL;
+ 	}
+ 	if (PyUnicode_EncodeDecimal(s, length, buffer, NULL))
+ 		return NULL;
+ 	return PyInt_FromString(buffer, NULL, base);
+ }
+ 
 /* Methods */
 
Index: longobject.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/longobject.c,v
retrieving revision 1.54
retrieving revision 1.55
diff -C2 -r1.54 -r1.55
*** longobject.c	1999年12月23日 15:41:28	1.54
--- longobject.c	2000年04月05日 20:11:20	1.55
***************
*** 725,729 ****
 {
 	int sign = 1;
! 	char *start;
 	PyLongObject *z;
 	
--- 725,729 ----
 {
 	int sign = 1;
! 	char *start, *orig_str = str;
 	PyLongObject *z;
 	
***************
*** 773,787 ****
 	if (z == NULL)
 		return NULL;
! 	if (str == start) {
! 		PyErr_SetString(PyExc_ValueError,
! 				"no digits in long int constant");
! 		Py_DECREF(z);
! 		return NULL;
! 	}
 	if (sign < 0 && z != NULL && z->ob_size != 0)
 		z->ob_size = -(z->ob_size);
 	if (pend)
 		*pend = str;
 	return (PyObject *) z;
 }
 
--- 773,814 ----
 	if (z == NULL)
 		return NULL;
! 	if (str == start)
! 		goto onError;
 	if (sign < 0 && z != NULL && z->ob_size != 0)
 		z->ob_size = -(z->ob_size);
+ 	if (*str == 'L' || *str == 'l')
+ 		str++;
+ 	while (*str && isspace(Py_CHARMASK(*str)))
+ 		str++;
+ 	if (*str != '0円')
+ 		goto onError;
 	if (pend)
 		*pend = str;
 	return (PyObject *) z;
+ 
+ onError:
+ 	PyErr_Format(PyExc_ValueError, 
+ 		 "invalid literal for long(): %.200s", orig_str);
+ 	Py_XDECREF(z);
+ 	return NULL;
+ }
+ 
+ PyObject *
+ PyLong_FromUnicode(u, length, base)
+ 	Py_UNICODE *u;
+ 	int length;
+ 	int base;
+ {
+ 	char buffer[256];
+ 
+ 	if (length >= sizeof(buffer)) {
+ 		PyErr_SetString(PyExc_ValueError,
+ 				"long() literal too large to convert");
+ 		return NULL;
+ 	}
+ 	if (PyUnicode_EncodeDecimal(u, length, buffer, NULL))
+ 		return NULL;
+ 
+ 	return PyLong_FromString(buffer, NULL, base);
 }
 
Index: unicodeobject.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.5
retrieving revision 2.6
diff -C2 -r2.5 -r2.6
*** unicodeobject.c	2000年03月31日 17:24:09	2.5
--- unicodeobject.c	2000年04月05日 20:11:20	2.6
***************
*** 330,335 ****
 	len = PyString_GET_SIZE(obj);
 }
! else if (PyObject_AsCharBuffer(obj, &s, &len))
 	return NULL;
 if (len == 0) {
 	Py_INCREF(unicode_empty);
--- 330,341 ----
 	len = PyString_GET_SIZE(obj);
 }
! else if (PyObject_AsCharBuffer(obj, &s, &len)) {
! 	/* Overwrite the error message with something more useful in
! 	 case of a TypeError. */
! 	if (PyErr_ExceptionMatches(PyExc_TypeError))
! 	 PyErr_SetString(PyExc_TypeError,
! 			 "coercing to Unicode: need string or charbuffer");
 	return NULL;
+ }
 if (len == 0) {
 	Py_INCREF(unicode_empty);
***************
*** 1924,1927 ****
--- 1930,1987 ----
 }
 
+ /* --- Decimal Encoder ---------------------------------------------------- */
+ 
+ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
+ 			 int length,
+ 			 char *output,
+ 			 const char *errors)
+ {
+ Py_UNICODE *p, *end;
+ 
+ if (output == NULL) {
+ 	PyErr_BadArgument();
+ 	return -1;
+ }
+ 
+ p = s;
+ end = s + length;
+ while (p < end) {
+ 	register Py_UNICODE ch = *p++;
+ 	int decimal;
+ 	
+ 	if (Py_UNICODE_ISSPACE(ch)) {
+ 	 *output++ = ' ';
+ 	 continue;
+ 	}
+ 	decimal = Py_UNICODE_TODECIMAL(ch);
+ 	if (decimal >= 0) {
+ 	 *output++ = '0' + decimal;
+ 	 continue;
+ 	}
+ 	if (0 < ch < 256) {
+ 	 *output++ = ch;
+ 	 continue;
+ 	}
+ 	/* All other characters are considered invalid */
+ 	if (errors == NULL || strcmp(errors, "strict") == 0) {
+ 	 PyErr_SetString(PyExc_ValueError,
+ 			 "invalid decimal Unicode string");
+ 	 goto onError;
+ 	}
+ 	else if (strcmp(errors, "ignore") == 0)
+ 	 continue;
+ 	else if (strcmp(errors, "replace") == 0) {
+ 	 *output++ = '?';
+ 	 continue;
+ 	}
+ }
+ /* 0-terminate the output string */
+ *output++ = '0円';
+ return 0;
+ 
+ onError:
+ return -1;
+ }
+ 
 /* --- Helpers ------------------------------------------------------------ */
 
***************
*** 2812,2821 ****
 
 /* Coerce the two arguments */
- u = (PyUnicodeObject *)PyUnicode_FromObject(container);
- if (u == NULL)
- 	goto onError;
 v = (PyUnicodeObject *)PyUnicode_FromObject(element);
 if (v == NULL)
 	goto onError;
 
 /* Check v in u */
--- 2872,2883 ----
 
 /* Coerce the two arguments */
 v = (PyUnicodeObject *)PyUnicode_FromObject(element);
 if (v == NULL)
 	goto onError;
+ u = (PyUnicodeObject *)PyUnicode_FromObject(container);
+ if (u == NULL) {
+ 	Py_DECREF(v);
+ 	goto onError;
+ }
 
 /* Check v in u */

AltStyle によって変換されたページ (->オリジナル) /