[Python-checkins] python/dist/src/Objects stringobject.c, 2.214, 2.215 unicodeobject.c, 2.203, 2.204

Mon Dec 15 13:49:22 EST 2003

Update of /cvsroot/python/python/dist/src/Objects
In directory sc8-pr-cvs1:/tmp/cvs-serv28695/Objects
Modified Files:
	stringobject.c unicodeobject.c 
Log Message:
Add rsplit method for str and unicode builtin types. 
SF feature request #801847.
Original patch is written by Sean Reifschneider.
Index: stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.214
retrieving revision 2.215
diff -C2 -d -r2.214 -r2.215
*** stringobject.c	29 Nov 2003 23:52:13 -0000	2.214
--- stringobject.c	15 Dec 2003 18:49:19 -0000	2.215
***************
*** 1408,1411 ****
--- 1408,1534 ----
 }
 
+ static PyObject *
+ rsplit_whitespace(const char *s, int len, int maxsplit)
+ {
+ 	int i, j, err;
+ 	PyObject* item;
+ 	PyObject *list = PyList_New(0);
+ 
+ 	if (list == NULL)
+ 		return NULL;
+ 
+ 	for (i = j = len - 1; i >= 0; ) {
+ 		while (i >= 0 && isspace(Py_CHARMASK(s[i])))
+ 			i--;
+ 		j = i;
+ 		while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
+ 			i--;
+ 		if (j > i) {
+ 			if (maxsplit-- <= 0)
+ 				break;
+ 			item = PyString_FromStringAndSize(s+i+1, (int)(j-i));
+ 			if (item == NULL)
+ 				goto finally;
+ 			err = PyList_Insert(list, 0, item);
+ 			Py_DECREF(item);
+ 			if (err < 0)
+ 				goto finally;
+ 			while (i >= 0 && isspace(Py_CHARMASK(s[i])))
+ 				i--;
+ 			j = i;
+ 		}
+ 	}
+ 	if (j >= 0) {
+ 		item = PyString_FromStringAndSize(s, (int)(j + 1));
+ 		if (item == NULL)
+ 			goto finally;
+ 		err = PyList_Insert(list, 0, item);
+ 		Py_DECREF(item);
+ 		if (err < 0)
+ 			goto finally;
+ 	}
+ 	return list;
+ finally:
+ 	Py_DECREF(list);
+ 	return NULL;
+ }
+ 
+ 
+ PyDoc_STRVAR(rsplit__doc__,
+ "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
+ \n\
+ Return a list of the words in the string S, using sep as the\n\
+ delimiter string, starting at the end of the string and working\n\
+ to the front. If maxsplit is given, at most maxsplit splits are\n\
+ done. If sep is not specified or is None, any whitespace string\n\
+ is a separator.");
+ 
+ static PyObject *
+ string_rsplit(PyStringObject *self, PyObject *args)
+ {
+ 	int len = PyString_GET_SIZE(self), n, i, j, err;
+ 	int maxsplit = -1;
+ 	const char *s = PyString_AS_STRING(self), *sub;
+ 	PyObject *list, *item, *subobj = Py_None;
+ 
+ 	if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
+ 		return NULL;
+ 	if (maxsplit < 0)
+ 		maxsplit = INT_MAX;
+ 	if (subobj == Py_None)
+ 		return rsplit_whitespace(s, len, maxsplit);
+ 	if (PyString_Check(subobj)) {
+ 		sub = PyString_AS_STRING(subobj);
+ 		n = PyString_GET_SIZE(subobj);
+ 	}
+ #ifdef Py_USING_UNICODE
+ 	else if (PyUnicode_Check(subobj))
+ 		return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
+ #endif
+ 	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ 		return NULL;
+ 	if (n == 0) {
+ 		PyErr_SetString(PyExc_ValueError, "empty separator");
+ 		return NULL;
+ 	}
+ 
+ 	list = PyList_New(0);
+ 	if (list == NULL)
+ 		return NULL;
+ 
+ 	j = len;
+ 	i = j - n;
+ 	while (i >= 0) {
+ 		if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
+ 			if (maxsplit-- <= 0)
+ 				break;
+ 			item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
+ 			if (item == NULL)
+ 				goto fail;
+ 			err = PyList_Insert(list, 0, item);
+ 			Py_DECREF(item);
+ 			if (err < 0)
+ 				goto fail;
+ 			j = i;
+ 			i -= n;
+ 		}
+ 		else
+ 			i--;
+ 	}
+ 	item = PyString_FromStringAndSize(s, j);
+ 	if (item == NULL)
+ 		goto fail;
+ 	err = PyList_Insert(list, 0, item);
+ 	Py_DECREF(item);
+ 	if (err < 0)
+ 		goto fail;
+ 
+ 	return list;
+ 
+ fail:
+ 	Py_DECREF(list);
+ 	return NULL;
+ }
+ 
 
 PyDoc_STRVAR(join__doc__,
***************
*** 3065,3068 ****
--- 3188,3192 ----
 	{"join", (PyCFunction)string_join, METH_O, join__doc__},
 	{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
+ 	{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
 	{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
 	{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.203
retrieving revision 2.204
diff -C2 -d -r2.203 -r2.204
*** unicodeobject.c	29 Nov 2003 23:52:13 -0000	2.203
--- unicodeobject.c	15 Dec 2003 18:49:19 -0000	2.204
***************
*** 4054,4058 ****
 
 #define SPLIT_APPEND(data, left, right)					\
! 	str = PyUnicode_FromUnicode(data + left, right - left);		\
 	if (!str)							\
 	 goto onError;						\
--- 4054,4058 ----
 
 #define SPLIT_APPEND(data, left, right)					\
! 	str = PyUnicode_FromUnicode((data) + (left), (right) - (left));	\
 	if (!str)							\
 	 goto onError;						\
***************
*** 4064,4067 ****
--- 4064,4078 ----
 Py_DECREF(str);
 
+ #define SPLIT_INSERT(data, left, right)					\
+ 	str = PyUnicode_FromUnicode((data) + (left), (right) - (left));	\
+ 	if (!str)							\
+ 	 goto onError;						\
+ 	if (PyList_Insert(list, 0, str)) {				\
+ 	 Py_DECREF(str);						\
+ 	 goto onError;						\
+ 	}								\
+ else								\
+ Py_DECREF(str);
+ 
 static
 PyObject *split_whitespace(PyUnicodeObject *self,
***************
*** 4215,4219 ****
--- 4226,4329 ----
 }
 
+ static
+ PyObject *rsplit_whitespace(PyUnicodeObject *self,
+ 			 PyObject *list,
+ 			 int maxcount)
+ {
+ register int i;
+ register int j;
+ int len = self->length;
+ PyObject *str;
+ 
+ for (i = j = len - 1; i >= 0; ) {
+ 	/* find a token */
+ 	while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+ 	 i--;
+ 	j = i;
+ 	while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i]))
+ 	 i--;
+ 	if (j > i) {
+ 	 if (maxcount-- <= 0)
+ 		break;
+ 	 SPLIT_INSERT(self->str, i + 1, j + 1);
+ 	 while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+ 		i--;
+ 	 j = i;
+ 	}
+ }
+ if (j >= 0) {
+ 	SPLIT_INSERT(self->str, 0, j + 1);
+ }
+ return list;
+ 
+ onError:
+ Py_DECREF(list);
+ return NULL;
+ }
+ 
+ static 
+ PyObject *rsplit_char(PyUnicodeObject *self,
+ 		 PyObject *list,
+ 		 Py_UNICODE ch,
+ 		 int maxcount)
+ {
+ register int i;
+ register int j;
+ int len = self->length;
+ PyObject *str;
+ 
+ for (i = j = len - 1; i >= 0; ) {
+ 	if (self->str[i] == ch) {
+ 	 if (maxcount-- <= 0)
+ 		break;
+ 	 SPLIT_INSERT(self->str, i + 1, j + 1);
+ 	 j = i = i - 1;
+ 	} else
+ 	 i--;
+ }
+ if (j >= 0) {
+ 	SPLIT_INSERT(self->str, 0, j + 1);
+ }
+ return list;
+ 
+ onError:
+ Py_DECREF(list);
+ return NULL;
+ }
+ 
+ static 
+ PyObject *rsplit_substring(PyUnicodeObject *self,
+ 			 PyObject *list,
+ 			 PyUnicodeObject *substring,
+ 			 int maxcount)
+ {
+ register int i;
+ register int j;
+ int len = self->length;
+ int sublen = substring->length;
+ PyObject *str;
+ 
+ for (i = len - sublen, j = len; i >= 0; ) {
+ 	if (Py_UNICODE_MATCH(self, i, substring)) {
+ 	 if (maxcount-- <= 0)
+ 		break;
+ 	 SPLIT_INSERT(self->str, i + sublen, j);
+ 	 j = i;
+ 	 i -= sublen;
+ 	} else
+ 	 i--;
+ }
+ if (j >= 0) {
+ 	SPLIT_INSERT(self->str, 0, j);
+ }
+ return list;
+ 
+ onError:
+ Py_DECREF(list);
+ return NULL;
+ }
+ 
 #undef SPLIT_APPEND
+ #undef SPLIT_INSERT
 
 static
***************
*** 4247,4250 ****
--- 4357,4389 ----
 
 static
+ PyObject *rsplit(PyUnicodeObject *self,
+ 		 PyUnicodeObject *substring,
+ 		 int maxcount)
+ {
+ PyObject *list;
+ 
+ if (maxcount < 0)
+ maxcount = INT_MAX;
+ 
+ list = PyList_New(0);
+ if (!list)
+ return NULL;
+ 
+ if (substring == NULL)
+ 	return rsplit_whitespace(self,list,maxcount);
+ 
+ else if (substring->length == 1)
+ 	return rsplit_char(self,list,substring->str[0],maxcount);
+ 
+ else if (substring->length == 0) {
+ 	Py_DECREF(list);
+ 	PyErr_SetString(PyExc_ValueError, "empty separator");
+ 	return NULL;
+ }
+ else
+ 	return rsplit_substring(self,list,substring,maxcount);
+ }
+ 
+ static
 PyObject *replace(PyUnicodeObject *self,
 		 PyUnicodeObject *str1,
***************
*** 5676,5679 ****
--- 5815,5868 ----
 }
 
+ PyObject *PyUnicode_RSplit(PyObject *s,
+ 			 PyObject *sep,
+ 			 int maxsplit)
+ {
+ PyObject *result;
+ 
+ s = PyUnicode_FromObject(s);
+ if (s == NULL)
+ 	return NULL;
+ if (sep != NULL) {
+ 	sep = PyUnicode_FromObject(sep);
+ 	if (sep == NULL) {
+ 	 Py_DECREF(s);
+ 	 return NULL;
+ 	}
+ }
+ 
+ result = rsplit((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
+ 
+ Py_DECREF(s);
+ Py_XDECREF(sep);
+ return result;
+ }
+ 
+ PyDoc_STRVAR(rsplit__doc__,
+ "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
+ \n\
+ Return a list of the words in S, using sep as the\n\
+ delimiter string, starting at the end of the string and\n\
+ working to the front. If maxsplit is given, at most maxsplit\n\
+ splits are done. If sep is not specified, any whitespace string\n\
+ is a separator.");
+ 
+ static PyObject*
+ unicode_rsplit(PyUnicodeObject *self, PyObject *args)
+ {
+ PyObject *substring = Py_None;
+ int maxcount = -1;
+ 
+ if (!PyArg_ParseTuple(args, "|Oi:rsplit", &substring, &maxcount))
+ return NULL;
+ 
+ if (substring == Py_None)
+ 	return rsplit(self, NULL, maxcount);
+ else if (PyUnicode_Check(substring))
+ 	return rsplit(self, (PyUnicodeObject *)substring, maxcount);
+ else
+ 	return PyUnicode_RSplit((PyObject *)self, substring, maxcount);
+ }
+ 
 PyDoc_STRVAR(splitlines__doc__,
 "S.splitlines([keepends]]) -> list of strings\n\
***************
*** 5871,5874 ****
--- 6060,6064 ----
 {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
 {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
+ {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
 {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
 {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},