[Python-Dev] very slow compare of recursive objects

Tim Peters tim.one@comcast.net
2003年1月19日 19:38:16 -0500


This is a multi-part message in MIME format.
--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)
Content-type: text/plain; charset=iso-8859-1
Content-transfer-encoding: 7BIT
[Neal Norwitz]
> Could someone take a look at the patch attached to this bug report?
>> http://python.org/sf/625698

-1: it doesn't really solve the problem, and it's not necessarily the case
anymore that
 x is y
implies
 x == y
> It fixes a problem comparing recursive objects by adding a check in
> PyObject_RichCompare.

The bug report is confusing: the original report claimed crashes, but
nobody was able to reproduce that, and jaw-dropping slowness isn't a bug.
> Seems to work fine for the specific problem,
> however, it still doesn't help this case:
>> a = []
> b = []
> for i in range(2): a.append(a)
> for i in range(2): b.append(b)
> print a == b

That runs in an eyeblink with the attached patch, but this is delicate stuff
and the attached is pure hackery. I think there are two "deep" problems
with the recursive compare gimmicks:
1. Entries in the inprogress dict are cleared too early, requiring
 exponential time to rebuild them over & over & ... & over again.
2. By the time check_recursion() is called, compare_nesting is already
 larger than NESTING_LIMIT. As a result, the tuple rich comparisons
 *implied by* the PyDict_GetItem and PyDict_SetItem calls within
 check_recursion also trigger the "recursive compare" gimmicks: the
 very machinery used to check whether recursion is in progress ends
 up exacerbating the problem by doing "hidden" comparisons on the
 (address#1, address#2, comparison_op) tuples it uses to index the
 dict. So, the addresses of those tuples also end up in the inprogress
 dict in new tuples of their own, and so on -- it's something of
 a miracle that it ever stops <0.9 wink>.
The attached worms around both of those without any grac, so I'm -1 on the
attached too. But seeing one way that seems to get close to solving the
problem "for real" may inspire someone to do it gracefully.
--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)
Content-type: text/plain; name=patch.txt
Content-transfer-encoding: 7BIT
Content-disposition: attachment; filename=patch.txt
Index: object.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/object.c,v
retrieving revision 2.195
diff -u -r2.195 object.c
--- object.c	13 Jan 2003 20:13:04 -0000	2.195
+++ object.c	20 Jan 2003 00:32:21 -0000
@@ -708,6 +708,7 @@
 #define NESTING_LIMIT 20
 
 static int compare_nesting = 0;
+static int use_dict = 0;
 
 static PyObject*
 get_inprogress_dict(void)
@@ -746,18 +747,21 @@
 check_recursion(PyObject *v, PyObject *w, int op)
 {
 	PyObject *inprogress;
-	PyObject *token;
+	PyObject *token = NULL;
 	Py_uintptr_t iv = (Py_uintptr_t)v;
 	Py_uintptr_t iw = (Py_uintptr_t)w;
 	PyObject *x, *y, *z;
+	int save_use_dict = use_dict;
+
+	use_dict = 0;
 
 	inprogress = get_inprogress_dict();
 	if (inprogress == NULL)
-		return NULL;
+		goto Done;
 
 	token = PyTuple_New(3);
 	if (token == NULL)
-		return NULL;
+		goto Done;
 
 	if (iv <= iw) {
 		PyTuple_SET_ITEM(token, 0, x = PyLong_FromVoidPtr((void *)v));
@@ -771,19 +775,24 @@
 	PyTuple_SET_ITEM(token, 2, z = PyInt_FromLong((long)op));
 	if (x == NULL || y == NULL || z == NULL) {
 		Py_DECREF(token);
-		return NULL;
+		token = NULL;
+		goto Done;;
 	}
 
 	if (PyDict_GetItem(inprogress, token) != NULL) {
 		Py_DECREF(token);
-		return Py_None; /* Without INCREF! */
+		token = Py_None; /* Without INCREF! */
+		goto Done;
 	}
 
 	if (PyDict_SetItem(inprogress, token, token) < 0) {
 		Py_DECREF(token);
-		return NULL;
+		token = NULL;
+		goto Done;
 	}
 
+Done:
+	use_dict = save_use_dict;
 	return token;
 }
 
@@ -802,6 +811,18 @@
 	Py_DECREF(token);
 }
 
+static void
+clear_inprogress_dict()
+{
+	PyObject *inprogress;
+
+	inprogress = get_inprogress_dict();
+	if (inprogress == NULL)
+		PyErr_Clear();
+	else
+		PyDict_Clear(inprogress);
+}
+
 /* Compare v to w. Return
 -1 if v < w or exception (PyErr_Occurred() true in latter case).
 0 if v == w.
@@ -926,18 +947,21 @@
 	assert(Py_LT <= op && op <= Py_GE);
 
 	compare_nesting++;
-	if (compare_nesting > NESTING_LIMIT &&
+	if ((use_dict || compare_nesting > NESTING_LIMIT) &&
 		(v->ob_type->tp_as_mapping
 		 || (v->ob_type->tp_as_sequence
 		 && !PyString_Check(v)
 		 && !PyTuple_Check(v)))) {
 
 		/* try to detect circular data structures */
-		PyObject *token = check_recursion(v, w, op);
-		if (token == NULL) {
+		int save_compare_nesting = compare_nesting;
+		PyObject *token;
+
+		use_dict = 1;
+		compare_nesting = NESTING_LIMIT - 2;
+		token = check_recursion(v, w, op);
+		if (token == NULL)
 			res = NULL;
-			goto Done;
-		}
 		else if (token == Py_None) {
 			/* already comparing these objects with this operator.
 			 assume they're equal until shown otherwise */
@@ -952,10 +976,9 @@
 			}
 			Py_XINCREF(res);
 		}
-		else {
+		else
 			res = do_richcmp(v, w, op);
-			delete_token(token);
-		}
+		compare_nesting = save_compare_nesting;
 		goto Done;
 	}
 
@@ -993,6 +1016,10 @@
 	res = do_richcmp(v, w, op);
 Done:
 	compare_nesting--;
+	if (use_dict && compare_nesting <= 0) {
+		clear_inprogress_dict();
+		use_dict = 0;
+	}
 	return res;
 }
 
--Boundary_(ID_LbvhV0B4RROIr+t2rcTs3Q)--

AltStyle によって変換されたページ (->オリジナル) /