[Python-checkins] python/dist/src/Objects longobject.c,1.131,1.132

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
2002年8月12日 15:01:36 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv21561/python/Objects
Modified Files:
	longobject.c 
Log Message:
Added new function k_lopsided_mul(), which is much more efficient than
k_mul() when inputs have vastly different sizes, and a little more
efficient when they're close to a factor of 2 out of whack.
I consider this done now, although I'll set up some more correctness
tests to run overnight.
Index: longobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/longobject.c,v
retrieving revision 1.131
retrieving revision 1.132
diff -C2 -d -r1.131 -r1.132
*** longobject.c	12 Aug 2002 19:43:49 -0000	1.131
--- longobject.c	12 Aug 2002 22:01:34 -0000	1.132
***************
*** 1593,1596 ****
--- 1593,1598 ----
 }
 
+ static PyLongObject *k_lopsided_mul(PyLongObject *a, PyLongObject *b);
+ 
 /* Karatsuba multiplication. Ignores the input signs, and returns the
 * absolute value of the product (or NULL if error).
***************
*** 1634,1640 ****
 	/* Use gradeschool math when either number is too small. */
 	if (asize <= KARATSUBA_CUTOFF) {
- 		/* 0 is inevitable if one kmul arg has more than twice
- 		 * the digits of another, so it's worth special-casing.
- 		 */
 		if (asize == 0)
 			return _PyLong_New(0);
--- 1636,1639 ----
***************
*** 1643,1646 ****
--- 1642,1654 ----
 	}
 
+ 	/* If a is small compared to b, splitting on b gives a degenerate
+ 	 * case with ah==0, and Karatsuba may be (even much) less efficient
+ 	 * than "grade school" then. However, we can still win, by viewing
+ 	 * b as a string of "big digits", each of width a->ob_size. That
+ 	 * leads to a sequence of balanced calls to k_mul.
+ 	 */
+ 	if (2 * asize <= bsize)
+ 		return k_lopsided_mul(a, b);
+ 
 	shift = bsize >> 1;
 	if (kmul_split(a, shift, &ah, &al) < 0) goto fail;
***************
*** 1751,1754 ****
--- 1759,1823 ----
 }
 
+ /* b has at least twice the digits of a, and a is big enough that Karatsuba
+ * would pay off *if* the inputs had balanced sizes. View b as a sequence
+ * of slices, each with a->ob_size digits, and multiply the slices by a,
+ * one at a time. This gives k_mul balanced inputs to work with, and is
+ * also cache-friendly (we compute one double-width slice of the result
+ * at a time, then move on, never bactracking except for the helpful
+ * single-width slice overlap between successive partial sums).
+ */
+ static PyLongObject *
+ k_lopsided_mul(PyLongObject *a, PyLongObject *b)
+ {
+ 	const int asize = ABS(a->ob_size);
+ 	int bsize = ABS(b->ob_size);
+ 	int nbdone;	/* # of b digits already multiplied */
+ 	PyLongObject *ret;
+ 	PyLongObject *bslice = NULL;
+ 
+ 	assert(asize > KARATSUBA_CUTOFF);
+ 	assert(2 * asize <= bsize);
+ 
+ 	/* Allocate result space, and zero it out. */
+ 	ret = _PyLong_New(asize + bsize);
+ 	if (ret == NULL)
+ 		return NULL;
+ 	memset(ret->ob_digit, 0, ret->ob_size * sizeof(digit));
+ 
+ 	/* Successive slices of b are copied into bslice. */
+ 	bslice = _PyLong_New(bsize);
+ 	if (bslice == NULL)
+ 		goto fail;
+ 
+ 	nbdone = 0;
+ 	while (bsize > 0) {
+ 		PyLongObject *product;
+ 		const int nbtouse = MIN(bsize, asize);
+ 
+ 		/* Multiply the next slice of b by a. */
+ 		memcpy(bslice->ob_digit, b->ob_digit + nbdone,
+ 		 nbtouse * sizeof(digit));
+ 		bslice->ob_size = nbtouse;
+ 		product = k_mul(a, bslice);
+ 		if (product == NULL)
+ 			goto fail;
+ 
+ 		/* Add into result. */
+ 		(void)v_iadd(ret->ob_digit + nbdone, ret->ob_size - nbdone,
+ 			 product->ob_digit, product->ob_size);
+ 		Py_DECREF(product);
+ 
+ 		bsize -= nbtouse;
+ 		nbdone += nbtouse;
+ 	}
+ 
+ 	Py_DECREF(bslice);
+ 	return long_normalize(ret);
+ 
+ fail:
+ 	Py_DECREF(ret);
+ 	Py_XDECREF(bslice);
+ 	return NULL;
+ }
 
 static PyObject *
***************
*** 1770,1781 ****
 	}
 
- #if 0
- 	if (Py_GETENV("KARAT") != NULL)
- 		z = k_mul(a, b);
- 	else
- 		z = x_mul(a, b);
- #else
 	z = k_mul(a, b);
- #endif
 	if(z == NULL) {
 		Py_DECREF(a);
--- 1839,1843 ----

AltStyle によって変換されたページ (->オリジナル) /