GC failure w/ THREAD_LOCAL_ALLOC ?

Boehm, Hans hans_boehm@hp.com
Thu Mar 28 18:53:00 GMT 2002


Here's a patch. It's bigger than it needs to be in that it includes some
fixes for related things that I wasn't quite comfortable with while
proofreading the code. Most of those were already in my version. Many of
them make it easier to debug any similar problems we might encounter in the
future. Unless someone wants to object, or I encounter further test
failures, I will check the whole thing into both the trunk and the branch
sometime tomorrow.
The crucial part of the patch is the reclaim.c fix.
I have seen no more failures with the patch, so far. And I did finally
reproduce them without it. It would be interesting if particularly Jeff and
Michael could confirm that. (Michael - you already had some of the other
changes, so this probably won't apply cleanly without backing those out.)
Hans
	* linux_threads.c (return_free_lists): Clear fl[i] unconditionally.
	(GC_local_gcj_malloc): Add assertion.
	(start_mark_threads): Fix abort message.
	* mark.c (GC_mark_from): Generalize assertion.
	* reclaim.c (GC_clear_fl_links): New function.
	(GC_start_reclaim): Must clear some freelist links.
	* specific.h, specific.c: Add assertions. Safer definition for
	INVALID_QTID, quick_thread_id. Fix/add comments.
	Rearrange tse fields.
	
Index: linux_threads.c
===================================================================
RCS file: /cvs/gcc/gcc/boehm-gc/linux_threads.c,v
retrieving revision 1.18.2.1
diff -u -r1.18.2.1 linux_threads.c
--- linux_threads.c	2002年03月25日 18:23:36	1.18.2.1
+++ linux_threads.c	2002年03月29日 02:15:18
@@ -231,15 +231,16 @@
 	nwords = i * (GRANULARITY/sizeof(word));
 qptr = fl + i;	
 	q = *qptr;
-	if ((word)q < HBLKSIZE) continue;
-	if (gfl[nwords] == 0) {
+	if ((word)q >= HBLKSIZE) {
+	 if (gfl[nwords] == 0) {
 	 gfl[nwords] = q;
-	} else {
+	 } else {
 	 /* Concatenate: */
 	 for (; (word)q >= HBLKSIZE; qptr = &(obj_link(q)), q = *qptr);
 	 GC_ASSERT(0 == q);
 	 *qptr = gfl[nwords];
 	 gfl[nwords] = fl[i];
+	 }
 	}
 	/* Clear fl[i], since the thread structure may hang around.	*/
 	/* Do it in a way that is likely to trap if we access it.	*/
@@ -412,6 +413,7 @@
 	 /* A memory barrier is probably never needed, since the 	*/
 	 /* action of stopping this thread will cause prior writes	*/
 	 /* to complete.						*/
+	 GC_ASSERT(((void * volatile *)result)[1] == 0); 
 	 *(void * volatile *)result = ptr_to_struct_containing_descr; 
 	 return result;
 	} else if ((word)my_entry - 1 < DIRECT_GRANULES) {
@@ -544,7 +546,7 @@
 	 ABORT("pthread_attr_getstacksize failed\n");
 	if (old_size < MIN_STACK_SIZE) {
 	 if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
-	 ABORT("pthread_attr_getstacksize failed\n");
+		 ABORT("pthread_attr_setstacksize failed\n");
 	}
 }
 # endif /* HPUX */
Index: mark.c
===================================================================
RCS file: /cvs/gcc/gcc/boehm-gc/mark.c,v
retrieving revision 1.12.2.1
diff -u -r1.12.2.1 mark.c
--- mark.c	2002年03月12日 18:31:12	1.12.2.1
+++ mark.c	2002年03月29日 02:15:19
@@ -546,13 +546,13 @@
 /* Large length.					 */
 /* Process part of the range to avoid pushing too much on the
*/
 /* stack.							*/
+	 GC_ASSERT(descr < GC_greatest_plausible_heap_addr
+			 - GC_least_plausible_heap_addr);
 #	 ifdef PARALLEL_MARK
 #	 define SHARE_BYTES 2048
 	 if (descr > SHARE_BYTES && GC_parallel
 		&& mark_stack_top < mark_stack_limit - 1) {
 	 int new_size = (descr/2) & ~(sizeof(word)-1);
-	 GC_ASSERT(descr < GC_greatest_plausible_heap_addr
-			 - GC_least_plausible_heap_addr);
 	 mark_stack_top -> mse_start = current_p;
 	 mark_stack_top -> mse_descr = new_size + sizeof(word);
 					/* makes sure we handle 	*/
Index: reclaim.c
===================================================================
RCS file: /cvs/gcc/gcc/boehm-gc/reclaim.c,v
retrieving revision 1.11
diff -u -r1.11 reclaim.c
--- reclaim.c	2002年02月12日 04:37:53	1.11
+++ reclaim.c	2002年03月29日 02:15:19
@@ -862,6 +862,25 @@
 #endif /* NO_DEBUGGING */
 
 /*
+ * Clear all obj_link pointers in the list of free objects *flp.
+ * Clear *flp.
+ * This must be done before dropping a list of free gcj-style objects,
+ * since may otherwise end up with dangling "descriptor" pointers.
+ * It may help for other pointer-containg objects.
+ */
+void GC_clear_fl_links(flp)
+ptr_t *flp;
+{
+ ptr_t next = *flp;
+
+ while (0 != next) {
+ *flp = 0;
+ flp = &(obj_link(next));
+ next = *flp;
+ }
+}
+
+/*
 * Perform GC_reclaim_block on the entire heap, after first clearing
 * small object free lists (if we are not just looking for leaks).
 */
@@ -875,17 +894,24 @@
 # endif
 /* Clear reclaim- and free-lists */
 for (kind = 0; kind < GC_n_kinds; kind++) {
- register ptr_t *fop;
- register ptr_t *lim;
- register struct hblk ** rlp;
- register struct hblk ** rlim;
- register struct hblk ** rlist = GC_obj_kinds[kind].ok_reclaim_list;
+ ptr_t *fop;
+ ptr_t *lim;
+ struct hblk ** rlp;
+ struct hblk ** rlim;
+ struct hblk ** rlist = GC_obj_kinds[kind].ok_reclaim_list;
+	GC_bool should_clobber = (GC_obj_kinds[kind].ok_descriptor != 0);
 
 if (rlist == 0) continue;	/* This kind not used.	*/
 if (!report_if_found) {
 lim = &(GC_obj_kinds[kind].ok_freelist[MAXOBJSZ+1]);
 	 for( fop = GC_obj_kinds[kind].ok_freelist; fop < lim; fop++ ) {
-	 *fop = 0;
+	 if (*fop != 0) {
+		if (should_clobber) {
+		 GC_clear_fl_links(fop);
+		} else {
+	 *fop = 0;
+		}
+	 }
 	 }
 	} /* otherwise free list objects are marked, 	*/
 	 /* and its safe to leave them			*/
Index: specific.c
===================================================================
RCS file: /cvs/gcc/gcc/boehm-gc/specific.c,v
retrieving revision 1.3
diff -u -r1.3 specific.c
--- specific.c	2001年10月16日 09:01:36	1.3
+++ specific.c	2002年03月29日 02:15:19
@@ -16,17 +16,27 @@
 #include "private/gc_priv.h" /* For GC_compare_and_exchange,
GC_memory_barrier */
 #include "private/specific.h"
 
-static tse invalid_tse; 	/* 0 qtid is guaranteed to be invalid	*/
+static tse invalid_tse = {INVALID_QTID, 0, 0, INVALID_THREADID};
+			/* A thread-specific data entry which will never
*/
+			/* appear valid to a reader. Used to fill in empty
*/
+			/* cache entries to avoid a check for 0.
*/
 
 int PREFIXED(key_create) (tsd ** key_ptr, void (* destructor)(void *)) {
 int i;
 tsd * result = (tsd *)MALLOC_CLEAR(sizeof (tsd));
 
+ /* A quick alignment check, since we need atomic stores */
+ GC_ASSERT((unsigned long)(&invalid_tse.next) % sizeof(tse *) == 0);
 if (0 == result) return ENOMEM;
 pthread_mutex_init(&(result -> lock), NULL);
 for (i = 0; i < TS_CACHE_SIZE; ++i) {
 	result -> cache[i] = &invalid_tse;
 }
+# ifdef GC_ASSERTIONS
+ for (i = 0; i < TS_HASH_SIZE; ++i) {
+	GC_ASSERT(result -> hash[i] == 0);
+ }
+# endif
 *key_ptr = result;
 return 0;
 }
@@ -36,12 +46,14 @@
 int hash_val = HASH(self);
 volatile tse * entry = (volatile tse *)MALLOC_CLEAR(sizeof (tse));
 
+ GC_ASSERT(self != INVALID_THREADID);
 if (0 == entry) return ENOMEM;
 pthread_mutex_lock(&(key -> lock));
 /* Could easily check for an existing entry here.	*/
 entry -> next = key -> hash[hash_val];
 entry -> thread = self;
 entry -> value = value;
+ GC_ASSERT(entry -> qtid == INVALID_QTID);
 /* There can only be one writer at a time, but this needs to be	*/
 /* atomic with respect to concurrent readers.			*/ 
 *(volatile tse **)(key -> hash + hash_val) = entry;
@@ -70,6 +82,10 @@
 	*link = entry -> next;
 	/* Atomic! concurrent accesses still work.	*/
 	/* They must, since readers don't lock.		*/
+	/* We shouldn't need a volatile access here,	*/
+	/* since both this and the preceding write 	*/
+	/* should become visible no later than		*/
+	/* the pthread_mutex_unlock() call.		*/
 }
 /* If we wanted to deallocate the entry, we'd first have to clear 	*/
 /* any cache entries pointing to it. That probably requires	*/
@@ -91,6 +107,7 @@
 unsigned hash_val = HASH(self);
 tse *entry = key -> hash[hash_val];
 
+ GC_ASSERT(qtid != INVALID_QTID);
 while (entry != NULL && entry -> thread != self) {
 	entry = entry -> next;
 } 
@@ -99,6 +116,8 @@
 entry -> qtid = qtid;
 		/* It's safe to do this asynchronously. Either value 	*/
 		/* is safe, though may produce spurious misses.		*/
+		/* We're replacing one qtid with another one for the	*/
+		/* same thread.						*/
 	*cache_ptr = entry;
 		/* Again this is safe since pointer assignments are 	*/
 		/* presumed atomic, and either pointer is valid.	*/
Index: include/private/specific.h
===================================================================
RCS file: /cvs/gcc/gcc/boehm-gc/include/private/specific.h,v
retrieving revision 1.2
diff -u -r1.2 specific.h
--- specific.h	2001年08月17日 18:30:50	1.2
+++ specific.h	2002年03月29日 02:15:19
@@ -27,16 +27,22 @@
 #define TS_HASH_SIZE 1024
 #define HASH(n) (((((long)n) >> 8) ^ (long)n) & (TS_HASH_SIZE - 1))
 
+/* An entry describing a thread-specific value for a given thread.	*/
+/* All such accessible structures preserve the invariant that if either
*/
+/* thread is a valid pthread id or qtid is a valid "quick tread id"	*/
+/* for a thread, then value holds the corresponding thread specific	*/
+/* value. This invariant must be preserved at ALL times, since
*/
+/* asynchronous reads are allowed.					*/
 typedef struct thread_specific_entry {
 	unsigned long qtid;	/* quick thread id, only for cache */
 	void * value;
-	pthread_t thread;
 	struct thread_specific_entry *next;
+	pthread_t thread;
 } tse;
 
 
 /* We represent each thread-specific datum as two tables. The first is
*/
-/* a cache, index by a "quick thread identifier". The "quick" thread	*/
+/* a cache, indexed by a "quick thread identifier". The "quick" thread
*/
 /* identifier is an easy to compute value, which is guaranteed to	*/
 /* determine the thread, though a thread may correspond to more than	*/
 /* one value. We typically use the address of a page in the stack.	*/
@@ -45,12 +51,15 @@
 
 /* Return the "quick thread id". Default version. Assumes page size,	*/
 /* or at least thread stack separation, is at least 4K.
*/
-static __inline__ long quick_thread_id() {
+/* Must be defined so that it never returns 0. (Page 0 can't really	*/
+/* be part of any stack, since that would make 0 a valid stack pointer.)*/
+static __inline__ unsigned long quick_thread_id() {
 int dummy;
- return (long)(&dummy) >> 12;
+ return (unsigned long)(&dummy) >> 12;
 }
 
-#define INVALID_QTID ((unsigned long)(-1))
+#define INVALID_QTID ((unsigned long)0)
+#define INVALID_THREADID ((pthread_t)0)
 
 typedef struct thread_specific_data {
 tse * volatile cache[TS_CACHE_SIZE];
@@ -76,7 +85,10 @@
 unsigned hash_val = CACHE_HASH(qtid);
 tse * volatile * entry_ptr = key -> cache + hash_val;
 tse * entry = *entry_ptr; /* Must be loaded only once.	*/
- if (entry -> qtid == qtid) return entry -> value;
+ if (entry -> qtid == qtid) {
+ GC_ASSERT(entry -> thread == pthread_self());
+ return entry -> value;
+ }
 return PREFIXED(slow_getspecific) (key, qtid, entry_ptr);
 }
 


More information about the Java mailing list

AltStyle によって変換されたページ (->オリジナル) /