NOTE: My implementation is based on codeproject article by Vladislav Gelfer.
Based on valdok's codes, I rewrote the critical section class. The difference is that my implementation integrated recursive(reentrantable) locking feature into a single critical section class. (FYI, valdok provides 2 classes: one without recursion and the other with recursion.)
I just want to verify that my implementation is still correct and intact.
Please, do not try to argue that the implementation is meaningless against using the WIN32 critical section object. I think this implementation has many advantages for many reasons. And, one reason is I needed to provide try_enter()
feature for all versions of Windows.
#pragma once
#include <windows.h>
#include <intrin.h>
#pragma intrinsic(_WriteBarrier)
#pragma intrinsic(_ReadWriteBarrier)
class critical_section
{
private:
struct cpu_type
{
enum type
{
unknown,
single,
multiple
};
};
public:
critical_section(u32 spin_count=0)
: m_semaphore(null)
, m_thread_id(0)
, m_wait_count(0)
, m_spin_count(0)
, m_recur_count(0)
{
// determine the cpu type
if( m_cpu_type == cpu_type::unknown )
{
SYSTEM_INFO sys_info;
::GetSystemInfo(&sys_info);
m_cpu_type = (sys_info.dwNumberOfProcessors > 1)?cpu_type::multiple:cpu_type::single;
}
// set the spin count.
set_spin_count(spin_count);
}
~critical_section()
{
if(m_semaphore != null)
{
CloseHandle(m_semaphore);
m_semaphore = null;
}
::memset(this, 0, sizeof(*this));
}
void set_spin_count(u32 count)
{
// on single core, there should be no spinning at all.
if(m_cpu_type == cpu_type::multiple)
{
m_spin_count = count;
}
}
public:
bool enter(u32 timeout=INFINITE)
{
u32 cur_thread_id = ::GetCurrentThreadId();
if(cur_thread_id == m_thread_id)
{
// already owned by the current thread.
m_recur_count++;
}
else
{
if((!m_thread_id && lock_immediate(cur_thread_id))
|| (timeout && lock_internal(cur_thread_id, timeout)))
{
// successfully locked!
m_recur_count = 1;
}
else
{
// failed to lock!
return false;
}
}
return true;
}
bool try_enter()
{
return enter(0);
}
void leave()
{
assert(m_recur_count > 0);
if(--m_recur_count == 0)
{
unlock_internal();
}
}
inline bool is_acquired() const
{
return (::GetCurrentThreadId() == m_thread_id);
}
private:
inline bool lock_immediate(u32 thread_id)
{
// return true only if m_thread_id was 0 (and, at the same time, replaced by thread_id).
return (_InterlockedCompareExchange(reinterpret_cast<long volatile*>(&m_thread_id), thread_id, 0) == 0);
}
bool lock_kernel(u32 thread_id, u32 timeout)
{
bool waiter = false;
for(u32 ticks=GetTickCount();;)
{
if(!waiter) _InterlockedIncrement(reinterpret_cast<long volatile*>(&m_wait_count));
// try locking once again before going to kernel-mode.
if(lock_immediate(thread_id)) return true;
u32 wait;
if(timeout==INFINITE)
{
wait = INFINITE;
}
else
{
// update the remaining time-out.
wait = GetTickCount()-ticks;
if(timeout<=wait) return false; // timed-out
wait = timeout-wait;
}
// go kernel
assert(m_semaphore!=null);
switch(WaitForSingleObject(m_semaphore, wait))
{
case WAIT_OBJECT_0:
// got a change!
waiter = false;
break;
case WAIT_TIMEOUT:
// timed-out.
// but, there's one more change in the upper section of the loop.
waiter = true;
break;
default:
assert(false);
}
}
}
bool lock_internal(u32 thread_id, u32 timeout)
{
// try spinning and locking
for(u32 spin=0; spin<m_spin_count; spin++)
{
if(lock_immediate(thread_id)) return true;
// give chance to other waiting threads.
// on single-core, it does nothing.
YieldProcessor();
}
// prepare semaphore object for kernel-mode waiting.
allocate_semaphore();
bool locked = lock_kernel(thread_id, timeout);
_InterlockedDecrement(reinterpret_cast<long volatile*>(&m_wait_count));
return locked;
}
void unlock_internal()
{
// changes done to the shared resource are committed.
_WriteBarrier();
// reset owner thread id.
m_thread_id = 0;
// critical section is now released.
_ReadWriteBarrier();
// if there are waiting threads:
if(m_wait_count > 0)
{
_InterlockedDecrement(reinterpret_cast<long volatile*>(&m_wait_count));
// wake up one of them by incrementing semaphore count by 1.
assert(m_semaphore);
ReleaseSemaphore(m_semaphore, 1, null);
}
}
void allocate_semaphore()
{
if(m_semaphore==null)
{
// create a semaphore object.
HANDLE semaphore = CreateSemaphore(null, 0, 0x7FFFFFFF, null);
assert(semaphore!=null);
// try assign it to m_semaphore.
if(InterlockedCompareExchangePointer(&m_semaphore, semaphore, null)!=null)
{
// other thread already created and assigned the semaphore.
CloseHandle(semaphore);
}
}
}
private:
// prevent copying
critical_section(const critical_section&);
void operator=(const critical_section&);
private:
// type of cpu: single-core or multiple-core
static cpu_type::type m_cpu_type;
// owner thread's id
volatile u32 m_thread_id;
// number of waiting threads
volatile u32 m_wait_count;
// spinning count
volatile u32 m_spin_count;
// recursion(reentrance) count
s32 m_recur_count;
// semaphore for kernel-mode wait
volatile HANDLE m_semaphore;
};
Don't worry about a static member. critical_section::m_cpu_type
is defined and initialized to unknown
in other .cpp file.
1 Answer 1
To be honest it is quite difficult to verify this sort of thing works by dry running code.
I would take the original code and design some test cases which show it working. Run them with and without the code.
Then design some test cases which fail due to the problem you have spotted, or perhaps do not work well enough without your enhancement.
Run those test cases over your new code (all of them not just the newest ones) and hey presto you know if your code works.
-
1\$\begingroup\$ Writing unit tests which reliably show faults in parallel code is hard, almost as hard as writing those implementations in the first place. That’s not to say that you shouldn’t write tests, they are still great for showing failure. But you mustn’t rely on them. So I agree with this answer, except the last sentence. \$\endgroup\$Konrad Rudolph– Konrad Rudolph2012年06月29日 12:19:21 +00:00Commented Jun 29, 2012 at 12:19
boost::threads
and don't spend your time reinventing the wheel? \$\endgroup\$memset
in your code? Also, you are modifyingm_recur_count
concurrently without locking. This is not an atomic variable. The code will provoke a race condition. Also note that theCRITICAL_SECTION
struct doesn’t even bother declaring its members asvolatile
sincevolatile
does nothing here. In conclusion, no this implementation isn’t correct. I can’t verify whether valdok’s original code is since I have to register to CP to download the source code. \$\endgroup\$