Introduction
The code is a simple implementation of a readers/writers lock that supports reentrance and lock escalation, i.e. a thread holding a read lock can request and is granted write access provided that no other thread is holding the read lock and a thread holding a write lock is granted read lock access.
Background
The Windows synchronization primitives do not include support for locking readers and writers. Sometimes, it is useful to allow read access to multiple threads without the threads needing to read data, not having to block just because other threads are reading too. The risk of data corruption arises only when the data is altered. Write access must be exclusive (to other writers and to any reader) but read access can be shared between readers. Allowing multiple reader threads to share the lock allows for greater concurrency and reduces the risk of deadlocks. The existing implementations that I could find didn't support reentrancy which was key to avoiding deadlocks in the application I was working on.
Using the code
The code is straightforward to use, either call ClaimReader
/ClaimWriter
and later ReleaseReader
/ReleaseWriter
, or use the auto-lock classes AutoLockReader
and AutoLockWriter
:
class ReadWriteLockTLSRelease {
public:
class TimeoutExpiredException : std::exception {};
class ImplicitEscalationException : std::exception {};
private:
volatile int numReaders;
int numWriters;
CRITICAL_SECTION atomizer;
HANDLE spinEvent;
static const int WaitSpinTimeout=1000;
static const int MaxSpinIterations=4000;
static const int MaxSpinIterationsWrite=4000;
int writerThreadId;
int tlsSlot;
int anyThreadWaiting;
class EscalatingPolicyAllow
{
public:
static bool AllowImplicitEscalation()
{
return true;
}
};
class EscalatingPolicyDeny
{
public:
static bool AllowImplicitEscalation()
{
return false;
}
};
__forceinline int GetTLSReaderCount()
{
return (int)(INT_PTR)TlsGetValue(tlsSlot);
}
__forceinline void SetTLSReaderCount(int count)
{
TlsSetValue(tlsSlot, (LPVOID)(INT_PTR)count);
}
__forceinline void SpinThreads()
{
if(anyThreadWaiting>0) PulseEvent(spinEvent);
}
template<class EscalatingPolicy>
inline bool CheckUpgradingFromReaderToWriter()
{
if(numReaders==0) return false;
int readerCount=GetTLSReaderCount();
if(readerCount>0) {
if(!EscalatingPolicy::AllowImplicitEscalation())
throw ImplicitEscalationException();
SetTLSReaderCount(-readerCount);
while(true) {
int old=numReaders;
if(old==InterlockedCompareExchange((LONG*)&numReaders,
numReaders-readerCount, old))
break;
}
return true;
}
return false;
}
inline void CheckRestorePreviousReaderLock()
{
int previous=-GetTLSReaderCount();
if(previous>0) {
SetTLSReaderCount(previous);
numReaders=previous;
}
}
inline void IncrementReaderCount()
{
SetTLSReaderCount(GetTLSReaderCount()+1);
}
__forceinline void Spin()
{
Sleep(0);
}
__forceinline void AcquireReader()
{
_ASSERT(numReaders>=0);
InterlockedIncrement((LONG*)&numReaders);
IncrementReaderCount();
_ASSERT(numWriters==0);
LeaveCriticalSection(&atomizer);
}
__forceinline void AcquireWriter(int myThreadId)
{
numWriters++;
_ASSERT(numReaders==0);
writerThreadId=myThreadId;
LeaveCriticalSection(&atomizer);
}
class TimeoutIgnore
{
public:
__forceinline void CheckExpired(int timeout) const {}
};
class TimeoutChecker
{
unsigned long ticksAtStart;
public:
TimeoutChecker()
{
ticksAtStart=timeGetTime();
}
void CheckExpired(int timeout)
{
if(int(timeGetTime()-ticksAtStart)>timeout)
throw TimeoutExpiredException();
}
};
template<class TimeoutPolicy>
__forceinline void ClaimReaderInternal(int timeout)
{
_ASSERT(numReaders>=0);
int myThreadId=GetCurrentThreadId();
if(myThreadId==writerThreadId) return;
int old=numReaders;
if(old>0) {
if(old==InterlockedCompareExchange((LONG*)&numReaders,
old+1, old))
{
_ASSERT(numReaders>=0);
IncrementReaderCount();
return;
}
}
TimeoutPolicy t;
for(int i=0; i<MaxSpinIterations; ++i)
{
if(numWriters==0) {
EnterCriticalSection(&atomizer);
if(numWriters==0) {
AcquireReader();
return;
}
LeaveCriticalSection(&atomizer);
}
t.CheckExpired(timeout);
Spin();
}
while(true) {
EnterCriticalSection(&atomizer);
if(numWriters==0) break;
InterlockedIncrement((LONG*)&anyThreadWaiting);
LeaveCriticalSection(&atomizer);
t.CheckExpired(timeout);
WaitForSingleObject(spinEvent, WaitSpinTimeout);
InterlockedDecrement((LONG*)&anyThreadWaiting);
}
AcquireReader();
}
template<class TimeoutPolicy, class EscalatingPolicy>
__forceinline void ClaimWriterInternal(int timeout)
{
_ASSERT(numReaders>=0);
int myThreadId=GetCurrentThreadId();
TimeoutPolicy t;
for(int i=0; i<MaxSpinIterationsWrite; ++i) {
EnterCriticalSection(&atomizer);
if(numWriters==1&&myThreadId==writerThreadId) {
AcquireWriter(myThreadId);
return;
}
CheckUpgradingFromReaderToWriter<EscalatingPolicy>();
if(numReaders==0&&numWriters==0) {
AcquireWriter(myThreadId);
return;
}
LeaveCriticalSection(&atomizer);
t.CheckExpired(timeout);
Spin();
}
while(true) {
EnterCriticalSection(&atomizer);
CheckUpgradingFromReaderToWriter<EscalatingPolicy>();
if(numReaders==0&&numWriters==0) {
AcquireWriter(myThreadId);
return;
}
t.CheckExpired(timeout);
InterlockedIncrement((LONG*)&anyThreadWaiting);
LeaveCriticalSection(&atomizer);
WaitForSingleObject(spinEvent, WaitSpinTimeout);
InterlockedDecrement((LONG*)&anyThreadWaiting);
}
}
public:
~ReadWriteLockTLSRelease()
{
anyThreadWaiting=0;
DeleteCriticalSection(&atomizer);
CloseHandle(spinEvent);
TlsFree(tlsSlot);
}
ReadWriteLockTLSRelease()
{
InitializeCriticalSection(&atomizer);
numReaders=numWriters=0;
spinEvent=CreateEvent(NULL,TRUE,FALSE,NULL);
tlsSlot=TlsAlloc();
if(tlsSlot==TLS_OUT_OF_INDEXES)
throw std::exception("Out of TLS slots");
}
void ClaimWriterAllowEscalating()
{
return ClaimWriterInternal<TimeoutIgnore,
EscalatingPolicyAllow>(INFINITE);
}
void ClaimWriterAllowEscalating(int timeout)
{
return ClaimWriterInternal<TimeoutChecker,
EscalatingPolicyAllow>(timeout);
}
void ClaimWriterNoEscalating()
{
return ClaimWriterInternal<TimeoutIgnore,
EscalatingPolicyDeny>(INFINITE);
}
void ClaimWriterNoEscalating(int timeout)
{
return ClaimWriterInternal<TimeoutChecker,
EscalatingPolicyDeny>(timeout);
}
void ClaimReader()
{
return ClaimReaderInternal<TimeoutIgnore>(INFINITE);
}
void ClaimReader(int timeout)
{
return ClaimReaderInternal<TimeoutChecker>(timeout);
}
void ReleaseWriter()
{
_ASSERT(numReaders>=0);
EnterCriticalSection(&atomizer);
_ASSERT(numWriters>0);
numWriters--;
if(0==numWriters) writerThreadId=0;
CheckRestorePreviousReaderLock();
SpinThreads();
LeaveCriticalSection(&atomizer);
}
void ReleaseReader()
{
_ASSERT(numReaders>=0);
EnterCriticalSection(&atomizer);
int myThreadId=GetCurrentThreadId();
if(numWriters==0)
{
InterlockedDecrement((LONG*)&numReaders);
SetTLSReaderCount(GetTLSReaderCount()-1);
_ASSERT(numReaders>=0);
_ASSERT(numWriters==0);
SpinThreads();
}
LeaveCriticalSection(&atomizer);
}
};
Points of interest
When a thread requests for a write lock after having obtained a read lock, the reader lock is released and the thread waits until there are no readers and the write lock is granted. This is needed to avoid deadlocks caused by two threads holding the read lock and simultaneously requesting for a write lock. The release and reacquire upon lock escalation can possibly lead to data corruption as the info that the thread may have gotten while holding the reader lock may not be consistent with the state of the data protected by the lock after the write lock is granted, as other writer threads might have gotten hold of the lock while the thread was upgrading his lock from read to write. To avoid this, the upgraded thread must be conscious of the fact that after having requested for the write lock it has to reacquire info about the data protected by the lock it needs for its write operation. If this behavior is not desired, ClaimWriterNoEscalating()
can be used, in which case the lock throws an ImplicitEscalationException
exception if it finds that the thread already holds the read lock.
Performance
I've run a simple benchmark program (included in the source example code) and the result is shown in the following graph:
The fully reentrant lock is about 3 times slower than a simple Windows critical section, which is not too bad. It's faster than .NET ReaderWriterLock
. The biggest performance gain comes from avoiding the use of heavy-weight OS synchronization primitives and instead using the active wait plus Sleep(0).
History
- 1-10-06
- 1-12-06
- Measured .NET
ReaderWriterLock
.
- 1-20-06
- Use of spin lock and thread local storage for 8x speedup, timeouts, release reader lock when escalating with option to allow or disallow implicit escalation.