blob: 0adaac078f93aa82a661135b88c11a5386a9468e [file] [log] [blame]
#ifndef __HIPLOCKSMUTEXEBO_H__
#define __HIPLOCKSMUTEXEBO_H__
#include "hip/hip_runtime.h"
#include "hipLocks.h"
inline __host__ hipError_t hipMutexCreateEBO(hipMutex_t * const handle,
const int mutexNumber)
{
*handle = mutexNumber;
return hipSuccess;
}
inline __device__ void hipMutexEBOLock(const hipMutex_t mutex,
unsigned int * mutexBufferHeads,
const int NUM_CU)
{
// local variables
__shared__ int done, backoff;
const bool isMasterThread = (hipThreadIdx_x == 0 && hipThreadIdx_y == 0 &&
hipThreadIdx_z == 0);
unsigned int * mutexHeadPtr = NULL;
if (isMasterThread)
{
backoff = 1;
done = 0;
mutexHeadPtr = (mutexBufferHeads + (mutex * NUM_CU));
}
__syncthreads();
while (!done)
{
__syncthreads();
if (isMasterThread)
{
// try to acquire the lock
if (atomicCAS(mutexHeadPtr, 0, 1) == 0) {
// atomicCAS acts as a load acquire, need TF to enforce ordering
__threadfence();
done = 1;
}
else
{
// if we failed in acquiring the lock, wait for a little while before
// trying again
//sleepFunc(backoff);
for (int j = 0; j < backoff; ++j) { ; }
// (capped) exponential backoff
backoff = (((backoff << 1) + 1) & (MAX_BACKOFF-1));
}
}
__syncthreads();
}
}
inline __device__ void hipMutexEBOUnlock(const hipMutex_t mutex,
unsigned int * mutexBufferHeads,
const int NUM_CU)
{
__syncthreads();
if (hipThreadIdx_x == 0 && hipThreadIdx_y == 0 && hipThreadIdx_z == 0) {
// atomicExch acts as a store release, need TF to enforce ordering
__threadfence();
atomicExch(mutexBufferHeads + (mutex * NUM_CU), 0); // release the lock
}
__syncthreads();
}
// same locking algorithm but with local scope
inline __device__ void hipMutexEBOLockLocal(const hipMutex_t mutex,
const unsigned int cuID,
unsigned int * mutexBufferHeads,
const int NUM_CU)
{
// local variables
__shared__ int done, backoff;
const bool isMasterThread = (hipThreadIdx_x == 0 && hipThreadIdx_y == 0 &&
hipThreadIdx_z == 0);
unsigned int * mutexHeadPtr = NULL;
if (isMasterThread)
{
backoff = 1;
done = 0;
mutexHeadPtr = (mutexBufferHeads + ((mutex * NUM_CU) + cuID));
}
__syncthreads();
while (!done)
{
__syncthreads();
if (isMasterThread)
{
// try to acquire the lock
if (atomicCAS(mutexHeadPtr, 0, 1) == 0) {
// atomicCAS acts as a load acquire, need TF to enforce ordering locally
__threadfence_block();
done = 1;
}
else
{
// if we failed in acquiring the lock, wait for a little while before
// trying again
//sleepFunc(backoff);
for (int j = 0; j < backoff; ++j) { ; }
// (capped) exponential backoff
backoff = (((backoff << 1) + 1) & (MAX_BACKOFF-1));
}
}
__syncthreads();
}
}
// same unlock algorithm but with local scope
inline __device__ void hipMutexEBOUnlockLocal(const hipMutex_t mutex,
const unsigned int cuID,
unsigned int * mutexBufferHeads,
const int NUM_CU)
{
__syncthreads();
if (hipThreadIdx_x == 0 && hipThreadIdx_y == 0 && hipThreadIdx_z == 0) {
// atomicExch acts as a store release, need TF to enforce ordering locally
__threadfence_block();
atomicExch(mutexBufferHeads + ((mutex * NUM_CU) + cuID), 0); // release the lock
}
__syncthreads();
}
#endif