blob: 783689dd5fe5ce619b68cbcf1b7515fddaf73710 [file] [log] [blame]
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Steve Reinhardt
*/
#include "arch/hsail/insts/decl.hh"
#include "debug/GPUExec.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/simple_pool_manager.hh"
namespace HsailISA
{
template<> const char *B1::label = "b1";
template<> const char *B8::label = "b8";
template<> const char *B16::label = "b16";
template<> const char *B32::label = "b32";
template<> const char *B64::label = "b64";
template<> const char *S8::label = "s8";
template<> const char *S16::label = "s16";
template<> const char *S32::label = "s32";
template<> const char *S64::label = "s64";
template<> const char *U8::label = "u8";
template<> const char *U16::label = "u16";
template<> const char *U32::label = "u32";
template<> const char *U64::label = "u64";
template<> const char *F32::label = "f32";
template<> const char *F64::label = "f64";
const char*
cmpOpToString(Brig::BrigCompareOperation cmpOp)
{
using namespace Brig;
switch (cmpOp) {
case BRIG_COMPARE_EQ:
return "eq";
case BRIG_COMPARE_NE:
return "ne";
case BRIG_COMPARE_LT:
return "lt";
case BRIG_COMPARE_LE:
return "le";
case BRIG_COMPARE_GT:
return "gt";
case BRIG_COMPARE_GE:
return "ge";
case BRIG_COMPARE_EQU:
return "equ";
case BRIG_COMPARE_NEU:
return "neu";
case BRIG_COMPARE_LTU:
return "ltu";
case BRIG_COMPARE_LEU:
return "leu";
case BRIG_COMPARE_GTU:
return "gtu";
case BRIG_COMPARE_GEU:
return "geu";
case BRIG_COMPARE_NUM:
return "num";
case BRIG_COMPARE_NAN:
return "nan";
case BRIG_COMPARE_SEQ:
return "seq";
case BRIG_COMPARE_SNE:
return "sne";
case BRIG_COMPARE_SLT:
return "slt";
case BRIG_COMPARE_SLE:
return "sle";
case BRIG_COMPARE_SGT:
return "sgt";
case BRIG_COMPARE_SGE:
return "sge";
case BRIG_COMPARE_SGEU:
return "sgeu";
case BRIG_COMPARE_SEQU:
return "sequ";
case BRIG_COMPARE_SNEU:
return "sneu";
case BRIG_COMPARE_SLTU:
return "sltu";
case BRIG_COMPARE_SLEU:
return "sleu";
case BRIG_COMPARE_SNUM:
return "snum";
case BRIG_COMPARE_SNAN:
return "snan";
case BRIG_COMPARE_SGTU:
return "sgtu";
default:
return "unknown";
}
}
void
Ret::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
// mask off completed work-items
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
w->initMask[lane] = 0;
}
}
// delete extra instructions fetched for completed work-items
w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
w->instructionBuffer.end());
if (w->pendingFetch) {
w->dropFetch = true;
}
// if all work-items have completed, then wave-front is done
if (w->initMask.none()) {
w->status = Wavefront::S_STOPPED;
int32_t refCount = w->computeUnit->getLds().
decreaseRefCounter(w->dispatchId, w->wgId);
DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
w->computeUnit->cu_id, w->wgId, refCount);
// free the vector registers of the completed wavefront
w->computeUnit->vectorRegsReserved[w->simdId] -=
w->reservedVectorRegs;
assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
uint32_t endIndex = (w->startVgprIndex +
w->reservedVectorRegs - 1) %
w->computeUnit->vrf[w->simdId]->numRegs();
w->computeUnit->vrf[w->simdId]->manager->
freeRegion(w->startVgprIndex, endIndex);
w->reservedVectorRegs = 0;
w->startVgprIndex = 0;
w->computeUnit->completedWfs++;
DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
if (!refCount) {
setFlag(SystemScope);
setFlag(Release);
setFlag(GlobalSegment);
// Notify Memory System of Kernel Completion
// Kernel End = isKernel + isRelease
w->status = Wavefront::S_RETURNING;
GPUDynInstPtr local_mempacket = gpuDynInst;
local_mempacket->useContinuation = false;
local_mempacket->simdId = w->simdId;
local_mempacket->wfSlotId = w->wfSlotId;
local_mempacket->wfDynId = w->wfDynId;
w->computeUnit->injectGlobalMemFence(local_mempacket, true);
} else {
w->computeUnit->shader->dispatcher->scheduleDispatch();
}
}
}
void
Barrier::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
assert(w->barrierCnt == w->oldBarrierCnt);
w->barrierCnt = w->oldBarrierCnt + 1;
w->stalledAtBarrier = true;
}
} // namespace HsailISA