blob: 0c729b784e49dc524224cb5b1a3739298e571d20 [file] [log] [blame]
/*
* Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gpu-compute/gpu_dyn_inst.hh"
#include "debug/GPUMem.hh"
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/shader.hh"
#include "gpu-compute/wavefront.hh"
GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
GPUStaticInst *static_inst, uint64_t instSeqNum)
: GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
n_reg(0), useContinuation(false),
statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
{
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
d_data = new uint8_t[computeUnit()->wfSize() * 16];
a_data = new uint8_t[computeUnit()->wfSize() * 8];
x_data = new uint8_t[computeUnit()->wfSize() * 8];
for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
a_data[i] = 0;
x_data[i] = 0;
}
for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
d_data[i] = 0;
}
}
GPUDynInst::~GPUDynInst()
{
delete[] d_data;
delete[] a_data;
delete[] x_data;
}
void
GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
{
_staticInst->execute(gpuDynInst);
}
int
GPUDynInst::numSrcRegOperands()
{
return _staticInst->numSrcRegOperands();
}
int
GPUDynInst::numDstRegOperands()
{
return _staticInst->numDstRegOperands();
}
int
GPUDynInst::getNumOperands()
{
return _staticInst->getNumOperands();
}
bool
GPUDynInst::isVectorRegister(int operandIdx)
{
return _staticInst->isVectorRegister(operandIdx);
}
bool
GPUDynInst::isScalarRegister(int operandIdx)
{
return _staticInst->isScalarRegister(operandIdx);
}
bool
GPUDynInst::isCondRegister(int operandIdx)
{
return _staticInst->isCondRegister(operandIdx);
}
int
GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
{
return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
}
int
GPUDynInst::getOperandSize(int operandIdx)
{
return _staticInst->getOperandSize(operandIdx);
}
bool
GPUDynInst::isDstOperand(int operandIdx)
{
return _staticInst->isDstOperand(operandIdx);
}
bool
GPUDynInst::isSrcOperand(int operandIdx)
{
return _staticInst->isSrcOperand(operandIdx);
}
const std::string&
GPUDynInst::disassemble() const
{
return _staticInst->disassemble();
}
uint64_t
GPUDynInst::seqNum() const
{
return _seqNum;
}
Enums::StorageClassType
GPUDynInst::executedAs()
{
return _staticInst->executed_as;
}
// Process a memory instruction and (if necessary) submit timing request
void
GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
{
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
cu->cu_id, simdId, wfSlotId, exec_mask);
_staticInst->initiateAcc(gpuDynInst);
time = 0;
}
void
GPUDynInst::completeAcc(GPUDynInstPtr gpuDynInst)
{
_staticInst->completeAcc(gpuDynInst);
}
/**
* accessor methods for the attributes of
* the underlying GPU static instruction
*/
bool
GPUDynInst::isALU() const
{
return _staticInst->isALU();
}
bool
GPUDynInst::isBranch() const
{
return _staticInst->isBranch();
}
bool
GPUDynInst::isNop() const
{
return _staticInst->isNop();
}
bool
GPUDynInst::isReturn() const
{
return _staticInst->isReturn();
}
bool
GPUDynInst::isUnconditionalJump() const
{
return _staticInst->isUnconditionalJump();
}
bool
GPUDynInst::isSpecialOp() const
{
return _staticInst->isSpecialOp();
}
bool
GPUDynInst::isWaitcnt() const
{
return _staticInst->isWaitcnt();
}
bool
GPUDynInst::isBarrier() const
{
return _staticInst->isBarrier();
}
bool
GPUDynInst::isMemFence() const
{
return _staticInst->isMemFence();
}
bool
GPUDynInst::isMemRef() const
{
return _staticInst->isMemRef();
}
bool
GPUDynInst::isFlat() const
{
return _staticInst->isFlat();
}
bool
GPUDynInst::isLoad() const
{
return _staticInst->isLoad();
}
bool
GPUDynInst::isStore() const
{
return _staticInst->isStore();
}
bool
GPUDynInst::isAtomic() const
{
return _staticInst->isAtomic();
}
bool
GPUDynInst::isAtomicNoRet() const
{
return _staticInst->isAtomicNoRet();
}
bool
GPUDynInst::isAtomicRet() const
{
return _staticInst->isAtomicRet();
}
bool
GPUDynInst::isScalar() const
{
return _staticInst->isScalar();
}
bool
GPUDynInst::readsSCC() const
{
return _staticInst->readsSCC();
}
bool
GPUDynInst::writesSCC() const
{
return _staticInst->writesSCC();
}
bool
GPUDynInst::readsVCC() const
{
return _staticInst->readsVCC();
}
bool
GPUDynInst::writesVCC() const
{
return _staticInst->writesVCC();
}
bool
GPUDynInst::isAtomicAnd() const
{
return _staticInst->isAtomicAnd();
}
bool
GPUDynInst::isAtomicOr() const
{
return _staticInst->isAtomicOr();
}
bool
GPUDynInst::isAtomicXor() const
{
return _staticInst->isAtomicXor();
}
bool
GPUDynInst::isAtomicCAS() const
{
return _staticInst->isAtomicCAS();
}
bool GPUDynInst::isAtomicExch() const
{
return _staticInst->isAtomicExch();
}
bool
GPUDynInst::isAtomicAdd() const
{
return _staticInst->isAtomicAdd();
}
bool
GPUDynInst::isAtomicSub() const
{
return _staticInst->isAtomicSub();
}
bool
GPUDynInst::isAtomicInc() const
{
return _staticInst->isAtomicInc();
}
bool
GPUDynInst::isAtomicDec() const
{
return _staticInst->isAtomicDec();
}
bool
GPUDynInst::isAtomicMax() const
{
return _staticInst->isAtomicMax();
}
bool
GPUDynInst::isAtomicMin() const
{
return _staticInst->isAtomicMin();
}
bool
GPUDynInst::isArgLoad() const
{
return _staticInst->isArgLoad();
}
bool
GPUDynInst::isGlobalMem() const
{
return _staticInst->isGlobalMem();
}
bool
GPUDynInst::isLocalMem() const
{
return _staticInst->isLocalMem();
}
bool
GPUDynInst::isArgSeg() const
{
return _staticInst->isArgSeg();
}
bool
GPUDynInst::isGlobalSeg() const
{
return _staticInst->isGlobalSeg();
}
bool
GPUDynInst::isGroupSeg() const
{
return _staticInst->isGroupSeg();
}
bool
GPUDynInst::isKernArgSeg() const
{
return _staticInst->isKernArgSeg();
}
bool
GPUDynInst::isPrivateSeg() const
{
return _staticInst->isPrivateSeg();
}
bool
GPUDynInst::isReadOnlySeg() const
{
return _staticInst->isReadOnlySeg();
}
bool
GPUDynInst::isSpillSeg() const
{
return _staticInst->isSpillSeg();
}
bool
GPUDynInst::isWorkitemScope() const
{
return _staticInst->isWorkitemScope();
}
bool
GPUDynInst::isWavefrontScope() const
{
return _staticInst->isWavefrontScope();
}
bool
GPUDynInst::isWorkgroupScope() const
{
return _staticInst->isWorkgroupScope();
}
bool
GPUDynInst::isDeviceScope() const
{
return _staticInst->isDeviceScope();
}
bool
GPUDynInst::isSystemScope() const
{
return _staticInst->isSystemScope();
}
bool
GPUDynInst::isNoScope() const
{
return _staticInst->isNoScope();
}
bool
GPUDynInst::isRelaxedOrder() const
{
return _staticInst->isRelaxedOrder();
}
bool
GPUDynInst::isAcquire() const
{
return _staticInst->isAcquire();
}
bool
GPUDynInst::isRelease() const
{
return _staticInst->isRelease();
}
bool
GPUDynInst::isAcquireRelease() const
{
return _staticInst->isAcquireRelease();
}
bool
GPUDynInst::isNoOrder() const
{
return _staticInst->isNoOrder();
}
bool
GPUDynInst::isGloballyCoherent() const
{
return _staticInst->isGloballyCoherent();
}
bool
GPUDynInst::isSystemCoherent() const
{
return _staticInst->isSystemCoherent();
}
void
GPUDynInst::updateStats()
{
if (_staticInst->isLocalMem()) {
// access to LDS (shared) memory
cu->dynamicLMemInstrCnt++;
} else {
// access to global memory
// update PageDivergence histogram
int number_pages_touched = cu->pagesTouched.size();
assert(number_pages_touched);
cu->pageDivergenceDist.sample(number_pages_touched);
std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret;
for (auto it : cu->pagesTouched) {
// see if this page has been touched before. if not, this also
// inserts the page into the table.
ret = cu->pageAccesses
.insert(ComputeUnit::pageDataStruct::value_type(it.first,
std::make_pair(1, it.second)));
// if yes, then update the stats
if (!ret.second) {
ret.first->second.first++;
ret.first->second.second += it.second;
}
}
cu->pagesTouched.clear();
// total number of memory instructions (dynamic)
// Atomics are counted as a single memory instruction.
// this is # memory instructions per wavefronts, not per workitem
cu->dynamicGMemInstrCnt++;
}
}