blob: f973f2f4301ce88d559eed3017f115089ff17a78 [file] [log] [blame]
/*
* Copyright (c) 2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __GPU_STATIC_INST_HH__
#define __GPU_STATIC_INST_HH__
/*
* @file gpu_static_inst.hh
*
* Defines the base class representing static instructions for the GPU. The
* instructions are "static" because they contain no dynamic instruction
* information. GPUStaticInst corresponds to the StaticInst class for the CPU
* models.
*/
#include <cstdint>
#include <string>
#include "enums/GPUStaticInstFlags.hh"
#include "enums/StorageClassType.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "gpu-compute/misc.hh"
class BaseOperand;
class BaseRegOperand;
class Wavefront;
class GPUStaticInst : public GPUStaticInstFlags
{
public:
GPUStaticInst(const std::string &opcode);
virtual ~GPUStaticInst() { }
void instAddr(int inst_addr) { _instAddr = inst_addr; }
int instAddr() const { return _instAddr; }
int nextInstAddr() const { return _instAddr + instSize(); }
void instNum(int num) { _instNum = num; }
int instNum() { return _instNum; }
void ipdInstNum(int num) { _ipdInstNum = num; }
int ipdInstNum() const { return _ipdInstNum; }
virtual TheGpuISA::ScalarRegU32 srcLiteral() const { return 0; }
virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
virtual void generateDisassembly() = 0;
const std::string& disassemble();
virtual int getNumOperands() = 0;
virtual bool isScalarRegister(int operandIndex) = 0;
virtual bool isVectorRegister(int operandIndex) = 0;
virtual bool isSrcOperand(int operandIndex) = 0;
virtual bool isDstOperand(int operandIndex) = 0;
virtual bool isFlatScratchRegister(int opIdx) = 0;
virtual bool isExecMaskRegister(int opIdx) = 0;
virtual int getOperandSize(int operandIndex) = 0;
virtual int getRegisterIndex(int operandIndex,
GPUDynInstPtr gpuDynInst) = 0;
virtual int numDstRegOperands() = 0;
virtual int numSrcRegOperands() = 0;
virtual int coalescerTokenCount() const { return 0; }
int numDstVecOperands();
int numSrcVecOperands();
int numDstVecDWORDs();
int numSrcVecDWORDs();
int numOpdDWORDs(int operandIdx);
bool isALU() const { return _flags[ALU]; }
bool isBranch() const { return _flags[Branch]; }
bool isCondBranch() const { return _flags[CondBranch]; }
bool isNop() const { return _flags[Nop]; }
bool isReturn() const { return _flags[Return]; }
bool isEndOfKernel() const { return _flags[EndOfKernel]; }
bool isKernelLaunch() const { return _flags[KernelLaunch]; }
bool isSDWAInst() const { return _flags[IsSDWA]; }
bool isDPPInst() const { return _flags[IsDPP]; }
bool
isUnconditionalJump() const
{
return _flags[UnconditionalJump];
}
bool isSpecialOp() const { return _flags[SpecialOp]; }
bool isWaitcnt() const { return _flags[Waitcnt]; }
bool isSleep() const { return _flags[Sleep]; }
bool isBarrier() const { return _flags[MemBarrier]; }
bool isMemSync() const { return _flags[MemSync]; }
bool isMemRef() const { return _flags[MemoryRef]; }
bool isFlat() const { return _flags[Flat]; }
bool isLoad() const { return _flags[Load]; }
bool isStore() const { return _flags[Store]; }
bool
isAtomic() const
{
return _flags[AtomicReturn] || _flags[AtomicNoReturn];
}
bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
bool isAtomicRet() const { return _flags[AtomicReturn]; }
bool isScalar() const { return _flags[Scalar]; }
bool readsSCC() const { return _flags[ReadsSCC]; }
bool writesSCC() const { return _flags[WritesSCC]; }
bool readsVCC() const { return _flags[ReadsVCC]; }
bool writesVCC() const { return _flags[WritesVCC]; }
// Identify instructions that implicitly read the Execute mask
// as a source operand but not to dictate which threads execute.
bool readsEXEC() const { return _flags[ReadsEXEC]; }
bool writesEXEC() const { return _flags[WritesEXEC]; }
bool readsMode() const { return _flags[ReadsMode]; }
bool writesMode() const { return _flags[WritesMode]; }
bool ignoreExec() const { return _flags[IgnoreExec]; }
bool isAtomicAnd() const { return _flags[AtomicAnd]; }
bool isAtomicOr() const { return _flags[AtomicOr]; }
bool isAtomicXor() const { return _flags[AtomicXor]; }
bool isAtomicCAS() const { return _flags[AtomicCAS]; }
bool isAtomicExch() const { return _flags[AtomicExch]; }
bool isAtomicAdd() const { return _flags[AtomicAdd]; }
bool isAtomicSub() const { return _flags[AtomicSub]; }
bool isAtomicInc() const { return _flags[AtomicInc]; }
bool isAtomicDec() const { return _flags[AtomicDec]; }
bool isAtomicMax() const { return _flags[AtomicMax]; }
bool isAtomicMin() const { return _flags[AtomicMin]; }
bool
isArgLoad() const
{
return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
}
bool
isGlobalMem() const
{
return _flags[MemoryRef] && (_flags[GlobalSegment] ||
_flags[PrivateSegment] || _flags[ReadOnlySegment] ||
_flags[SpillSegment]);
}
bool
isLocalMem() const
{
return _flags[MemoryRef] && _flags[GroupSegment];
}
bool isArgSeg() const { return _flags[ArgSegment]; }
bool isGlobalSeg() const { return _flags[GlobalSegment]; }
bool isGroupSeg() const { return _flags[GroupSegment]; }
bool isKernArgSeg() const { return _flags[KernArgSegment]; }
bool isPrivateSeg() const { return _flags[PrivateSegment]; }
bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
bool isSpillSeg() const { return _flags[SpillSegment]; }
/**
* Coherence domain of a memory instruction. The coherence domain
* specifies where it is possible to perform memory synchronization
* (e.g., acquire or release) from the shader kernel.
*
* isGloballyCoherent(): returns true if WIs share same device
* isSystemCoherent(): returns true if WIs or threads in different
* devices share memory
*
*/
bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
bool isSystemCoherent() const { return _flags[SystemCoherent]; }
// Floating-point instructions
bool isF16() const { return _flags[F16]; }
bool isF32() const { return _flags[F32]; }
bool isF64() const { return _flags[F64]; }
// FMA, MAC, MAD instructions
bool isFMA() const { return _flags[FMA]; }
bool isMAC() const { return _flags[MAC]; }
bool isMAD() const { return _flags[MAD]; }
virtual int instSize() const = 0;
// only used for memory instructions
virtual void
initiateAcc(GPUDynInstPtr gpuDynInst)
{
fatal("calling initiateAcc() on a non-memory instruction.\n");
}
// only used for memory instructions
virtual void
completeAcc(GPUDynInstPtr gpuDynInst)
{
fatal("calling completeAcc() on a non-memory instruction.\n");
}
virtual uint32_t getTargetPc() { return 0; }
static uint64_t dynamic_id_count;
// For flat memory accesses
Enums::StorageClassType executed_as;
void setFlag(Flags flag) {
_flags[flag] = true;
if (isGroupSeg()) {
executed_as = Enums::SC_GROUP;
} else if (isGlobalSeg()) {
executed_as = Enums::SC_GLOBAL;
} else if (isPrivateSeg()) {
executed_as = Enums::SC_PRIVATE;
} else if (isSpillSeg()) {
executed_as = Enums::SC_SPILL;
} else if (isReadOnlySeg()) {
executed_as = Enums::SC_READONLY;
} else if (isKernArgSeg()) {
executed_as = Enums::SC_KERNARG;
} else if (isArgSeg()) {
executed_as = Enums::SC_ARG;
}
}
const std::string& opcode() const { return _opcode; }
protected:
const std::string _opcode;
std::string disassembly;
int _instNum;
int _instAddr;
int srcVecOperands;
int dstVecOperands;
int srcVecDWORDs;
int dstVecDWORDs;
/**
* Identifier of the immediate post-dominator instruction.
*/
int _ipdInstNum;
std::bitset<Num_Flags> _flags;
};
class KernelLaunchStaticInst : public GPUStaticInst
{
public:
KernelLaunchStaticInst() : GPUStaticInst("kernel_launch")
{
setFlag(Nop);
setFlag(KernelLaunch);
setFlag(MemSync);
setFlag(Scalar);
setFlag(GlobalSegment);
}
void
execute(GPUDynInstPtr gpuDynInst) override
{
fatal("kernel launch instruction should not be executed\n");
}
void
generateDisassembly() override
{
disassembly = _opcode;
}
int getNumOperands() override { return 0; }
bool isFlatScratchRegister(int opIdx) override { return false; }
// return true if the Execute mask is explicitly used as a source
// register operand
bool isExecMaskRegister(int opIdx) override { return false; }
bool isScalarRegister(int operandIndex) override { return false; }
bool isVectorRegister(int operandIndex) override { return false; }
bool isSrcOperand(int operandIndex) override { return false; }
bool isDstOperand(int operandIndex) override { return false; }
int getOperandSize(int operandIndex) override { return 0; }
int
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
{
return 0;
}
int numDstRegOperands() override { return 0; }
int numSrcRegOperands() override { return 0; }
int instSize() const override { return 0; }
};
#endif // __GPU_STATIC_INST_HH__