arch-vega: Implement V_LSHL_ADD_U32
Change-Id: I986f82e8c6c02b0d62e55fbaed1c3f9e5b2b4a43
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53865
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc
index cc24d96..54fcda9 100644
--- a/src/arch/amdgpu/vega/decoder.cc
+++ b/src/arch/amdgpu/vega/decoder.cc
@@ -6927,8 +6927,7 @@
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_LSHL_ADD_U32(MachInst iFmt)
{
- fatal("Trying to decode instruction without a class\n");
- return nullptr;
+ return new Inst_VOP3__V_LSHL_ADD_U32(&iFmt->iFmt_VOP3A);
}
GPUStaticInst*
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index be3966d..d699a9a 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -32004,6 +32004,52 @@
vcc.write();
vdst.write();
} // execute
+ // --- Inst_VOP3__V_LSHL_ADD_U32 class methods ---
+
+ Inst_VOP3__V_LSHL_ADD_U32::Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *iFmt)
+ : Inst_VOP3A(iFmt, "v_lshl_add_u32", false)
+ {
+ setFlag(ALU);
+ } // Inst_VOP3__V_LSHL_ADD_U32
+
+ Inst_VOP3__V_LSHL_ADD_U32::~Inst_VOP3__V_LSHL_ADD_U32()
+ {
+ } // ~Inst_VOP3__V_LSHL_ADD_U32
+
+ // --- description from .arch file ---
+ // D.u = (S0.u << S1.u[4:0]) + S2.u.
+ void
+ Inst_VOP3__V_LSHL_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *wf = gpuDynInst->wavefront();
+ ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
+ ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
+ ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
+ VecOperandU32 vdst(gpuDynInst, instData.VDST);
+
+ src0.readSrc();
+ src1.readSrc();
+ src2.readSrc();
+
+ /**
+ * input modifiers are supported by FP operations only
+ */
+ assert(!(instData.ABS & 0x1));
+ assert(!(instData.ABS & 0x2));
+ assert(!(instData.ABS & 0x4));
+ assert(!(extData.NEG & 0x1));
+ assert(!(extData.NEG & 0x2));
+ assert(!(extData.NEG & 0x4));
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0))
+ + src2[lane];
+ }
+ }
+
+ vdst.write();
+ } // execute
// --- Inst_VOP3__V_LSHL_OR_B32 class methods ---
Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 0c30ca4..e3e390b 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -29496,6 +29496,42 @@
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_MAD_I64_I32
+ class Inst_VOP3__V_LSHL_ADD_U32 : public Inst_VOP3A
+ {
+ public:
+ Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A*);
+ ~Inst_VOP3__V_LSHL_ADD_U32();
+
+ int
+ getNumOperands() override
+ {
+ return numDstRegOperands() + numSrcRegOperands();
+ } // getNumOperands
+
+ int numDstRegOperands() override { return 1; }
+ int numSrcRegOperands() override { return 3; }
+
+ int
+ getOperandSize(int opIdx) override
+ {
+ switch (opIdx) {
+ case 0: //src_0
+ return 4;
+ case 1: //src_1
+ return 4;
+ case 2: //src_2
+ return 4;
+ case 3: //vdst
+ return 4;
+ default:
+ fatal("op idx %i out of bounds\n", opIdx);
+ return -1;
+ }
+ } // getOperandSize
+
+ void execute(GPUDynInstPtr) override;
+ }; // Inst_VOP3__V_LSHL_ADD_U32
+
class Inst_VOP3__V_LSHL_OR_B32 : public Inst_VOP3A
{
public: