arch-vega: Implement V_ADD3_U32
Change-Id: I4d01265f946e289cbff56090c2dd193ea66d5c70
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53867
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc
index e616a7f..b798bd0 100644
--- a/src/arch/amdgpu/vega/decoder.cc
+++ b/src/arch/amdgpu/vega/decoder.cc
@@ -6939,8 +6939,7 @@
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_ADD3_U32(MachInst iFmt)
{
- fatal("Trying to decode instruction without a class\n");
- return nullptr;
+ return new Inst_VOP3__V_ADD3_U32(&iFmt->iFmt_VOP3A);
}
GPUStaticInst*
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 114859c..c60ba62 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -32096,6 +32096,51 @@
vdst.write();
} // execute
+ // --- Inst_VOP3__V_ADD3_U32 class methods ---
+
+ Inst_VOP3__V_ADD3_U32::Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *iFmt)
+ : Inst_VOP3A(iFmt, "v_add3_u32", false)
+ {
+ setFlag(ALU);
+ } // Inst_VOP3__V_ADD3_U32
+
+ Inst_VOP3__V_ADD3_U32::~Inst_VOP3__V_ADD3_U32()
+ {
+ } // ~Inst_VOP3__V_ADD3_U32
+
+ // --- description from .arch file ---
+ // D.u = S0.u + S1.u + S2.u.
+ void
+ Inst_VOP3__V_ADD3_U32::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *wf = gpuDynInst->wavefront();
+ ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
+ ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
+ ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
+ VecOperandU32 vdst(gpuDynInst, instData.VDST);
+
+ src0.readSrc();
+ src1.readSrc();
+ src2.readSrc();
+
+ /**
+ * input modifiers are supported by FP operations only
+ */
+ assert(!(instData.ABS & 0x1));
+ assert(!(instData.ABS & 0x2));
+ assert(!(instData.ABS & 0x4));
+ assert(!(extData.NEG & 0x1));
+ assert(!(extData.NEG & 0x2));
+ assert(!(extData.NEG & 0x4));
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0[lane] + src1[lane] + src2[lane];
+ }
+ }
+
+ vdst.write();
+ } // execute
// --- Inst_VOP3__V_LSHL_OR_B32 class methods ---
Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 1107c24..91edc10 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -29568,6 +29568,42 @@
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_ADD_LSHL_U32
+ class Inst_VOP3__V_ADD3_U32 : public Inst_VOP3A
+ {
+ public:
+ Inst_VOP3__V_ADD3_U32(InFmt_VOP3A*);
+ ~Inst_VOP3__V_ADD3_U32();
+
+ int
+ getNumOperands() override
+ {
+ return numDstRegOperands() + numSrcRegOperands();
+ } // getNumOperands
+
+ int numDstRegOperands() override { return 1; }
+ int numSrcRegOperands() override { return 3; }
+
+ int
+ getOperandSize(int opIdx) override
+ {
+ switch (opIdx) {
+ case 0: //src_0
+ return 4;
+ case 1: //src_1
+ return 4;
+ case 2: //src_2
+ return 4;
+ case 3: //vdst
+ return 4;
+ default:
+ fatal("op idx %i out of bounds\n", opIdx);
+ return -1;
+ }
+ } // getOperandSize
+
+ void execute(GPUDynInstPtr) override;
+ }; // Inst_VOP3__V_ADD3_U32
+
class Inst_VOP3__V_LSHL_OR_B32 : public Inst_VOP3A
{
public: