arch-vega: Fix SOPK instruction sign extends

See: https://gem5-review.googlesource.com/c/public/gem5/+/37495

Same patch but for vega. This fixes issues with lulesh and probably
rodinia - heartwall as well in fullsystem.

Change-Id: I3af36bb9b60d32dc96cc3b439bb1167be1b0945d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65432
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
(cherry picked from commit 8693d725e202002893aafc4ac814bfa87c86ae76)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65453
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 76bb8aa..f5b08b7 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -1553,7 +1553,7 @@
     void
     Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 
         sdst = simm16;
@@ -1579,7 +1579,7 @@
     void
     Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1607,7 +1607,7 @@
     void
     Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1634,7 +1634,7 @@
     void
     Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1661,7 +1661,7 @@
     void
     Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1688,7 +1688,7 @@
     void
     Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1715,7 +1715,7 @@
     void
     Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1742,7 +1742,7 @@
     void
     Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1939,7 +1939,7 @@
 
         src.read();
 
-        sdst = src.rawData() + (ScalarRegI32)simm16;
+        sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16);
         scc = (bits(src.rawData(), 31) == bits(simm16, 15)
             && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
 
@@ -1969,7 +1969,7 @@
 
         src.read();
 
-        sdst = src.rawData() * (ScalarRegI32)simm16;
+        sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16);
 
         sdst.write();
     } // execute