arch-riscv: Fix disassembling of immediate for c.lui instruction

For compressed instruction c.lui, the 6-bit immediate is left-shifted by 12
bits in decoding. While the original Gem5 gives the left-shifted value
directly in disassembly.
This patch fixes the problem by adding a new template CILuiExecute to
resume the immediate before outputting it in disassembly.
Note: The immediate is sign-extended to 20-bit to be compatible with GCC.

Change-Id: If73f72d3e8f85a8b10ce7a323379d8ad6c4c3085
Signed-off-by: Ian Jiang <ianjiang.ict@gmail.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/22567
Reviewed-by: Alec Roelke <alec.roelke@gmail.com>
Maintainer: Alec Roelke <alec.roelke@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/arch/riscv/isa/formats/compressed.isa b/src/arch/riscv/isa/formats/compressed.isa
index 9c812e8..62d5b75 100644
--- a/src/arch/riscv/isa/formats/compressed.isa
+++ b/src/arch/riscv/isa/formats/compressed.isa
@@ -54,7 +54,10 @@
     header_output = ImmDeclare.subst(iop)
     decoder_output = ImmConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
-    exec_output = ImmExecute.subst(iop)
+    if (name == "c_lui"):
+        exec_output = CILuiExecute.subst(iop)
+    else:
+        exec_output = ImmExecute.subst(iop)
 }};
 
 def format CJOp(code, *opt_flags) {{
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index a689c57..2ffa2de 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -90,6 +90,40 @@
     }
 }};
 
+def template CILuiExecute {{
+    Fault
+    %(class_name)s::execute(
+        ExecContext *xc, Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        if (fault == NoFault) {
+            %(code)s;
+            if (fault == NoFault) {
+                %(op_wb)s;
+            }
+        }
+        return fault;
+    }
+
+    std::string
+    %(class_name)s::generateDisassembly(Addr pc,
+            const SymbolTable *symtab) const
+    {
+        std::vector<RegId> indices = {%(regs)s};
+        std::stringstream ss;
+        ss << mnemonic << ' ';
+        for (const RegId& idx: indices)
+            ss << registerName(idx) << ", ";
+        // To be compliant with GCC, the immediate is formated to a 20-bit
+        // signed integer.
+        ss << ((((uint64_t)imm) >> 12) & 0xFFFFF);
+        return ss.str();
+    }
+}};
+
 def template FenceExecute {{
     Fault
     %(class_name)s::execute(