cpu, arch, arch-arm: Wire unused VecElem code in the O3 model

VecElem code had been introduced in order to simulate change of renaming
for vector registers. Most of the work is happening on the rename_map
switchRenameMode. Change of renaming can happen after a squash in the
pipeline.
This patch is also changing the interface to the ISA part so that
a PCState is used instead of ISA in order to check if rename mode
has changed.

Change-Id: I8af795d771b958e0a0d459abfeceff5f16b4b5d4
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/15601
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 60c5728..a3e89b5 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -699,15 +699,28 @@
 }
 
 template<>
-struct initRenameMode<ArmISA::ISA>
+struct RenameMode<ArmISA::ISA>
 {
-    static Enums::VecRegRenameMode mode(const ArmISA::ISA* isa)
+    static Enums::VecRegRenameMode
+    init(const ArmISA::ISA* isa)
     {
         return isa->vecRegRenameMode();
     }
-    static bool equals(const ArmISA::ISA* isa1, const ArmISA::ISA* isa2)
+
+    static Enums::VecRegRenameMode
+    mode(const ArmISA::PCState& pc)
     {
-        return mode(isa1) == mode(isa2);
+        if (pc.aarch64()) {
+            return Enums::Full;
+        } else {
+            return Enums::Elem;
+        }
+    }
+
+    static bool
+    equalsInit(const ArmISA::ISA* isa1, const ArmISA::ISA* isa2)
+    {
+        return init(isa1) == init(isa2);
     }
 };
 
diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index 175fd7c..11c3479 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -145,6 +145,24 @@
     }
 }
 
+static void
+copyVecRegs(ThreadContext *src, ThreadContext *dest)
+{
+    auto src_mode = RenameMode<ArmISA::ISA>::mode(src->pcState());
+
+    // The way vector registers are copied (VecReg vs VecElem) is relevant
+    // in the O3 model only.
+    if (src_mode == Enums::Full) {
+        for (auto idx = 0; idx < NumVecRegs; idx++)
+            dest->setVecRegFlat(idx, src->readVecRegFlat(idx));
+    } else {
+        for (auto idx = 0; idx < NumVecRegs; idx++)
+            for (auto elem_idx = 0; elem_idx < NumVecElemPerVecReg; elem_idx++)
+                dest->setVecElemFlat(
+                    idx, elem_idx, src->readVecElemFlat(idx, elem_idx));
+    }
+}
+
 void
 copyRegs(ThreadContext *src, ThreadContext *dest)
 {
@@ -154,15 +172,14 @@
     for (int i = 0; i < NumFloatRegs; i++)
         dest->setFloatRegBitsFlat(i, src->readFloatRegBitsFlat(i));
 
-    for (int i = 0; i < NumVecRegs; i++)
-        dest->setVecRegFlat(i, src->readVecRegFlat(i));
-
     for (int i = 0; i < NumCCRegs; i++)
         dest->setCCReg(i, src->readCCReg(i));
 
     for (int i = 0; i < NumMiscRegs; i++)
         dest->setMiscRegNoEffect(i, src->readMiscRegNoEffect(i));
 
+    copyVecRegs(src, dest);
+
     // setMiscReg "with effect" will set the misc register mapping correctly.
     // e.g. updateRegMap(val)
     dest->setMiscReg(MISCREG_CPSR, src->readMiscRegNoEffect(MISCREG_CPSR));
diff --git a/src/arch/generic/traits.hh b/src/arch/generic/traits.hh
index 3dc6b30..c95c5ab 100644
--- a/src/arch/generic/traits.hh
+++ b/src/arch/generic/traits.hh
@@ -42,6 +42,7 @@
 #ifndef __ARCH_COMMON_TRAITS_HH__
 #define __ARCH_COMMON_TRAITS_HH__
 
+#include "arch/types.hh"
 #include "enums/VecRegRenameMode.hh"
 
 /** Helper structure to get the vector register mode for a given ISA.
@@ -50,14 +51,19 @@
  * appropriate member of the ISA.
  */
 template <typename ISA>
-struct initRenameMode
+struct RenameMode
 {
-    static Enums::VecRegRenameMode mode(const ISA*) { return Enums::Full; }
+    static Enums::VecRegRenameMode init(const ISA*) { return Enums::Full; }
+
+    static Enums::VecRegRenameMode
+    mode(const TheISA::PCState&)
+    { return Enums::Full; }
+
     /**
      * Compare the initial rename mode of two instances of the ISA.
      * Result is true by definition, as the default mode is Full.
      * */
-    static bool equals(const ISA*, const ISA*) { return true; }
+    static bool equalsInit(const ISA*, const ISA*) { return true; }
 };
 
 #endif /* __ARCH_COMMON_TRAITS_HH__ */
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 9e1efa1..5d92d92 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2014, 2016, 2017 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -155,7 +155,7 @@
 
       /* It is mandatory that all SMT threads use the same renaming mode as
        * they are sharing registers and rename */
-      vecMode(initRenameMode<TheISA::ISA>::mode(params->isa[0])),
+      vecMode(RenameMode<TheISA::ISA>::init(params->isa[0])),
       regFile(params->numPhysIntRegs,
               params->numPhysFloatRegs,
               params->numPhysVecRegs,
@@ -266,7 +266,7 @@
     // Setup the rename map for whichever stages need it.
     for (ThreadID tid = 0; tid < numThreads; tid++) {
         isa[tid] = params->isa[tid];
-        assert(initRenameMode<TheISA::ISA>::equals(isa[tid], isa[0]));
+        assert(RenameMode<TheISA::ISA>::equalsInit(isa[tid], isa[0]));
 
         // Only Alpha has an FP zero register, so for other ISAs we
         // use an invalid FP register index to avoid special treatment
@@ -961,6 +961,25 @@
 }
 
 template <class Impl>
+void
+FullO3CPU<Impl>::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist)
+{
+    auto pc = this->pcState(tid);
+
+    // new_mode is the new vector renaming mode
+    auto new_mode = RenameMode<TheISA::ISA>::mode(pc);
+
+    // We update vecMode only if there has been a change
+    if (new_mode != vecMode) {
+        vecMode = new_mode;
+
+        renameMap[tid].switchMode(vecMode);
+        commitRenameMap[tid].switchMode(vecMode);
+        renameMap[tid].switchFreeList(freelist);
+    }
+}
+
+template <class Impl>
 Fault
 FullO3CPU<Impl>::getInterrupts()
 {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 1159850..b5f7540 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2013, 2016-2018 ARM Limited
+ * Copyright (c) 2011-2013, 2016-2019 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -371,6 +371,14 @@
 
     bool simPalCheck(int palFunc, ThreadID tid);
 
+    /** Check if a change in renaming is needed for vector registers.
+     * The vecMode variable is updated and propagated to rename maps.
+     *
+     * @param tid ThreadID
+     * @param freelist list of free registers
+     */
+    void switchRenameMode(ThreadID tid, UnifiedFreeList* freelist);
+
     /** Returns the Fault for any valid interrupt. */
     Fault getInterrupts();
 
@@ -409,6 +417,13 @@
      */
     VecRegContainer& getWritableVecReg(PhysRegIdPtr reg_idx);
 
+    /** Returns current vector renaming mode */
+    Enums::VecRegRenameMode vecRenameMode() const { return vecMode; }
+
+    /** Sets the current vector renaming mode */
+    void vecRenameMode(Enums::VecRegRenameMode vec_mode)
+    { vecMode = vec_mode; }
+
     /**
      * Read physical vector register lane
      */
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index c5be404..ed5dfb6 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -927,10 +927,6 @@
 
     // After a syscall squashes everything, the history buffer may be empty
     // but the ROB may still be squashing instructions.
-    if (historyBuffer[tid].empty()) {
-        return;
-    }
-
     // Go through the most recent instructions, undoing the mappings
     // they did and freeing up the registers.
     while (!historyBuffer[tid].empty() &&
@@ -965,6 +961,9 @@
 
         ++renameUndoneMaps;
     }
+
+    // Check if we need to change vector renaming mode after squashing
+    cpu->switchRenameMode(tid, freeList);
 }
 
 template<class Impl>
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 1b831d9..86c4393 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016,2019 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -125,45 +125,68 @@
 }
 
 void
-UnifiedRenameMap::switchMode(VecMode newVecMode, UnifiedFreeList* freeList)
+UnifiedRenameMap::switchFreeList(UnifiedFreeList* freeList)
 {
-    if (newVecMode == Enums::Elem && vecMode == Enums::Full) {
-        /* Switch to vector element rename mode. */
+    if (vecMode == Enums::Elem) {
+
         /* The free list should currently be tracking full registers. */
         panic_if(freeList->hasFreeVecElems(),
                 "The free list is already tracking Vec elems");
         panic_if(freeList->numFreeVecRegs() !=
                 regFile->numVecPhysRegs() - TheISA::NumVecRegs,
                 "The free list has lost vector registers");
-        /* Split the mapping of each arch reg. */
-        int reg = 0;
-        for (auto &e: vecMap) {
-            PhysRegFile::IdRange range = this->regFile->getRegElemIds(e);
-            uint32_t i;
-            for (i = 0; range.first != range.second; i++, range.first++) {
-                vecElemMap.setEntry(RegId(VecElemClass, reg, i),
-                                    &(*range.first));
-            }
-            panic_if(i != NVecElems,
-                "Wrong name of elems: expecting %u, got %d\n",
-                TheISA::NumVecElemPerVecReg, i);
-            reg++;
-        }
+
         /* Split the free regs. */
         while (freeList->hasFreeVecRegs()) {
             auto vr = freeList->getVecReg();
             auto range = this->regFile->getRegElemIds(vr);
             freeList->addRegs(range.first, range.second);
         }
-        vecMode = Enums::Elem;
-    } else if (newVecMode == Enums::Full && vecMode == Enums::Elem) {
-        /* Switch to full vector register rename mode. */
+
+    } else if (vecMode == Enums::Full) {
+
         /* The free list should currently be tracking register elems. */
         panic_if(freeList->hasFreeVecRegs(),
                 "The free list is already tracking full Vec");
         panic_if(freeList->numFreeVecElems() !=
                 regFile->numVecElemPhysRegs() - TheISA::NumFloatRegs,
                 "The free list has lost vector register elements");
+
+        auto range = regFile->getRegIds(VecRegClass);
+        freeList->addRegs(range.first + TheISA::NumVecRegs, range.second);
+
+        /* We remove the elems from the free list. */
+        while (freeList->hasFreeVecElems())
+            freeList->getVecElem();
+    }
+}
+
+void
+UnifiedRenameMap::switchMode(VecMode newVecMode)
+{
+    if (newVecMode == Enums::Elem && vecMode == Enums::Full) {
+
+        /* Switch to vector element rename mode. */
+        vecMode = Enums::Elem;
+
+        /* Split the mapping of each arch reg. */
+        int vec_idx = 0;
+        for (auto &vec: vecMap) {
+            PhysRegFile::IdRange range = this->regFile->getRegElemIds(vec);
+            auto idx = 0;
+            for (auto phys_elem = range.first;
+                 phys_elem < range.second; idx++, phys_elem++) {
+
+                setEntry(RegId(VecElemClass, vec_idx, idx), &(*phys_elem));
+            }
+            vec_idx++;
+        }
+
+    } else if (newVecMode == Enums::Full && vecMode == Enums::Elem) {
+
+        /* Switch to full vector register rename mode. */
+        vecMode = Enums::Full;
+
         /* To rebuild the arch regs we take the easy road:
          *  1.- Stitch the elems together into vectors.
          *  2.- Replace the contents of the register file with the vectors
@@ -184,12 +207,5 @@
             regFile->setVecReg(regFile->getTrueId(&pregId), new_RF[i]);
         }
 
-        auto range = regFile->getRegIds(VecRegClass);
-        freeList->addRegs(range.first + TheISA::NumVecRegs, range.second);
-
-        /* We remove the elems from the free list. */
-        while (freeList->hasFreeVecElems())
-            freeList->getVecElem();
-        vecMode = Enums::Full;
     }
 }
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index ab909f0..d306680 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -160,7 +160,6 @@
     /** @} */
 };
 
-
 /**
  * Unified register rename map for all classes of registers.  Wraps a
  * set of class-specific rename maps.  Methods that do not specify a
@@ -377,8 +376,16 @@
     /**
      * Set vector mode to Full or Elem.
      * Ignore 'silent' modifications.
+     *
+     * @param newVecMode new vector renaming mode
      */
-    void switchMode(VecMode newVecMode, UnifiedFreeList* freeList);
+    void switchMode(VecMode newVecMode);
+
+    /**
+     * Switch freeList of registers from Full to Elem or vicevers
+     * depending on vecMode (vector renaming mode).
+     */
+    void switchFreeList(UnifiedFreeList* freeList);
 
 };
 
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index e1d7717..bd2bf63 100644
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -45,6 +45,7 @@
 #ifndef __CPU_O3_THREAD_CONTEXT_IMPL_HH__
 #define __CPU_O3_THREAD_CONTEXT_IMPL_HH__
 
+#include "arch/generic/traits.hh"
 #include "arch/kernel_stats.hh"
 #include "arch/registers.hh"
 #include "config/the_isa.hh"
@@ -176,6 +177,9 @@
 void
 O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc)
 {
+    // Set vector renaming mode before copying registers
+    cpu->vecRenameMode(RenameMode<TheISA::ISA>::mode(tc->pcState()));
+
     // Prevent squashing
     thread->noSquashFromTC = true;
     TheISA::copyRegs(tc, this);