cpu: Added parameters to enable/disable features in LTAGE

They are for the following features in the LTAGE loop predictor:
 - Hashing for calculating the loop table entry
 - Add direction information
 - Add speculative iteration number information

Change-Id: I395f4526163ee0d0229d1e87cde2bb046f1dd43a
Signed-off-by: Pau Cabre <pau.cabre@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14597
Reviewed-by: Ilias Vougioukas <ilias.vougioukas@arm.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Louis Delhez <ldelhez@ucla.edu>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py
index 2c622cd..0c1e9c2 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -143,3 +143,19 @@
     loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per loop")
     logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor associativity")
 
+    # Parameters for enabling modifications to the loop predictor
+    # They have been copied from ISL-TAGE
+    # (https://www.jilp.org/jwac-2/program/03_seznec.tgz)
+    #
+    # All of them should be disabled to match the original LTAGE implementation
+    # (http://hpca23.cse.tamu.edu/taco/camino/cbp2/cbp-src/realistic-seznec.h)
+
+    # Add speculation
+    useSpeculation = Param.Bool(False, "Use speculation")
+
+    # Add hashing for calculating the loop table index
+    useHashing = Param.Bool(False, "Use hashing")
+
+    # Add a direction bit to the loop table entries
+    useDirectionBit = Param.Bool(False, "Use direction info")
+
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index 73f4777..8f332b3 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -58,8 +58,12 @@
     confidenceThreshold((1 << loopTableConfidenceBits) - 1),
     loopTagMask((1 << loopTableTagBits) - 1),
     loopNumIterMask((1 << loopTableIterBits) - 1),
+    loopSetMask((1 << (logSizeLoopPred - logLoopTableAssoc)) - 1),
     loopUseCounter(0),
-    withLoopBits(params->withLoopBits)
+    withLoopBits(params->withLoopBits),
+    useDirectionBit(params->useDirectionBit),
+    useSpeculation(params->useSpeculation),
+    useHashing(params->useHashing)
 {
     // we use uint16_t type for these vales, so they cannot be more than
     // 16 bits
@@ -82,12 +86,28 @@
     // by logLoopTableAssoc in order to return the index of the first of the
     // N entries of the set
     Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
-    return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
+    Addr pc = pc_in >> instShiftAmt;
+    if (useHashing) {
+        // copied from TAGE-SC-L
+        // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz)
+        pc ^= (pc_in >> (instShiftAmt + logLoopTableAssoc));
+    }
+    return ((pc & mask) << logLoopTableAssoc);
+}
+
+int
+LTAGE::finallindex(int index, int lowPcBits, int way) const
+{
+    // copied from TAGE-SC-L
+    // (http://www.jilp.org/cbp2016/code/AndreSeznecLimited.tar.gz)
+    return (useHashing ? (index ^ ((lowPcBits >> way) << logLoopTableAssoc)) :
+                         (index))
+           + way;
 }
 
 //loop prediction: only used if high confidence
 bool
-LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const
+LTAGE::getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const
 {
     bi->loopHit = -1;
     bi->loopPredValid = false;
@@ -95,17 +115,25 @@
     unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
     bi->loopTag = ((pc) >> pcShift) & loopTagMask;
 
+    if (useHashing) {
+        bi->loopTag ^= ((pc >> (pcShift + logSizeLoopPred)) & loopTagMask);
+        bi->loopLowPcBits = (pc >> pcShift) & loopSetMask;
+    }
+
     for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
-        if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
+        int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, i);
+        if (ltable[idx].tag == bi->loopTag) {
             bi->loopHit = i;
             bi->loopPredValid =
-                ltable[bi->loopIndex + i].confidence == confidenceThreshold;
-            bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
-            if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
-                ltable[bi->loopIndex + i].numIter) {
-                return !(ltable[bi->loopIndex + i].dir);
-            }else {
-                return (ltable[bi->loopIndex + i].dir);
+                ltable[idx].confidence == confidenceThreshold;
+
+            uint16_t iter = speculative ? ltable[idx].currentIterSpec
+                                        : ltable[idx].currentIter;
+
+            if ((iter + 1) == ltable[idx].numIter) {
+                return useDirectionBit ? !(ltable[idx].dir) : false;
+            } else {
+                return useDirectionBit ? (ltable[idx].dir) : true;
             }
         }
     }
@@ -113,10 +141,10 @@
 }
 
 void
-LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
+LTAGE::specLoopUpdate(bool taken, LTageBranchInfo* bi)
 {
     if (bi->loopHit>=0) {
-        int index = lindex(pc);
+        int index = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit);
         if (taken != ltable[index].dir) {
             ltable[index].currentIterSpec = 0;
         } else {
@@ -129,7 +157,7 @@
 void
 LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
 {
-    int idx = bi->loopIndex + bi->loopHit;
+    int idx = finallindex(bi->loopIndex, bi->loopLowPcBits, bi->loopHit);
     if (bi->loopHit >= 0) {
         //already a hit
         if (bi->loopPredValid) {
@@ -158,7 +186,7 @@
             }
         }
 
-        if (taken != ltable[idx].dir) {
+        if (taken != (useDirectionBit ? ltable[idx].dir : true)) {
             if (ltable[idx].currentIter == ltable[idx].numIter) {
                 DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
 
@@ -167,7 +195,7 @@
                 //just do not predict when the loop count is 1 or 2
                 if (ltable[idx].numIter < 3) {
                     // free the entry
-                    ltable[idx].dir = taken;
+                    ltable[idx].dir = taken; // ignored if no useDirectionBit
                     ltable[idx].numIter = 0;
                     ltable[idx].age = 0;
                     ltable[idx].confidence = 0;
@@ -189,7 +217,9 @@
             ltable[idx].currentIter = 0;
         }
 
-    } else if (taken) {
+    } else if (useDirectionBit ?
+                ((bi->loopPredValid ? bi->loopPred : bi->tagePred) != taken) :
+                taken) {
         //try to allocate an entry on taken branch
         int nrand = random_mt.random<int>();
         for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
@@ -198,7 +228,7 @@
             if (ltable[idx].age == 0) {
                 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
                         pc);
-                ltable[idx].dir = !taken;
+                ltable[idx].dir = !taken; // ignored if no useDirectionBit
                 ltable[idx].tag = bi->loopTag;
                 ltable[idx].numIter = 0;
                 ltable[idx].age = (1 << loopTableAgeBits) - 1;
@@ -224,7 +254,8 @@
     bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);
 
     if (cond_branch) {
-        bi->loopPred = getLoop(branch_pc, bi);	// loop prediction
+        // loop prediction
+        bi->loopPred = getLoop(branch_pc, bi, useSpeculation);
 
         if ((loopUseCounter >= 0) && bi->loopPredValid) {
             pred_taken = bi->loopPred;
@@ -234,9 +265,12 @@
                 "loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
                 branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
                 loopUseCounter, bi->tagePred, bi->altTaken);
+
+        if (useSpeculation) {
+            specLoopUpdate(pred_taken, bi);
+        }
     }
 
-    specLoopUpdate(branch_pc, pred_taken, bi);
     return pred_taken;
 }
 
@@ -246,8 +280,13 @@
 {
     LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);
 
-    // first update the loop predictor
-    loopUpdate(branch_pc, taken, bi);
+    if (useSpeculation) {
+        // recalculate loop prediction without speculation
+        // It is ok to overwrite the loop prediction fields in bi
+        // as the stats have already been updated with the previous
+        // values
+        bi->loopPred = getLoop(branch_pc, bi, false);
+    }
 
     if (bi->loopPredValid) {
         if (bi->tagePred != bi->loopPred) {
@@ -257,6 +296,8 @@
         }
     }
 
+    loopUpdate(branch_pc, taken, bi);
+
     TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
 }
 
@@ -269,7 +310,9 @@
 
     if (bi->condBranch) {
         if (bi->loopHit >= 0) {
-            int idx = bi->loopIndex + bi->loopHit;
+            int idx = finallindex(bi->loopIndex,
+                                  bi->loopLowPcBits,
+                                  bi->loopHit);
             ltable[idx].currentIterSpec = bi->currentIter;
         }
     }
@@ -281,7 +324,9 @@
     LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
     if (bi->condBranch) {
         if (bi->loopHit >= 0) {
-            int idx = bi->loopIndex + bi->loopHit;
+            int idx = finallindex(bi->loopIndex,
+                                  bi->loopLowPcBits,
+                                  bi->loopHit);
             ltable[idx].currentIterSpec = bi->currentIter;
         }
     }
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index e9e34b7..94ff968 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -76,11 +76,11 @@
     {
         uint16_t numIter;
         uint16_t currentIter;
-        uint16_t currentIterSpec;
+        uint16_t currentIterSpec; // only for useSpeculation
         uint8_t confidence;
         uint16_t tag;
         uint8_t age;
-        bool dir;
+        bool dir; // only for useDirectionBit
 
         LoopEntry() : numIter(0), currentIter(0), currentIterSpec(0),
                       confidence(0), tag(0), age(0), dir(0) { }
@@ -100,13 +100,14 @@
         bool loopPred;
         bool loopPredValid;
         int  loopIndex;
+        int  loopLowPcBits;  // only for useHashing
         int loopHit;
 
         LTageBranchInfo(int sz)
             : TageBranchInfo(sz),
               loopTag(0), currentIter(0),
               loopPred(false),
-              loopPredValid(false), loopIndex(0), loopHit(0)
+              loopPredValid(false), loopIndex(0), loopLowPcBits(0), loopHit(0)
         {}
     };
 
@@ -118,13 +119,24 @@
     int lindex(Addr pc_in) const;
 
     /**
+     * Computes the index used to access the
+     * ltable structures.
+     * It may take hashing into account
+     * @param index Result of lindex function
+     * @param lowPcBits PC bits masked with set size
+     * @param way Way to be used
+     */
+    int finallindex(int lindex, int lowPcBits, int way) const;
+
+    /**
      * Get a branch prediction from the loop
      * predictor.
      * @param pc The unshifted branch PC.
      * @param bi Pointer to information on the
      * prediction.
+     * @param speculative Use speculative number of iterations
      */
-    bool getLoop(Addr pc, LTageBranchInfo* bi) const;
+    bool getLoop(Addr pc, LTageBranchInfo* bi, bool speculative) const;
 
    /**
     * Updates the loop predictor.
@@ -137,13 +149,12 @@
 
     /**
      * Speculatively updates the loop predictor
-     * iteration count.
-     * @param pc The unshifted branch PC.
+     * iteration count (only for useSpeculation).
      * @param taken The predicted branch outcome.
      * @param bi Pointer to information on the prediction
      * recorded at prediction time.
      */
-    void specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi);
+    void specLoopUpdate(bool taken, LTageBranchInfo* bi);
 
     /**
      * Update LTAGE for conditional branches.
@@ -201,12 +212,17 @@
     const uint8_t confidenceThreshold;
     const uint16_t loopTagMask;
     const uint16_t loopNumIterMask;
+    const int loopSetMask;
 
     LoopEntry *ltable;
 
     int8_t loopUseCounter;
     unsigned withLoopBits;
 
+    const bool useDirectionBit;
+    const bool useSpeculation;
+    const bool useHashing;
+
     // stats
     Stats::Scalar loopPredictorCorrect;
     Stats::Scalar loopPredictorWrong;