cpu: Made LTAGE parameters configurable

This includes TAGE tag sizes, TAGE table sizes, U counters reset period,
loop predictor associativity, path history size, the USE_ALT_ON_NA size
and the WITHLOOP size

Change-Id: I935823f0a5794f5d55b744263798897a813dc1bd
Signed-off-by: Pau Cabre <pau.cabre@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14417
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py
index aa6cd4a..9d83abb 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -92,11 +92,9 @@
     cxx_class = 'LTAGE'
     cxx_header = "cpu/pred/ltage.hh"
 
-    logSizeBiMP = Param.Unsigned(14, "Log size of Bimodal predictor in bits")
     logRatioBiModalHystEntries = Param.Unsigned(2,
         "Log num of prediction entries for a shared hysteresis bit " \
         "for the Bimodal")
-    logSizeTagTables = Param.Unsigned(11, "Log size of tag table in LTAGE")
     logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
     nHistoryTables = Param.Unsigned(12, "Number of history tables")
     tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter bits")
@@ -105,11 +103,22 @@
             "A large number to track all branch histories(2MEntries default)")
     minHist = Param.Unsigned(4, "Minimum history size of LTAGE")
     maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
-    minTagWidth = Param.Unsigned(7, "Minimum tag size in tag tables")
+    pathHistBits = Param.Unsigned(16, "Path history size")
+    tagTableTagWidths = VectorParam.Unsigned(
+        [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15],
+        "Tag size in TAGE tag tables")
+    logTagTableSizes = VectorParam.Int(
+        [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9],
+        "Log2 of TAGE table sizes")
+    logUResetPeriod = Param.Unsigned(19,
+        "Log period in number of branches to reset TAGE useful counters")
+    useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
+    withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")
 
     loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop entry")
     loopTableConfidenceBits = Param.Unsigned(2,
             "Number of confidence bits per loop entry")
     loopTableTagBits = Param.Unsigned(14, "Number of tag bits per loop entry")
     loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per loop")
+    logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor associativity")
 
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index b049d4f..d6cc087 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -49,9 +49,7 @@
 
 LTAGE::LTAGE(const LTAGEParams *params)
   : BPredUnit(params),
-    logSizeBiMP(params->logSizeBiMP),
     logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
-    logSizeTagTables(params->logSizeTagTables),
     logSizeLoopPred(params->logSizeLoopPred),
     nHistoryTables(params->nHistoryTables),
     tagTableCounterBits(params->tagTableCounterBits),
@@ -59,15 +57,21 @@
     histBufferSize(params->histBufferSize),
     minHist(params->minHist),
     maxHist(params->maxHist),
-    minTagWidth(params->minTagWidth),
+    pathHistBits(params->pathHistBits),
     loopTableAgeBits(params->loopTableAgeBits),
     loopTableConfidenceBits(params->loopTableConfidenceBits),
     loopTableTagBits(params->loopTableTagBits),
     loopTableIterBits(params->loopTableIterBits),
+    logLoopTableAssoc(params->logLoopTableAssoc),
     confidenceThreshold((1 << loopTableConfidenceBits) - 1),
     loopTagMask((1 << loopTableTagBits) - 1),
     loopNumIterMask((1 << loopTableIterBits) - 1),
-    threadHistory(params->numThreads)
+    tagTableTagWidths(params->tagTableTagWidths),
+    logTagTableSizes(params->logTagTableSizes),
+    threadHistory(params->numThreads),
+    logUResetPeriod(params->logUResetPeriod),
+    useAltOnNaBits(params->useAltOnNaBits),
+    withLoopBits(params->withLoopBits)
 {
     // Current method for periodically resetting the u counter bits only
     // works for 1 or 2 bits
@@ -79,10 +83,18 @@
     assert(loopTableTagBits <= 16);
     assert(loopTableIterBits <= 16);
 
+    assert(logSizeLoopPred >= logLoopTableAssoc);
+
+    // we use int type for the path history, so it cannot be more than
+    // its size
+    assert(pathHistBits <= (sizeof(int)*8));
+
+    // initialize the counter to half of the period
+    assert(logUResetPeriod != 0);
+    tCounter = ULL(1) << (logUResetPeriod - 1);
+
     assert(params->histBufferSize > params->maxHist * 2);
     useAltPredForNewlyAllocated = 0;
-    logTick = 19;
-    tCounter = ULL(1) << (logTick - 1);
 
     for (auto& history : threadHistory) {
         history.pathHist = 0;
@@ -103,27 +115,12 @@
                     + 0.5);
     }
 
-    tagWidths[1] = minTagWidth;
-    tagWidths[2] = minTagWidth;
-    tagWidths[3] = minTagWidth + 1;
-    tagWidths[4] = minTagWidth + 1;
-    tagWidths[5] = minTagWidth + 2;
-    tagWidths[6] = minTagWidth + 3;
-    tagWidths[7] = minTagWidth + 4;
-    tagWidths[8] = minTagWidth + 5;
-    tagWidths[9] = minTagWidth + 5;
-    tagWidths[10] = minTagWidth + 6;
-    tagWidths[11] = minTagWidth + 7;
-    tagWidths[12] = minTagWidth + 8;
+    assert(tagTableTagWidths.size() == (nHistoryTables+1));
+    assert(logTagTableSizes.size() == (nHistoryTables+1));
 
-    for (int i = 1; i <= 2; i++)
-        tagTableSizes[i] = logSizeTagTables - 1;
-    for (int i = 3; i <= 6; i++)
-        tagTableSizes[i] = logSizeTagTables;
-    for (int i = 7; i <= 10; i++)
-        tagTableSizes[i] = logSizeTagTables - 1;
-    for (int i = 11; i <= 12; i++)
-        tagTableSizes[i] = logSizeTagTables - 2;
+    // First entry is for the Bimodal table and it is untagged in this
+    // implementation
+    assert(tagTableTagWidths[0] == 0);
 
     for (auto& history : threadHistory) {
         history.computeIndices = new FoldedHistory[nHistoryTables+1];
@@ -131,17 +128,18 @@
         history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
 
         for (int i = 1; i <= nHistoryTables; i++) {
-            history.computeIndices[i].init(histLengths[i], (tagTableSizes[i]));
+            history.computeIndices[i].init(
+                histLengths[i], (logTagTableSizes[i]));
             history.computeTags[0][i].init(
-                history.computeIndices[i].origLength, tagWidths[i]);
+                history.computeIndices[i].origLength, tagTableTagWidths[i]);
             history.computeTags[1][i].init(
-                history.computeIndices[i].origLength, tagWidths[i] - 1);
+                history.computeIndices[i].origLength, tagTableTagWidths[i]-1);
             DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
-                    histLengths[i], tagTableSizes[i], tagWidths[i]);
+                    histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]);
         }
     }
 
-    const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
+    const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
     btablePrediction.resize(bimodalTableSize, false);
     btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
                             true);
@@ -149,7 +147,7 @@
     ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
     gtable = new TageEntry*[nHistoryTables + 1];
     for (int i = 1; i <= nHistoryTables; i++) {
-        gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
+        gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
     }
 
     tableIndices = new int [nHistoryTables+1];
@@ -161,14 +159,21 @@
 int
 LTAGE::bindex(Addr pc_in) const
 {
-    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
+    return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1));
 }
 
 int
 LTAGE::lindex(Addr pc_in) const
 {
-    return (((pc_in >> instShiftAmt) &
-             ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
+    // The loop table is implemented as a linear table
+    // If associativity is N (N being 1 << logLoopTableAssoc),
+    // the first N entries are for set 0, the next N entries are for set 1,
+    // and so on.
+    // Thus, this function calculates the set and then it gets left shifted
+    // by logLoopTableAssoc in order to return the index of the first of the
+    // N entries of the set
+    Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
+    return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
 }
 
 int
@@ -177,13 +182,13 @@
     int A1, A2;
 
     A = A & ((ULL(1) << size) - 1);
-    A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
-    A2 = (A >> tagTableSizes[bank]);
-    A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
-       + (A2 >> (tagTableSizes[bank] - bank));
+    A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
+    A2 = (A >> logTagTableSizes[bank]);
+    A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+       + (A2 >> (logTagTableSizes[bank] - bank));
     A = A1 ^ A2;
-    A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
-      + (A >> (tagTableSizes[bank] - bank));
+    A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+      + (A >> (logTagTableSizes[bank] - bank));
     return (A);
 }
 
@@ -193,14 +198,16 @@
 LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
 {
     int index;
-    int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
+    int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
+                                                    histLengths[bank];
+    const Addr shiftedPc = pc >> instShiftAmt;
     index =
-        (pc >> instShiftAmt) ^
-        ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) + 1)) ^
+        shiftedPc ^
+        (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
         threadHistory[tid].computeIndices[bank].comp ^
         F(threadHistory[tid].pathHist, hlen, bank);
 
-    return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
+    return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
 }
 
 
@@ -212,7 +219,7 @@
               threadHistory[tid].computeTags[0][bank].comp ^
               (threadHistory[tid].computeTags[1][bank].comp << 1);
 
-    return (tag & ((ULL(1) << tagWidths[bank]) - 1));
+    return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
 }
 
 
@@ -280,9 +287,10 @@
     bi->loopHit = -1;
     bi->loopPredValid = false;
     bi->loopIndex = lindex(pc);
-    bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) & loopTagMask;
+    unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
+    bi->loopTag = ((pc) >> pcShift) & loopTagMask;
 
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
         if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
             bi->loopHit = i;
             bi->loopPredValid =
@@ -379,8 +387,8 @@
     } else if (taken) {
         //try to allocate an entry on taken branch
         int nrand = random_mt.random<int>();
-        for (int i = 0; i < 4; i++) {
-            int loop_hit = (nrand + i) & 3;
+        for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
+            int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
             idx = bi->loopIndex + loop_hit;
             if (ltable[idx].age == 0) {
                 DPRINTF(LTage, "Allocating loop pred entry for branch %lx\n",
@@ -552,7 +560,9 @@
 
         if (bi->loopPredValid) {
             if (bi->tagePred != bi->loopPred) {
-                ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
+                ctrUpdate(loopUseCounter,
+                          (bi->loopPred == taken),
+                          withLoopBits);
             }
         }
 
@@ -575,7 +585,7 @@
                 // allocate new entry even if the overall prediction was false
                 if (longest_match_pred != bi->altTaken) {
                     ctrUpdate(useAltPredForNewlyAllocated,
-                         bi->altTaken == taken, 4);
+                         bi->altTaken == taken, useAltOnNaBits);
                 }
             }
         }
@@ -617,11 +627,11 @@
         }
         //periodic reset of u: reset is not complete but bit by bit
         tCounter++;
-        if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
+        if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
             // reset least significant bit
             // most significant bit becomes least significant bit
             for (int i = 1; i <= nHistoryTables; i++) {
-                for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
+                for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
                     gtable[i][j].u = gtable[i][j].u >> 1;
                 }
             }
@@ -674,7 +684,7 @@
     //update user history
     updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
     tHist.pathHist = (tHist.pathHist << 1) + pathbit;
-    tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
+    tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
 
     bi->ptGhist = tHist.ptGhist;
     bi->pathHist = tHist.pathHist;
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index 68aef1c..8b417d4 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -354,9 +354,7 @@
      */
     void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);
 
-    const unsigned logSizeBiMP;
     const unsigned logRatioBiModalHystEntries;
-    const unsigned logSizeTagTables;
     const unsigned logSizeLoopPred;
     const unsigned nHistoryTables;
     const unsigned tagTableCounterBits;
@@ -364,16 +362,19 @@
     const unsigned histBufferSize;
     const unsigned minHist;
     const unsigned maxHist;
-    const unsigned minTagWidth;
+    const unsigned pathHistBits;
     const unsigned loopTableAgeBits;
     const unsigned loopTableConfidenceBits;
     const unsigned loopTableTagBits;
     const unsigned loopTableIterBits;
-
+    const unsigned logLoopTableAssoc;
     const uint8_t confidenceThreshold;
     const uint16_t loopTagMask;
     const uint16_t loopNumIterMask;
 
+    const std::vector<unsigned> tagTableTagWidths;
+    const std::vector<int> logTagTableSizes;
+
     std::vector<bool> btablePrediction;
     std::vector<bool> btableHysteresis;
     TageEntry **gtable;
@@ -404,16 +405,16 @@
 
     std::vector<ThreadHistory> threadHistory;
 
-    int tagWidths[15];
-    int tagTableSizes[15];
     int *histLengths;
     int *tableIndices;
     int *tableTags;
 
     int8_t loopUseCounter;
     int8_t useAltPredForNewlyAllocated;
-    int tCounter;
-    int logTick;
+    uint64_t tCounter;
+    uint64_t logUResetPeriod;
+    unsigned useAltOnNaBits;
+    unsigned withLoopBits;
 };
 
 #endif // __CPU_PRED_LTAGE