cpu: Proposal for changing the indirect branch predictor interface

Now the indirect branch predictor handles its own GHR instead of getting
the one from the direction predictor.

Also, now the commit method of the indirect predictor is called for every
pending branch on an update, as the indirect predictors may want to update
their interal structures/histories with the information of each branch.

Change-Id: I7053fbea42a53960a3bc1ba32912cc99c160511e
Reviewed-on: https://gem5-review.googlesource.com/c/15318
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc
index 18286a7..e3a1e51 100644
--- a/src/cpu/pred/bi_mode.cc
+++ b/src/cpu/pred/bi_mode.cc
@@ -229,12 +229,6 @@
     delete history;
 }
 
-unsigned
-BiModeBP::getGHR(ThreadID tid, void *bp_history) const
-{
-    return static_cast<BPHistory*>(bp_history)->globalHistoryReg;
-}
-
 void
 BiModeBP::updateGlobalHistReg(ThreadID tid, bool taken)
 {
diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh
index 34766b5..e24c5ee 100644
--- a/src/cpu/pred/bi_mode.hh
+++ b/src/cpu/pred/bi_mode.hh
@@ -63,7 +63,6 @@
     void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history);
     void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
                 bool squashed, const StaticInstPtr & inst, Addr corrTarget);
-    unsigned getGHR(ThreadID tid, void *bp_history) const;
 
   private:
     void updateGlobalHistReg(ThreadID tid, bool taken);
diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc
index 455773c..a768cc1 100644
--- a/src/cpu/pred/bpred_unit.cc
+++ b/src/cpu/pred/bpred_unit.cc
@@ -192,6 +192,7 @@
     ppBranches->notify(1);
 
     void *bp_history = NULL;
+    void *indirect_history = NULL;
 
     if (inst->isUncondCtrl()) {
         DPRINTF(Branch, "[tid:%i]: Unconditional control.\n", tid);
@@ -206,11 +207,15 @@
                 " predicted %i for PC %s\n", tid, seqNum,  pred_taken, pc);
     }
 
+    if (useIndirect) {
+        iPred.updateDirectionInfo(tid, pred_taken, indirect_history);
+    }
+
     DPRINTF(Branch, "[tid:%i]: [sn:%i] Creating prediction history "
             "for PC %s\n", tid, seqNum, pc);
 
-    PredictorHistory predict_record(seqNum, pc.instAddr(),
-                                    pred_taken, bp_history, tid, inst);
+    PredictorHistory predict_record(seqNum, pc.instAddr(), pred_taken,
+                                    bp_history, indirect_history, tid, inst);
 
     // Now lookup in the BTB or RAS.
     if (pred_taken) {
@@ -280,8 +285,7 @@
                 predict_record.wasIndirect = true;
                 ++indirectLookups;
                 //Consult indirect predictor on indirect control
-                if (iPred.lookup(pc.instAddr(), getGHR(tid, bp_history),
-                        target, tid)) {
+                if (iPred.lookup(pc.instAddr(), target, tid)) {
                     // Indirect predictor hit
                     ++indirectHits;
                     DPRINTF(Branch, "[tid:%i]: Instruction %s predicted "
@@ -327,7 +331,6 @@
     DPRINTF(Branch, "[tid:%i]: Committing branches until "
             "[sn:%lli].\n", tid, done_sn);
 
-    iPred.commit(done_sn, tid);
     while (!predHist[tid].empty() &&
            predHist[tid].back().seqNum <= done_sn) {
         // Update the branch predictor with the correct results.
@@ -337,6 +340,8 @@
                     predHist[tid].back().inst,
                     predHist[tid].back().target);
 
+        iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory);
+
         predHist[tid].pop_back();
     }
 }
@@ -366,6 +371,7 @@
 
         // This call should delete the bpHistory.
         squash(tid, pred_hist.front().bpHistory);
+        iPred.deleteDirectionInfo(tid, pred_hist.front().indirectHistory);
 
         DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i] "
                 "PC %s.\n", tid, pred_hist.front().seqNum,
@@ -429,9 +435,6 @@
                     tid, hist_it->seqNum);
         }
 
-        // Get the underlying Global History Register
-        unsigned ghr = getGHR(tid, hist_it->bpHistory);
-
         // There are separate functions for in-order and out-of-order
         // branch prediction, but not for update. Therefore, this
         // call should take into account that the mispredicted branch may
@@ -449,6 +452,9 @@
                pred_hist.front().bpHistory, true, pred_hist.front().inst,
                corrTarget.instAddr());
 
+        iPred.changeDirectionPrediction(tid, pred_hist.front().indirectHistory,
+                                        actually_taken);
+
         if (actually_taken) {
             if (hist_it->wasReturn && !hist_it->usedRAS) {
                  DPRINTF(Branch, "[tid: %i] Incorrectly predicted"
@@ -459,7 +465,7 @@
             }
             if (hist_it->wasIndirect) {
                 ++indirectMispredicted;
-                iPred.recordTarget(hist_it->seqNum, ghr, corrTarget, tid);
+                iPred.recordTarget(hist_it->seqNum, corrTarget, tid);
             } else {
                 DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
                         " PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh
index 9a75bbd..0ad6867 100644
--- a/src/cpu/pred/bpred_unit.hh
+++ b/src/cpu/pred/bpred_unit.hh
@@ -193,8 +193,6 @@
     { BTB.update(instPC, target, 0); }
 
 
-    virtual unsigned getGHR(ThreadID tid, void* bp_history) const { return 0; }
-
     void dump();
 
   private:
@@ -205,11 +203,13 @@
          */
         PredictorHistory(const InstSeqNum &seq_num, Addr instPC,
                          bool pred_taken, void *bp_history,
-                         ThreadID _tid, const StaticInstPtr & inst)
-            : seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
-              RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0),
-              wasCall(0), wasReturn(0), wasIndirect(0),
-              target(MaxAddr), inst(inst)
+                         void *indirect_history, ThreadID _tid,
+                         const StaticInstPtr & inst)
+            : seqNum(seq_num), pc(instPC), bpHistory(bp_history),
+              indirectHistory(indirect_history), RASTarget(0), RASIndex(0),
+              tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0),
+              wasCall(0), wasReturn(0), wasIndirect(0), target(MaxAddr),
+              inst(inst)
         {}
 
         bool operator==(const PredictorHistory &entry) const {
@@ -228,6 +228,8 @@
          */
         void *bpHistory;
 
+        void *indirectHistory;
+
         /** The RAS target (only valid if a return). */
         TheISA::PCState RASTarget;
 
diff --git a/src/cpu/pred/indirect.cc b/src/cpu/pred/indirect.cc
index a8934d5..6690abb 100644
--- a/src/cpu/pred/indirect.cc
+++ b/src/cpu/pred/indirect.cc
@@ -53,11 +53,31 @@
     }
 }
 
+void
+IndirectPredictor::updateDirectionInfo(ThreadID tid, bool taken,
+    void* & indirect_history)
+{
+    // record the GHR as it was before this prediction
+    // It will be used to recover the history in case this prediction is
+    // wrong or belongs to bad path
+    indirect_history = new unsigned(threadInfo[tid].ghr);
+
+    threadInfo[tid].ghr <<= taken;
+}
+
+void
+IndirectPredictor::changeDirectionPrediction(ThreadID tid,
+    void * indirect_history, bool actually_taken)
+{
+    unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
+    threadInfo[tid].ghr = ((*previousGhr) << 1) + actually_taken;
+}
+
 bool
-IndirectPredictor::lookup(Addr br_addr, unsigned ghr, TheISA::PCState& target,
+IndirectPredictor::lookup(Addr br_addr, TheISA::PCState& target,
     ThreadID tid)
 {
-    Addr set_index = getSetIndex(br_addr, ghr, tid);
+    Addr set_index = getSetIndex(br_addr, threadInfo[tid].ghr, tid);
     Addr tag = getTag(br_addr);
 
     assert(set_index < numSets);
@@ -85,11 +105,16 @@
 }
 
 void
-IndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid)
+IndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid,
+                          void * indirect_history)
 {
     DPRINTF(Indirect, "Committing seq:%d\n", seq_num);
     ThreadInfo &t_info = threadInfo[tid];
 
+    // we do not need to recover the GHR, so delete the information
+    unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
+    delete previousGhr;
+
     if (t_info.pathHist.empty()) return;
 
     if (t_info.headHistEntry < t_info.pathHist.size() &&
@@ -121,9 +146,17 @@
     t_info.pathHist.erase(squash_itr, t_info.pathHist.end());
 }
 
+void
+IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history)
+{
+    unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
+    threadInfo[tid].ghr = *previousGhr;
+
+    delete previousGhr;
+}
 
 void
-IndirectPredictor::recordTarget(InstSeqNum seq_num, unsigned ghr,
+IndirectPredictor::recordTarget(InstSeqNum seq_num,
         const TheISA::PCState& target, ThreadID tid)
 {
     ThreadInfo &t_info = threadInfo[tid];
@@ -132,7 +165,7 @@
     auto hist_entry = *(t_info.pathHist.rbegin());
     // Temporarily pop it off the history so we can calculate the set
     t_info.pathHist.pop_back();
-    Addr set_index = getSetIndex(hist_entry.pcAddr, ghr, tid);
+    Addr set_index = getSetIndex(hist_entry.pcAddr, t_info.ghr, tid);
     Addr tag = getTag(hist_entry.pcAddr);
     hist_entry.targetAddr = target.instAddr();
     t_info.pathHist.push_back(hist_entry);
diff --git a/src/cpu/pred/indirect.hh b/src/cpu/pred/indirect.hh
index 8945053..226a778 100644
--- a/src/cpu/pred/indirect.hh
+++ b/src/cpu/pred/indirect.hh
@@ -44,14 +44,18 @@
                       unsigned num_sets, unsigned num_ways,
                       unsigned tag_bits, unsigned path_len,
                       unsigned inst_shift, unsigned num_threads);
-    bool lookup(Addr br_addr, unsigned ghr, TheISA::PCState& br_target,
-                ThreadID tid);
+    bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid);
     void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
                         ThreadID tid);
-    void commit(InstSeqNum seq_num, ThreadID tid);
+    void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history);
     void squash(InstSeqNum seq_num, ThreadID tid);
-    void recordTarget(InstSeqNum seq_num, unsigned ghr,
-                      const TheISA::PCState& target, ThreadID tid);
+    void recordTarget(InstSeqNum seq_num, const TheISA::PCState& target,
+                      ThreadID tid);
+    void updateDirectionInfo(ThreadID tid, bool taken,
+                             void* & indirect_history);
+    void changeDirectionPrediction(ThreadID tid, void * indirect_history,
+                                   bool actually_taken);
+    void deleteDirectionInfo(ThreadID tid, void * indirect_history);
 
   private:
     const bool hashGHR;
@@ -85,10 +89,11 @@
 
 
     struct ThreadInfo {
-        ThreadInfo() : headHistEntry(0) { }
+        ThreadInfo() : headHistEntry(0), ghr(0) { }
 
         std::deque<HistoryEntry> pathHist;
         unsigned headHistEntry;
+        unsigned ghr;
     };
 
     std::vector<ThreadInfo> threadInfo;
diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc
index 50beb20..5702c04 100644
--- a/src/cpu/pred/tage.cc
+++ b/src/cpu/pred/tage.cc
@@ -51,16 +51,6 @@
 {
 }
 
-// Get GHR for hashing indirect predictor
-// Build history backwards from pointer in
-// bp_history.
-unsigned
-TAGE::getGHR(ThreadID tid, void *bp_history) const
-{
-    TageBranchInfo* bi = static_cast<TageBranchInfo*>(bp_history);
-    return tage->getGHR(tid, bi->tageBranchInfo);
-}
-
 // PREDICTOR UPDATE
 void
 TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh
index b75051d..0c5cde2 100644
--- a/src/cpu/pred/tage.hh
+++ b/src/cpu/pred/tage.hh
@@ -89,7 +89,6 @@
                 bool squashed, const StaticInstPtr & inst,
                 Addr corrTarget) override;
     virtual void squash(ThreadID tid, void *bp_history) override;
-    unsigned getGHR(ThreadID tid, void *bp_history) const override;
 };
 
 #endif // __CPU_PRED_TAGE
diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc
index 6ed208b..67beb09 100644
--- a/src/cpu/pred/tournament.cc
+++ b/src/cpu/pred/tournament.cc
@@ -366,12 +366,6 @@
     return new TournamentBP(this);
 }
 
-unsigned
-TournamentBP::getGHR(ThreadID tid, void *bp_history) const
-{
-    return static_cast<BPHistory *>(bp_history)->globalHistory;
-}
-
 #ifdef DEBUG
 int
 TournamentBP::BPHistory::newCount = 0;
diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh
index e1e5081..0eb69e1 100644
--- a/src/cpu/pred/tournament.hh
+++ b/src/cpu/pred/tournament.hh
@@ -115,8 +115,6 @@
      */
     void squash(ThreadID tid, void *bp_history);
 
-    unsigned getGHR(ThreadID tid, void *bp_history) const;
-
   private:
     /**
      * Returns if the branch should be taken or not, given a counter