cpu-o3: fix IQ missing mem barriers

After commit e2a5063e5f18f902833c84894b0ff103e3371493 some
memory references now tracked as barriers were not having
their completion properly notified to the MemDepUnit.

This patch fixes InstructionQueue and changes MemDepUnit's
completeBarrier to completeInst, which now should be called
for both memory references and barrier instructions.

Change-Id: I28b5f112b45778f6272e71bb3766b364c3d2e7db
Signed-off-by: Tiago Mück <tiago.muck@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29654
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh
index f9e2966..b8878f3 100644
--- a/src/cpu/o3/inst_queue.hh
+++ b/src/cpu/o3/inst_queue.hh
@@ -240,9 +240,6 @@
     /** Replays a memory instruction. It must be rescheduled first. */
     void replayMemInst(const DynInstPtr &replay_inst);
 
-    /** Completes a memory operation. */
-    void completeMemInst(const DynInstPtr &completed_inst);
-
     /**
      * Defers a memory instruction when its DTB translation incurs a hw
      * page table walk.
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 67b1108..ff5b3be 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014, 2017-2019 ARM Limited
+ * Copyright (c) 2011-2014, 2017-2020 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
@@ -1004,15 +1004,20 @@
     // Tell the memory dependence unit to wake any dependents on this
     // instruction if it is a memory instruction.  Also complete the memory
     // instruction at this point since we know it executed without issues.
-    // @todo: Might want to rename "completeMemInst" to something that
-    // indicates that it won't need to be replayed, and call this
-    // earlier.  Might not be a big deal.
+    ThreadID tid = completed_inst->threadNumber;
     if (completed_inst->isMemRef()) {
-        memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
-        completeMemInst(completed_inst);
+        memDepUnit[tid].completeInst(completed_inst);
+
+        DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
+            completed_inst->pcState(), completed_inst->seqNum);
+
+        ++freeEntries;
+        completed_inst->memOpDone(true);
+        count[tid]--;
     } else if (completed_inst->isMemBarrier() ||
                completed_inst->isWriteBarrier()) {
-        memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst);
+        // Completes a non mem ref barrier
+        memDepUnit[tid].completeInst(completed_inst);
     }
 
     for (int dest_reg_idx = 0;
@@ -1123,23 +1128,6 @@
 
 template <class Impl>
 void
-InstructionQueue<Impl>::completeMemInst(const DynInstPtr &completed_inst)
-{
-    ThreadID tid = completed_inst->threadNumber;
-
-    DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
-            completed_inst->pcState(), completed_inst->seqNum);
-
-    ++freeEntries;
-
-    completed_inst->memOpDone(true);
-
-    memDepUnit[tid].completed(completed_inst);
-    count[tid]--;
-}
-
-template <class Impl>
-void
 InstructionQueue<Impl>::deferMemInst(const DynInstPtr &deferred_inst)
 {
     deferredMemInsts.push_back(deferred_inst);
diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh
index 3d24b1f..685f649 100644
--- a/src/cpu/o3/mem_dep_unit.hh
+++ b/src/cpu/o3/mem_dep_unit.hh
@@ -139,14 +139,8 @@
      */
     void replay();
 
-    /** Completes a memory instruction. */
-    void completed(const DynInstPtr &inst);
-
-    /** Completes a barrier instruction. */
-    void completeBarrier(const DynInstPtr &inst);
-
-    /** Wakes any dependents of a memory instruction. */
-    void wakeDependents(const DynInstPtr &inst);
+    /** Notifies completion of an instruction. */
+    void completeInst(const DynInstPtr &inst);
 
     /** Squashes all instructions up until a given sequence number for a
      *  specific thread.
@@ -164,6 +158,13 @@
     void dumpLists();
 
   private:
+
+    /** Completes a memory instruction. */
+    void completed(const DynInstPtr &inst);
+
+    /** Wakes any dependents of a memory instruction. */
+    void wakeDependents(const DynInstPtr &inst);
+
     typedef typename std::list<DynInstPtr>::iterator ListIt;
 
     class MemDepEntry;
diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh
index d1eac29..3a7ad36 100644
--- a/src/cpu/o3/mem_dep_unit_impl.hh
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@ -435,7 +435,7 @@
 
 template <class MemDepPred, class Impl>
 void
-MemDepUnit<MemDepPred, Impl>::completeBarrier(const DynInstPtr &inst)
+MemDepUnit<MemDepPred, Impl>::completeInst(const DynInstPtr &inst)
 {
     wakeDependents(inst);
     completed(inst);