cpu: fix how a thread starts up in MinorCPU

When a thread is activated by another thread calling a clone system
call, the child thread's context is initialized in the middle of the
clone system call and before the context is fully initialized.
Therefore, the child thread starts fetching an unitialized PC, which
could lead to a page fault.

This patch adds a pipeline wakeup event that is scheduled later in the
cycle when the thread is activated. This event ensures that the first
fetch only happens after the thread context is fully initialized
(e.g., in case of clone syscall, it is when the parent thread copies
its context over to the child thread).

When a thread first starts or wakes up, input queue to the Fetch2 stage
needs to be drained since the execution flow is likely to change and
previously fetched instructions in the queue may no longer be in the
correct flow. This patch dumps/drains all inputs in the input queue
of a thread context in the Fetch2 stage when the associated thread wakes
up.

Change-Id: Iad970638e435858b7289cd471158cc0afdbbb0e5
Reviewed-on: https://gem5-review.googlesource.com/c/8182
Reviewed-by: Brandon Potter <Brandon.Potter@amd.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Maintainer: Brandon Potter <Brandon.Potter@amd.com>
diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc
index 63efde2..484457b 100644
--- a/src/cpu/minor/cpu.cc
+++ b/src/cpu/minor/cpu.cc
@@ -49,6 +49,7 @@
 
 MinorCPU::MinorCPU(MinorCPUParams *params) :
     BaseCPU(params),
+    pipelineStartupEvent([this]{ wakeupPipeline(); }, name()),
     threadPolicy(params->threadPolicy)
 {
     /* This is only written for one thread at the moment */
@@ -279,20 +280,43 @@
 void
 MinorCPU::activateContext(ThreadID thread_id)
 {
-    DPRINTF(MinorCPU, "ActivateContext thread: %d\n", thread_id);
+    /* Remember to wake up this thread_id by scheduling the
+     * pipelineStartup event.
+     * We can't wakeupFetch the thread right away because its context may
+     * not have been fully initialized. For example, in the case of clone
+     * syscall, this activateContext function is called in the middle of
+     * the syscall and before the new thread context is initialized.
+     * If we start fetching right away, the new thread will fetch from an
+     * invalid address (i.e., pc is not initialized yet), which could lead
+     * to a page fault. Instead, we remember which threads to wake up and
+     * schedule an event to wake all them up after their contexts are
+     * fully initialized */
+    readyThreads.push_back(thread_id);
+    if (!pipelineStartupEvent.scheduled())
+        schedule(pipelineStartupEvent, clockEdge(Cycles(0)));
+}
 
-    /* Do some cycle accounting.  lastStopped is reset to stop the
-     *  wakeup call on the pipeline from adding the quiesce period
-     *  to BaseCPU::numCycles */
-    stats.quiesceCycles += pipeline->cyclesSinceLastStopped();
-    pipeline->resetLastStopped();
+void
+MinorCPU::wakeupPipeline()
+{
+    for (auto thread_id : readyThreads) {
+        DPRINTF(MinorCPU, "ActivateContext thread: %d\n", thread_id);
 
-    /* Wake up the thread, wakeup the pipeline tick */
-    threads[thread_id]->activate();
-    wakeupOnEvent(Minor::Pipeline::CPUStageId);
-    pipeline->wakeupFetch(thread_id);
+        /* Do some cycle accounting.  lastStopped is reset to stop the
+         *  wakeup call on the pipeline from adding the quiesce period
+         *  to BaseCPU::numCycles */
+        stats.quiesceCycles += pipeline->cyclesSinceLastStopped();
+        pipeline->resetLastStopped();
 
-    BaseCPU::activateContext(thread_id);
+        /* Wake up the thread, wakeup the pipeline tick */
+        threads[thread_id]->activate();
+        wakeupOnEvent(Minor::Pipeline::CPUStageId);
+
+        pipeline->wakeupFetch(thread_id);
+        BaseCPU::activateContext(thread_id);
+    }
+
+    readyThreads.clear();
 }
 
 void
diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh
index 4e47623..606a401 100644
--- a/src/cpu/minor/cpu.hh
+++ b/src/cpu/minor/cpu.hh
@@ -83,6 +83,13 @@
      *  Elements of pipeline call TheISA to implement the model. */
     Minor::Pipeline *pipeline;
 
+    /** An event that wakes up the pipeline when a thread context is
+     * activated */
+    EventFunctionWrapper pipelineStartupEvent;
+
+    /** List of threads that are ready to wake up and run */
+    std::vector<ThreadID> readyThreads;
+
   public:
     /** Activity recording for pipeline.  This belongs to Pipeline but
      *  stages will access it through the CPU as the MinorCPU object
@@ -165,6 +172,9 @@
     void activateContext(ThreadID thread_id) override;
     void suspendContext(ThreadID thread_id) override;
 
+    /** Wake up ready-to-run threads */
+    void wakeupPipeline();
+
     /** Thread scheduling utility functions */
     std::vector<ThreadID> roundRobinPriority(ThreadID priority)
     {
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 7b76ca2..93c0895 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1054,7 +1054,8 @@
         !branch.isStreamChange() && /* No real branch */
         fault == NoFault && /* No faults */
         completed_inst && /* Still finding instructions to execute */
-        num_insts_committed != commitLimit /* Not reached commit limit */
+        num_insts_committed != commitLimit && /* Not reached commit limit */
+        cpu.getContext(thread_id)->status() != ThreadContext::Suspended
         )
     {
         if (only_commit_microops) {
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index ba898d9..09a06fc 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -120,6 +120,7 @@
         popInput(tid);
 
     fetchInfo[tid].inputIndex = 0;
+    fetchInfo[tid].havePC = false;
 }
 
 void
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh
index c66fbd8..2230560 100644
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -172,6 +172,11 @@
     Stats::Scalar loadInstructions;
     Stats::Scalar storeInstructions;
 
+  public:
+    /** Dump the whole contents of the input buffer.  Useful after a
+     *  prediction changes control flow */
+    void dumpAllInput(ThreadID tid);
+
   protected:
     /** Get a piece of data to work on from the inputBuffer, or 0 if there
      *  is no data. */
@@ -180,10 +185,6 @@
     /** Pop an element off the input buffer, if there are any */
     void popInput(ThreadID tid);
 
-    /** Dump the whole contents of the input buffer.  Useful after a
-     *  prediction changes control flow */
-    void dumpAllInput(ThreadID tid);
-
     /** Update local branch prediction structures from feedback from
      *  Execute. */
     void updateBranchPrediction(const BranchData &branch);
diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc
index b5659ac..3248d54 100644
--- a/src/cpu/minor/pipeline.cc
+++ b/src/cpu/minor/pipeline.cc
@@ -199,6 +199,7 @@
 Pipeline::wakeupFetch(ThreadID tid)
 {
     fetch1.wakeupFetch(tid);
+    fetch2.dumpAllInput(tid);
 }
 
 bool