src/cpu/minor/execute.cc - public/gem5 - Git at Google

 /*
  * Copyright (c) 2013-2014,2018 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
  * property including but not limited to intellectual property relating
  * to a hardware implementation of the functionality of the software
  * licensed hereunder.  You may use the software subject to the license
  * terms below provided that you ensure that this notice is replicated
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Andrew Bardsley
  */

 #include "cpu/minor/execute.hh"

 #include "arch/locked_mem.hh"
 #include "arch/registers.hh"
 #include "arch/utility.hh"
 #include "cpu/minor/cpu.hh"
 #include "cpu/minor/exec_context.hh"
 #include "cpu/minor/fetch1.hh"
 #include "cpu/minor/lsq.hh"
 #include "cpu/op_class.hh"
 #include "debug/Activity.hh"
 #include "debug/Branch.hh"
 #include "debug/Drain.hh"
 #include "debug/MinorExecute.hh"
 #include "debug/MinorInterrupt.hh"
 #include "debug/MinorMem.hh"
 #include "debug/MinorTrace.hh"
 #include "debug/PCEvent.hh"

 namespace Minor
 {

 Execute::Execute(const std::string &name_,
     MinorCPU &cpu_,
     MinorCPUParams &params,
     Latch<ForwardInstData>::Output inp_,
     Latch<BranchData>::Input out_) :
     Named(name_),
     inp(inp_),
     out(out_),
     cpu(cpu_),
     issueLimit(params.executeIssueLimit),
     memoryIssueLimit(params.executeMemoryIssueLimit),
     commitLimit(params.executeCommitLimit),
     memoryCommitLimit(params.executeMemoryCommitLimit),
     processMoreThanOneInput(params.executeCycleInput),
     fuDescriptions(*params.executeFuncUnits),
     numFuncUnits(fuDescriptions.funcUnits.size()),
     setTraceTimeOnCommit(params.executeSetTraceTimeOnCommit),
     setTraceTimeOnIssue(params.executeSetTraceTimeOnIssue),
     allowEarlyMemIssue(params.executeAllowEarlyMemoryIssue),
     noCostFUIndex(fuDescriptions.funcUnits.size() + 1),
     lsq(name_ + ".lsq", name_ + ".dcache_port",
         cpu_, *this,
         params.executeMaxAccessesInMemory,
         params.executeMemoryWidth,
         params.executeLSQRequestsQueueSize,
         params.executeLSQTransfersQueueSize,
         params.executeLSQStoreBufferSize,
         params.executeLSQMaxStoreBufferStoresPerCycle),
     executeInfo(params.numThreads, ExecuteThreadInfo(params.executeCommitLimit)),
     interruptPriority(0),
     issuePriority(0),
     commitPriority(0)
 {
     if (commitLimit < 1) {
         fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_,
             commitLimit);
     }

     if (issueLimit < 1) {
         fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_,
             issueLimit);
     }

     if (memoryIssueLimit < 1) {
         fatal("%s: executeMemoryIssueLimit must be >= 1 (%d)\n", name_,
             memoryIssueLimit);
     }

     if (memoryCommitLimit > commitLimit) {
         fatal("%s: executeMemoryCommitLimit (%d) must be <="
             " executeCommitLimit (%d)\n",
             name_, memoryCommitLimit, commitLimit);
     }

     if (params.executeInputBufferSize < 1) {
         fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_,
         params.executeInputBufferSize);
     }

     if (params.executeInputBufferSize < 1) {
         fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_,
         params.executeInputBufferSize);
     }

     /* This should be large enough to count all the in-FU instructions
      *  which need to be accounted for in the inFlightInsts
      *  queue */
     unsigned int total_slots = 0;

     /* Make FUPipelines for each MinorFU */
     for (unsigned int i = 0; i < numFuncUnits; i++) {
         std::ostringstream fu_name;
         MinorFU *fu_description = fuDescriptions.funcUnits[i];

         /* Note the total number of instruction slots (for sizing
          *  the inFlightInst queue) and the maximum latency of any FU
          *  (for sizing the activity recorder) */
         total_slots += fu_description->opLat;

         fu_name << name_ << ".fu." << i;

         FUPipeline *fu = new FUPipeline(fu_name.str(), *fu_description, cpu);

         funcUnits.push_back(fu);
     }

     /** Check that there is a functional unit for all operation classes */
     for (int op_class = No_OpClass + 1; op_class < Num_OpClasses; op_class++) {
         bool found_fu = false;
         unsigned int fu_index = 0;

         while (fu_index < numFuncUnits && !found_fu)
         {
             if (funcUnits[fu_index]->provides(
                 static_cast<OpClass>(op_class)))
             {
                 found_fu = true;
             }
             fu_index++;
         }

         if (!found_fu) {
             warn("No functional unit for OpClass %s\n",
                 Enums::OpClassStrings[op_class]);
         }
     }

     /* Per-thread structures */
     for (ThreadID tid = 0; tid < params.numThreads; tid++) {
         std::string tid_str = std::to_string(tid);

         /* Input Buffers */
         inputBuffer.push_back(
             InputBuffer<ForwardInstData>(
                 name_ + ".inputBuffer" + tid_str, "insts",
                 params.executeInputBufferSize));

         /* Scoreboards */
         scoreboard.push_back(Scoreboard(name_ + ".scoreboard" + tid_str));

         /* In-flight instruction records */
         executeInfo[tid].inFlightInsts =  new Queue<QueuedInst,
             ReportTraitsAdaptor<QueuedInst> >(
             name_ + ".inFlightInsts" + tid_str, "insts", total_slots);

         executeInfo[tid].inFUMemInsts = new Queue<QueuedInst,
             ReportTraitsAdaptor<QueuedInst> >(
             name_ + ".inFUMemInsts" + tid_str, "insts", total_slots);
     }
 }

 const ForwardInstData *
 Execute::getInput(ThreadID tid)
 {
     /* Get a line from the inputBuffer to work with */
     if (!inputBuffer[tid].empty()) {
         const ForwardInstData &head = inputBuffer[tid].front();

         return (head.isBubble() ? NULL : &(inputBuffer[tid].front()));
     } else {
         return NULL;
     }
 }

 void
 Execute::popInput(ThreadID tid)
 {
     if (!inputBuffer[tid].empty())
         inputBuffer[tid].pop();

     executeInfo[tid].inputIndex = 0;
 }

 void
 Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch)
 {
     ThreadContext *thread = cpu.getContext(inst->id.threadId);
     const TheISA::PCState &pc_before = inst->pc;
     TheISA::PCState target = thread->pcState();

     /* Force a branch for SerializeAfter/SquashAfter instructions
      * at the end of micro-op sequence when we're not suspended */
     bool force_branch = thread->status() != ThreadContext::Suspended &&
         !inst->isFault() &&
         inst->isLastOpInInst() &&
         (inst->staticInst->isSerializeAfter() ||
          inst->staticInst->isSquashAfter() ||
          inst->staticInst->isIprAccess());

     DPRINTF(Branch, "tryToBranch before: %s after: %s%s\n",
         pc_before, target, (force_branch ? " (forcing)" : ""));

     /* Will we change the PC to something other than the next instruction? */
     bool must_branch = pc_before != target ||
         fault != NoFault ||
         force_branch;

     /* The reason for the branch data we're about to generate, set below */
     BranchData::Reason reason = BranchData::NoBranch;

     if (fault == NoFault)
     {
         TheISA::advancePC(target, inst->staticInst);
         thread->pcState(target);

         DPRINTF(Branch, "Advancing current PC from: %s to: %s\n",
             pc_before, target);
     }

     if (inst->predictedTaken && !force_branch) {
         /* Predicted to branch */
         if (!must_branch) {
             /* No branch was taken, change stream to get us back to the
              *  intended PC value */
             DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x but"
                 " none happened inst: %s\n",
                 inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst);

             reason = BranchData::BadlyPredictedBranch;
         } else if (inst->predictedTarget == target) {
             /* Branch prediction got the right target, kill the branch and
              *  carry on.
              *  Note that this information to the branch predictor might get
              *  overwritten by a "real" branch during this cycle */
             DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x correctly"
                 " inst: %s\n",
                 inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst);

             reason = BranchData::CorrectlyPredictedBranch;
         } else {
             /* Branch prediction got the wrong target */
             DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x"
                     " but got the wrong target (actual: 0x%x) inst: %s\n",
                     inst->pc.instAddr(), inst->predictedTarget.instAddr(),
                     target.instAddr(), *inst);

             reason = BranchData::BadlyPredictedBranchTarget;
         }
     } else if (must_branch) {
         /* Unpredicted branch */
         DPRINTF(Branch, "Unpredicted branch from 0x%x to 0x%x inst: %s\n",
             inst->pc.instAddr(), target.instAddr(), *inst);

         reason = BranchData::UnpredictedBranch;
     } else {
         /* No branch at all */
         reason = BranchData::NoBranch;
     }

     updateBranchData(inst->id.threadId, reason, inst, target, branch);
 }

 void
 Execute::updateBranchData(
     ThreadID tid,
     BranchData::Reason reason,
     MinorDynInstPtr inst, const TheISA::PCState &target,
     BranchData &branch)
 {
     if (reason != BranchData::NoBranch) {
         /* Bump up the stream sequence number on a real branch*/
         if (BranchData::isStreamChange(reason))
             executeInfo[tid].streamSeqNum++;

         /* Branches (even mis-predictions) don't change the predictionSeqNum,
          *  just the streamSeqNum */
         branch = BranchData(reason, tid,
             executeInfo[tid].streamSeqNum,
             /* Maintaining predictionSeqNum if there's no inst is just a
              * courtesy and looks better on minorview */
             (inst->isBubble() ? executeInfo[tid].lastPredictionSeqNum
                 : inst->id.predictionSeqNum),
             target, inst);

         DPRINTF(Branch, "Branch data signalled: %s\n", branch);
     }
 }

 void
 Execute::handleMemResponse(MinorDynInstPtr inst,
     LSQ::LSQRequestPtr response, BranchData &branch, Fault &fault)
 {
     ThreadID thread_id = inst->id.threadId;
     ThreadContext *thread = cpu.getContext(thread_id);

     ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);

     PacketPtr packet = response->packet;

     bool is_load = inst->staticInst->isLoad();
     bool is_store = inst->staticInst->isStore();
     bool is_atomic = inst->staticInst->isAtomic();
     bool is_prefetch = inst->staticInst->isDataPrefetch();

     /* If true, the trace's predicate value will be taken from the exec
      *  context predicate, otherwise, it will be set to false */
     bool use_context_predicate = true;

     if (inst->translationFault != NoFault) {
         /* Invoke memory faults. */
         DPRINTF(MinorMem, "Completing fault from DTLB access: %s\n",
             inst->translationFault->name());

         if (inst->staticInst->isPrefetch()) {
             DPRINTF(MinorMem, "Not taking fault on prefetch: %s\n",
                 inst->translationFault->name());

             /* Don't assign to fault */
         } else {
             /* Take the fault raised during the TLB/memory access */
             fault = inst->translationFault;

             fault->invoke(thread, inst->staticInst);
         }
     } else if (!packet) {
         DPRINTF(MinorMem, "Completing failed request inst: %s\n",
             *inst);
         use_context_predicate = false;
         if (!context.readMemAccPredicate())
             inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
     } else if (packet->isError()) {
         DPRINTF(MinorMem, "Trying to commit error response: %s\n",
             *inst);

         fatal("Received error response packet for inst: %s\n", *inst);
     } else if (is_store || is_load || is_prefetch || is_atomic) {
         assert(packet);

         DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n",
             *inst, packet->getAddr(), packet->getSize());

         if (is_load && packet->getSize() > 0) {
             DPRINTF(MinorMem, "Memory data[0]: 0x%x\n",
                 static_cast<unsigned int>(packet->getConstPtr<uint8_t>()[0]));
         }

         /* Complete the memory access instruction */
         fault = inst->staticInst->completeAcc(packet, &context,
             inst->traceData);

         if (fault != NoFault) {
             /* Invoke fault created by instruction completion */
             DPRINTF(MinorMem, "Fault in memory completeAcc: %s\n",
                 fault->name());
             fault->invoke(thread, inst->staticInst);
         } else {
             /* Stores need to be pushed into the store buffer to finish
              *  them off */
             if (response->needsToBeSentToStoreBuffer())
                 lsq.sendStoreToStoreBuffer(response);
         }
     } else {
         fatal("There should only ever be reads, "
             "writes or faults at this point\n");
     }

     lsq.popResponse(response);

     if (inst->traceData) {
         inst->traceData->setPredicate((use_context_predicate ?
             context.readPredicate() : false));
     }

     doInstCommitAccounting(inst);

     /* Generate output to account for branches */
     tryToBranch(inst, fault, branch);
 }

 bool
 Execute::isInterrupted(ThreadID thread_id) const
 {
     return cpu.checkInterrupts(cpu.getContext(thread_id));
 }

 bool
 Execute::takeInterrupt(ThreadID thread_id, BranchData &branch)
 {
     DPRINTF(MinorInterrupt, "Considering interrupt status from PC: %s\n",
         cpu.getContext(thread_id)->pcState());

     Fault interrupt = cpu.getInterruptController(thread_id)->getInterrupt
         (cpu.getContext(thread_id));

     if (interrupt != NoFault) {
         /* The interrupt *must* set pcState */
         cpu.getInterruptController(thread_id)->updateIntrInfo
             (cpu.getContext(thread_id));
         interrupt->invoke(cpu.getContext(thread_id));

         assert(!lsq.accessesInFlight());

         DPRINTF(MinorInterrupt, "Invoking interrupt: %s to PC: %s\n",
             interrupt->name(), cpu.getContext(thread_id)->pcState());

         /* Assume that an interrupt *must* cause a branch.  Assert this? */

         updateBranchData(thread_id, BranchData::Interrupt,
             MinorDynInst::bubble(), cpu.getContext(thread_id)->pcState(),
             branch);
     }

     return interrupt != NoFault;
 }

 bool
 Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch,
     bool &passed_predicate, Fault &fault)
 {
     bool issued = false;

     /* Set to true if the mem op. is issued and sent to the mem system */
     passed_predicate = false;

     if (!lsq.canRequest()) {
         /* Not acting on instruction yet as the memory
          * queues are full */
         issued = false;
     } else {
         ThreadContext *thread = cpu.getContext(inst->id.threadId);
         TheISA::PCState old_pc = thread->pcState();

         ExecContext context(cpu, *cpu.threads[inst->id.threadId],
             *this, inst);

         DPRINTF(MinorExecute, "Initiating memRef inst: %s\n", *inst);

         Fault init_fault = inst->staticInst->initiateAcc(&context,
             inst->traceData);

         if (inst->inLSQ) {
             if (init_fault != NoFault) {
                 assert(inst->translationFault != NoFault);
                 // Translation faults are dealt with in handleMemResponse()
                 init_fault = NoFault;
             } else {
                 // If we have a translation fault then it got suppressed  by
                 // initateAcc()
                 inst->translationFault = NoFault;
             }
         }

         if (init_fault != NoFault) {
             DPRINTF(MinorExecute, "Fault on memory inst: %s"
                 " initiateAcc: %s\n", *inst, init_fault->name());
             fault = init_fault;
         } else {
             /* Only set this if the instruction passed its
              * predicate */
             if (!context.readMemAccPredicate()) {
                 DPRINTF(MinorMem, "No memory access for inst: %s\n", *inst);
                 assert(context.readPredicate());
             }
             passed_predicate = context.readPredicate();

             /* Set predicate in tracing */
             if (inst->traceData)
                 inst->traceData->setPredicate(passed_predicate);

             /* If the instruction didn't pass its predicate (and so will not
              *  progress from here)  Try to branch to correct and branch
              *  mis-prediction. */
             if (!passed_predicate) {
                 /* Leave it up to commit to handle the fault */
                 lsq.pushFailedRequest(inst);
             }
         }

         /* Restore thread PC */
         thread->pcState(old_pc);
         issued = true;
     }

     return issued;
 }

 /** Increment a cyclic buffer index for indices [0, cycle_size-1] */
 inline unsigned int
 cyclicIndexInc(unsigned int index, unsigned int cycle_size)
 {
     unsigned int ret = index + 1;

     if (ret == cycle_size)
         ret = 0;

     return ret;
 }

 /** Decrement a cyclic buffer index for indices [0, cycle_size-1] */
 inline unsigned int
 cyclicIndexDec(unsigned int index, unsigned int cycle_size)
 {
     int ret = index - 1;

     if (ret < 0)
         ret = cycle_size - 1;

     return ret;
 }

 unsigned int
 Execute::issue(ThreadID thread_id)
 {
     const ForwardInstData *insts_in = getInput(thread_id);
     ExecuteThreadInfo &thread = executeInfo[thread_id];

     /* Early termination if we have no instructions */
     if (!insts_in)
         return 0;

     /* Start from the first FU */
     unsigned int fu_index = 0;

     /* Remains true while instructions are still being issued.  If any
      *  instruction fails to issue, this is set to false and we exit issue.
      *  This strictly enforces in-order issue.  For other issue behaviours,
      *  a more complicated test in the outer while loop below is needed. */
     bool issued = true;

     /* Number of insts issues this cycle to check for issueLimit */
     unsigned num_insts_issued = 0;

     /* Number of memory ops issues this cycle to check for memoryIssueLimit */
     unsigned num_mem_insts_issued = 0;

     /* Number of instructions discarded this cycle in order to enforce a
      *  discardLimit. @todo, add that parameter? */
     unsigned num_insts_discarded = 0;

     do {
         MinorDynInstPtr inst = insts_in->insts[thread.inputIndex];
         Fault fault = inst->fault;
         bool discarded = false;
         bool issued_mem_ref = false;

         if (inst->isBubble()) {
             /* Skip */
             issued = true;
         } else if (cpu.getContext(thread_id)->status() ==
             ThreadContext::Suspended)
         {
             DPRINTF(MinorExecute, "Discarding inst: %s from suspended"
                 " thread\n", *inst);

             issued = true;
             discarded = true;
         } else if (inst->id.streamSeqNum != thread.streamSeqNum) {
             DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
                 " state was unexpected, expected: %d\n",
                 *inst, thread.streamSeqNum);
             issued = true;
             discarded = true;
         } else {
             /* Try and issue an instruction into an FU, assume we didn't and
              * fix that in the loop */
             issued = false;

             /* Try FU from 0 each instruction */
             fu_index = 0;

             /* Try and issue a single instruction stepping through the
              *  available FUs */
             do {
                 FUPipeline *fu = funcUnits[fu_index];

                 DPRINTF(MinorExecute, "Trying to issue inst: %s to FU: %d\n",
                     *inst, fu_index);

                 /* Does the examined fu have the OpClass-related capability
                  *  needed to execute this instruction?  Faults can always
                  *  issue to any FU but probably should just 'live' in the
                  *  inFlightInsts queue rather than having an FU. */
                 bool fu_is_capable = (!inst->isFault() ?
                     fu->provides(inst->staticInst->opClass()) : true);

                 if (inst->isNoCostInst()) {
                     /* Issue free insts. to a fake numbered FU */
                     fu_index = noCostFUIndex;

                     /* And start the countdown on activity to allow
                      *  this instruction to get to the end of its FU */
                     cpu.activityRecorder->activity();

                     /* Mark the destinations for this instruction as
                      *  busy */
                     scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
                         Cycles(0), cpu.getContext(thread_id), false);

                     DPRINTF(MinorExecute, "Issuing %s to %d\n", inst->id, noCostFUIndex);
                     inst->fuIndex = noCostFUIndex;
                     inst->extraCommitDelay = Cycles(0);
                     inst->extraCommitDelayExpr = NULL;

                     /* Push the instruction onto the inFlight queue so
                      *  it can be committed in order */
                     QueuedInst fu_inst(inst);
                     thread.inFlightInsts->push(fu_inst);

                     issued = true;

                 } else if (!fu_is_capable || fu->alreadyPushed()) {
                     /* Skip */
                     if (!fu_is_capable) {
                         DPRINTF(MinorExecute, "Can't issue as FU: %d isn't"
                             " capable\n", fu_index);
                     } else {
                         DPRINTF(MinorExecute, "Can't issue as FU: %d is"
                             " already busy\n", fu_index);
                     }
                 } else if (fu->stalled) {
                     DPRINTF(MinorExecute, "Can't issue inst: %s into FU: %d,"
                         " it's stalled\n",
                         *inst, fu_index);
                 } else if (!fu->canInsert()) {
                     DPRINTF(MinorExecute, "Can't issue inst: %s to busy FU"
                         " for another: %d cycles\n",
                         *inst, fu->cyclesBeforeInsert());
                 } else {
                     MinorFUTiming *timing = (!inst->isFault() ?
                         fu->findTiming(inst->staticInst) : NULL);

                     const std::vector<Cycles> *src_latencies =
                         (timing ? &(timing->srcRegsRelativeLats)
                             : NULL);

                     const std::vector<bool> *cant_forward_from_fu_indices =
                         &(fu->cantForwardFromFUIndices);

                     if (timing && timing->suppress) {
                         DPRINTF(MinorExecute, "Can't issue inst: %s as extra"
                             " decoding is suppressing it\n",
                             *inst);
                     } else if (!scoreboard[thread_id].canInstIssue(inst,
                         src_latencies, cant_forward_from_fu_indices,
                         cpu.curCycle(), cpu.getContext(thread_id)))
                     {
                         DPRINTF(MinorExecute, "Can't issue inst: %s yet\n",
                             *inst);
                     } else {
                         /* Can insert the instruction into this FU */
                         DPRINTF(MinorExecute, "Issuing inst: %s"
                             " into FU %d\n", *inst,
                             fu_index);

                         Cycles extra_dest_retire_lat = Cycles(0);
                         TimingExpr *extra_dest_retire_lat_expr = NULL;
                         Cycles extra_assumed_lat = Cycles(0);

                         /* Add the extraCommitDelay and extraAssumeLat to
                          *  the FU pipeline timings */
                         if (timing) {
                             extra_dest_retire_lat =
                                 timing->extraCommitLat;
                             extra_dest_retire_lat_expr =
                                 timing->extraCommitLatExpr;
                             extra_assumed_lat =
                                 timing->extraAssumedLat;
                         }

                         issued_mem_ref = inst->isMemRef();

                         QueuedInst fu_inst(inst);

                         /* Decorate the inst with FU details */
                         inst->fuIndex = fu_index;
                         inst->extraCommitDelay = extra_dest_retire_lat;
                         inst->extraCommitDelayExpr =
                             extra_dest_retire_lat_expr;

                         if (issued_mem_ref) {
                             /* Remember which instruction this memory op
                              *  depends on so that initiateAcc can be called
                              *  early */
                             if (allowEarlyMemIssue) {
                                 inst->instToWaitFor =
                                     scoreboard[thread_id].execSeqNumToWaitFor(inst,
                                         cpu.getContext(thread_id));

                                 if (lsq.getLastMemBarrier(thread_id) >
                                     inst->instToWaitFor)
                                 {
                                     DPRINTF(MinorExecute, "A barrier will"
                                         " cause a delay in mem ref issue of"
                                         " inst: %s until after inst"
                                         " %d(exec)\n", *inst,
                                         lsq.getLastMemBarrier(thread_id));

                                     inst->instToWaitFor =
                                         lsq.getLastMemBarrier(thread_id);
                                 } else {
                                     DPRINTF(MinorExecute, "Memory ref inst:"
                                         " %s must wait for inst %d(exec)"
                                         " before issuing\n",
                                         *inst, inst->instToWaitFor);
                                 }

                                 inst->canEarlyIssue = true;
                             }
                             /* Also queue this instruction in the memory ref
                              *  queue to ensure in-order issue to the LSQ */
                             DPRINTF(MinorExecute, "Pushing mem inst: %s\n",
                                 *inst);
                             thread.inFUMemInsts->push(fu_inst);
                         }

                         /* Issue to FU */
                         fu->push(fu_inst);
                         /* And start the countdown on activity to allow
                          *  this instruction to get to the end of its FU */
                         cpu.activityRecorder->activity();

                         /* Mark the destinations for this instruction as
                          *  busy */
                         scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
                             fu->description.opLat +
                             extra_dest_retire_lat +
                             extra_assumed_lat,
                             cpu.getContext(thread_id),
                             issued_mem_ref && extra_assumed_lat == Cycles(0));

                         /* Push the instruction onto the inFlight queue so
                          *  it can be committed in order */
                         thread.inFlightInsts->push(fu_inst);

                         issued = true;
                     }
                 }

                 fu_index++;
             } while (fu_index != numFuncUnits && !issued);

             if (!issued)
                 DPRINTF(MinorExecute, "Didn't issue inst: %s\n", *inst);
         }

         if (issued) {
             /* Generate MinorTrace's MinorInst lines.  Do this at commit
              *  to allow better instruction annotation? */
             if (DTRACE(MinorTrace) && !inst->isBubble())
                 inst->minorTraceInst(*this);

             /* Mark up barriers in the LSQ */
             if (!discarded && inst->isInst() &&
                 inst->staticInst->isMemBarrier())
             {
                 DPRINTF(MinorMem, "Issuing memory barrier inst: %s\n", *inst);
                 lsq.issuedMemBarrierInst(inst);
             }

             if (inst->traceData && setTraceTimeOnIssue) {
                 inst->traceData->setWhen(curTick());
             }

             if (issued_mem_ref)
                 num_mem_insts_issued++;

             if (discarded) {
                 num_insts_discarded++;
             } else if (!inst->isBubble()) {
                 num_insts_issued++;

                 if (num_insts_issued == issueLimit)
                     DPRINTF(MinorExecute, "Reached inst issue limit\n");
             }

             thread.inputIndex++;
             DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n",
                 thread.inputIndex);
         }

         /* Got to the end of a line */
         if (thread.inputIndex == insts_in->width()) {
             popInput(thread_id);
             /* Set insts_in to null to force us to leave the surrounding
              *  loop */
             insts_in = NULL;

             if (processMoreThanOneInput) {
                 DPRINTF(MinorExecute, "Wrapping\n");
                 insts_in = getInput(thread_id);
             }
         }
     } while (insts_in && thread.inputIndex < insts_in->width() &&
         /* We still have instructions */
         fu_index != numFuncUnits && /* Not visited all FUs */
         issued && /* We've not yet failed to issue an instruction */
         num_insts_issued != issueLimit && /* Still allowed to issue */
         num_mem_insts_issued != memoryIssueLimit);

     return num_insts_issued;
 }

 bool
 Execute::tryPCEvents(ThreadID thread_id)
 {
     ThreadContext *thread = cpu.getContext(thread_id);
     unsigned int num_pc_event_checks = 0;

     /* Handle PC events on instructions */
     Addr oldPC;
     do {
         oldPC = thread->instAddr();
         cpu.threads[thread_id]->pcEventQueue.service(oldPC, thread);
         num_pc_event_checks++;
     } while (oldPC != thread->instAddr());

     if (num_pc_event_checks > 1) {
         DPRINTF(PCEvent, "Acting on PC Event to PC: %s\n",
             thread->pcState());
     }

     return num_pc_event_checks > 1;
 }

 void
 Execute::doInstCommitAccounting(MinorDynInstPtr inst)
 {
     assert(!inst->isFault());

     MinorThread *thread = cpu.threads[inst->id.threadId];

     /* Increment the many and various inst and op counts in the
      *  thread and system */
     if (!inst->staticInst->isMicroop() || inst->staticInst->isLastMicroop())
     {
         thread->numInst++;
         thread->numInsts++;
         cpu.stats.numInsts++;
         cpu.system->totalNumInsts++;

         /* Act on events related to instruction counts */
         thread->comInstEventQueue.serviceEvents(thread->numInst);
     }
     thread->numOp++;
     thread->numOps++;
     cpu.stats.numOps++;
     cpu.stats.committedInstType[inst->id.threadId]
                                [inst->staticInst->opClass()]++;

     /* Set the CP SeqNum to the numOps commit number */
     if (inst->traceData)
         inst->traceData->setCPSeq(thread->numOp);

     cpu.probeInstCommit(inst->staticInst, inst->pc.instAddr());
 }

 bool
 Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
     BranchData &branch, Fault &fault, bool &committed,
     bool &completed_mem_issue)
 {
     ThreadID thread_id = inst->id.threadId;
     ThreadContext *thread = cpu.getContext(thread_id);

     bool completed_inst = true;
     fault = NoFault;

     /* Is the thread for this instruction suspended?  In that case, just
      *  stall as long as there are no pending interrupts */
     if (thread->status() == ThreadContext::Suspended &&
         !isInterrupted(thread_id))
     {
         panic("We should never hit the case where we try to commit from a "
               "suspended thread as the streamSeqNum should not match");
     } else if (inst->isFault()) {
         ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);

         DPRINTF(MinorExecute, "Fault inst reached Execute: %s\n",
             inst->fault->name());

         fault = inst->fault;
         inst->fault->invoke(thread, NULL);

         tryToBranch(inst, fault, branch);
     } else if (inst->staticInst->isMemRef()) {
         /* Memory accesses are executed in two parts:
          *  executeMemRefInst -- calculates the EA and issues the access
          *      to memory.  This is done here.
          *  handleMemResponse -- handles the response packet, done by
          *      Execute::commit
          *
          *  While the memory access is in its FU, the EA is being
          *  calculated.  At the end of the FU, when it is ready to
          *  'commit' (in this function), the access is presented to the
          *  memory queues.  When a response comes back from memory,
          *  Execute::commit will commit it.
          */
         bool predicate_passed = false;
         bool completed_mem_inst = executeMemRefInst(inst, branch,
             predicate_passed, fault);

         if (completed_mem_inst && fault != NoFault) {
             if (early_memory_issue) {
                 DPRINTF(MinorExecute, "Fault in early executing inst: %s\n",
                     fault->name());
                 /* Don't execute the fault, just stall the instruction
                  *  until it gets to the head of inFlightInsts */
                 inst->canEarlyIssue = false;
                 /* Not completed as we'll come here again to pick up
                  * the fault when we get to the end of the FU */
                 completed_inst = false;
             } else {
                 DPRINTF(MinorExecute, "Fault in execute: %s\n",
                     fault->name());
                 fault->invoke(thread, NULL);

                 tryToBranch(inst, fault, branch);
                 completed_inst = true;
             }
         } else {
             completed_inst = completed_mem_inst;
         }
         completed_mem_issue = completed_inst;
     } else if (inst->isInst() && inst->staticInst->isMemBarrier() &&
         !lsq.canPushIntoStoreBuffer())
     {
         DPRINTF(MinorExecute, "Can't commit data barrier inst: %s yet as"
             " there isn't space in the store buffer\n", *inst);

         completed_inst = false;
     } else if (inst->isInst() && inst->staticInst->isQuiesce()
             && !branch.isBubble()){
         /* This instruction can suspend, need to be able to communicate
          * backwards, so no other branches may evaluate this cycle*/
         completed_inst = false;
     } else {
         ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);

         DPRINTF(MinorExecute, "Committing inst: %s\n", *inst);

         fault = inst->staticInst->execute(&context,
             inst->traceData);

         /* Set the predicate for tracing and dump */
         if (inst->traceData)
             inst->traceData->setPredicate(context.readPredicate());

         committed = true;

         if (fault != NoFault) {
             DPRINTF(MinorExecute, "Fault in execute of inst: %s fault: %s\n",
                 *inst, fault->name());
             fault->invoke(thread, inst->staticInst);
         }

         doInstCommitAccounting(inst);
         tryToBranch(inst, fault, branch);
     }

     if (completed_inst) {
         /* Keep a copy of this instruction's predictionSeqNum just in case
          * we need to issue a branch without an instruction (such as an
          * interrupt) */
         executeInfo[thread_id].lastPredictionSeqNum = inst->id.predictionSeqNum;

         /* Check to see if this instruction suspended the current thread. */
         if (!inst->isFault() &&
             thread->status() == ThreadContext::Suspended &&
             branch.isBubble() && /* It didn't branch too */
             !isInterrupted(thread_id)) /* Don't suspend if we have
                 interrupts */
         {
             TheISA::PCState resume_pc = cpu.getContext(thread_id)->pcState();

             assert(resume_pc.microPC() == 0);

             DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
                 " inst: %s\n", thread_id, *inst);

             cpu.stats.numFetchSuspends++;

             updateBranchData(thread_id, BranchData::SuspendThread, inst,
                 resume_pc, branch);
         }
     }

     return completed_inst;
 }

 void
 Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
     BranchData &branch)
 {
     Fault fault = NoFault;
     Cycles now = cpu.curCycle();
     ExecuteThreadInfo &ex_info = executeInfo[thread_id];

     /**
      * Try and execute as many instructions from the end of FU pipelines as
      *  possible.  This *doesn't* include actually advancing the pipelines.
      *
      * We do this by looping on the front of the inFlightInsts queue for as
      *  long as we can find the desired instruction at the end of the
      *  functional unit it was issued to without seeing a branch or a fault.
      *  In this function, these terms are used:
      *      complete -- The instruction has finished its passage through
      *          its functional unit and its fate has been decided
      *          (committed, discarded, issued to the memory system)
      *      commit -- The instruction is complete(d), not discarded and has
      *          its effects applied to the CPU state
      *      discard(ed) -- The instruction is complete but not committed
      *          as its streamSeqNum disagrees with the current
      *          Execute::streamSeqNum
      *
      *  Commits are also possible from two other places:
      *
      *  1) Responses returning from the LSQ
      *  2) Mem ops issued to the LSQ ('committed' from the FUs) earlier
      *      than their position in the inFlightInsts queue, but after all
      *      their dependencies are resolved.
      */

     /* Has an instruction been completed?  Once this becomes false, we stop
      *  trying to complete instructions. */
     bool completed_inst = true;

     /* Number of insts committed this cycle to check against commitLimit */
     unsigned int num_insts_committed = 0;

     /* Number of memory access instructions committed to check against
      *  memCommitLimit */
     unsigned int num_mem_refs_committed = 0;

     if (only_commit_microops && !ex_info.inFlightInsts->empty()) {
         DPRINTF(MinorInterrupt, "Only commit microops %s %d\n",
             *(ex_info.inFlightInsts->front().inst),
             ex_info.lastCommitWasEndOfMacroop);
     }

     while (!ex_info.inFlightInsts->empty() && /* Some more instructions to process */
         !branch.isStreamChange() && /* No real branch */
         fault == NoFault && /* No faults */
         completed_inst && /* Still finding instructions to execute */
         num_insts_committed != commitLimit /* Not reached commit limit */
         )
     {
         if (only_commit_microops) {
             DPRINTF(MinorInterrupt, "Committing tail of insts before"
                 " interrupt: %s\n",
                 *(ex_info.inFlightInsts->front().inst));
         }

         QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());

         InstSeqNum head_exec_seq_num =
             head_inflight_inst->inst->id.execSeqNum;

         /* The instruction we actually process if completed_inst
          *  remains true to the end of the loop body.
          *  Start by considering the the head of the in flight insts queue */
         MinorDynInstPtr inst = head_inflight_inst->inst;

         bool committed_inst = false;
         bool discard_inst = false;
         bool completed_mem_ref = false;
         bool issued_mem_ref = false;
         bool early_memory_issue = false;

         /* Must set this again to go around the loop */
         completed_inst = false;

         /* If we're just completing a macroop before an interrupt or drain,
          *  can we stil commit another microop (rather than a memory response)
          *  without crosing into the next full instruction? */
         bool can_commit_insts = !ex_info.inFlightInsts->empty() &&
             !(only_commit_microops && ex_info.lastCommitWasEndOfMacroop);

         /* Can we find a mem response for this inst */
         LSQ::LSQRequestPtr mem_response =
             (inst->inLSQ ? lsq.findResponse(inst) : NULL);

         DPRINTF(MinorExecute, "Trying to commit canCommitInsts: %d\n",
             can_commit_insts);

         /* Test for PC events after every instruction */
         if (isInbetweenInsts(thread_id) && tryPCEvents(thread_id)) {
             ThreadContext *thread = cpu.getContext(thread_id);

             /* Branch as there was a change in PC */
             updateBranchData(thread_id, BranchData::UnpredictedBranch,
                 MinorDynInst::bubble(), thread->pcState(), branch);
         } else if (mem_response &&
             num_mem_refs_committed < memoryCommitLimit)
         {
             /* Try to commit from the memory responses next */
             discard_inst = inst->id.streamSeqNum !=
                            ex_info.streamSeqNum || discard;

             DPRINTF(MinorExecute, "Trying to commit mem response: %s\n",
                 *inst);

             /* Complete or discard the response */
             if (discard_inst) {
                 DPRINTF(MinorExecute, "Discarding mem inst: %s as its"
                     " stream state was unexpected, expected: %d\n",
                     *inst, ex_info.streamSeqNum);

                 lsq.popResponse(mem_response);
             } else {
                 handleMemResponse(inst, mem_response, branch, fault);
                 committed_inst = true;
             }

             completed_mem_ref = true;
             completed_inst = true;
         } else if (can_commit_insts) {
             /* If true, this instruction will, subject to timing tweaks,
              *  be considered for completion.  try_to_commit flattens
              *  the `if' tree a bit and allows other tests for inst
              *  commit to be inserted here. */
             bool try_to_commit = false;

             /* Try and issue memory ops early if they:
              *  - Can push a request into the LSQ
              *  - Have reached the end of their FUs
              *  - Have had all their dependencies satisfied
              *  - Are from the right stream
              *
              *  For any other case, leave it to the normal instruction
              *  issue below to handle them.
              */
             if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
                 DPRINTF(MinorExecute, "Trying to commit from mem FUs\n");

                 const MinorDynInstPtr head_mem_ref_inst =
                     ex_info.inFUMemInsts->front().inst;
                 FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
                 const MinorDynInstPtr &fu_inst = fu->front().inst;

                 /* Use this, possibly out of order, inst as the one
                  *  to 'commit'/send to the LSQ */
                 if (!fu_inst->isBubble() &&
                     !fu_inst->inLSQ &&
                     fu_inst->canEarlyIssue &&
                     ex_info.streamSeqNum == fu_inst->id.streamSeqNum &&
                     head_exec_seq_num > fu_inst->instToWaitFor)
                 {
                     DPRINTF(MinorExecute, "Issuing mem ref early"
                         " inst: %s instToWaitFor: %d\n",
                         *(fu_inst), fu_inst->instToWaitFor);

                     inst = fu_inst;
                     try_to_commit = true;
                     early_memory_issue = true;
                     completed_inst = true;
                 }
             }

             /* Try and commit FU-less insts */
             if (!completed_inst && inst->isNoCostInst()) {
                 DPRINTF(MinorExecute, "Committing no cost inst: %s", *inst);

                 try_to_commit = true;
                 completed_inst = true;
             }

             /* Try to issue from the ends of FUs and the inFlightInsts
              *  queue */
             if (!completed_inst && !inst->inLSQ) {
                 DPRINTF(MinorExecute, "Trying to commit from FUs\n");

                 /* Try to commit from a functional unit */
                 /* Is the head inst of the expected inst's FU actually the
                  *  expected inst? */
                 QueuedInst &fu_inst =
                     funcUnits[inst->fuIndex]->front();
                 InstSeqNum fu_inst_seq_num = fu_inst.inst->id.execSeqNum;

                 if (fu_inst.inst->isBubble()) {
                     /* No instruction ready */
                     completed_inst = false;
                 } else if (fu_inst_seq_num != head_exec_seq_num) {
                     /* Past instruction: we must have already executed it
                      * in the same cycle and so the head inst isn't
                      * actually at the end of its pipeline
                      * Future instruction: handled above and only for
                      * mem refs on their way to the LSQ */
                 } else if (fu_inst.inst->id == inst->id)  {
                     /* All instructions can be committed if they have the
                      *  right execSeqNum and there are no in-flight
                      *  mem insts before us */
                     try_to_commit = true;
                     completed_inst = true;
                 }
             }

             if (try_to_commit) {
                 discard_inst = inst->id.streamSeqNum !=
                     ex_info.streamSeqNum || discard;

                 /* Is this instruction discardable as its streamSeqNum
                  *  doesn't match? */
                 if (!discard_inst) {
                     /* Try to commit or discard a non-memory instruction.
                      *  Memory ops are actually 'committed' from this FUs
                      *  and 'issued' into the memory system so we need to
                      *  account for them later (commit_was_mem_issue gets
                      *  set) */
                     if (inst->extraCommitDelayExpr) {
                         DPRINTF(MinorExecute, "Evaluating expression for"
                             " extra commit delay inst: %s\n", *inst);

                         ThreadContext *thread = cpu.getContext(thread_id);

                         TimingExprEvalContext context(inst->staticInst,
                             thread, NULL);

                         uint64_t extra_delay = inst->extraCommitDelayExpr->
                             eval(context);

                         DPRINTF(MinorExecute, "Extra commit delay expr"
                             " result: %d\n", extra_delay);

                         if (extra_delay < 128) {
                             inst->extraCommitDelay += Cycles(extra_delay);
                         } else {
                             DPRINTF(MinorExecute, "Extra commit delay was"
                                 " very long: %d\n", extra_delay);
                         }
                         inst->extraCommitDelayExpr = NULL;
                     }

                     /* Move the extraCommitDelay from the instruction
                      *  into the minimumCommitCycle */
                     if (inst->extraCommitDelay != Cycles(0)) {
                         inst->minimumCommitCycle = cpu.curCycle() +
                             inst->extraCommitDelay;
                         inst->extraCommitDelay = Cycles(0);
                     }

                     /* @todo Think about making lastMemBarrier be
                      *  MAX_UINT_64 to avoid using 0 as a marker value */
                     if (!inst->isFault() && inst->isMemRef() &&
                         lsq.getLastMemBarrier(thread_id) <
                             inst->id.execSeqNum &&
                         lsq.getLastMemBarrier(thread_id) != 0)
                     {
                         DPRINTF(MinorExecute, "Not committing inst: %s yet"
                             " as there are incomplete barriers in flight\n",
                             *inst);
                         completed_inst = false;
                     } else if (inst->minimumCommitCycle > now) {
                         DPRINTF(MinorExecute, "Not committing inst: %s yet"
                             " as it wants to be stalled for %d more cycles\n",
                             *inst, inst->minimumCommitCycle - now);
                         completed_inst = false;
                     } else {
                         completed_inst = commitInst(inst,
                             early_memory_issue, branch, fault,
                             committed_inst, issued_mem_ref);
                     }
                 } else {
                     /* Discard instruction */
                     completed_inst = true;
                 }

                 if (completed_inst) {
                     /* Allow the pipeline to advance.  If the FU head
                      *  instruction wasn't the inFlightInsts head
                      *  but had already been committed, it would have
                      *  unstalled the pipeline before here */
                     if (inst->fuIndex != noCostFUIndex) {
                         DPRINTF(MinorExecute, "Unstalling %d for inst %s\n", inst->fuIndex, inst->id);
                         funcUnits[inst->fuIndex]->stalled = false;
                     }
                 }
             }
         } else {
             DPRINTF(MinorExecute, "No instructions to commit\n");
             completed_inst = false;
         }

         /* All discardable instructions must also be 'completed' by now */
         assert(!(discard_inst && !completed_inst));

         /* Instruction committed but was discarded due to streamSeqNum
          *  mismatch */
         if (discard_inst) {
             DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
                 " state was unexpected, expected: %d\n",
                 *inst, ex_info.streamSeqNum);

             if (fault == NoFault)
                 cpu.stats.numDiscardedOps++;
         }

         /* Mark the mem inst as being in the LSQ */
         if (issued_mem_ref) {
             inst->fuIndex = 0;
             inst->inLSQ = true;
         }

         /* Pop issued (to LSQ) and discarded mem refs from the inFUMemInsts
          *  as they've *definitely* exited the FUs */
         if (completed_inst && inst->isMemRef()) {
             /* The MemRef could have been discarded from the FU or the memory
              *  queue, so just check an FU instruction */
             if (!ex_info.inFUMemInsts->empty() &&
                 ex_info.inFUMemInsts->front().inst == inst)
             {
                 ex_info.inFUMemInsts->pop();
             }
         }

         if (completed_inst && !(issued_mem_ref && fault == NoFault)) {
             /* Note that this includes discarded insts */
             DPRINTF(MinorExecute, "Completed inst: %s\n", *inst);

             /* Got to the end of a full instruction? */
             ex_info.lastCommitWasEndOfMacroop = inst->isFault() ||
                 inst->isLastOpInInst();

             /* lastPredictionSeqNum is kept as a convenience to prevent its
              *  value from changing too much on the minorview display */
             ex_info.lastPredictionSeqNum = inst->id.predictionSeqNum;

             /* Finished with the inst, remove it from the inst queue and
              *  clear its dependencies */
             ex_info.inFlightInsts->pop();

             /* Complete barriers in the LSQ/move to store buffer */
             if (inst->isInst() && inst->staticInst->isMemBarrier()) {
                 DPRINTF(MinorMem, "Completing memory barrier"
                     " inst: %s committed: %d\n", *inst, committed_inst);
                 lsq.completeMemBarrierInst(inst, committed_inst);
             }

             scoreboard[thread_id].clearInstDests(inst, inst->isMemRef());
         }

         /* Handle per-cycle instruction counting */
         if (committed_inst) {
             bool is_no_cost_inst = inst->isNoCostInst();

             /* Don't show no cost instructions as having taken a commit
              *  slot */
             if (DTRACE(MinorTrace) && !is_no_cost_inst)
                 ex_info.instsBeingCommitted.insts[num_insts_committed] = inst;

             if (!is_no_cost_inst)
                 num_insts_committed++;

             if (num_insts_committed == commitLimit)
                 DPRINTF(MinorExecute, "Reached inst commit limit\n");

             /* Re-set the time of the instruction if that's required for
              * tracing */
             if (inst->traceData) {
                 if (setTraceTimeOnCommit)
                     inst->traceData->setWhen(curTick());
                 inst->traceData->dump();
             }

             if (completed_mem_ref)
                 num_mem_refs_committed++;

             if (num_mem_refs_committed == memoryCommitLimit)
                 DPRINTF(MinorExecute, "Reached mem ref commit limit\n");
         }
     }
 }

 bool
 Execute::isInbetweenInsts(ThreadID thread_id) const
 {
     return executeInfo[thread_id].lastCommitWasEndOfMacroop &&
         !lsq.accessesInFlight();
 }

 void
 Execute::evaluate()
 {
     if (!inp.outputWire->isBubble())
         inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire);

     BranchData &branch = *out.inputWire;

     unsigned int num_issued = 0;

     /* Do all the cycle-wise activities for dcachePort here to potentially
      *  free up input spaces in the LSQ's requests queue */
     lsq.step();

     /* Check interrupts first.  Will halt commit if interrupt found */
     bool interrupted = false;
     ThreadID interrupt_tid = checkInterrupts(branch, interrupted);

     if (interrupt_tid != InvalidThreadID) {
         /* Signalling an interrupt this cycle, not issuing/committing from
          * any other threads */
     } else if (!branch.isBubble()) {
         /* It's important that this is here to carry Fetch1 wakeups to Fetch1
          *  without overwriting them */
         DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old"
             " branch to complete\n");
     } else {
         ThreadID commit_tid = getCommittingThread();

         if (commit_tid != InvalidThreadID) {
             ExecuteThreadInfo& commit_info = executeInfo[commit_tid];

             DPRINTF(MinorExecute, "Attempting to commit [tid:%d]\n",
                     commit_tid);
             /* commit can set stalled flags observable to issue and so *must* be
              *  called first */
             if (commit_info.drainState != NotDraining) {
                 if (commit_info.drainState == DrainCurrentInst) {
                     /* Commit only micro-ops, don't kill anything else */
                     commit(commit_tid, true, false, branch);

                     if (isInbetweenInsts(commit_tid))
                         setDrainState(commit_tid, DrainHaltFetch);

                     /* Discard any generated branch */
                     branch = BranchData::bubble();
                 } else if (commit_info.drainState == DrainAllInsts) {
                     /* Kill all instructions */
                     while (getInput(commit_tid))
                         popInput(commit_tid);
                     commit(commit_tid, false, true, branch);
                 }
             } else {
                 /* Commit micro-ops only if interrupted.  Otherwise, commit
                  *  anything you like */
                 DPRINTF(MinorExecute, "Committing micro-ops for interrupt[tid:%d]\n",
                         commit_tid);
                 bool only_commit_microops = interrupted &&
                                             hasInterrupt(commit_tid);
                 commit(commit_tid, only_commit_microops, false, branch);
             }

             /* Halt fetch, but don't do it until we have the current instruction in
              *  the bag */
             if (commit_info.drainState == DrainHaltFetch) {
                 updateBranchData(commit_tid, BranchData::HaltFetch,
                         MinorDynInst::bubble(), TheISA::PCState(0), branch);

                 cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
                 setDrainState(commit_tid, DrainAllInsts);
             }
         }
         ThreadID issue_tid = getIssuingThread();
         /* This will issue merrily even when interrupted in the sure and
          *  certain knowledge that the interrupt with change the stream */
         if (issue_tid != InvalidThreadID) {
             DPRINTF(MinorExecute, "Attempting to issue [tid:%d]\n",
                     issue_tid);
             num_issued = issue(issue_tid);
         }

     }

     /* Run logic to step functional units + decide if we are active on the next
      * clock cycle */
     std::vector<MinorDynInstPtr> next_issuable_insts;
     bool can_issue_next = false;

     for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
         /* Find the next issuable instruction for each thread and see if it can
            be issued */
         if (getInput(tid)) {
             unsigned int input_index = executeInfo[tid].inputIndex;
             MinorDynInstPtr inst = getInput(tid)->insts[input_index];
             if (inst->isFault()) {
                 can_issue_next = true;
             } else if (!inst->isBubble()) {
                 next_issuable_insts.push_back(inst);
             }
         }
     }

     bool becoming_stalled = true;

     /* Advance the pipelines and note whether they still need to be
      * advanced */
     for (unsigned int i = 0; i < numFuncUnits; i++) {
         FUPipeline *fu = funcUnits[i];
         fu->advance();

         /* If we need to tick again, the pipeline will have been left or set
          * to be unstalled */
         if (fu->occupancy !=0 && !fu->stalled)
             becoming_stalled = false;

         /* Could we possibly issue the next instruction from any thread?
          * This is quite an expensive test and is only used to determine
          * if the CPU should remain active, only run it if we aren't sure
          * we are active next cycle yet */
         for (auto inst : next_issuable_insts) {
             if (!fu->stalled && fu->provides(inst->staticInst->opClass()) &&
                 scoreboard[inst->id.threadId].canInstIssue(inst,
                     NULL, NULL, cpu.curCycle() + Cycles(1),
                     cpu.getContext(inst->id.threadId))) {
                 can_issue_next = true;
                 break;
             }
         }
     }

     bool head_inst_might_commit = false;

     /* Could the head in flight insts be committed */
     for (auto const &info : executeInfo) {
         if (!info.inFlightInsts->empty()) {
             const QueuedInst &head_inst = info.inFlightInsts->front();

             if (head_inst.inst->isNoCostInst()) {
                 head_inst_might_commit = true;
             } else {
                 FUPipeline *fu = funcUnits[head_inst.inst->fuIndex];
                 if ((fu->stalled &&
                      fu->front().inst->id == head_inst.inst->id) ||
                      lsq.findResponse(head_inst.inst))
                 {
                     head_inst_might_commit = true;
                     break;
                 }
             }
         }
     }

     DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n",
        (num_issued != 0 ? " (issued some insts)" : ""),
        (becoming_stalled ? "(becoming stalled)" : "(not becoming stalled)"),
        (can_issue_next ? " (can issued next inst)" : ""),
        (head_inst_might_commit ? "(head inst might commit)" : ""),
        (lsq.needsToTick() ? " (LSQ needs to tick)" : ""),
        (interrupted ? " (interrupted)" : ""));

     bool need_to_tick =
        num_issued != 0 || /* Issued some insts this cycle */
        !becoming_stalled || /* Some FU pipelines can still move */
        can_issue_next || /* Can still issue a new inst */
        head_inst_might_commit || /* Could possible commit the next inst */
        lsq.needsToTick() || /* Must step the dcache port */
        interrupted; /* There are pending interrupts */

     if (!need_to_tick) {
         DPRINTF(Activity, "The next cycle might be skippable as there are no"
             " advanceable FUs\n");
     }

     /* Wake up if we need to tick again */
     if (need_to_tick)
         cpu.wakeupOnEvent(Pipeline::ExecuteStageId);

     /* Note activity of following buffer */
     if (!branch.isBubble())
         cpu.activityRecorder->activity();

     /* Make sure the input (if any left) is pushed */
     if (!inp.outputWire->isBubble())
         inputBuffer[inp.outputWire->threadId].pushTail();
 }

 ThreadID
 Execute::checkInterrupts(BranchData& branch, bool& interrupted)
 {
     ThreadID tid = interruptPriority;
     /* Evaluate interrupts in round-robin based upon service */
     do {
         /* Has an interrupt been signalled?  This may not be acted on
          *  straighaway so this is different from took_interrupt */
         bool thread_interrupted = false;

         if (FullSystem && cpu.getInterruptController(tid)) {
             /* This is here because it seems that after drainResume the
              * interrupt controller isn't always set */
             thread_interrupted = executeInfo[tid].drainState == NotDraining &&
                 isInterrupted(tid);
             interrupted = interrupted || thread_interrupted;
         } else {
             DPRINTF(MinorInterrupt, "No interrupt controller\n");
         }
         DPRINTF(MinorInterrupt, "[tid:%d] thread_interrupted?=%d isInbetweenInsts?=%d\n",
                 tid, thread_interrupted, isInbetweenInsts(tid));
         /* Act on interrupts */
         if (thread_interrupted && isInbetweenInsts(tid)) {
             if (takeInterrupt(tid, branch)) {
                 interruptPriority = tid;
                 return tid;
             }
         } else {
             tid = (tid + 1) % cpu.numThreads;
         }
     } while (tid != interruptPriority);

     return InvalidThreadID;
 }

 bool
 Execute::hasInterrupt(ThreadID thread_id)
 {
     if (FullSystem && cpu.getInterruptController(thread_id)) {
         return executeInfo[thread_id].drainState == NotDraining &&
                isInterrupted(thread_id);
     }

     return false;
 }

 void
 Execute::minorTrace() const
 {
     std::ostringstream insts;
     std::ostringstream stalled;

     executeInfo[0].instsBeingCommitted.reportData(insts);
     lsq.minorTrace();
     inputBuffer[0].minorTrace();
     scoreboard[0].minorTrace();

     /* Report functional unit stalling in one string */
     unsigned int i = 0;
     while (i < numFuncUnits)
     {
         stalled << (funcUnits[i]->stalled ? '1' : 'E');
         i++;
         if (i != numFuncUnits)
             stalled << ',';
     }

     MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d"
         " stalled=%s drainState=%d isInbetweenInsts=%d\n",
         insts.str(), executeInfo[0].inputIndex, executeInfo[0].streamSeqNum,
         stalled.str(), executeInfo[0].drainState, isInbetweenInsts(0));

     std::for_each(funcUnits.begin(), funcUnits.end(),
         std::mem_fun(&FUPipeline::minorTrace));

     executeInfo[0].inFlightInsts->minorTrace();
     executeInfo[0].inFUMemInsts->minorTrace();
 }

 inline ThreadID
 Execute::getCommittingThread()
 {
     std::vector<ThreadID> priority_list;

     switch (cpu.threadPolicy) {
       case Enums::SingleThreaded:
           return 0;
       case Enums::RoundRobin:
           priority_list = cpu.roundRobinPriority(commitPriority);
           break;
       case Enums::Random:
           priority_list = cpu.randomPriority();
           break;
       default:
           panic("Invalid thread policy");
     }

     for (auto tid : priority_list) {
         ExecuteThreadInfo &ex_info = executeInfo[tid];
         bool can_commit_insts = !ex_info.inFlightInsts->empty();
         if (can_commit_insts) {
             QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
             MinorDynInstPtr inst = head_inflight_inst->inst;

             can_commit_insts = can_commit_insts &&
                 (!inst->inLSQ || (lsq.findResponse(inst) != NULL));

             if (!inst->inLSQ) {
                 bool can_transfer_mem_inst = false;
                 if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
                     const MinorDynInstPtr head_mem_ref_inst =
                         ex_info.inFUMemInsts->front().inst;
                     FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
                     const MinorDynInstPtr &fu_inst = fu->front().inst;
                     can_transfer_mem_inst =
                         !fu_inst->isBubble() &&
                          fu_inst->id.threadId == tid &&
                          !fu_inst->inLSQ &&
                          fu_inst->canEarlyIssue &&
                          inst->id.execSeqNum > fu_inst->instToWaitFor;
                 }

                 bool can_execute_fu_inst = inst->fuIndex == noCostFUIndex;
                 if (can_commit_insts && !can_transfer_mem_inst &&
                         inst->fuIndex != noCostFUIndex)
                 {
                     QueuedInst& fu_inst = funcUnits[inst->fuIndex]->front();
                     can_execute_fu_inst = !fu_inst.inst->isBubble() &&
                         fu_inst.inst->id == inst->id;
                 }

                 can_commit_insts = can_commit_insts &&
                     (can_transfer_mem_inst || can_execute_fu_inst);
             }
         }


         if (can_commit_insts) {
             commitPriority = tid;
             return tid;
         }
     }

     return InvalidThreadID;
 }

 inline ThreadID
 Execute::getIssuingThread()
 {
     std::vector<ThreadID> priority_list;

     switch (cpu.threadPolicy) {
       case Enums::SingleThreaded:
           return 0;
       case Enums::RoundRobin:
           priority_list = cpu.roundRobinPriority(issuePriority);
           break;
       case Enums::Random:
           priority_list = cpu.randomPriority();
           break;
       default:
           panic("Invalid thread scheduling policy.");
     }

     for (auto tid : priority_list) {
         if (getInput(tid)) {
             issuePriority = tid;
             return tid;
         }
     }

     return InvalidThreadID;
 }

 void
 Execute::drainResume()
 {
     DPRINTF(Drain, "MinorExecute drainResume\n");

     for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
         setDrainState(tid, NotDraining);
     }

     cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
 }

 std::ostream &operator <<(std::ostream &os, Execute::DrainState state)
 {
     switch (state)
     {
         case Execute::NotDraining:
           os << "NotDraining";
           break;
         case Execute::DrainCurrentInst:
           os << "DrainCurrentInst";
           break;
         case Execute::DrainHaltFetch:
           os << "DrainHaltFetch";
           break;
         case Execute::DrainAllInsts:
           os << "DrainAllInsts";
           break;
         default:
           os << "Drain-" << static_cast<int>(state);
           break;
     }

     return os;
 }

 void
 Execute::setDrainState(ThreadID thread_id, DrainState state)
 {
     DPRINTF(Drain, "setDrainState[%d]: %s\n", thread_id, state);
     executeInfo[thread_id].drainState = state;
 }

 unsigned int
 Execute::drain()
 {
     DPRINTF(Drain, "MinorExecute drain\n");

     for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
         if (executeInfo[tid].drainState == NotDraining) {
             cpu.wakeupOnEvent(Pipeline::ExecuteStageId);

             /* Go to DrainCurrentInst if we're between microops
              * or waiting on an unbufferable memory operation.
              * Otherwise we can go straight to DrainHaltFetch
              */
             if (isInbetweenInsts(tid))
                 setDrainState(tid, DrainHaltFetch);
             else
                 setDrainState(tid, DrainCurrentInst);
         }
     }
     return (isDrained() ? 0 : 1);
 }

 bool
 Execute::isDrained()
 {
     if (!lsq.isDrained())
         return false;

     for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
         if (!inputBuffer[tid].empty() ||
             !executeInfo[tid].inFlightInsts->empty()) {

             return false;
         }
     }

     return true;
 }

 Execute::~Execute()
 {
     for (unsigned int i = 0; i < numFuncUnits; i++)
         delete funcUnits[i];

     for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
         delete executeInfo[tid].inFlightInsts;
 }

 bool
 Execute::instIsRightStream(MinorDynInstPtr inst)
 {
     return inst->id.streamSeqNum == executeInfo[inst->id.threadId].streamSeqNum;
 }

 bool
 Execute::instIsHeadInst(MinorDynInstPtr inst)
 {
     bool ret = false;

     if (!executeInfo[inst->id.threadId].inFlightInsts->empty())
         ret = executeInfo[inst->id.threadId].inFlightInsts->front().inst->id == inst->id;

     return ret;
 }

 MinorCPU::MinorCPUPort &
 Execute::getDcachePort()
 {
     return lsq.getDcachePort();
 }

 }