src/cpu/o3/fetch_impl.hh - public/gem5 - Git at Google

 /*
  * Copyright (c) 2010-2014 ARM Limited
  * Copyright (c) 2012-2013 AMD
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
  * property including but not limited to intellectual property relating
  * to a hardware implementation of the functionality of the software
  * licensed hereunder.  You may use the software subject to the license
  * terms below provided that you ensure that this notice is replicated
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #ifndef __CPU_O3_FETCH_IMPL_HH__
 #define __CPU_O3_FETCH_IMPL_HH__

 #include <algorithm>
 #include <cstring>
 #include <list>
 #include <map>
 #include <queue>

 #include "arch/generic/tlb.hh"
 #include "arch/utility.hh"
 #include "base/random.hh"
 #include "base/types.hh"
 #include "config/the_isa.hh"
 #include "cpu/base.hh"
 #include "cpu/o3/cpu.hh"
 #include "cpu/o3/fetch.hh"
 #include "cpu/exetrace.hh"
 #include "debug/Activity.hh"
 #include "debug/Drain.hh"
 #include "debug/Fetch.hh"
 #include "debug/O3CPU.hh"
 #include "debug/O3PipeView.hh"
 #include "mem/packet.hh"
 #include "params/DerivO3CPU.hh"
 #include "sim/byteswap.hh"
 #include "sim/core.hh"
 #include "sim/eventq.hh"
 #include "sim/full_system.hh"
 #include "sim/system.hh"
 #include "cpu/o3/isa_specific.hh"

 template<class Impl>
 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, const DerivO3CPUParams &params)
     : fetchPolicy(params.smtFetchPolicy),
       cpu(_cpu),
       branchPred(nullptr),
       decodeToFetchDelay(params.decodeToFetchDelay),
       renameToFetchDelay(params.renameToFetchDelay),
       iewToFetchDelay(params.iewToFetchDelay),
       commitToFetchDelay(params.commitToFetchDelay),
       fetchWidth(params.fetchWidth),
       decodeWidth(params.decodeWidth),
       retryPkt(NULL),
       retryTid(InvalidThreadID),
       cacheBlkSize(cpu->cacheLineSize()),
       fetchBufferSize(params.fetchBufferSize),
       fetchBufferMask(fetchBufferSize - 1),
       fetchQueueSize(params.fetchQueueSize),
       numThreads(params.numThreads),
       numFetchingThreads(params.smtNumFetchingThreads),
       icachePort(this, _cpu),
       finishTranslationEvent(this), fetchStats(_cpu, this)
 {
     if (numThreads > Impl::MaxThreads)
         fatal("numThreads (%d) is larger than compiled limit (%d),\n"
               "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
               numThreads, static_cast<int>(Impl::MaxThreads));
     if (fetchWidth > Impl::MaxWidth)
         fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
              "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
              fetchWidth, static_cast<int>(Impl::MaxWidth));
     if (fetchBufferSize > cacheBlkSize)
         fatal("fetch buffer size (%u bytes) is greater than the cache "
               "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
     if (cacheBlkSize % fetchBufferSize)
         fatal("cache block (%u bytes) is not a multiple of the "
               "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);

     // Get the size of an instruction.
     instSize = sizeof(TheISA::MachInst);

     for (int i = 0; i < Impl::MaxThreads; i++) {
         fetchStatus[i] = Idle;
         decoder[i] = nullptr;
         pc[i] = 0;
         fetchOffset[i] = 0;
         macroop[i] = nullptr;
         delayedCommit[i] = false;
         memReq[i] = nullptr;
         stalls[i] = {false, false};
         fetchBuffer[i] = NULL;
         fetchBufferPC[i] = 0;
         fetchBufferValid[i] = false;
         lastIcacheStall[i] = 0;
         issuePipelinedIfetch[i] = false;
     }

     branchPred = params.branchPred;

     for (ThreadID tid = 0; tid < numThreads; tid++) {
         decoder[tid] = new TheISA::Decoder(
                 dynamic_cast<TheISA::ISA *>(params.isa[tid]));
         // Create space to buffer the cache line data,
         // which may not hold the entire cache line.
         fetchBuffer[tid] = new uint8_t[fetchBufferSize];
     }
 }

 template <class Impl>
 std::string
 DefaultFetch<Impl>::name() const
 {
     return cpu->name() + ".fetch";
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::regProbePoints()
 {
     ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
     ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
                                                        "FetchRequest");

 }

 template <class Impl>
 DefaultFetch<Impl>::
 FetchStatGroup::FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch)
     : Stats::Group(cpu, "fetch"),
     ADD_STAT(icacheStallCycles, UNIT_CYCLE,
              "Number of cycles fetch is stalled on an Icache miss"),
     ADD_STAT(insts, UNIT_COUNT, "Number of instructions fetch has processed"),
     ADD_STAT(branches, UNIT_COUNT,
              "Number of branches that fetch encountered"),
     ADD_STAT(predictedBranches, UNIT_COUNT,
              "Number of branches that fetch has predicted taken"),
     ADD_STAT(cycles, UNIT_CYCLE,
              "Number of cycles fetch has run and was not squashing or "
              "blocked"),
     ADD_STAT(squashCycles, UNIT_CYCLE,
              "Number of cycles fetch has spent squashing"),
     ADD_STAT(tlbCycles, UNIT_CYCLE,
              "Number of cycles fetch has spent waiting for tlb"),
     ADD_STAT(idleCycles, UNIT_CYCLE, "Number of cycles fetch was idle"),
     ADD_STAT(blockedCycles, UNIT_CYCLE,
              "Number of cycles fetch has spent blocked"),
     ADD_STAT(miscStallCycles, UNIT_CYCLE,
              "Number of cycles fetch has spent waiting on interrupts, or bad "
              "addresses, or out of MSHRs"),
     ADD_STAT(pendingDrainCycles, UNIT_CYCLE,
              "Number of cycles fetch has spent waiting on pipes to drain"),
     ADD_STAT(noActiveThreadStallCycles, UNIT_CYCLE,
              "Number of stall cycles due to no active thread to fetch from"),
     ADD_STAT(pendingTrapStallCycles, UNIT_CYCLE,
              "Number of stall cycles due to pending traps"),
     ADD_STAT(pendingQuiesceStallCycles, UNIT_CYCLE,
              "Number of stall cycles due to pending quiesce instructions"),
     ADD_STAT(icacheWaitRetryStallCycles, UNIT_CYCLE,
              "Number of stall cycles due to full MSHR"),
     ADD_STAT(cacheLines, UNIT_COUNT, "Number of cache lines fetched"),
     ADD_STAT(icacheSquashes, UNIT_COUNT,
              "Number of outstanding Icache misses that were squashed"),
     ADD_STAT(tlbSquashes, UNIT_COUNT,
              "Number of outstanding ITLB misses that were squashed"),
     ADD_STAT(nisnDist, UNIT_COUNT,
              "Number of instructions fetched each cycle (Total)"),
     ADD_STAT(idleRate, UNIT_RATIO, "Ratio of cycles fetch was idle",
              idleCycles / cpu->baseStats.numCycles),
     ADD_STAT(branchRate, UNIT_RATIO, "Number of branch fetches per cycle",
              branches / cpu->baseStats.numCycles),
     ADD_STAT(rate, UNIT_RATE(Stats::Units::Count, Stats::Units::Cycle),
              "Number of inst fetches per cycle",
              insts / cpu->baseStats.numCycles)
 {
         icacheStallCycles
             .prereq(icacheStallCycles);
         insts
             .prereq(insts);
         branches
             .prereq(branches);
         predictedBranches
             .prereq(predictedBranches);
         cycles
             .prereq(cycles);
         squashCycles
             .prereq(squashCycles);
         tlbCycles
             .prereq(tlbCycles);
         idleCycles
             .prereq(idleCycles);
         blockedCycles
             .prereq(blockedCycles);
         cacheLines
             .prereq(cacheLines);
         miscStallCycles
             .prereq(miscStallCycles);
         pendingDrainCycles
             .prereq(pendingDrainCycles);
         noActiveThreadStallCycles
             .prereq(noActiveThreadStallCycles);
         pendingTrapStallCycles
             .prereq(pendingTrapStallCycles);
         pendingQuiesceStallCycles
             .prereq(pendingQuiesceStallCycles);
         icacheWaitRetryStallCycles
             .prereq(icacheWaitRetryStallCycles);
         icacheSquashes
             .prereq(icacheSquashes);
         tlbSquashes
             .prereq(tlbSquashes);
         nisnDist
             .init(/* base value */ 0,
               /* last value */ fetch->fetchWidth,
               /* bucket size */ 1)
             .flags(Stats::pdf);
         idleRate
             .prereq(idleRate);
         branchRate
             .flags(Stats::total);
         rate
             .flags(Stats::total);
 }
 template<class Impl>
 void
 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 {
     timeBuffer = time_buffer;

     // Create wires to get information from proper places in time buffer.
     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
     fromRename = timeBuffer->getWire(-renameToFetchDelay);
     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
 {
     activeThreads = at_ptr;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
 {
     // Create wire to write information to proper place in fetch time buf.
     toDecode = ftb_ptr->getWire(0);
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::startupStage()
 {
     assert(priorityList.empty());
     resetStage();

     // Fetch needs to start fetching instructions at the very beginning,
     // so it must start up in active state.
     switchToActive();
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::clearStates(ThreadID tid)
 {
     fetchStatus[tid] = Running;
     pc[tid] = cpu->pcState(tid);
     fetchOffset[tid] = 0;
     macroop[tid] = NULL;
     delayedCommit[tid] = false;
     memReq[tid] = NULL;
     stalls[tid].decode = false;
     stalls[tid].drain = false;
     fetchBufferPC[tid] = 0;
     fetchBufferValid[tid] = false;
     fetchQueue[tid].clear();

     // TODO not sure what to do with priorityList for now
     // priorityList.push_back(tid);
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::resetStage()
 {
     numInst = 0;
     interruptPending = false;
     cacheBlocked = false;

     priorityList.clear();

     // Setup PC and nextPC with initial state.
     for (ThreadID tid = 0; tid < numThreads; ++tid) {
         fetchStatus[tid] = Running;
         pc[tid] = cpu->pcState(tid);
         fetchOffset[tid] = 0;
         macroop[tid] = NULL;

         delayedCommit[tid] = false;
         memReq[tid] = NULL;

         stalls[tid].decode = false;
         stalls[tid].drain = false;

         fetchBufferPC[tid] = 0;
         fetchBufferValid[tid] = false;

         fetchQueue[tid].clear();

         priorityList.push_back(tid);
     }

     wroteToTimeBuffer = false;
     _status = Inactive;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 {
     ThreadID tid = cpu->contextToThread(pkt->req->contextId());

     DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
     assert(!cpu->switchedOut());

     // Only change the status if it's still waiting on the icache access
     // to return.
     if (fetchStatus[tid] != IcacheWaitResponse ||
         pkt->req != memReq[tid]) {
         ++fetchStats.icacheSquashes;
         delete pkt;
         return;
     }

     memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
     fetchBufferValid[tid] = true;

     // Wake up the CPU (if it went to sleep and was waiting on
     // this completion event).
     cpu->wakeCPU();

     DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
             tid);

     switchToActive();

     // Only switch to IcacheAccessComplete if we're not stalled as well.
     if (checkStall(tid)) {
         fetchStatus[tid] = Blocked;
     } else {
         fetchStatus[tid] = IcacheAccessComplete;
     }

     pkt->req->setAccessLatency();
     cpu->ppInstAccessComplete->notify(pkt);
     // Reset the mem req to NULL.
     delete pkt;
     memReq[tid] = NULL;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::drainResume()
 {
     for (ThreadID i = 0; i < numThreads; ++i) {
         stalls[i].decode = false;
         stalls[i].drain = false;
     }
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::drainSanityCheck() const
 {
     assert(isDrained());
     assert(retryPkt == NULL);
     assert(retryTid == InvalidThreadID);
     assert(!cacheBlocked);
     assert(!interruptPending);

     for (ThreadID i = 0; i < numThreads; ++i) {
         assert(!memReq[i]);
         assert(fetchStatus[i] == Idle || stalls[i].drain);
     }

     branchPred->drainSanityCheck();
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::isDrained() const
 {
     /* Make sure that threads are either idle of that the commit stage
      * has signaled that draining has completed by setting the drain
      * stall flag. This effectively forces the pipeline to be disabled
      * until the whole system is drained (simulation may continue to
      * drain other components).
      */
     for (ThreadID i = 0; i < numThreads; ++i) {
         // Verify fetch queues are drained
         if (!fetchQueue[i].empty())
             return false;

         // Return false if not idle or drain stalled
         if (fetchStatus[i] != Idle) {
             if (fetchStatus[i] == Blocked && stalls[i].drain)
                 continue;
             else
                 return false;
         }
     }

     /* The pipeline might start up again in the middle of the drain
      * cycle if the finish translation event is scheduled, so make
      * sure that's not the case.
      */
     return !finishTranslationEvent.scheduled();
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::takeOverFrom()
 {
     assert(cpu->getInstPort().isConnected());
     resetStage();

 }

 template <class Impl>
 void
 DefaultFetch<Impl>::drainStall(ThreadID tid)
 {
     assert(cpu->isDraining());
     assert(!stalls[tid].drain);
     DPRINTF(Drain, "%i: Thread drained.\n", tid);
     stalls[tid].drain = true;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::wakeFromQuiesce()
 {
     DPRINTF(Fetch, "Waking up from quiesce\n");
     // Hopefully this is safe
     // @todo: Allow other threads to wake from quiesce.
     fetchStatus[0] = Running;
 }

 template <class Impl>
 inline void
 DefaultFetch<Impl>::switchToActive()
 {
     if (_status == Inactive) {
         DPRINTF(Activity, "Activating stage.\n");

         cpu->activateStage(O3CPU::FetchIdx);

         _status = Active;
     }
 }

 template <class Impl>
 inline void
 DefaultFetch<Impl>::switchToInactive()
 {
     if (_status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");

         cpu->deactivateStage(O3CPU::FetchIdx);

         _status = Inactive;
     }
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::deactivateThread(ThreadID tid)
 {
     // Update priority list
     auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
     if (thread_it != priorityList.end()) {
         priorityList.erase(thread_it);
     }
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::lookupAndUpdateNextPC(
         const DynInstPtr &inst, TheISA::PCState &nextPC)
 {
     // Do branch prediction check here.
     // A bit of a misnomer...next_PC is actually the current PC until
     // this function updates it.
     bool predict_taken;

     if (!inst->isControl()) {
         TheISA::advancePC(nextPC, inst->staticInst);
         inst->setPredTarg(nextPC);
         inst->setPredTaken(false);
         return false;
     }

     ThreadID tid = inst->threadNumber;
     predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
                                         nextPC, tid);

     if (predict_taken) {
         DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
                 "predicted to be taken to %s\n",
                 tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
     } else {
         DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
                 "predicted to be not taken\n",
                 tid, inst->seqNum, inst->pcState().instAddr());
     }

     DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
             "predicted to go to %s\n",
             tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
     inst->setPredTarg(nextPC);
     inst->setPredTaken(predict_taken);

     ++fetchStats.branches;

     if (predict_taken) {
         ++fetchStats.predictedBranches;
     }

     return predict_taken;
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
 {
     Fault fault = NoFault;

     assert(!cpu->switchedOut());

     // @todo: not sure if these should block translation.
     //AlphaDep
     if (cacheBlocked) {
         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
                 tid);
         return false;
     } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
         // Hold off fetch from getting new instructions when:
         // Cache is blocked, or
         // while an interrupt is pending and we're not in PAL mode, or
         // fetch is switched out.
         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
                 tid);
         return false;
     }

     // Align the fetch address to the start of a fetch buffer segment.
     Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);

     DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
             tid, fetchBufferBlockPC, vaddr);

     // Setup the memReq to do a read of the first instruction's address.
     // Set the appropriate read size and flags as well.
     // Build request here.
     RequestPtr mem_req = std::make_shared<Request>(
         fetchBufferBlockPC, fetchBufferSize,
         Request::INST_FETCH, cpu->instRequestorId(), pc,
         cpu->thread[tid]->contextId());

     mem_req->taskId(cpu->taskId());

     memReq[tid] = mem_req;

     // Initiate translation of the icache block
     fetchStatus[tid] = ItlbWait;
     FetchTranslation *trans = new FetchTranslation(this);
     cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
                               trans, BaseTLB::Execute);
     return true;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::finishTranslation(const Fault &fault,
                                       const RequestPtr &mem_req)
 {
     ThreadID tid = cpu->contextToThread(mem_req->contextId());
     Addr fetchBufferBlockPC = mem_req->getVaddr();

     assert(!cpu->switchedOut());

     // Wake up CPU if it was idle
     cpu->wakeCPU();

     if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
         mem_req->getVaddr() != memReq[tid]->getVaddr()) {
         DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
                 tid);
         ++fetchStats.tlbSquashes;
         return;
     }


     // If translation was successful, attempt to read the icache block.
     if (fault == NoFault) {
         // Check that we're not going off into random memory
         // If we have, just wait around for commit to squash something and put
         // us on the right track
         if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
             warn("Address %#x is outside of physical memory, stopping fetch\n",
                     mem_req->getPaddr());
             fetchStatus[tid] = NoGoodAddr;
             memReq[tid] = NULL;
             return;
         }

         // Build packet here.
         PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
         data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);

         fetchBufferPC[tid] = fetchBufferBlockPC;
         fetchBufferValid[tid] = false;
         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");

         fetchStats.cacheLines++;

         // Access the cache.
         if (!icachePort.sendTimingReq(data_pkt)) {
             assert(retryPkt == NULL);
             assert(retryTid == InvalidThreadID);
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);

             fetchStatus[tid] = IcacheWaitRetry;
             retryPkt = data_pkt;
             retryTid = tid;
             cacheBlocked = true;
         } else {
             DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
             DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
                     "response.\n", tid);
             lastIcacheStall[tid] = curTick();
             fetchStatus[tid] = IcacheWaitResponse;
             // Notify Fetch Request probe when a packet containing a fetch
             // request is successfully sent
             ppFetchRequestSent->notify(mem_req);
         }
     } else {
         // Don't send an instruction to decode if we can't handle it.
         if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
             assert(!finishTranslationEvent.scheduled());
             finishTranslationEvent.setFault(fault);
             finishTranslationEvent.setReq(mem_req);
             cpu->schedule(finishTranslationEvent,
                           cpu->clockEdge(Cycles(1)));
             return;
         }
         DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
                 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
         // Translation faulted, icache request won't be sent.
         memReq[tid] = NULL;

         // Send the fault to commit.  This thread will not do anything
         // until commit handles the fault.  The only other way it can
         // wake up is if a squash comes along and changes the PC.
         TheISA::PCState fetchPC = pc[tid];

         DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
         // We will use a nop in ordier to carry the fault.
         DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
                                            NULL, fetchPC, fetchPC, false);
         instruction->setNotAnInst();

         instruction->setPredTarg(fetchPC);
         instruction->fault = fault;
         wroteToTimeBuffer = true;

         DPRINTF(Activity, "Activity this cycle.\n");
         cpu->activityThisCycle();

         fetchStatus[tid] = TrapPending;

         DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
         DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
                 tid, fault->name(), pc[tid]);
     }
     _status = updateFetchStatus();
 }

 template <class Impl>
 inline void
 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
                              const DynInstPtr squashInst, ThreadID tid)
 {
     DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
             tid, newPC);

     pc[tid] = newPC;
     fetchOffset[tid] = 0;
     if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
         macroop[tid] = squashInst->macroop;
     else
         macroop[tid] = NULL;
     decoder[tid]->reset();

     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
         DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
                 tid);
         memReq[tid] = NULL;
     } else if (fetchStatus[tid] == ItlbWait) {
         DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
                 tid);
         memReq[tid] = NULL;
     }

     // Get rid of the retrying packet if it was from this thread.
     if (retryTid == tid) {
         assert(cacheBlocked);
         if (retryPkt) {
             delete retryPkt;
         }
         retryPkt = NULL;
         retryTid = InvalidThreadID;
     }

     fetchStatus[tid] = Squashing;

     // Empty fetch queue
     fetchQueue[tid].clear();

     // microops are being squashed, it is not known wheather the
     // youngest non-squashed microop was  marked delayed commit
     // or not. Setting the flag to true ensures that the
     // interrupts are not handled when they cannot be, though
     // some opportunities to handle interrupts may be missed.
     delayedCommit[tid] = true;

     ++fetchStats.squashCycles;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
                                      const DynInstPtr squashInst,
                                      const InstSeqNum seq_num, ThreadID tid)
 {
     DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);

     doSquash(newPC, squashInst, tid);

     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
     cpu->removeInstsUntil(seq_num, tid);
 }

 template<class Impl>
 bool
 DefaultFetch<Impl>::checkStall(ThreadID tid) const
 {
     bool ret_val = false;

     if (stalls[tid].drain) {
         assert(cpu->isDraining());
         DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
         ret_val = true;
     }

     return ret_val;
 }

 template<class Impl>
 typename DefaultFetch<Impl>::FetchStatus
 DefaultFetch<Impl>::updateFetchStatus()
 {
     //Check Running
     std::list<ThreadID>::iterator threads = activeThreads->begin();
     std::list<ThreadID>::iterator end = activeThreads->end();

     while (threads != end) {
         ThreadID tid = *threads++;

         if (fetchStatus[tid] == Running ||
             fetchStatus[tid] == Squashing ||
             fetchStatus[tid] == IcacheAccessComplete) {

             if (_status == Inactive) {
                 DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);

                 if (fetchStatus[tid] == IcacheAccessComplete) {
                     DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
                             "completion\n",tid);
                 }

                 cpu->activateStage(O3CPU::FetchIdx);
             }

             return Active;
         }
     }

     // Stage is switching from active to inactive, notify CPU of it.
     if (_status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");

         cpu->deactivateStage(O3CPU::FetchIdx);
     }

     return Inactive;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
                            const InstSeqNum seq_num, DynInstPtr squashInst,
                            ThreadID tid)
 {
     DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);

     doSquash(newPC, squashInst, tid);

     // Tell the CPU to remove any instructions that are not in the ROB.
     cpu->removeInstsNotInROB(tid);
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::tick()
 {
     std::list<ThreadID>::iterator threads = activeThreads->begin();
     std::list<ThreadID>::iterator end = activeThreads->end();
     bool status_change = false;

     wroteToTimeBuffer = false;

     for (ThreadID i = 0; i < numThreads; ++i) {
         issuePipelinedIfetch[i] = false;
     }

     while (threads != end) {
         ThreadID tid = *threads++;

         // Check the signals for each thread to determine the proper status
         // for each thread.
         bool updated_status = checkSignalsAndUpdate(tid);
         status_change =  status_change || updated_status;
     }

     DPRINTF(Fetch, "Running stage.\n");

     if (FullSystem) {
         if (fromCommit->commitInfo[0].interruptPending) {
             interruptPending = true;
         }

         if (fromCommit->commitInfo[0].clearInterrupt) {
             interruptPending = false;
         }
     }

     for (threadFetched = 0; threadFetched < numFetchingThreads;
          threadFetched++) {
         // Fetch each of the actively fetching threads.
         fetch(status_change);
     }

     // Record number of instructions fetched this cycle for distribution.
     fetchStats.nisnDist.sample(numInst);

     if (status_change) {
         // Change the fetch stage status if there was a status change.
         _status = updateFetchStatus();
     }

     // Issue the next I-cache request if possible.
     for (ThreadID i = 0; i < numThreads; ++i) {
         if (issuePipelinedIfetch[i]) {
             pipelineIcacheAccesses(i);
         }
     }

     // Send instructions enqueued into the fetch queue to decode.
     // Limit rate by fetchWidth.  Stall if decode is stalled.
     unsigned insts_to_decode = 0;
     unsigned available_insts = 0;

     for (auto tid : *activeThreads) {
         if (!stalls[tid].decode) {
             available_insts += fetchQueue[tid].size();
         }
     }

     // Pick a random thread to start trying to grab instructions from
     auto tid_itr = activeThreads->begin();
     std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));

     while (available_insts != 0 && insts_to_decode < decodeWidth) {
         ThreadID tid = *tid_itr;
         if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
             const auto& inst = fetchQueue[tid].front();
             toDecode->insts[toDecode->size++] = inst;
             DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
                     "from fetch queue. Fetch queue size: %i.\n",
                     tid, inst->seqNum, fetchQueue[tid].size());

             wroteToTimeBuffer = true;
             fetchQueue[tid].pop_front();
             insts_to_decode++;
             available_insts--;
         }

         tid_itr++;
         // Wrap around if at end of active threads list
         if (tid_itr == activeThreads->end())
             tid_itr = activeThreads->begin();
     }

     // If there was activity this cycle, inform the CPU of it.
     if (wroteToTimeBuffer) {
         DPRINTF(Activity, "Activity this cycle.\n");
         cpu->activityThisCycle();
     }

     // Reset the number of the instruction we've fetched.
     numInst = 0;
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
 {
     // Update the per thread stall statuses.
     if (fromDecode->decodeBlock[tid]) {
         stalls[tid].decode = true;
     }

     if (fromDecode->decodeUnblock[tid]) {
         assert(stalls[tid].decode);
         assert(!fromDecode->decodeBlock[tid]);
         stalls[tid].decode = false;
     }

     // Check squash signals from commit.
     if (fromCommit->commitInfo[tid].squash) {

         DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
                 "from commit.\n",tid);
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].pc,
                fromCommit->commitInfo[tid].doneSeqNum,
                fromCommit->commitInfo[tid].squashInst, tid);

         // If it was a branch mispredict on a control instruction, update the
         // branch predictor with that instruction, otherwise just kill the
         // invalid state we generated in after sequence number
         if (fromCommit->commitInfo[tid].mispredictInst &&
             fromCommit->commitInfo[tid].mispredictInst->isControl()) {
             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
                               fromCommit->commitInfo[tid].pc,
                               fromCommit->commitInfo[tid].branchTaken,
                               tid);
         } else {
             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
                               tid);
         }

         return true;
     } else if (fromCommit->commitInfo[tid].doneSeqNum) {
         // Update the branch predictor if it wasn't a squashed instruction
         // that was broadcasted.
         branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
     }

     // Check squash signals from decode.
     if (fromDecode->decodeInfo[tid].squash) {
         DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
                 "from decode.\n",tid);

         // Update the branch predictor.
         if (fromDecode->decodeInfo[tid].branchMispredict) {
             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
                               fromDecode->decodeInfo[tid].nextPC,
                               fromDecode->decodeInfo[tid].branchTaken,
                               tid);
         } else {
             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
                               tid);
         }

         if (fetchStatus[tid] != Squashing) {

             DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
                 fromDecode->decodeInfo[tid].nextPC);
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
                              fromDecode->decodeInfo[tid].squashInst,
                              fromDecode->decodeInfo[tid].doneSeqNum,
                              tid);

             return true;
         }
     }

     if (checkStall(tid) &&
         fetchStatus[tid] != IcacheWaitResponse &&
         fetchStatus[tid] != IcacheWaitRetry &&
         fetchStatus[tid] != ItlbWait &&
         fetchStatus[tid] != QuiescePending) {
         DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);

         fetchStatus[tid] = Blocked;

         return true;
     }

     if (fetchStatus[tid] == Blocked ||
         fetchStatus[tid] == Squashing) {
         // Switch status to running if fetch isn't being told to block or
         // squash this cycle.
         DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
                 tid);

         fetchStatus[tid] = Running;

         return true;
     }

     // If we've reached this point, we have not gotten any signals that
     // cause fetch to change its status.  Fetch remains the same as before.
     return false;
 }

 template<class Impl>
 typename Impl::DynInstPtr
 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
                               StaticInstPtr curMacroop, TheISA::PCState thisPC,
                               TheISA::PCState nextPC, bool trace)
 {
     // Get a sequence number.
     InstSeqNum seq = cpu->getAndIncrementInstSeq();

     // Create a new DynInst from the instruction fetched.
     DynInstPtr instruction =
         new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
     instruction->setTid(tid);

     instruction->setThreadState(cpu->thread[tid]);

     DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created "
             "[sn:%lli].\n", tid, thisPC.instAddr(),
             thisPC.microPC(), seq);

     DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
             instruction->staticInst->
             disassemble(thisPC.instAddr()));

 #if TRACING_ON
     if (trace) {
         instruction->traceData =
             cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
                     instruction->staticInst, thisPC, curMacroop);
     }
 #else
     instruction->traceData = NULL;
 #endif

     // Add instruction to the CPU's list of instructions.
     instruction->setInstListIt(cpu->addInst(instruction));

     // Write the instruction to the first slot in the queue
     // that heads to decode.
     assert(numInst < fetchWidth);
     fetchQueue[tid].push_back(instruction);
     assert(fetchQueue[tid].size() <= fetchQueueSize);
     DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
             tid, fetchQueue[tid].size(), fetchQueueSize);
     //toDecode->insts[toDecode->size++] = instruction;

     // Keep track of if we can take an interrupt at this boundary
     delayedCommit[tid] = instruction->isDelayedCommit();

     return instruction;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::fetch(bool &status_change)
 {
     //////////////////////////////////////////
     // Start actual fetch
     //////////////////////////////////////////
     ThreadID tid = getFetchingThread();

     assert(!cpu->switchedOut());

     if (tid == InvalidThreadID) {
         // Breaks looping condition in tick()
         threadFetched = numFetchingThreads;

         if (numThreads == 1) {  // @todo Per-thread stats
             profileStall(0);
         }

         return;
     }

     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);

     // The current PC.
     TheISA::PCState thisPC = pc[tid];

     Addr pcOffset = fetchOffset[tid];
     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

     bool inRom = isRomMicroPC(thisPC.microPC());

     // If returning from the delay of a cache miss, then update the status
     // to running, otherwise do the cache access.  Possibly move this up
     // to tick() function.
     if (fetchStatus[tid] == IcacheAccessComplete) {
         DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);

         fetchStatus[tid] = Running;
         status_change = true;
     } else if (fetchStatus[tid] == Running) {
         // Align the fetch PC so its at the start of a fetch buffer segment.
         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

         // If buffer is no longer valid or fetchAddr has moved to point
         // to the next cache block, AND we have no remaining ucode
         // from a macro-op, then start fetch from icache.
         if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
             && !inRom && !macroop[tid]) {
             DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
                     "instruction, starting at PC %s.\n", tid, thisPC);

             fetchCacheLine(fetchAddr, tid, thisPC.instAddr());

             if (fetchStatus[tid] == IcacheWaitResponse)
                 ++fetchStats.icacheStallCycles;
             else if (fetchStatus[tid] == ItlbWait)
                 ++fetchStats.tlbCycles;
             else
                 ++fetchStats.miscStallCycles;
             return;
         } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
             // Stall CPU if an interrupt is posted and we're not issuing
             // an delayed commit micro-op currently (delayed commit instructions
             // are not interruptable by interrupts, only faults)
             ++fetchStats.miscStallCycles;
             DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
             return;
         }
     } else {
         if (fetchStatus[tid] == Idle) {
             ++fetchStats.idleCycles;
             DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
         }

         // Status is Idle, so fetch should do nothing.
         return;
     }

     ++fetchStats.cycles;

     TheISA::PCState nextPC = thisPC;

     StaticInstPtr staticInst = NULL;
     StaticInstPtr curMacroop = macroop[tid];

     // If the read of the first instruction was successful, then grab the
     // instructions from the rest of the cache line and put them into the
     // queue heading to decode.

     DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
             "decode.\n", tid);

     // Need to keep track of whether or not a predicted branch
     // ended this fetch block.
     bool predictedBranch = false;

     // Need to halt fetch if quiesce instruction detected
     bool quiesce = false;

     TheISA::MachInst *cacheInsts =
         reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);

     const unsigned numInsts = fetchBufferSize / instSize;
     unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;

     // Loop through instruction memory from the cache.
     // Keep issuing while fetchWidth is available and branch is not
     // predicted taken
     while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
            && !predictedBranch && !quiesce) {
         // We need to process more memory if we aren't going to get a
         // StaticInst from the rom, the current macroop, or what's already
         // in the decoder.
         bool needMem = !inRom && !curMacroop &&
             !decoder[tid]->instReady();
         fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
         Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

         if (needMem) {
             // If buffer is no longer valid or fetchAddr has moved to point
             // to the next cache block then start fetch from icache.
             if (!fetchBufferValid[tid] ||
                 fetchBufferBlockPC != fetchBufferPC[tid])
                 break;

             if (blkOffset >= numInsts) {
                 // We need to process more memory, but we've run out of the
                 // current block.
                 break;
             }

             decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]);

             if (decoder[tid]->needMoreBytes()) {
                 blkOffset++;
                 fetchAddr += instSize;
                 pcOffset += instSize;
             }
         }

         // Extract as many instructions and/or microops as we can from
         // the memory we've processed so far.
         do {
             if (!(curMacroop || inRom)) {
                 if (decoder[tid]->instReady()) {
                     staticInst = decoder[tid]->decode(thisPC);

                     // Increment stat of fetched instructions.
                     ++fetchStats.insts;

                     if (staticInst->isMacroop()) {
                         curMacroop = staticInst;
                     } else {
                         pcOffset = 0;
                     }
                 } else {
                     // We need more bytes for this instruction so blkOffset and
                     // pcOffset will be updated
                     break;
                 }
             }
             // Whether we're moving to a new macroop because we're at the
             // end of the current one, or the branch predictor incorrectly
             // thinks we are...
             bool newMacro = false;
             if (curMacroop || inRom) {
                 if (inRom) {
                     staticInst = decoder[tid]->fetchRomMicroop(
                             thisPC.microPC(), curMacroop);
                 } else {
                     staticInst = curMacroop->fetchMicroop(thisPC.microPC());
                 }
                 newMacro |= staticInst->isLastMicroop();
             }

             DynInstPtr instruction =
                 buildInst(tid, staticInst, curMacroop,
                           thisPC, nextPC, true);

             ppFetch->notify(instruction);
             numInst++;

 #if TRACING_ON
             if (DTRACE(O3PipeView)) {
                 instruction->fetchTick = curTick();
             }
 #endif

             nextPC = thisPC;

             // If we're branching after this instruction, quit fetching
             // from the same block.
             predictedBranch |= thisPC.branching();
             predictedBranch |=
                 lookupAndUpdateNextPC(instruction, nextPC);
             if (predictedBranch) {
                 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
             }

             newMacro |= thisPC.instAddr() != nextPC.instAddr();

             // Move to the next instruction, unless we have a branch.
             thisPC = nextPC;
             inRom = isRomMicroPC(thisPC.microPC());

             if (newMacro) {
                 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
                 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
                 pcOffset = 0;
                 curMacroop = NULL;
             }

             if (instruction->isQuiesce()) {
                 DPRINTF(Fetch,
                         "Quiesce instruction encountered, halting fetch!\n");
                 fetchStatus[tid] = QuiescePending;
                 status_change = true;
                 quiesce = true;
                 break;
             }
         } while ((curMacroop || decoder[tid]->instReady()) &&
                  numInst < fetchWidth &&
                  fetchQueue[tid].size() < fetchQueueSize);

         // Re-evaluate whether the next instruction to fetch is in micro-op ROM
         // or not.
         inRom = isRomMicroPC(thisPC.microPC());
     }

     if (predictedBranch) {
         DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
                 "instruction encountered.\n", tid);
     } else if (numInst >= fetchWidth) {
         DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
                 "for this cycle.\n", tid);
     } else if (blkOffset >= fetchBufferSize) {
         DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
                 "fetch buffer.\n", tid);
     }

     macroop[tid] = curMacroop;
     fetchOffset[tid] = pcOffset;

     if (numInst > 0) {
         wroteToTimeBuffer = true;
     }

     pc[tid] = thisPC;

     // pipeline a fetch if we're crossing a fetch buffer boundary and not in
     // a state that would preclude fetching
     fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
     issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
         fetchStatus[tid] != IcacheWaitResponse &&
         fetchStatus[tid] != ItlbWait &&
         fetchStatus[tid] != IcacheWaitRetry &&
         fetchStatus[tid] != QuiescePending &&
         !curMacroop;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::recvReqRetry()
 {
     if (retryPkt != NULL) {
         assert(cacheBlocked);
         assert(retryTid != InvalidThreadID);
         assert(fetchStatus[retryTid] == IcacheWaitRetry);

         if (icachePort.sendTimingReq(retryPkt)) {
             fetchStatus[retryTid] = IcacheWaitResponse;
             // Notify Fetch Request probe when a retryPkt is successfully sent.
             // Note that notify must be called before retryPkt is set to NULL.
             ppFetchRequestSent->notify(retryPkt->req);
             retryPkt = NULL;
             retryTid = InvalidThreadID;
             cacheBlocked = false;
         }
     } else {
         assert(retryTid == InvalidThreadID);
         // Access has been squashed since it was sent out.  Just clear
         // the cache being blocked.
         cacheBlocked = false;
     }
 }

 ///////////////////////////////////////
 //                                   //
 //  SMT FETCH POLICY MAINTAINED HERE //
 //                                   //
 ///////////////////////////////////////
 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::getFetchingThread()
 {
     if (numThreads > 1) {
         switch (fetchPolicy) {
           case SMTFetchPolicy::RoundRobin:
             return roundRobin();
           case SMTFetchPolicy::IQCount:
             return iqCount();
           case SMTFetchPolicy::LSQCount:
             return lsqCount();
           case SMTFetchPolicy::Branch:
             return branchCount();
           default:
             return InvalidThreadID;
         }
     } else {
         std::list<ThreadID>::iterator thread = activeThreads->begin();
         if (thread == activeThreads->end()) {
             return InvalidThreadID;
         }

         ThreadID tid = *thread;

         if (fetchStatus[tid] == Running ||
             fetchStatus[tid] == IcacheAccessComplete ||
             fetchStatus[tid] == Idle) {
             return tid;
         } else {
             return InvalidThreadID;
         }
     }
 }


 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::roundRobin()
 {
     std::list<ThreadID>::iterator pri_iter = priorityList.begin();
     std::list<ThreadID>::iterator end      = priorityList.end();

     ThreadID high_pri;

     while (pri_iter != end) {
         high_pri = *pri_iter;

         assert(high_pri <= numThreads);

         if (fetchStatus[high_pri] == Running ||
             fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle) {

             priorityList.erase(pri_iter);
             priorityList.push_back(high_pri);

             return high_pri;
         }

         pri_iter++;
     }

     return InvalidThreadID;
 }

 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::iqCount()
 {
     //sorted from lowest->highest
     std::priority_queue<unsigned, std::vector<unsigned>,
                         std::greater<unsigned> > PQ;
     std::map<unsigned, ThreadID> threadMap;

     std::list<ThreadID>::iterator threads = activeThreads->begin();
     std::list<ThreadID>::iterator end = activeThreads->end();

     while (threads != end) {
         ThreadID tid = *threads++;
         unsigned iqCount = fromIEW->iewInfo[tid].iqCount;

         //we can potentially get tid collisions if two threads
         //have the same iqCount, but this should be rare.
         PQ.push(iqCount);
         threadMap[iqCount] = tid;
     }

     while (!PQ.empty()) {
         ThreadID high_pri = threadMap[PQ.top()];

         if (fetchStatus[high_pri] == Running ||
             fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle)
             return high_pri;
         else
             PQ.pop();

     }

     return InvalidThreadID;
 }

 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::lsqCount()
 {
     //sorted from lowest->highest
     std::priority_queue<unsigned, std::vector<unsigned>,
                         std::greater<unsigned> > PQ;
     std::map<unsigned, ThreadID> threadMap;

     std::list<ThreadID>::iterator threads = activeThreads->begin();
     std::list<ThreadID>::iterator end = activeThreads->end();

     while (threads != end) {
         ThreadID tid = *threads++;
         unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;

         //we can potentially get tid collisions if two threads
         //have the same iqCount, but this should be rare.
         PQ.push(ldstqCount);
         threadMap[ldstqCount] = tid;
     }

     while (!PQ.empty()) {
         ThreadID high_pri = threadMap[PQ.top()];

         if (fetchStatus[high_pri] == Running ||
             fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle)
             return high_pri;
         else
             PQ.pop();
     }

     return InvalidThreadID;
 }

 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::branchCount()
 {
     panic("Branch Count Fetch policy unimplemented\n");
     return InvalidThreadID;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
 {
     if (!issuePipelinedIfetch[tid]) {
         return;
     }

     // The next PC to access.
     TheISA::PCState thisPC = pc[tid];

     if (isRomMicroPC(thisPC.microPC())) {
         return;
     }

     Addr pcOffset = fetchOffset[tid];
     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

     // Align the fetch PC so its at the start of a fetch buffer segment.
     Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

     // Unless buffer already got the block, fetch it from icache.
     if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
         DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
                 "starting at PC %s.\n", tid, thisPC);

         fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
     }
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::profileStall(ThreadID tid) {
     DPRINTF(Fetch,"There are no more threads available to fetch from.\n");

     // @todo Per-thread stats

     if (stalls[tid].drain) {
         ++fetchStats.pendingDrainCycles;
         DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
     } else if (activeThreads->empty()) {
         ++fetchStats.noActiveThreadStallCycles;
         DPRINTF(Fetch, "Fetch has no active thread!\n");
     } else if (fetchStatus[tid] == Blocked) {
         ++fetchStats.blockedCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
     } else if (fetchStatus[tid] == Squashing) {
         ++fetchStats.squashCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitResponse) {
         ++fetchStats.icacheStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
                 tid);
     } else if (fetchStatus[tid] == ItlbWait) {
         ++fetchStats.tlbCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
                 "finish!\n", tid);
     } else if (fetchStatus[tid] == TrapPending) {
         ++fetchStats.pendingTrapStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
                 tid);
     } else if (fetchStatus[tid] == QuiescePending) {
         ++fetchStats.pendingQuiesceStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
                 "instruction!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitRetry) {
         ++fetchStats.icacheWaitRetryStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
                 tid);
     } else if (fetchStatus[tid] == NoGoodAddr) {
             DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
                     tid);
     } else {
         DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
             "(Status: %i)\n",
             tid, fetchStatus[tid]);
     }
 }

 template<class Impl>
 bool
 DefaultFetch<Impl>::IcachePort::recvTimingResp(PacketPtr pkt)
 {
     DPRINTF(O3CPU, "Fetch unit received timing\n");
     // We shouldn't ever get a cacheable block in Modified state
     assert(pkt->req->isUncacheable() ||
            !(pkt->cacheResponding() && !pkt->hasSharers()));
     fetch->processCacheCompletion(pkt);

     return true;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::IcachePort::recvReqRetry()
 {
     fetch->recvReqRetry();
 }

 #endif//__CPU_O3_FETCH_IMPL_HH__