src/cpu/o3/fetch_impl.hh - arm/gem5 - Git at Google

 /*
  * Copyright (c) 2010-2012 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
  * property including but not limited to intellectual property relating
  * to a hardware implementation of the functionality of the software
  * licensed hereunder.  You may use the software subject to the license
  * terms below provided that you ensure that this notice is replicated
  * unmodified and in its entirety in all distributions of the software,
  * modified or unmodified, in source code or in binary form.
  *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Kevin Lim
  *          Korey Sewell
  */

 #include <algorithm>
 #include <cstring>
 #include <list>
 #include <map>
 #include <queue>

 #include "arch/isa_traits.hh"
 #include "arch/tlb.hh"
 #include "arch/utility.hh"
 #include "arch/vtophys.hh"
 #include "base/types.hh"
 #include "config/the_isa.hh"
 #include "cpu/base.hh"
 //#include "cpu/checker/cpu.hh"
 #include "cpu/o3/fetch.hh"
 #include "cpu/exetrace.hh"
 #include "debug/Activity.hh"
 #include "debug/Drain.hh"
 #include "debug/Fetch.hh"
 #include "debug/O3PipeView.hh"
 #include "mem/packet.hh"
 #include "params/DerivO3CPU.hh"
 #include "sim/byteswap.hh"
 #include "sim/core.hh"
 #include "sim/eventq.hh"
 #include "sim/full_system.hh"
 #include "sim/system.hh"

 using namespace std;

 template<class Impl>
 DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
     : cpu(_cpu),
       decodeToFetchDelay(params->decodeToFetchDelay),
       renameToFetchDelay(params->renameToFetchDelay),
       iewToFetchDelay(params->iewToFetchDelay),
       commitToFetchDelay(params->commitToFetchDelay),
       fetchWidth(params->fetchWidth),
       retryPkt(NULL),
       retryTid(InvalidThreadID),
       numThreads(params->numThreads),
       numFetchingThreads(params->smtNumFetchingThreads),
       finishTranslationEvent(this)
 {
     if (numThreads > Impl::MaxThreads)
         fatal("numThreads (%d) is larger than compiled limit (%d),\n"
               "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
               numThreads, static_cast<int>(Impl::MaxThreads));
     if (fetchWidth > Impl::MaxWidth)
         fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
              "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
              fetchWidth, static_cast<int>(Impl::MaxWidth));

     std::string policy = params->smtFetchPolicy;

     // Convert string to lowercase
     std::transform(policy.begin(), policy.end(), policy.begin(),
                    (int(*)(int)) tolower);

     // Figure out fetch policy
     if (policy == "singlethread") {
         fetchPolicy = SingleThread;
         if (numThreads > 1)
             panic("Invalid Fetch Policy for a SMT workload.");
     } else if (policy == "roundrobin") {
         fetchPolicy = RoundRobin;
         DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
     } else if (policy == "branch") {
         fetchPolicy = Branch;
         DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
     } else if (policy == "iqcount") {
         fetchPolicy = IQ;
         DPRINTF(Fetch, "Fetch policy set to IQ count\n");
     } else if (policy == "lsqcount") {
         fetchPolicy = LSQ;
         DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
     } else {
         fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
               " RoundRobin,LSQcount,IQcount}\n");
     }

     // Get the size of an instruction.
     instSize = sizeof(TheISA::MachInst);

     for (int i = 0; i < Impl::MaxThreads; i++) {
         cacheData[i] = NULL;
         decoder[i] = new TheISA::Decoder;
     }

     branchPred = params->branchPred;
 }

 template <class Impl>
 std::string
 DefaultFetch<Impl>::name() const
 {
     return cpu->name() + ".fetch";
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::regStats()
 {
     icacheStallCycles
         .name(name() + ".icacheStallCycles")
         .desc("Number of cycles fetch is stalled on an Icache miss")
         .prereq(icacheStallCycles);

     fetchedInsts
         .name(name() + ".Insts")
         .desc("Number of instructions fetch has processed")
         .prereq(fetchedInsts);

     fetchedBranches
         .name(name() + ".Branches")
         .desc("Number of branches that fetch encountered")
         .prereq(fetchedBranches);

     predictedBranches
         .name(name() + ".predictedBranches")
         .desc("Number of branches that fetch has predicted taken")
         .prereq(predictedBranches);

     fetchCycles
         .name(name() + ".Cycles")
         .desc("Number of cycles fetch has run and was not squashing or"
               " blocked")
         .prereq(fetchCycles);

     fetchSquashCycles
         .name(name() + ".SquashCycles")
         .desc("Number of cycles fetch has spent squashing")
         .prereq(fetchSquashCycles);

     fetchTlbCycles
         .name(name() + ".TlbCycles")
         .desc("Number of cycles fetch has spent waiting for tlb")
         .prereq(fetchTlbCycles);

     fetchIdleCycles
         .name(name() + ".IdleCycles")
         .desc("Number of cycles fetch was idle")
         .prereq(fetchIdleCycles);

     fetchBlockedCycles
         .name(name() + ".BlockedCycles")
         .desc("Number of cycles fetch has spent blocked")
         .prereq(fetchBlockedCycles);

     fetchedCacheLines
         .name(name() + ".CacheLines")
         .desc("Number of cache lines fetched")
         .prereq(fetchedCacheLines);

     fetchMiscStallCycles
         .name(name() + ".MiscStallCycles")
         .desc("Number of cycles fetch has spent waiting on interrupts, or "
               "bad addresses, or out of MSHRs")
         .prereq(fetchMiscStallCycles);

     fetchPendingDrainCycles
         .name(name() + ".PendingDrainCycles")
         .desc("Number of cycles fetch has spent waiting on pipes to drain")
         .prereq(fetchPendingDrainCycles);

     fetchNoActiveThreadStallCycles
         .name(name() + ".NoActiveThreadStallCycles")
         .desc("Number of stall cycles due to no active thread to fetch from")
         .prereq(fetchNoActiveThreadStallCycles);

     fetchPendingTrapStallCycles
         .name(name() + ".PendingTrapStallCycles")
         .desc("Number of stall cycles due to pending traps")
         .prereq(fetchPendingTrapStallCycles);

     fetchPendingQuiesceStallCycles
         .name(name() + ".PendingQuiesceStallCycles")
         .desc("Number of stall cycles due to pending quiesce instructions")
         .prereq(fetchPendingQuiesceStallCycles);

     fetchIcacheWaitRetryStallCycles
         .name(name() + ".IcacheWaitRetryStallCycles")
         .desc("Number of stall cycles due to full MSHR")
         .prereq(fetchIcacheWaitRetryStallCycles);

     fetchIcacheSquashes
         .name(name() + ".IcacheSquashes")
         .desc("Number of outstanding Icache misses that were squashed")
         .prereq(fetchIcacheSquashes);

     fetchTlbSquashes
         .name(name() + ".ItlbSquashes")
         .desc("Number of outstanding ITLB misses that were squashed")
         .prereq(fetchTlbSquashes);

     fetchNisnDist
         .init(/* base value */ 0,
               /* last value */ fetchWidth,
               /* bucket size */ 1)
         .name(name() + ".rateDist")
         .desc("Number of instructions fetched each cycle (Total)")
         .flags(Stats::pdf);

     idleRate
         .name(name() + ".idleRate")
         .desc("Percent of cycles fetch was idle")
         .prereq(idleRate);
     idleRate = fetchIdleCycles * 100 / cpu->numCycles;

     branchRate
         .name(name() + ".branchRate")
         .desc("Number of branch fetches per cycle")
         .flags(Stats::total);
     branchRate = fetchedBranches / cpu->numCycles;

     fetchRate
         .name(name() + ".rate")
         .desc("Number of inst fetches per cycle")
         .flags(Stats::total);
     fetchRate = fetchedInsts / cpu->numCycles;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
 {
     timeBuffer = time_buffer;

     // Create wires to get information from proper places in time buffer.
     fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
     fromRename = timeBuffer->getWire(-renameToFetchDelay);
     fromIEW = timeBuffer->getWire(-iewToFetchDelay);
     fromCommit = timeBuffer->getWire(-commitToFetchDelay);
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
 {
     activeThreads = at_ptr;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
 {
     fetchQueue = fq_ptr;

     // Create wire to write information to proper place in fetch queue.
     toDecode = fetchQueue->getWire(0);
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::startupStage()
 {
     assert(priorityList.empty());
     resetStage();

     // Fetch needs to start fetching instructions at the very beginning,
     // so it must start up in active state.
     switchToActive();
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::resetStage()
 {
     numInst = 0;
     interruptPending = false;
     cacheBlocked = false;

     priorityList.clear();

     // Setup PC and nextPC with initial state.
     for (ThreadID tid = 0; tid < numThreads; tid++) {
         fetchStatus[tid] = Running;
         pc[tid] = cpu->pcState(tid);
         fetchOffset[tid] = 0;
         macroop[tid] = NULL;

         delayedCommit[tid] = false;
         memReq[tid] = NULL;

         stalls[tid].decode = false;
         stalls[tid].rename = false;
         stalls[tid].iew = false;
         stalls[tid].commit = false;
         stalls[tid].drain = false;

         priorityList.push_back(tid);
     }

     wroteToTimeBuffer = false;
     _status = Inactive;

     // this CPU could still be unconnected if we are restoring from a
     // checkpoint and this CPU is to be switched in, thus we can only
     // do this here if the instruction port is actually connected, if
     // not we have to do it as part of takeOverFrom.
     if (cpu->getInstPort().isConnected())
         setIcache();
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::setIcache()
 {
     assert(cpu->getInstPort().isConnected());

     // Size of cache block.
     cacheBlkSize = cpu->getInstPort().peerBlockSize();

     // Create mask to get rid of offset bits.
     cacheBlkMask = (cacheBlkSize - 1);

     for (ThreadID tid = 0; tid < numThreads; tid++) {
         // Create space to store a cache line.
         if (!cacheData[tid])
             cacheData[tid] = new uint8_t[cacheBlkSize];
         cacheDataPC[tid] = 0;
         cacheDataValid[tid] = false;
     }
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
 {
     ThreadID tid = pkt->req->threadId();

     DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);
     assert(!cpu->switchedOut());

     // Only change the status if it's still waiting on the icache access
     // to return.
     if (fetchStatus[tid] != IcacheWaitResponse ||
         pkt->req != memReq[tid]) {
         ++fetchIcacheSquashes;
         delete pkt->req;
         delete pkt;
         return;
     }

     memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize);
     cacheDataValid[tid] = true;

     // Wake up the CPU (if it went to sleep and was waiting on
     // this completion event).
     cpu->wakeCPU();

     DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
             tid);

     switchToActive();

     // Only switch to IcacheAccessComplete if we're not stalled as well.
     if (checkStall(tid)) {
         fetchStatus[tid] = Blocked;
     } else {
         fetchStatus[tid] = IcacheAccessComplete;
     }

     // Reset the mem req to NULL.
     delete pkt->req;
     delete pkt;
     memReq[tid] = NULL;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::drainResume()
 {
     for (ThreadID i = 0; i < Impl::MaxThreads; ++i)
         stalls[i].drain = false;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::drainSanityCheck() const
 {
     assert(isDrained());
     assert(retryPkt == NULL);
     assert(retryTid == InvalidThreadID);
     assert(cacheBlocked == false);
     assert(interruptPending == false);

     for (ThreadID i = 0; i < numThreads; ++i) {
         assert(!memReq[i]);
         assert(!stalls[i].decode);
         assert(!stalls[i].rename);
         assert(!stalls[i].iew);
         assert(!stalls[i].commit);
         assert(fetchStatus[i] == Idle || stalls[i].drain);
     }

     branchPred->drainSanityCheck();
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::isDrained() const
 {
     /* Make sure that threads are either idle of that the commit stage
      * has signaled that draining has completed by setting the drain
      * stall flag. This effectively forces the pipeline to be disabled
      * until the whole system is drained (simulation may continue to
      * drain other components).
      */
     for (ThreadID i = 0; i < numThreads; ++i) {
         if (!(fetchStatus[i] == Idle ||
               (fetchStatus[i] == Blocked && stalls[i].drain)))
             return false;
     }

     /* The pipeline might start up again in the middle of the drain
      * cycle if the finish translation event is scheduled, so make
      * sure that's not the case.
      */
     return !finishTranslationEvent.scheduled();
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::takeOverFrom()
 {
     assert(cpu->getInstPort().isConnected());
     resetStage();

 }

 template <class Impl>
 void
 DefaultFetch<Impl>::drainStall(ThreadID tid)
 {
     assert(cpu->isDraining());
     assert(!stalls[tid].drain);
     DPRINTF(Drain, "%i: Thread drained.\n", tid);
     stalls[tid].drain = true;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::wakeFromQuiesce()
 {
     DPRINTF(Fetch, "Waking up from quiesce\n");
     // Hopefully this is safe
     // @todo: Allow other threads to wake from quiesce.
     fetchStatus[0] = Running;
 }

 template <class Impl>
 inline void
 DefaultFetch<Impl>::switchToActive()
 {
     if (_status == Inactive) {
         DPRINTF(Activity, "Activating stage.\n");

         cpu->activateStage(O3CPU::FetchIdx);

         _status = Active;
     }
 }

 template <class Impl>
 inline void
 DefaultFetch<Impl>::switchToInactive()
 {
     if (_status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");

         cpu->deactivateStage(O3CPU::FetchIdx);

         _status = Inactive;
     }
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::lookupAndUpdateNextPC(
         DynInstPtr &inst, TheISA::PCState &nextPC)
 {
     // Do branch prediction check here.
     // A bit of a misnomer...next_PC is actually the current PC until
     // this function updates it.
     bool predict_taken;

     if (!inst->isControl()) {
         TheISA::advancePC(nextPC, inst->staticInst);
         inst->setPredTarg(nextPC);
         inst->setPredTaken(false);
         return false;
     }

     ThreadID tid = inst->threadNumber;
     predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
                                         nextPC, tid);

     if (predict_taken) {
         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:  Branch predicted to be taken to %s.\n",
                 tid, inst->seqNum, nextPC);
     } else {
         DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",
                 tid, inst->seqNum);
     }

     DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",
             tid, inst->seqNum, nextPC);
     inst->setPredTarg(nextPC);
     inst->setPredTaken(predict_taken);

     ++fetchedBranches;

     if (predict_taken) {
         ++predictedBranches;
     }

     return predict_taken;
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
 {
     Fault fault = NoFault;

     assert(!cpu->switchedOut());

     // @todo: not sure if these should block translation.
     //AlphaDep
     if (cacheBlocked) {
         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
                 tid);
         return false;
     } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
         // Hold off fetch from getting new instructions when:
         // Cache is blocked, or
         // while an interrupt is pending and we're not in PAL mode, or
         // fetch is switched out.
         DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
                 tid);
         return false;
     }

     // Align the fetch address so it's at the start of a cache block.
     Addr block_PC = icacheBlockAlignPC(vaddr);

     DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
             tid, block_PC, vaddr);

     // Setup the memReq to do a read of the first instruction's address.
     // Set the appropriate read size and flags as well.
     // Build request here.
     RequestPtr mem_req =
         new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH,
                     cpu->instMasterId(), pc, cpu->thread[tid]->contextId(), tid);

     memReq[tid] = mem_req;

     // Initiate translation of the icache block
     fetchStatus[tid] = ItlbWait;
     FetchTranslation *trans = new FetchTranslation(this);
     cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),
                               trans, BaseTLB::Execute);
     return true;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
 {
     ThreadID tid = mem_req->threadId();
     Addr block_PC = mem_req->getVaddr();

     assert(!cpu->switchedOut());

     // Wake up CPU if it was idle
     cpu->wakeCPU();

     if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
         mem_req->getVaddr() != memReq[tid]->getVaddr()) {
         DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
                 tid);
         ++fetchTlbSquashes;
         delete mem_req;
         return;
     }


     // If translation was successful, attempt to read the icache block.
     if (fault == NoFault) {
         // Check that we're not going off into random memory
         // If we have, just wait around for commit to squash something and put
         // us on the right track
         if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
             warn("Address %#x is outside of physical memory, stopping fetch\n",
                     mem_req->getPaddr());
             fetchStatus[tid] = NoGoodAddr;
             delete mem_req;
             memReq[tid] = NULL;
             return;
         }

         // Build packet here.
         PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
         data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);

         cacheDataPC[tid] = block_PC;
         cacheDataValid[tid] = false;
         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");

         fetchedCacheLines++;

         // Access the cache.
         if (!cpu->getInstPort().sendTimingReq(data_pkt)) {
             assert(retryPkt == NULL);
             assert(retryTid == InvalidThreadID);
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);

             fetchStatus[tid] = IcacheWaitRetry;
             retryPkt = data_pkt;
             retryTid = tid;
             cacheBlocked = true;
         } else {
             DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);
             DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
                     "response.\n", tid);

             lastIcacheStall[tid] = curTick();
             fetchStatus[tid] = IcacheWaitResponse;
         }
     } else {
         if (!(numInst < fetchWidth)) {
             assert(!finishTranslationEvent.scheduled());
             finishTranslationEvent.setFault(fault);
             finishTranslationEvent.setReq(mem_req);
             cpu->schedule(finishTranslationEvent,
                           cpu->clockEdge(Cycles(1)));
             return;
         }
         DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
                 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
         // Translation faulted, icache request won't be sent.
         delete mem_req;
         memReq[tid] = NULL;

         // Send the fault to commit.  This thread will not do anything
         // until commit handles the fault.  The only other way it can
         // wake up is if a squash comes along and changes the PC.
         TheISA::PCState fetchPC = pc[tid];

         DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);
         // We will use a nop in ordier to carry the fault.
         DynInstPtr instruction = buildInst(tid,
                 decoder[tid]->decode(TheISA::NoopMachInst, fetchPC.instAddr()),
                 NULL, fetchPC, fetchPC, false);

         instruction->setPredTarg(fetchPC);
         instruction->fault = fault;
         wroteToTimeBuffer = true;

         DPRINTF(Activity, "Activity this cycle.\n");
         cpu->activityThisCycle();

         fetchStatus[tid] = TrapPending;

         DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);
         DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",
                 tid, fault->name(), pc[tid]);
     }
     _status = updateFetchStatus();
 }

 template <class Impl>
 inline void
 DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
                              const DynInstPtr squashInst, ThreadID tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",
             tid, newPC);

     pc[tid] = newPC;
     fetchOffset[tid] = 0;
     if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
         macroop[tid] = squashInst->macroop;
     else
         macroop[tid] = NULL;
     decoder[tid]->reset();

     // Clear the icache miss if it's outstanding.
     if (fetchStatus[tid] == IcacheWaitResponse) {
         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
                 tid);
         memReq[tid] = NULL;
     } else if (fetchStatus[tid] == ItlbWait) {
         DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",
                 tid);
         memReq[tid] = NULL;
     }

     // Get rid of the retrying packet if it was from this thread.
     if (retryTid == tid) {
         assert(cacheBlocked);
         if (retryPkt) {
             delete retryPkt->req;
             delete retryPkt;
         }
         retryPkt = NULL;
         retryTid = InvalidThreadID;
     }

     fetchStatus[tid] = Squashing;

     // microops are being squashed, it is not known wheather the
     // youngest non-squashed microop was  marked delayed commit
     // or not. Setting the flag to true ensures that the
     // interrupts are not handled when they cannot be, though
     // some opportunities to handle interrupts may be missed.
     delayedCommit[tid] = true;

     ++fetchSquashCycles;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
                                      const DynInstPtr squashInst,
                                      const InstSeqNum seq_num, ThreadID tid)
 {
     DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);

     doSquash(newPC, squashInst, tid);

     // Tell the CPU to remove any instructions that are in flight between
     // fetch and decode.
     cpu->removeInstsUntil(seq_num, tid);
 }

 template<class Impl>
 bool
 DefaultFetch<Impl>::checkStall(ThreadID tid) const
 {
     bool ret_val = false;

     if (cpu->contextSwitch) {
         DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
         ret_val = true;
     } else if (stalls[tid].drain) {
         assert(cpu->isDraining());
         DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);
         ret_val = true;
     } else if (stalls[tid].decode) {
         DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
         ret_val = true;
     } else if (stalls[tid].rename) {
         DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
         ret_val = true;
     } else if (stalls[tid].iew) {
         DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
         ret_val = true;
     } else if (stalls[tid].commit) {
         DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
         ret_val = true;
     }

     return ret_val;
 }

 template<class Impl>
 typename DefaultFetch<Impl>::FetchStatus
 DefaultFetch<Impl>::updateFetchStatus()
 {
     //Check Running
     list<ThreadID>::iterator threads = activeThreads->begin();
     list<ThreadID>::iterator end = activeThreads->end();

     while (threads != end) {
         ThreadID tid = *threads++;

         if (fetchStatus[tid] == Running ||
             fetchStatus[tid] == Squashing ||
             fetchStatus[tid] == IcacheAccessComplete) {

             if (_status == Inactive) {
                 DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);

                 if (fetchStatus[tid] == IcacheAccessComplete) {
                     DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
                             "completion\n",tid);
                 }

                 cpu->activateStage(O3CPU::FetchIdx);
             }

             return Active;
         }
     }

     // Stage is switching from active to inactive, notify CPU of it.
     if (_status == Active) {
         DPRINTF(Activity, "Deactivating stage.\n");

         cpu->deactivateStage(O3CPU::FetchIdx);
     }

     return Inactive;
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
                            const InstSeqNum seq_num, DynInstPtr squashInst,
                            ThreadID tid)
 {
     DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);

     doSquash(newPC, squashInst, tid);

     // Tell the CPU to remove any instructions that are not in the ROB.
     cpu->removeInstsNotInROB(tid);
 }

 template <class Impl>
 void
 DefaultFetch<Impl>::tick()
 {
     list<ThreadID>::iterator threads = activeThreads->begin();
     list<ThreadID>::iterator end = activeThreads->end();
     bool status_change = false;

     wroteToTimeBuffer = false;

     for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
         issuePipelinedIfetch[i] = false;
     }

     while (threads != end) {
         ThreadID tid = *threads++;

         // Check the signals for each thread to determine the proper status
         // for each thread.
         bool updated_status = checkSignalsAndUpdate(tid);
         status_change =  status_change || updated_status;
     }

     DPRINTF(Fetch, "Running stage.\n");

     if (FullSystem) {
         if (fromCommit->commitInfo[0].interruptPending) {
             interruptPending = true;
         }

         if (fromCommit->commitInfo[0].clearInterrupt) {
             interruptPending = false;
         }
     }

     for (threadFetched = 0; threadFetched < numFetchingThreads;
          threadFetched++) {
         // Fetch each of the actively fetching threads.
         fetch(status_change);
     }

     // Record number of instructions fetched this cycle for distribution.
     fetchNisnDist.sample(numInst);

     if (status_change) {
         // Change the fetch stage status if there was a status change.
         _status = updateFetchStatus();
     }

     // If there was activity this cycle, inform the CPU of it.
     if (wroteToTimeBuffer || cpu->contextSwitch) {
         DPRINTF(Activity, "Activity this cycle.\n");

         cpu->activityThisCycle();
     }

     // Issue the next I-cache request if possible.
     for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
         if (issuePipelinedIfetch[i]) {
             pipelineIcacheAccesses(i);
         }
     }

     // Reset the number of the instruction we've fetched.
     numInst = 0;
 }

 template <class Impl>
 bool
 DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
 {
     // Update the per thread stall statuses.
     if (fromDecode->decodeBlock[tid]) {
         stalls[tid].decode = true;
     }

     if (fromDecode->decodeUnblock[tid]) {
         assert(stalls[tid].decode);
         assert(!fromDecode->decodeBlock[tid]);
         stalls[tid].decode = false;
     }

     if (fromRename->renameBlock[tid]) {
         stalls[tid].rename = true;
     }

     if (fromRename->renameUnblock[tid]) {
         assert(stalls[tid].rename);
         assert(!fromRename->renameBlock[tid]);
         stalls[tid].rename = false;
     }

     if (fromIEW->iewBlock[tid]) {
         stalls[tid].iew = true;
     }

     if (fromIEW->iewUnblock[tid]) {
         assert(stalls[tid].iew);
         assert(!fromIEW->iewBlock[tid]);
         stalls[tid].iew = false;
     }

     if (fromCommit->commitBlock[tid]) {
         stalls[tid].commit = true;
     }

     if (fromCommit->commitUnblock[tid]) {
         assert(stalls[tid].commit);
         assert(!fromCommit->commitBlock[tid]);
         stalls[tid].commit = false;
     }

     // Check squash signals from commit.
     if (fromCommit->commitInfo[tid].squash) {

         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
                 "from commit.\n",tid);
         // In any case, squash.
         squash(fromCommit->commitInfo[tid].pc,
                fromCommit->commitInfo[tid].doneSeqNum,
                fromCommit->commitInfo[tid].squashInst, tid);

         // If it was a branch mispredict on a control instruction, update the
         // branch predictor with that instruction, otherwise just kill the
         // invalid state we generated in after sequence number
         if (fromCommit->commitInfo[tid].mispredictInst &&
             fromCommit->commitInfo[tid].mispredictInst->isControl()) {
             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
                               fromCommit->commitInfo[tid].pc,
                               fromCommit->commitInfo[tid].branchTaken,
                               tid);
         } else {
             branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
                               tid);
         }

         return true;
     } else if (fromCommit->commitInfo[tid].doneSeqNum) {
         // Update the branch predictor if it wasn't a squashed instruction
         // that was broadcasted.
         branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
     }

     // Check ROB squash signals from commit.
     if (fromCommit->commitInfo[tid].robSquashing) {
         DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);

         // Continue to squash.
         fetchStatus[tid] = Squashing;

         return true;
     }

     // Check squash signals from decode.
     if (fromDecode->decodeInfo[tid].squash) {
         DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
                 "from decode.\n",tid);

         // Update the branch predictor.
         if (fromDecode->decodeInfo[tid].branchMispredict) {
             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
                               fromDecode->decodeInfo[tid].nextPC,
                               fromDecode->decodeInfo[tid].branchTaken,
                               tid);
         } else {
             branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
                               tid);
         }

         if (fetchStatus[tid] != Squashing) {

             DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
                 fromDecode->decodeInfo[tid].nextPC);
             // Squash unless we're already squashing
             squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
                              fromDecode->decodeInfo[tid].squashInst,
                              fromDecode->decodeInfo[tid].doneSeqNum,
                              tid);

             return true;
         }
     }

     if (checkStall(tid) &&
         fetchStatus[tid] != IcacheWaitResponse &&
         fetchStatus[tid] != IcacheWaitRetry &&
         fetchStatus[tid] != ItlbWait &&
         fetchStatus[tid] != QuiescePending) {
         DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);

         fetchStatus[tid] = Blocked;

         return true;
     }

     if (fetchStatus[tid] == Blocked ||
         fetchStatus[tid] == Squashing) {
         // Switch status to running if fetch isn't being told to block or
         // squash this cycle.
         DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
                 tid);

         fetchStatus[tid] = Running;

         return true;
     }

     // If we've reached this point, we have not gotten any signals that
     // cause fetch to change its status.  Fetch remains the same as before.
     return false;
 }

 template<class Impl>
 typename Impl::DynInstPtr
 DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
                               StaticInstPtr curMacroop, TheISA::PCState thisPC,
                               TheISA::PCState nextPC, bool trace)
 {
     // Get a sequence number.
     InstSeqNum seq = cpu->getAndIncrementInstSeq();

     // Create a new DynInst from the instruction fetched.
     DynInstPtr instruction =
         new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
     instruction->setTid(tid);

     instruction->setASID(tid);

     instruction->setThreadState(cpu->thread[tid]);

     DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "
             "[sn:%lli].\n", tid, thisPC.instAddr(),
             thisPC.microPC(), seq);

     DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,
             instruction->staticInst->
             disassemble(thisPC.instAddr()));

 #if TRACING_ON
     if (trace) {
         instruction->traceData =
             cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
                     instruction->staticInst, thisPC, curMacroop);
     }
 #else
     instruction->traceData = NULL;
 #endif

     // Add instruction to the CPU's list of instructions.
     instruction->setInstListIt(cpu->addInst(instruction));

     // Write the instruction to the first slot in the queue
     // that heads to decode.
     assert(numInst < fetchWidth);
     toDecode->insts[toDecode->size++] = instruction;

     // Keep track of if we can take an interrupt at this boundary
     delayedCommit[tid] = instruction->isDelayedCommit();

     return instruction;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::fetch(bool &status_change)
 {
     //////////////////////////////////////////
     // Start actual fetch
     //////////////////////////////////////////
     ThreadID tid = getFetchingThread(fetchPolicy);

     assert(!cpu->switchedOut());

     if (tid == InvalidThreadID) {
         // Breaks looping condition in tick()
         threadFetched = numFetchingThreads;

         if (numThreads == 1) {  // @todo Per-thread stats
             profileStall(0);
         }

         return;
     }

     DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);

     // The current PC.
     TheISA::PCState thisPC = pc[tid];

     Addr pcOffset = fetchOffset[tid];
     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

     bool inRom = isRomMicroPC(thisPC.microPC());

     // If returning from the delay of a cache miss, then update the status
     // to running, otherwise do the cache access.  Possibly move this up
     // to tick() function.
     if (fetchStatus[tid] == IcacheAccessComplete) {
         DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);

         fetchStatus[tid] = Running;
         status_change = true;
     } else if (fetchStatus[tid] == Running) {
         // Align the fetch PC so its at the start of a cache block.
         Addr block_PC = icacheBlockAlignPC(fetchAddr);

         // If buffer is no longer valid or fetchAddr has moved to point
         // to the next cache block, AND we have no remaining ucode
         // from a macro-op, then start fetch from icache.
         if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])
             && !inRom && !macroop[tid]) {
             DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
                     "instruction, starting at PC %s.\n", tid, thisPC);

             fetchCacheLine(fetchAddr, tid, thisPC.instAddr());

             if (fetchStatus[tid] == IcacheWaitResponse)
                 ++icacheStallCycles;
             else if (fetchStatus[tid] == ItlbWait)
                 ++fetchTlbCycles;
             else
                 ++fetchMiscStallCycles;
             return;
         } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
             // Stall CPU if an interrupt is posted and we're not issuing
             // an delayed commit micro-op currently (delayed commit instructions
             // are not interruptable by interrupts, only faults)
             ++fetchMiscStallCycles;
             DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);
             return;
         }
     } else {
         if (fetchStatus[tid] == Idle) {
             ++fetchIdleCycles;
             DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
         }

         // Status is Idle, so fetch should do nothing.
         return;
     }

     ++fetchCycles;

     TheISA::PCState nextPC = thisPC;

     StaticInstPtr staticInst = NULL;
     StaticInstPtr curMacroop = macroop[tid];

     // If the read of the first instruction was successful, then grab the
     // instructions from the rest of the cache line and put them into the
     // queue heading to decode.

     DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
             "decode.\n", tid);

     // Need to keep track of whether or not a predicted branch
     // ended this fetch block.
     bool predictedBranch = false;

     TheISA::MachInst *cacheInsts =
         reinterpret_cast<TheISA::MachInst *>(cacheData[tid]);

     const unsigned numInsts = cacheBlkSize / instSize;
     unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize;

     // Loop through instruction memory from the cache.
     // Keep issuing while fetchWidth is available and branch is not
     // predicted taken
     while (numInst < fetchWidth && !predictedBranch) {

         // We need to process more memory if we aren't going to get a
         // StaticInst from the rom, the current macroop, or what's already
         // in the decoder.
         bool needMem = !inRom && !curMacroop &&
             !decoder[tid]->instReady();
         fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
         Addr block_PC = icacheBlockAlignPC(fetchAddr);

         if (needMem) {
             // If buffer is no longer valid or fetchAddr has moved to point
             // to the next cache block then start fetch from icache.
             if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid])
                 break;

             if (blkOffset >= numInsts) {
                 // We need to process more memory, but we've run out of the
                 // current block.
                 break;
             }

             if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {
                 // Walk past any annulled delay slot instructions.
                 Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;
                 while (fetchAddr != pcAddr && blkOffset < numInsts) {
                     blkOffset++;
                     fetchAddr += instSize;
                 }
                 if (blkOffset >= numInsts)
                     break;
             }

             MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);
             decoder[tid]->moreBytes(thisPC, fetchAddr, inst);

             if (decoder[tid]->needMoreBytes()) {
                 blkOffset++;
                 fetchAddr += instSize;
                 pcOffset += instSize;
             }
         }

         // Extract as many instructions and/or microops as we can from
         // the memory we've processed so far.
         do {
             if (!(curMacroop || inRom)) {
                 if (decoder[tid]->instReady()) {
                     staticInst = decoder[tid]->decode(thisPC);

                     // Increment stat of fetched instructions.
                     ++fetchedInsts;

                     if (staticInst->isMacroop()) {
                         curMacroop = staticInst;
                     } else {
                         pcOffset = 0;
                     }
                 } else {
                     // We need more bytes for this instruction so blkOffset and
                     // pcOffset will be updated
                     break;
                 }
             }
             // Whether we're moving to a new macroop because we're at the
             // end of the current one, or the branch predictor incorrectly
             // thinks we are...
             bool newMacro = false;
             if (curMacroop || inRom) {
                 if (inRom) {
                     staticInst = cpu->microcodeRom.fetchMicroop(
                             thisPC.microPC(), curMacroop);
                 } else {
                     staticInst = curMacroop->fetchMicroop(thisPC.microPC());
                 }
                 newMacro |= staticInst->isLastMicroop();
             }

             DynInstPtr instruction =
                 buildInst(tid, staticInst, curMacroop,
                           thisPC, nextPC, true);

             numInst++;

 #if TRACING_ON
             if (DTRACE(O3PipeView)) {
                 instruction->fetchTick = curTick();
             }
 #endif

             nextPC = thisPC;

             // If we're branching after this instruction, quite fetching
             // from the same block then.
             predictedBranch |= thisPC.branching();
             predictedBranch |=
                 lookupAndUpdateNextPC(instruction, nextPC);
             if (predictedBranch) {
                 DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
             }

             newMacro |= thisPC.instAddr() != nextPC.instAddr();

             // Move to the next instruction, unless we have a branch.
             thisPC = nextPC;
             inRom = isRomMicroPC(thisPC.microPC());

             if (newMacro) {
                 fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
                 blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize;
                 pcOffset = 0;
                 curMacroop = NULL;
             }

             if (instruction->isQuiesce()) {
                 DPRINTF(Fetch,
                         "Quiesce instruction encountered, halting fetch!");
                 fetchStatus[tid] = QuiescePending;
                 status_change = true;
                 break;
             }
         } while ((curMacroop || decoder[tid]->instReady()) &&
                  numInst < fetchWidth);
     }

     if (predictedBranch) {
         DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
                 "instruction encountered.\n", tid);
     } else if (numInst >= fetchWidth) {
         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
                 "for this cycle.\n", tid);
     } else if (blkOffset >= cacheBlkSize) {
         DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
                 "block.\n", tid);
     }

     macroop[tid] = curMacroop;
     fetchOffset[tid] = pcOffset;

     if (numInst > 0) {
         wroteToTimeBuffer = true;
     }

     pc[tid] = thisPC;

     // pipeline a fetch if we're crossing a cache boundary and not in
     // a state that would preclude fetching
     fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
     Addr block_PC = icacheBlockAlignPC(fetchAddr);
     issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] &&
         fetchStatus[tid] != IcacheWaitResponse &&
         fetchStatus[tid] != ItlbWait &&
         fetchStatus[tid] != IcacheWaitRetry &&
         fetchStatus[tid] != QuiescePending &&
         !curMacroop;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::recvRetry()
 {
     if (retryPkt != NULL) {
         assert(cacheBlocked);
         assert(retryTid != InvalidThreadID);
         assert(fetchStatus[retryTid] == IcacheWaitRetry);

         if (cpu->getInstPort().sendTimingReq(retryPkt)) {
             fetchStatus[retryTid] = IcacheWaitResponse;
             retryPkt = NULL;
             retryTid = InvalidThreadID;
             cacheBlocked = false;
         }
     } else {
         assert(retryTid == InvalidThreadID);
         // Access has been squashed since it was sent out.  Just clear
         // the cache being blocked.
         cacheBlocked = false;
     }
 }

 ///////////////////////////////////////
 //                                   //
 //  SMT FETCH POLICY MAINTAINED HERE //
 //                                   //
 ///////////////////////////////////////
 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
 {
     if (numThreads > 1) {
         switch (fetch_priority) {

           case SingleThread:
             return 0;

           case RoundRobin:
             return roundRobin();

           case IQ:
             return iqCount();

           case LSQ:
             return lsqCount();

           case Branch:
             return branchCount();

           default:
             return InvalidThreadID;
         }
     } else {
         list<ThreadID>::iterator thread = activeThreads->begin();
         if (thread == activeThreads->end()) {
             return InvalidThreadID;
         }

         ThreadID tid = *thread;

         if (fetchStatus[tid] == Running ||
             fetchStatus[tid] == IcacheAccessComplete ||
             fetchStatus[tid] == Idle) {
             return tid;
         } else {
             return InvalidThreadID;
         }
     }
 }


 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::roundRobin()
 {
     list<ThreadID>::iterator pri_iter = priorityList.begin();
     list<ThreadID>::iterator end      = priorityList.end();

     ThreadID high_pri;

     while (pri_iter != end) {
         high_pri = *pri_iter;

         assert(high_pri <= numThreads);

         if (fetchStatus[high_pri] == Running ||
             fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle) {

             priorityList.erase(pri_iter);
             priorityList.push_back(high_pri);

             return high_pri;
         }

         pri_iter++;
     }

     return InvalidThreadID;
 }

 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::iqCount()
 {
     std::priority_queue<unsigned> PQ;
     std::map<unsigned, ThreadID> threadMap;

     list<ThreadID>::iterator threads = activeThreads->begin();
     list<ThreadID>::iterator end = activeThreads->end();

     while (threads != end) {
         ThreadID tid = *threads++;
         unsigned iqCount = fromIEW->iewInfo[tid].iqCount;

         PQ.push(iqCount);
         threadMap[iqCount] = tid;
     }

     while (!PQ.empty()) {
         ThreadID high_pri = threadMap[PQ.top()];

         if (fetchStatus[high_pri] == Running ||
             fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle)
             return high_pri;
         else
             PQ.pop();

     }

     return InvalidThreadID;
 }

 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::lsqCount()
 {
     std::priority_queue<unsigned> PQ;
     std::map<unsigned, ThreadID> threadMap;

     list<ThreadID>::iterator threads = activeThreads->begin();
     list<ThreadID>::iterator end = activeThreads->end();

     while (threads != end) {
         ThreadID tid = *threads++;
         unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;

         PQ.push(ldstqCount);
         threadMap[ldstqCount] = tid;
     }

     while (!PQ.empty()) {
         ThreadID high_pri = threadMap[PQ.top()];

         if (fetchStatus[high_pri] == Running ||
             fetchStatus[high_pri] == IcacheAccessComplete ||
             fetchStatus[high_pri] == Idle)
             return high_pri;
         else
             PQ.pop();
     }

     return InvalidThreadID;
 }

 template<class Impl>
 ThreadID
 DefaultFetch<Impl>::branchCount()
 {
 #if 0
     list<ThreadID>::iterator thread = activeThreads->begin();
     assert(thread != activeThreads->end());
     ThreadID tid = *thread;
 #endif

     panic("Branch Count Fetch policy unimplemented\n");
     return InvalidThreadID;
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
 {
     if (!issuePipelinedIfetch[tid]) {
         return;
     }

     // The next PC to access.
     TheISA::PCState thisPC = pc[tid];

     if (isRomMicroPC(thisPC.microPC())) {
         return;
     }

     Addr pcOffset = fetchOffset[tid];
     Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

     // Align the fetch PC so its at the start of a cache block.
     Addr block_PC = icacheBlockAlignPC(fetchAddr);

     // Unless buffer already got the block, fetch it from icache.
     if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) {
         DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "
                 "starting at PC %s.\n", tid, thisPC);

         fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
     }
 }

 template<class Impl>
 void
 DefaultFetch<Impl>::profileStall(ThreadID tid) {
     DPRINTF(Fetch,"There are no more threads available to fetch from.\n");

     // @todo Per-thread stats

     if (stalls[tid].drain) {
         ++fetchPendingDrainCycles;
         DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
     } else if (activeThreads->empty()) {
         ++fetchNoActiveThreadStallCycles;
         DPRINTF(Fetch, "Fetch has no active thread!\n");
     } else if (fetchStatus[tid] == Blocked) {
         ++fetchBlockedCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
     } else if (fetchStatus[tid] == Squashing) {
         ++fetchSquashCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitResponse) {
         ++icacheStallCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",
                 tid);
     } else if (fetchStatus[tid] == ItlbWait) {
         ++fetchTlbCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "
                 "finish!\n", tid);
     } else if (fetchStatus[tid] == TrapPending) {
         ++fetchPendingTrapStallCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",
                 tid);
     } else if (fetchStatus[tid] == QuiescePending) {
         ++fetchPendingQuiesceStallCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "
                 "instruction!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitRetry) {
         ++fetchIcacheWaitRetryStallCycles;
         DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",
                 tid);
     } else if (fetchStatus[tid] == NoGoodAddr) {
             DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",
                     tid);
     } else {
         DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",
              tid, fetchStatus[tid]);
     }
 }