| /* |
| * Copyright (c) 2010-2014 ARM Limited |
| * Copyright (c) 2012-2013 AMD |
| * All rights reserved. |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Copyright (c) 2004-2006 The Regents of The University of Michigan |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #ifndef __CPU_O3_FETCH_IMPL_HH__ |
| #define __CPU_O3_FETCH_IMPL_HH__ |
| |
| #include <algorithm> |
| #include <cstring> |
| #include <list> |
| #include <map> |
| #include <queue> |
| |
| #include "arch/generic/tlb.hh" |
| #include "arch/utility.hh" |
| #include "base/random.hh" |
| #include "base/types.hh" |
| #include "config/the_isa.hh" |
| #include "cpu/base.hh" |
| #include "cpu/o3/cpu.hh" |
| #include "cpu/o3/fetch.hh" |
| #include "cpu/exetrace.hh" |
| #include "debug/Activity.hh" |
| #include "debug/Drain.hh" |
| #include "debug/Fetch.hh" |
| #include "debug/O3CPU.hh" |
| #include "debug/O3PipeView.hh" |
| #include "mem/packet.hh" |
| #include "params/DerivO3CPU.hh" |
| #include "sim/byteswap.hh" |
| #include "sim/core.hh" |
| #include "sim/eventq.hh" |
| #include "sim/full_system.hh" |
| #include "sim/system.hh" |
| #include "cpu/o3/isa_specific.hh" |
| |
| template<class Impl> |
| DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, const DerivO3CPUParams ¶ms) |
| : fetchPolicy(params.smtFetchPolicy), |
| cpu(_cpu), |
| branchPred(nullptr), |
| decodeToFetchDelay(params.decodeToFetchDelay), |
| renameToFetchDelay(params.renameToFetchDelay), |
| iewToFetchDelay(params.iewToFetchDelay), |
| commitToFetchDelay(params.commitToFetchDelay), |
| fetchWidth(params.fetchWidth), |
| decodeWidth(params.decodeWidth), |
| retryPkt(NULL), |
| retryTid(InvalidThreadID), |
| cacheBlkSize(cpu->cacheLineSize()), |
| fetchBufferSize(params.fetchBufferSize), |
| fetchBufferMask(fetchBufferSize - 1), |
| fetchQueueSize(params.fetchQueueSize), |
| numThreads(params.numThreads), |
| numFetchingThreads(params.smtNumFetchingThreads), |
| icachePort(this, _cpu), |
| finishTranslationEvent(this), fetchStats(_cpu, this) |
| { |
| if (numThreads > Impl::MaxThreads) |
| fatal("numThreads (%d) is larger than compiled limit (%d),\n" |
| "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", |
| numThreads, static_cast<int>(Impl::MaxThreads)); |
| if (fetchWidth > Impl::MaxWidth) |
| fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" |
| "\tincrease MaxWidth in src/cpu/o3/impl.hh\n", |
| fetchWidth, static_cast<int>(Impl::MaxWidth)); |
| if (fetchBufferSize > cacheBlkSize) |
| fatal("fetch buffer size (%u bytes) is greater than the cache " |
| "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize); |
| if (cacheBlkSize % fetchBufferSize) |
| fatal("cache block (%u bytes) is not a multiple of the " |
| "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize); |
| |
| // Get the size of an instruction. |
| instSize = sizeof(TheISA::MachInst); |
| |
| for (int i = 0; i < Impl::MaxThreads; i++) { |
| fetchStatus[i] = Idle; |
| decoder[i] = nullptr; |
| pc[i] = 0; |
| fetchOffset[i] = 0; |
| macroop[i] = nullptr; |
| delayedCommit[i] = false; |
| memReq[i] = nullptr; |
| stalls[i] = {false, false}; |
| fetchBuffer[i] = NULL; |
| fetchBufferPC[i] = 0; |
| fetchBufferValid[i] = false; |
| lastIcacheStall[i] = 0; |
| issuePipelinedIfetch[i] = false; |
| } |
| |
| branchPred = params.branchPred; |
| |
| for (ThreadID tid = 0; tid < numThreads; tid++) { |
| decoder[tid] = new TheISA::Decoder( |
| dynamic_cast<TheISA::ISA *>(params.isa[tid])); |
| // Create space to buffer the cache line data, |
| // which may not hold the entire cache line. |
| fetchBuffer[tid] = new uint8_t[fetchBufferSize]; |
| } |
| } |
| |
| template <class Impl> |
| std::string |
| DefaultFetch<Impl>::name() const |
| { |
| return cpu->name() + ".fetch"; |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::regProbePoints() |
| { |
| ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch"); |
| ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(), |
| "FetchRequest"); |
| |
| } |
| |
| template <class Impl> |
| DefaultFetch<Impl>:: |
| FetchStatGroup::FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch) |
| : Stats::Group(cpu, "fetch"), |
| ADD_STAT(icacheStallCycles, UNIT_CYCLE, |
| "Number of cycles fetch is stalled on an Icache miss"), |
| ADD_STAT(insts, UNIT_COUNT, "Number of instructions fetch has processed"), |
| ADD_STAT(branches, UNIT_COUNT, |
| "Number of branches that fetch encountered"), |
| ADD_STAT(predictedBranches, UNIT_COUNT, |
| "Number of branches that fetch has predicted taken"), |
| ADD_STAT(cycles, UNIT_CYCLE, |
| "Number of cycles fetch has run and was not squashing or " |
| "blocked"), |
| ADD_STAT(squashCycles, UNIT_CYCLE, |
| "Number of cycles fetch has spent squashing"), |
| ADD_STAT(tlbCycles, UNIT_CYCLE, |
| "Number of cycles fetch has spent waiting for tlb"), |
| ADD_STAT(idleCycles, UNIT_CYCLE, "Number of cycles fetch was idle"), |
| ADD_STAT(blockedCycles, UNIT_CYCLE, |
| "Number of cycles fetch has spent blocked"), |
| ADD_STAT(miscStallCycles, UNIT_CYCLE, |
| "Number of cycles fetch has spent waiting on interrupts, or bad " |
| "addresses, or out of MSHRs"), |
| ADD_STAT(pendingDrainCycles, UNIT_CYCLE, |
| "Number of cycles fetch has spent waiting on pipes to drain"), |
| ADD_STAT(noActiveThreadStallCycles, UNIT_CYCLE, |
| "Number of stall cycles due to no active thread to fetch from"), |
| ADD_STAT(pendingTrapStallCycles, UNIT_CYCLE, |
| "Number of stall cycles due to pending traps"), |
| ADD_STAT(pendingQuiesceStallCycles, UNIT_CYCLE, |
| "Number of stall cycles due to pending quiesce instructions"), |
| ADD_STAT(icacheWaitRetryStallCycles, UNIT_CYCLE, |
| "Number of stall cycles due to full MSHR"), |
| ADD_STAT(cacheLines, UNIT_COUNT, "Number of cache lines fetched"), |
| ADD_STAT(icacheSquashes, UNIT_COUNT, |
| "Number of outstanding Icache misses that were squashed"), |
| ADD_STAT(tlbSquashes, UNIT_COUNT, |
| "Number of outstanding ITLB misses that were squashed"), |
| ADD_STAT(nisnDist, UNIT_COUNT, |
| "Number of instructions fetched each cycle (Total)"), |
| ADD_STAT(idleRate, UNIT_RATIO, "Ratio of cycles fetch was idle", |
| idleCycles / cpu->baseStats.numCycles), |
| ADD_STAT(branchRate, UNIT_RATIO, "Number of branch fetches per cycle", |
| branches / cpu->baseStats.numCycles), |
| ADD_STAT(rate, UNIT_RATE(Stats::Units::Count, Stats::Units::Cycle), |
| "Number of inst fetches per cycle", |
| insts / cpu->baseStats.numCycles) |
| { |
| icacheStallCycles |
| .prereq(icacheStallCycles); |
| insts |
| .prereq(insts); |
| branches |
| .prereq(branches); |
| predictedBranches |
| .prereq(predictedBranches); |
| cycles |
| .prereq(cycles); |
| squashCycles |
| .prereq(squashCycles); |
| tlbCycles |
| .prereq(tlbCycles); |
| idleCycles |
| .prereq(idleCycles); |
| blockedCycles |
| .prereq(blockedCycles); |
| cacheLines |
| .prereq(cacheLines); |
| miscStallCycles |
| .prereq(miscStallCycles); |
| pendingDrainCycles |
| .prereq(pendingDrainCycles); |
| noActiveThreadStallCycles |
| .prereq(noActiveThreadStallCycles); |
| pendingTrapStallCycles |
| .prereq(pendingTrapStallCycles); |
| pendingQuiesceStallCycles |
| .prereq(pendingQuiesceStallCycles); |
| icacheWaitRetryStallCycles |
| .prereq(icacheWaitRetryStallCycles); |
| icacheSquashes |
| .prereq(icacheSquashes); |
| tlbSquashes |
| .prereq(tlbSquashes); |
| nisnDist |
| .init(/* base value */ 0, |
| /* last value */ fetch->fetchWidth, |
| /* bucket size */ 1) |
| .flags(Stats::pdf); |
| idleRate |
| .prereq(idleRate); |
| branchRate |
| .flags(Stats::total); |
| rate |
| .flags(Stats::total); |
| } |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) |
| { |
| timeBuffer = time_buffer; |
| |
| // Create wires to get information from proper places in time buffer. |
| fromDecode = timeBuffer->getWire(-decodeToFetchDelay); |
| fromRename = timeBuffer->getWire(-renameToFetchDelay); |
| fromIEW = timeBuffer->getWire(-iewToFetchDelay); |
| fromCommit = timeBuffer->getWire(-commitToFetchDelay); |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr) |
| { |
| activeThreads = at_ptr; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr) |
| { |
| // Create wire to write information to proper place in fetch time buf. |
| toDecode = ftb_ptr->getWire(0); |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::startupStage() |
| { |
| assert(priorityList.empty()); |
| resetStage(); |
| |
| // Fetch needs to start fetching instructions at the very beginning, |
| // so it must start up in active state. |
| switchToActive(); |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::clearStates(ThreadID tid) |
| { |
| fetchStatus[tid] = Running; |
| pc[tid] = cpu->pcState(tid); |
| fetchOffset[tid] = 0; |
| macroop[tid] = NULL; |
| delayedCommit[tid] = false; |
| memReq[tid] = NULL; |
| stalls[tid].decode = false; |
| stalls[tid].drain = false; |
| fetchBufferPC[tid] = 0; |
| fetchBufferValid[tid] = false; |
| fetchQueue[tid].clear(); |
| |
| // TODO not sure what to do with priorityList for now |
| // priorityList.push_back(tid); |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::resetStage() |
| { |
| numInst = 0; |
| interruptPending = false; |
| cacheBlocked = false; |
| |
| priorityList.clear(); |
| |
| // Setup PC and nextPC with initial state. |
| for (ThreadID tid = 0; tid < numThreads; ++tid) { |
| fetchStatus[tid] = Running; |
| pc[tid] = cpu->pcState(tid); |
| fetchOffset[tid] = 0; |
| macroop[tid] = NULL; |
| |
| delayedCommit[tid] = false; |
| memReq[tid] = NULL; |
| |
| stalls[tid].decode = false; |
| stalls[tid].drain = false; |
| |
| fetchBufferPC[tid] = 0; |
| fetchBufferValid[tid] = false; |
| |
| fetchQueue[tid].clear(); |
| |
| priorityList.push_back(tid); |
| } |
| |
| wroteToTimeBuffer = false; |
| _status = Inactive; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt) |
| { |
| ThreadID tid = cpu->contextToThread(pkt->req->contextId()); |
| |
| DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid); |
| assert(!cpu->switchedOut()); |
| |
| // Only change the status if it's still waiting on the icache access |
| // to return. |
| if (fetchStatus[tid] != IcacheWaitResponse || |
| pkt->req != memReq[tid]) { |
| ++fetchStats.icacheSquashes; |
| delete pkt; |
| return; |
| } |
| |
| memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize); |
| fetchBufferValid[tid] = true; |
| |
| // Wake up the CPU (if it went to sleep and was waiting on |
| // this completion event). |
| cpu->wakeCPU(); |
| |
| DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n", |
| tid); |
| |
| switchToActive(); |
| |
| // Only switch to IcacheAccessComplete if we're not stalled as well. |
| if (checkStall(tid)) { |
| fetchStatus[tid] = Blocked; |
| } else { |
| fetchStatus[tid] = IcacheAccessComplete; |
| } |
| |
| pkt->req->setAccessLatency(); |
| cpu->ppInstAccessComplete->notify(pkt); |
| // Reset the mem req to NULL. |
| delete pkt; |
| memReq[tid] = NULL; |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::drainResume() |
| { |
| for (ThreadID i = 0; i < numThreads; ++i) { |
| stalls[i].decode = false; |
| stalls[i].drain = false; |
| } |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::drainSanityCheck() const |
| { |
| assert(isDrained()); |
| assert(retryPkt == NULL); |
| assert(retryTid == InvalidThreadID); |
| assert(!cacheBlocked); |
| assert(!interruptPending); |
| |
| for (ThreadID i = 0; i < numThreads; ++i) { |
| assert(!memReq[i]); |
| assert(fetchStatus[i] == Idle || stalls[i].drain); |
| } |
| |
| branchPred->drainSanityCheck(); |
| } |
| |
| template <class Impl> |
| bool |
| DefaultFetch<Impl>::isDrained() const |
| { |
| /* Make sure that threads are either idle of that the commit stage |
| * has signaled that draining has completed by setting the drain |
| * stall flag. This effectively forces the pipeline to be disabled |
| * until the whole system is drained (simulation may continue to |
| * drain other components). |
| */ |
| for (ThreadID i = 0; i < numThreads; ++i) { |
| // Verify fetch queues are drained |
| if (!fetchQueue[i].empty()) |
| return false; |
| |
| // Return false if not idle or drain stalled |
| if (fetchStatus[i] != Idle) { |
| if (fetchStatus[i] == Blocked && stalls[i].drain) |
| continue; |
| else |
| return false; |
| } |
| } |
| |
| /* The pipeline might start up again in the middle of the drain |
| * cycle if the finish translation event is scheduled, so make |
| * sure that's not the case. |
| */ |
| return !finishTranslationEvent.scheduled(); |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::takeOverFrom() |
| { |
| assert(cpu->getInstPort().isConnected()); |
| resetStage(); |
| |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::drainStall(ThreadID tid) |
| { |
| assert(cpu->isDraining()); |
| assert(!stalls[tid].drain); |
| DPRINTF(Drain, "%i: Thread drained.\n", tid); |
| stalls[tid].drain = true; |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::wakeFromQuiesce() |
| { |
| DPRINTF(Fetch, "Waking up from quiesce\n"); |
| // Hopefully this is safe |
| // @todo: Allow other threads to wake from quiesce. |
| fetchStatus[0] = Running; |
| } |
| |
| template <class Impl> |
| inline void |
| DefaultFetch<Impl>::switchToActive() |
| { |
| if (_status == Inactive) { |
| DPRINTF(Activity, "Activating stage.\n"); |
| |
| cpu->activateStage(O3CPU::FetchIdx); |
| |
| _status = Active; |
| } |
| } |
| |
| template <class Impl> |
| inline void |
| DefaultFetch<Impl>::switchToInactive() |
| { |
| if (_status == Active) { |
| DPRINTF(Activity, "Deactivating stage.\n"); |
| |
| cpu->deactivateStage(O3CPU::FetchIdx); |
| |
| _status = Inactive; |
| } |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::deactivateThread(ThreadID tid) |
| { |
| // Update priority list |
| auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid); |
| if (thread_it != priorityList.end()) { |
| priorityList.erase(thread_it); |
| } |
| } |
| |
| template <class Impl> |
| bool |
| DefaultFetch<Impl>::lookupAndUpdateNextPC( |
| const DynInstPtr &inst, TheISA::PCState &nextPC) |
| { |
| // Do branch prediction check here. |
| // A bit of a misnomer...next_PC is actually the current PC until |
| // this function updates it. |
| bool predict_taken; |
| |
| if (!inst->isControl()) { |
| TheISA::advancePC(nextPC, inst->staticInst); |
| inst->setPredTarg(nextPC); |
| inst->setPredTaken(false); |
| return false; |
| } |
| |
| ThreadID tid = inst->threadNumber; |
| predict_taken = branchPred->predict(inst->staticInst, inst->seqNum, |
| nextPC, tid); |
| |
| if (predict_taken) { |
| DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " |
| "predicted to be taken to %s\n", |
| tid, inst->seqNum, inst->pcState().instAddr(), nextPC); |
| } else { |
| DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " |
| "predicted to be not taken\n", |
| tid, inst->seqNum, inst->pcState().instAddr()); |
| } |
| |
| DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " |
| "predicted to go to %s\n", |
| tid, inst->seqNum, inst->pcState().instAddr(), nextPC); |
| inst->setPredTarg(nextPC); |
| inst->setPredTaken(predict_taken); |
| |
| ++fetchStats.branches; |
| |
| if (predict_taken) { |
| ++fetchStats.predictedBranches; |
| } |
| |
| return predict_taken; |
| } |
| |
| template <class Impl> |
| bool |
| DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) |
| { |
| Fault fault = NoFault; |
| |
| assert(!cpu->switchedOut()); |
| |
| // @todo: not sure if these should block translation. |
| //AlphaDep |
| if (cacheBlocked) { |
| DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", |
| tid); |
| return false; |
| } else if (checkInterrupt(pc) && !delayedCommit[tid]) { |
| // Hold off fetch from getting new instructions when: |
| // Cache is blocked, or |
| // while an interrupt is pending and we're not in PAL mode, or |
| // fetch is switched out. |
| DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", |
| tid); |
| return false; |
| } |
| |
| // Align the fetch address to the start of a fetch buffer segment. |
| Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr); |
| |
| DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", |
| tid, fetchBufferBlockPC, vaddr); |
| |
| // Setup the memReq to do a read of the first instruction's address. |
| // Set the appropriate read size and flags as well. |
| // Build request here. |
| RequestPtr mem_req = std::make_shared<Request>( |
| fetchBufferBlockPC, fetchBufferSize, |
| Request::INST_FETCH, cpu->instRequestorId(), pc, |
| cpu->thread[tid]->contextId()); |
| |
| mem_req->taskId(cpu->taskId()); |
| |
| memReq[tid] = mem_req; |
| |
| // Initiate translation of the icache block |
| fetchStatus[tid] = ItlbWait; |
| FetchTranslation *trans = new FetchTranslation(this); |
| cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(), |
| trans, BaseTLB::Execute); |
| return true; |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::finishTranslation(const Fault &fault, |
| const RequestPtr &mem_req) |
| { |
| ThreadID tid = cpu->contextToThread(mem_req->contextId()); |
| Addr fetchBufferBlockPC = mem_req->getVaddr(); |
| |
| assert(!cpu->switchedOut()); |
| |
| // Wake up CPU if it was idle |
| cpu->wakeCPU(); |
| |
| if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] || |
| mem_req->getVaddr() != memReq[tid]->getVaddr()) { |
| DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", |
| tid); |
| ++fetchStats.tlbSquashes; |
| return; |
| } |
| |
| |
| // If translation was successful, attempt to read the icache block. |
| if (fault == NoFault) { |
| // Check that we're not going off into random memory |
| // If we have, just wait around for commit to squash something and put |
| // us on the right track |
| if (!cpu->system->isMemAddr(mem_req->getPaddr())) { |
| warn("Address %#x is outside of physical memory, stopping fetch\n", |
| mem_req->getPaddr()); |
| fetchStatus[tid] = NoGoodAddr; |
| memReq[tid] = NULL; |
| return; |
| } |
| |
| // Build packet here. |
| PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); |
| data_pkt->dataDynamic(new uint8_t[fetchBufferSize]); |
| |
| fetchBufferPC[tid] = fetchBufferBlockPC; |
| fetchBufferValid[tid] = false; |
| DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); |
| |
| fetchStats.cacheLines++; |
| |
| // Access the cache. |
| if (!icachePort.sendTimingReq(data_pkt)) { |
| assert(retryPkt == NULL); |
| assert(retryTid == InvalidThreadID); |
| DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); |
| |
| fetchStatus[tid] = IcacheWaitRetry; |
| retryPkt = data_pkt; |
| retryTid = tid; |
| cacheBlocked = true; |
| } else { |
| DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid); |
| DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache " |
| "response.\n", tid); |
| lastIcacheStall[tid] = curTick(); |
| fetchStatus[tid] = IcacheWaitResponse; |
| // Notify Fetch Request probe when a packet containing a fetch |
| // request is successfully sent |
| ppFetchRequestSent->notify(mem_req); |
| } |
| } else { |
| // Don't send an instruction to decode if we can't handle it. |
| if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) { |
| assert(!finishTranslationEvent.scheduled()); |
| finishTranslationEvent.setFault(fault); |
| finishTranslationEvent.setReq(mem_req); |
| cpu->schedule(finishTranslationEvent, |
| cpu->clockEdge(Cycles(1))); |
| return; |
| } |
| DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", |
| tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); |
| // Translation faulted, icache request won't be sent. |
| memReq[tid] = NULL; |
| |
| // Send the fault to commit. This thread will not do anything |
| // until commit handles the fault. The only other way it can |
| // wake up is if a squash comes along and changes the PC. |
| TheISA::PCState fetchPC = pc[tid]; |
| |
| DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid); |
| // We will use a nop in ordier to carry the fault. |
| DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr, |
| NULL, fetchPC, fetchPC, false); |
| instruction->setNotAnInst(); |
| |
| instruction->setPredTarg(fetchPC); |
| instruction->fault = fault; |
| wroteToTimeBuffer = true; |
| |
| DPRINTF(Activity, "Activity this cycle.\n"); |
| cpu->activityThisCycle(); |
| |
| fetchStatus[tid] = TrapPending; |
| |
| DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid); |
| DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n", |
| tid, fault->name(), pc[tid]); |
| } |
| _status = updateFetchStatus(); |
| } |
| |
| template <class Impl> |
| inline void |
| DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC, |
| const DynInstPtr squashInst, ThreadID tid) |
| { |
| DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n", |
| tid, newPC); |
| |
| pc[tid] = newPC; |
| fetchOffset[tid] = 0; |
| if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) |
| macroop[tid] = squashInst->macroop; |
| else |
| macroop[tid] = NULL; |
| decoder[tid]->reset(); |
| |
| // Clear the icache miss if it's outstanding. |
| if (fetchStatus[tid] == IcacheWaitResponse) { |
| DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n", |
| tid); |
| memReq[tid] = NULL; |
| } else if (fetchStatus[tid] == ItlbWait) { |
| DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n", |
| tid); |
| memReq[tid] = NULL; |
| } |
| |
| // Get rid of the retrying packet if it was from this thread. |
| if (retryTid == tid) { |
| assert(cacheBlocked); |
| if (retryPkt) { |
| delete retryPkt; |
| } |
| retryPkt = NULL; |
| retryTid = InvalidThreadID; |
| } |
| |
| fetchStatus[tid] = Squashing; |
| |
| // Empty fetch queue |
| fetchQueue[tid].clear(); |
| |
| // microops are being squashed, it is not known wheather the |
| // youngest non-squashed microop was marked delayed commit |
| // or not. Setting the flag to true ensures that the |
| // interrupts are not handled when they cannot be, though |
| // some opportunities to handle interrupts may be missed. |
| delayedCommit[tid] = true; |
| |
| ++fetchStats.squashCycles; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC, |
| const DynInstPtr squashInst, |
| const InstSeqNum seq_num, ThreadID tid) |
| { |
| DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid); |
| |
| doSquash(newPC, squashInst, tid); |
| |
| // Tell the CPU to remove any instructions that are in flight between |
| // fetch and decode. |
| cpu->removeInstsUntil(seq_num, tid); |
| } |
| |
| template<class Impl> |
| bool |
| DefaultFetch<Impl>::checkStall(ThreadID tid) const |
| { |
| bool ret_val = false; |
| |
| if (stalls[tid].drain) { |
| assert(cpu->isDraining()); |
| DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid); |
| ret_val = true; |
| } |
| |
| return ret_val; |
| } |
| |
| template<class Impl> |
| typename DefaultFetch<Impl>::FetchStatus |
| DefaultFetch<Impl>::updateFetchStatus() |
| { |
| //Check Running |
| std::list<ThreadID>::iterator threads = activeThreads->begin(); |
| std::list<ThreadID>::iterator end = activeThreads->end(); |
| |
| while (threads != end) { |
| ThreadID tid = *threads++; |
| |
| if (fetchStatus[tid] == Running || |
| fetchStatus[tid] == Squashing || |
| fetchStatus[tid] == IcacheAccessComplete) { |
| |
| if (_status == Inactive) { |
| DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid); |
| |
| if (fetchStatus[tid] == IcacheAccessComplete) { |
| DPRINTF(Activity, "[tid:%i] Activating fetch due to cache" |
| "completion\n",tid); |
| } |
| |
| cpu->activateStage(O3CPU::FetchIdx); |
| } |
| |
| return Active; |
| } |
| } |
| |
| // Stage is switching from active to inactive, notify CPU of it. |
| if (_status == Active) { |
| DPRINTF(Activity, "Deactivating stage.\n"); |
| |
| cpu->deactivateStage(O3CPU::FetchIdx); |
| } |
| |
| return Inactive; |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::squash(const TheISA::PCState &newPC, |
| const InstSeqNum seq_num, DynInstPtr squashInst, |
| ThreadID tid) |
| { |
| DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid); |
| |
| doSquash(newPC, squashInst, tid); |
| |
| // Tell the CPU to remove any instructions that are not in the ROB. |
| cpu->removeInstsNotInROB(tid); |
| } |
| |
| template <class Impl> |
| void |
| DefaultFetch<Impl>::tick() |
| { |
| std::list<ThreadID>::iterator threads = activeThreads->begin(); |
| std::list<ThreadID>::iterator end = activeThreads->end(); |
| bool status_change = false; |
| |
| wroteToTimeBuffer = false; |
| |
| for (ThreadID i = 0; i < numThreads; ++i) { |
| issuePipelinedIfetch[i] = false; |
| } |
| |
| while (threads != end) { |
| ThreadID tid = *threads++; |
| |
| // Check the signals for each thread to determine the proper status |
| // for each thread. |
| bool updated_status = checkSignalsAndUpdate(tid); |
| status_change = status_change || updated_status; |
| } |
| |
| DPRINTF(Fetch, "Running stage.\n"); |
| |
| if (FullSystem) { |
| if (fromCommit->commitInfo[0].interruptPending) { |
| interruptPending = true; |
| } |
| |
| if (fromCommit->commitInfo[0].clearInterrupt) { |
| interruptPending = false; |
| } |
| } |
| |
| for (threadFetched = 0; threadFetched < numFetchingThreads; |
| threadFetched++) { |
| // Fetch each of the actively fetching threads. |
| fetch(status_change); |
| } |
| |
| // Record number of instructions fetched this cycle for distribution. |
| fetchStats.nisnDist.sample(numInst); |
| |
| if (status_change) { |
| // Change the fetch stage status if there was a status change. |
| _status = updateFetchStatus(); |
| } |
| |
| // Issue the next I-cache request if possible. |
| for (ThreadID i = 0; i < numThreads; ++i) { |
| if (issuePipelinedIfetch[i]) { |
| pipelineIcacheAccesses(i); |
| } |
| } |
| |
| // Send instructions enqueued into the fetch queue to decode. |
| // Limit rate by fetchWidth. Stall if decode is stalled. |
| unsigned insts_to_decode = 0; |
| unsigned available_insts = 0; |
| |
| for (auto tid : *activeThreads) { |
| if (!stalls[tid].decode) { |
| available_insts += fetchQueue[tid].size(); |
| } |
| } |
| |
| // Pick a random thread to start trying to grab instructions from |
| auto tid_itr = activeThreads->begin(); |
| std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1)); |
| |
| while (available_insts != 0 && insts_to_decode < decodeWidth) { |
| ThreadID tid = *tid_itr; |
| if (!stalls[tid].decode && !fetchQueue[tid].empty()) { |
| const auto& inst = fetchQueue[tid].front(); |
| toDecode->insts[toDecode->size++] = inst; |
| DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode " |
| "from fetch queue. Fetch queue size: %i.\n", |
| tid, inst->seqNum, fetchQueue[tid].size()); |
| |
| wroteToTimeBuffer = true; |
| fetchQueue[tid].pop_front(); |
| insts_to_decode++; |
| available_insts--; |
| } |
| |
| tid_itr++; |
| // Wrap around if at end of active threads list |
| if (tid_itr == activeThreads->end()) |
| tid_itr = activeThreads->begin(); |
| } |
| |
| // If there was activity this cycle, inform the CPU of it. |
| if (wroteToTimeBuffer) { |
| DPRINTF(Activity, "Activity this cycle.\n"); |
| cpu->activityThisCycle(); |
| } |
| |
| // Reset the number of the instruction we've fetched. |
| numInst = 0; |
| } |
| |
| template <class Impl> |
| bool |
| DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid) |
| { |
| // Update the per thread stall statuses. |
| if (fromDecode->decodeBlock[tid]) { |
| stalls[tid].decode = true; |
| } |
| |
| if (fromDecode->decodeUnblock[tid]) { |
| assert(stalls[tid].decode); |
| assert(!fromDecode->decodeBlock[tid]); |
| stalls[tid].decode = false; |
| } |
| |
| // Check squash signals from commit. |
| if (fromCommit->commitInfo[tid].squash) { |
| |
| DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash " |
| "from commit.\n",tid); |
| // In any case, squash. |
| squash(fromCommit->commitInfo[tid].pc, |
| fromCommit->commitInfo[tid].doneSeqNum, |
| fromCommit->commitInfo[tid].squashInst, tid); |
| |
| // If it was a branch mispredict on a control instruction, update the |
| // branch predictor with that instruction, otherwise just kill the |
| // invalid state we generated in after sequence number |
| if (fromCommit->commitInfo[tid].mispredictInst && |
| fromCommit->commitInfo[tid].mispredictInst->isControl()) { |
| branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, |
| fromCommit->commitInfo[tid].pc, |
| fromCommit->commitInfo[tid].branchTaken, |
| tid); |
| } else { |
| branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, |
| tid); |
| } |
| |
| return true; |
| } else if (fromCommit->commitInfo[tid].doneSeqNum) { |
| // Update the branch predictor if it wasn't a squashed instruction |
| // that was broadcasted. |
| branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid); |
| } |
| |
| // Check squash signals from decode. |
| if (fromDecode->decodeInfo[tid].squash) { |
| DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash " |
| "from decode.\n",tid); |
| |
| // Update the branch predictor. |
| if (fromDecode->decodeInfo[tid].branchMispredict) { |
| branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, |
| fromDecode->decodeInfo[tid].nextPC, |
| fromDecode->decodeInfo[tid].branchTaken, |
| tid); |
| } else { |
| branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, |
| tid); |
| } |
| |
| if (fetchStatus[tid] != Squashing) { |
| |
| DPRINTF(Fetch, "Squashing from decode with PC = %s\n", |
| fromDecode->decodeInfo[tid].nextPC); |
| // Squash unless we're already squashing |
| squashFromDecode(fromDecode->decodeInfo[tid].nextPC, |
| fromDecode->decodeInfo[tid].squashInst, |
| fromDecode->decodeInfo[tid].doneSeqNum, |
| tid); |
| |
| return true; |
| } |
| } |
| |
| if (checkStall(tid) && |
| fetchStatus[tid] != IcacheWaitResponse && |
| fetchStatus[tid] != IcacheWaitRetry && |
| fetchStatus[tid] != ItlbWait && |
| fetchStatus[tid] != QuiescePending) { |
| DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid); |
| |
| fetchStatus[tid] = Blocked; |
| |
| return true; |
| } |
| |
| if (fetchStatus[tid] == Blocked || |
| fetchStatus[tid] == Squashing) { |
| // Switch status to running if fetch isn't being told to block or |
| // squash this cycle. |
| DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n", |
| tid); |
| |
| fetchStatus[tid] = Running; |
| |
| return true; |
| } |
| |
| // If we've reached this point, we have not gotten any signals that |
| // cause fetch to change its status. Fetch remains the same as before. |
| return false; |
| } |
| |
| template<class Impl> |
| typename Impl::DynInstPtr |
| DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst, |
| StaticInstPtr curMacroop, TheISA::PCState thisPC, |
| TheISA::PCState nextPC, bool trace) |
| { |
| // Get a sequence number. |
| InstSeqNum seq = cpu->getAndIncrementInstSeq(); |
| |
| // Create a new DynInst from the instruction fetched. |
| DynInstPtr instruction = |
| new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); |
| instruction->setTid(tid); |
| |
| instruction->setThreadState(cpu->thread[tid]); |
| |
| DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created " |
| "[sn:%lli].\n", tid, thisPC.instAddr(), |
| thisPC.microPC(), seq); |
| |
| DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid, |
| instruction->staticInst-> |
| disassemble(thisPC.instAddr())); |
| |
| #if TRACING_ON |
| if (trace) { |
| instruction->traceData = |
| cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), |
| instruction->staticInst, thisPC, curMacroop); |
| } |
| #else |
| instruction->traceData = NULL; |
| #endif |
| |
| // Add instruction to the CPU's list of instructions. |
| instruction->setInstListIt(cpu->addInst(instruction)); |
| |
| // Write the instruction to the first slot in the queue |
| // that heads to decode. |
| assert(numInst < fetchWidth); |
| fetchQueue[tid].push_back(instruction); |
| assert(fetchQueue[tid].size() <= fetchQueueSize); |
| DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n", |
| tid, fetchQueue[tid].size(), fetchQueueSize); |
| //toDecode->insts[toDecode->size++] = instruction; |
| |
| // Keep track of if we can take an interrupt at this boundary |
| delayedCommit[tid] = instruction->isDelayedCommit(); |
| |
| return instruction; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::fetch(bool &status_change) |
| { |
| ////////////////////////////////////////// |
| // Start actual fetch |
| ////////////////////////////////////////// |
| ThreadID tid = getFetchingThread(); |
| |
| assert(!cpu->switchedOut()); |
| |
| if (tid == InvalidThreadID) { |
| // Breaks looping condition in tick() |
| threadFetched = numFetchingThreads; |
| |
| if (numThreads == 1) { // @todo Per-thread stats |
| profileStall(0); |
| } |
| |
| return; |
| } |
| |
| DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); |
| |
| // The current PC. |
| TheISA::PCState thisPC = pc[tid]; |
| |
| Addr pcOffset = fetchOffset[tid]; |
| Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; |
| |
| bool inRom = isRomMicroPC(thisPC.microPC()); |
| |
| // If returning from the delay of a cache miss, then update the status |
| // to running, otherwise do the cache access. Possibly move this up |
| // to tick() function. |
| if (fetchStatus[tid] == IcacheAccessComplete) { |
| DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid); |
| |
| fetchStatus[tid] = Running; |
| status_change = true; |
| } else if (fetchStatus[tid] == Running) { |
| // Align the fetch PC so its at the start of a fetch buffer segment. |
| Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); |
| |
| // If buffer is no longer valid or fetchAddr has moved to point |
| // to the next cache block, AND we have no remaining ucode |
| // from a macro-op, then start fetch from icache. |
| if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid]) |
| && !inRom && !macroop[tid]) { |
| DPRINTF(Fetch, "[tid:%i] Attempting to translate and read " |
| "instruction, starting at PC %s.\n", tid, thisPC); |
| |
| fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); |
| |
| if (fetchStatus[tid] == IcacheWaitResponse) |
| ++fetchStats.icacheStallCycles; |
| else if (fetchStatus[tid] == ItlbWait) |
| ++fetchStats.tlbCycles; |
| else |
| ++fetchStats.miscStallCycles; |
| return; |
| } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) { |
| // Stall CPU if an interrupt is posted and we're not issuing |
| // an delayed commit micro-op currently (delayed commit instructions |
| // are not interruptable by interrupts, only faults) |
| ++fetchStats.miscStallCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid); |
| return; |
| } |
| } else { |
| if (fetchStatus[tid] == Idle) { |
| ++fetchStats.idleCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid); |
| } |
| |
| // Status is Idle, so fetch should do nothing. |
| return; |
| } |
| |
| ++fetchStats.cycles; |
| |
| TheISA::PCState nextPC = thisPC; |
| |
| StaticInstPtr staticInst = NULL; |
| StaticInstPtr curMacroop = macroop[tid]; |
| |
| // If the read of the first instruction was successful, then grab the |
| // instructions from the rest of the cache line and put them into the |
| // queue heading to decode. |
| |
| DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to " |
| "decode.\n", tid); |
| |
| // Need to keep track of whether or not a predicted branch |
| // ended this fetch block. |
| bool predictedBranch = false; |
| |
| // Need to halt fetch if quiesce instruction detected |
| bool quiesce = false; |
| |
| TheISA::MachInst *cacheInsts = |
| reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]); |
| |
| const unsigned numInsts = fetchBufferSize / instSize; |
| unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; |
| |
| // Loop through instruction memory from the cache. |
| // Keep issuing while fetchWidth is available and branch is not |
| // predicted taken |
| while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize |
| && !predictedBranch && !quiesce) { |
| // We need to process more memory if we aren't going to get a |
| // StaticInst from the rom, the current macroop, or what's already |
| // in the decoder. |
| bool needMem = !inRom && !curMacroop && |
| !decoder[tid]->instReady(); |
| fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; |
| Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); |
| |
| if (needMem) { |
| // If buffer is no longer valid or fetchAddr has moved to point |
| // to the next cache block then start fetch from icache. |
| if (!fetchBufferValid[tid] || |
| fetchBufferBlockPC != fetchBufferPC[tid]) |
| break; |
| |
| if (blkOffset >= numInsts) { |
| // We need to process more memory, but we've run out of the |
| // current block. |
| break; |
| } |
| |
| decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]); |
| |
| if (decoder[tid]->needMoreBytes()) { |
| blkOffset++; |
| fetchAddr += instSize; |
| pcOffset += instSize; |
| } |
| } |
| |
| // Extract as many instructions and/or microops as we can from |
| // the memory we've processed so far. |
| do { |
| if (!(curMacroop || inRom)) { |
| if (decoder[tid]->instReady()) { |
| staticInst = decoder[tid]->decode(thisPC); |
| |
| // Increment stat of fetched instructions. |
| ++fetchStats.insts; |
| |
| if (staticInst->isMacroop()) { |
| curMacroop = staticInst; |
| } else { |
| pcOffset = 0; |
| } |
| } else { |
| // We need more bytes for this instruction so blkOffset and |
| // pcOffset will be updated |
| break; |
| } |
| } |
| // Whether we're moving to a new macroop because we're at the |
| // end of the current one, or the branch predictor incorrectly |
| // thinks we are... |
| bool newMacro = false; |
| if (curMacroop || inRom) { |
| if (inRom) { |
| staticInst = decoder[tid]->fetchRomMicroop( |
| thisPC.microPC(), curMacroop); |
| } else { |
| staticInst = curMacroop->fetchMicroop(thisPC.microPC()); |
| } |
| newMacro |= staticInst->isLastMicroop(); |
| } |
| |
| DynInstPtr instruction = |
| buildInst(tid, staticInst, curMacroop, |
| thisPC, nextPC, true); |
| |
| ppFetch->notify(instruction); |
| numInst++; |
| |
| #if TRACING_ON |
| if (DTRACE(O3PipeView)) { |
| instruction->fetchTick = curTick(); |
| } |
| #endif |
| |
| nextPC = thisPC; |
| |
| // If we're branching after this instruction, quit fetching |
| // from the same block. |
| predictedBranch |= thisPC.branching(); |
| predictedBranch |= |
| lookupAndUpdateNextPC(instruction, nextPC); |
| if (predictedBranch) { |
| DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); |
| } |
| |
| newMacro |= thisPC.instAddr() != nextPC.instAddr(); |
| |
| // Move to the next instruction, unless we have a branch. |
| thisPC = nextPC; |
| inRom = isRomMicroPC(thisPC.microPC()); |
| |
| if (newMacro) { |
| fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; |
| blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; |
| pcOffset = 0; |
| curMacroop = NULL; |
| } |
| |
| if (instruction->isQuiesce()) { |
| DPRINTF(Fetch, |
| "Quiesce instruction encountered, halting fetch!\n"); |
| fetchStatus[tid] = QuiescePending; |
| status_change = true; |
| quiesce = true; |
| break; |
| } |
| } while ((curMacroop || decoder[tid]->instReady()) && |
| numInst < fetchWidth && |
| fetchQueue[tid].size() < fetchQueueSize); |
| |
| // Re-evaluate whether the next instruction to fetch is in micro-op ROM |
| // or not. |
| inRom = isRomMicroPC(thisPC.microPC()); |
| } |
| |
| if (predictedBranch) { |
| DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch " |
| "instruction encountered.\n", tid); |
| } else if (numInst >= fetchWidth) { |
| DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth " |
| "for this cycle.\n", tid); |
| } else if (blkOffset >= fetchBufferSize) { |
| DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the" |
| "fetch buffer.\n", tid); |
| } |
| |
| macroop[tid] = curMacroop; |
| fetchOffset[tid] = pcOffset; |
| |
| if (numInst > 0) { |
| wroteToTimeBuffer = true; |
| } |
| |
| pc[tid] = thisPC; |
| |
| // pipeline a fetch if we're crossing a fetch buffer boundary and not in |
| // a state that would preclude fetching |
| fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; |
| Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); |
| issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] && |
| fetchStatus[tid] != IcacheWaitResponse && |
| fetchStatus[tid] != ItlbWait && |
| fetchStatus[tid] != IcacheWaitRetry && |
| fetchStatus[tid] != QuiescePending && |
| !curMacroop; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::recvReqRetry() |
| { |
| if (retryPkt != NULL) { |
| assert(cacheBlocked); |
| assert(retryTid != InvalidThreadID); |
| assert(fetchStatus[retryTid] == IcacheWaitRetry); |
| |
| if (icachePort.sendTimingReq(retryPkt)) { |
| fetchStatus[retryTid] = IcacheWaitResponse; |
| // Notify Fetch Request probe when a retryPkt is successfully sent. |
| // Note that notify must be called before retryPkt is set to NULL. |
| ppFetchRequestSent->notify(retryPkt->req); |
| retryPkt = NULL; |
| retryTid = InvalidThreadID; |
| cacheBlocked = false; |
| } |
| } else { |
| assert(retryTid == InvalidThreadID); |
| // Access has been squashed since it was sent out. Just clear |
| // the cache being blocked. |
| cacheBlocked = false; |
| } |
| } |
| |
| /////////////////////////////////////// |
| // // |
| // SMT FETCH POLICY MAINTAINED HERE // |
| // // |
| /////////////////////////////////////// |
| template<class Impl> |
| ThreadID |
| DefaultFetch<Impl>::getFetchingThread() |
| { |
| if (numThreads > 1) { |
| switch (fetchPolicy) { |
| case SMTFetchPolicy::RoundRobin: |
| return roundRobin(); |
| case SMTFetchPolicy::IQCount: |
| return iqCount(); |
| case SMTFetchPolicy::LSQCount: |
| return lsqCount(); |
| case SMTFetchPolicy::Branch: |
| return branchCount(); |
| default: |
| return InvalidThreadID; |
| } |
| } else { |
| std::list<ThreadID>::iterator thread = activeThreads->begin(); |
| if (thread == activeThreads->end()) { |
| return InvalidThreadID; |
| } |
| |
| ThreadID tid = *thread; |
| |
| if (fetchStatus[tid] == Running || |
| fetchStatus[tid] == IcacheAccessComplete || |
| fetchStatus[tid] == Idle) { |
| return tid; |
| } else { |
| return InvalidThreadID; |
| } |
| } |
| } |
| |
| |
| template<class Impl> |
| ThreadID |
| DefaultFetch<Impl>::roundRobin() |
| { |
| std::list<ThreadID>::iterator pri_iter = priorityList.begin(); |
| std::list<ThreadID>::iterator end = priorityList.end(); |
| |
| ThreadID high_pri; |
| |
| while (pri_iter != end) { |
| high_pri = *pri_iter; |
| |
| assert(high_pri <= numThreads); |
| |
| if (fetchStatus[high_pri] == Running || |
| fetchStatus[high_pri] == IcacheAccessComplete || |
| fetchStatus[high_pri] == Idle) { |
| |
| priorityList.erase(pri_iter); |
| priorityList.push_back(high_pri); |
| |
| return high_pri; |
| } |
| |
| pri_iter++; |
| } |
| |
| return InvalidThreadID; |
| } |
| |
| template<class Impl> |
| ThreadID |
| DefaultFetch<Impl>::iqCount() |
| { |
| //sorted from lowest->highest |
| std::priority_queue<unsigned, std::vector<unsigned>, |
| std::greater<unsigned> > PQ; |
| std::map<unsigned, ThreadID> threadMap; |
| |
| std::list<ThreadID>::iterator threads = activeThreads->begin(); |
| std::list<ThreadID>::iterator end = activeThreads->end(); |
| |
| while (threads != end) { |
| ThreadID tid = *threads++; |
| unsigned iqCount = fromIEW->iewInfo[tid].iqCount; |
| |
| //we can potentially get tid collisions if two threads |
| //have the same iqCount, but this should be rare. |
| PQ.push(iqCount); |
| threadMap[iqCount] = tid; |
| } |
| |
| while (!PQ.empty()) { |
| ThreadID high_pri = threadMap[PQ.top()]; |
| |
| if (fetchStatus[high_pri] == Running || |
| fetchStatus[high_pri] == IcacheAccessComplete || |
| fetchStatus[high_pri] == Idle) |
| return high_pri; |
| else |
| PQ.pop(); |
| |
| } |
| |
| return InvalidThreadID; |
| } |
| |
| template<class Impl> |
| ThreadID |
| DefaultFetch<Impl>::lsqCount() |
| { |
| //sorted from lowest->highest |
| std::priority_queue<unsigned, std::vector<unsigned>, |
| std::greater<unsigned> > PQ; |
| std::map<unsigned, ThreadID> threadMap; |
| |
| std::list<ThreadID>::iterator threads = activeThreads->begin(); |
| std::list<ThreadID>::iterator end = activeThreads->end(); |
| |
| while (threads != end) { |
| ThreadID tid = *threads++; |
| unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; |
| |
| //we can potentially get tid collisions if two threads |
| //have the same iqCount, but this should be rare. |
| PQ.push(ldstqCount); |
| threadMap[ldstqCount] = tid; |
| } |
| |
| while (!PQ.empty()) { |
| ThreadID high_pri = threadMap[PQ.top()]; |
| |
| if (fetchStatus[high_pri] == Running || |
| fetchStatus[high_pri] == IcacheAccessComplete || |
| fetchStatus[high_pri] == Idle) |
| return high_pri; |
| else |
| PQ.pop(); |
| } |
| |
| return InvalidThreadID; |
| } |
| |
| template<class Impl> |
| ThreadID |
| DefaultFetch<Impl>::branchCount() |
| { |
| panic("Branch Count Fetch policy unimplemented\n"); |
| return InvalidThreadID; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid) |
| { |
| if (!issuePipelinedIfetch[tid]) { |
| return; |
| } |
| |
| // The next PC to access. |
| TheISA::PCState thisPC = pc[tid]; |
| |
| if (isRomMicroPC(thisPC.microPC())) { |
| return; |
| } |
| |
| Addr pcOffset = fetchOffset[tid]; |
| Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; |
| |
| // Align the fetch PC so its at the start of a fetch buffer segment. |
| Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); |
| |
| // Unless buffer already got the block, fetch it from icache. |
| if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) { |
| DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, " |
| "starting at PC %s.\n", tid, thisPC); |
| |
| fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); |
| } |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::profileStall(ThreadID tid) { |
| DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); |
| |
| // @todo Per-thread stats |
| |
| if (stalls[tid].drain) { |
| ++fetchStats.pendingDrainCycles; |
| DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); |
| } else if (activeThreads->empty()) { |
| ++fetchStats.noActiveThreadStallCycles; |
| DPRINTF(Fetch, "Fetch has no active thread!\n"); |
| } else if (fetchStatus[tid] == Blocked) { |
| ++fetchStats.blockedCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid); |
| } else if (fetchStatus[tid] == Squashing) { |
| ++fetchStats.squashCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); |
| } else if (fetchStatus[tid] == IcacheWaitResponse) { |
| ++fetchStats.icacheStallCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", |
| tid); |
| } else if (fetchStatus[tid] == ItlbWait) { |
| ++fetchStats.tlbCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to " |
| "finish!\n", tid); |
| } else if (fetchStatus[tid] == TrapPending) { |
| ++fetchStats.pendingTrapStallCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n", |
| tid); |
| } else if (fetchStatus[tid] == QuiescePending) { |
| ++fetchStats.pendingQuiesceStallCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce " |
| "instruction!\n", tid); |
| } else if (fetchStatus[tid] == IcacheWaitRetry) { |
| ++fetchStats.icacheWaitRetryStallCycles; |
| DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n", |
| tid); |
| } else if (fetchStatus[tid] == NoGoodAddr) { |
| DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n", |
| tid); |
| } else { |
| DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason " |
| "(Status: %i)\n", |
| tid, fetchStatus[tid]); |
| } |
| } |
| |
| template<class Impl> |
| bool |
| DefaultFetch<Impl>::IcachePort::recvTimingResp(PacketPtr pkt) |
| { |
| DPRINTF(O3CPU, "Fetch unit received timing\n"); |
| // We shouldn't ever get a cacheable block in Modified state |
| assert(pkt->req->isUncacheable() || |
| !(pkt->cacheResponding() && !pkt->hasSharers())); |
| fetch->processCacheCompletion(pkt); |
| |
| return true; |
| } |
| |
| template<class Impl> |
| void |
| DefaultFetch<Impl>::IcachePort::recvReqRetry() |
| { |
| fetch->recvReqRetry(); |
| } |
| |
| #endif//__CPU_O3_FETCH_IMPL_HH__ |