blob: 7c54509cd23f2b16d793e87d620e65a7ab221beb [file] [log] [blame]
/*
* Copyright (c) 2010-2014 ARM Limited
* Copyright (c) 2012-2013 AMD
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_O3_FETCH_IMPL_HH__
#define __CPU_O3_FETCH_IMPL_HH__
#include <algorithm>
#include <cstring>
#include <list>
#include <map>
#include <queue>
#include "arch/generic/tlb.hh"
#include "arch/utility.hh"
#include "base/random.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
#include "cpu/base.hh"
#include "cpu/o3/cpu.hh"
#include "cpu/o3/fetch.hh"
#include "cpu/exetrace.hh"
#include "debug/Activity.hh"
#include "debug/Drain.hh"
#include "debug/Fetch.hh"
#include "debug/O3CPU.hh"
#include "debug/O3PipeView.hh"
#include "mem/packet.hh"
#include "params/DerivO3CPU.hh"
#include "sim/byteswap.hh"
#include "sim/core.hh"
#include "sim/eventq.hh"
#include "sim/full_system.hh"
#include "sim/system.hh"
#include "cpu/o3/isa_specific.hh"
template<class Impl>
DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, const DerivO3CPUParams &params)
: fetchPolicy(params.smtFetchPolicy),
cpu(_cpu),
branchPred(nullptr),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
iewToFetchDelay(params.iewToFetchDelay),
commitToFetchDelay(params.commitToFetchDelay),
fetchWidth(params.fetchWidth),
decodeWidth(params.decodeWidth),
retryPkt(NULL),
retryTid(InvalidThreadID),
cacheBlkSize(cpu->cacheLineSize()),
fetchBufferSize(params.fetchBufferSize),
fetchBufferMask(fetchBufferSize - 1),
fetchQueueSize(params.fetchQueueSize),
numThreads(params.numThreads),
numFetchingThreads(params.smtNumFetchingThreads),
icachePort(this, _cpu),
finishTranslationEvent(this), fetchStats(_cpu, this)
{
if (numThreads > Impl::MaxThreads)
fatal("numThreads (%d) is larger than compiled limit (%d),\n"
"\tincrease MaxThreads in src/cpu/o3/impl.hh\n",
numThreads, static_cast<int>(Impl::MaxThreads));
if (fetchWidth > Impl::MaxWidth)
fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
fetchWidth, static_cast<int>(Impl::MaxWidth));
if (fetchBufferSize > cacheBlkSize)
fatal("fetch buffer size (%u bytes) is greater than the cache "
"block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
if (cacheBlkSize % fetchBufferSize)
fatal("cache block (%u bytes) is not a multiple of the "
"fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
// Get the size of an instruction.
instSize = sizeof(TheISA::MachInst);
for (int i = 0; i < Impl::MaxThreads; i++) {
fetchStatus[i] = Idle;
decoder[i] = nullptr;
pc[i] = 0;
fetchOffset[i] = 0;
macroop[i] = nullptr;
delayedCommit[i] = false;
memReq[i] = nullptr;
stalls[i] = {false, false};
fetchBuffer[i] = NULL;
fetchBufferPC[i] = 0;
fetchBufferValid[i] = false;
lastIcacheStall[i] = 0;
issuePipelinedIfetch[i] = false;
}
branchPred = params.branchPred;
for (ThreadID tid = 0; tid < numThreads; tid++) {
decoder[tid] = new TheISA::Decoder(
dynamic_cast<TheISA::ISA *>(params.isa[tid]));
// Create space to buffer the cache line data,
// which may not hold the entire cache line.
fetchBuffer[tid] = new uint8_t[fetchBufferSize];
}
}
template <class Impl>
std::string
DefaultFetch<Impl>::name() const
{
return cpu->name() + ".fetch";
}
template <class Impl>
void
DefaultFetch<Impl>::regProbePoints()
{
ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
"FetchRequest");
}
template <class Impl>
DefaultFetch<Impl>::
FetchStatGroup::FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch)
: Stats::Group(cpu, "fetch"),
ADD_STAT(icacheStallCycles, UNIT_CYCLE,
"Number of cycles fetch is stalled on an Icache miss"),
ADD_STAT(insts, UNIT_COUNT, "Number of instructions fetch has processed"),
ADD_STAT(branches, UNIT_COUNT,
"Number of branches that fetch encountered"),
ADD_STAT(predictedBranches, UNIT_COUNT,
"Number of branches that fetch has predicted taken"),
ADD_STAT(cycles, UNIT_CYCLE,
"Number of cycles fetch has run and was not squashing or "
"blocked"),
ADD_STAT(squashCycles, UNIT_CYCLE,
"Number of cycles fetch has spent squashing"),
ADD_STAT(tlbCycles, UNIT_CYCLE,
"Number of cycles fetch has spent waiting for tlb"),
ADD_STAT(idleCycles, UNIT_CYCLE, "Number of cycles fetch was idle"),
ADD_STAT(blockedCycles, UNIT_CYCLE,
"Number of cycles fetch has spent blocked"),
ADD_STAT(miscStallCycles, UNIT_CYCLE,
"Number of cycles fetch has spent waiting on interrupts, or bad "
"addresses, or out of MSHRs"),
ADD_STAT(pendingDrainCycles, UNIT_CYCLE,
"Number of cycles fetch has spent waiting on pipes to drain"),
ADD_STAT(noActiveThreadStallCycles, UNIT_CYCLE,
"Number of stall cycles due to no active thread to fetch from"),
ADD_STAT(pendingTrapStallCycles, UNIT_CYCLE,
"Number of stall cycles due to pending traps"),
ADD_STAT(pendingQuiesceStallCycles, UNIT_CYCLE,
"Number of stall cycles due to pending quiesce instructions"),
ADD_STAT(icacheWaitRetryStallCycles, UNIT_CYCLE,
"Number of stall cycles due to full MSHR"),
ADD_STAT(cacheLines, UNIT_COUNT, "Number of cache lines fetched"),
ADD_STAT(icacheSquashes, UNIT_COUNT,
"Number of outstanding Icache misses that were squashed"),
ADD_STAT(tlbSquashes, UNIT_COUNT,
"Number of outstanding ITLB misses that were squashed"),
ADD_STAT(nisnDist, UNIT_COUNT,
"Number of instructions fetched each cycle (Total)"),
ADD_STAT(idleRate, UNIT_RATIO, "Ratio of cycles fetch was idle",
idleCycles / cpu->baseStats.numCycles),
ADD_STAT(branchRate, UNIT_RATIO, "Number of branch fetches per cycle",
branches / cpu->baseStats.numCycles),
ADD_STAT(rate, UNIT_RATE(Stats::Units::Count, Stats::Units::Cycle),
"Number of inst fetches per cycle",
insts / cpu->baseStats.numCycles)
{
icacheStallCycles
.prereq(icacheStallCycles);
insts
.prereq(insts);
branches
.prereq(branches);
predictedBranches
.prereq(predictedBranches);
cycles
.prereq(cycles);
squashCycles
.prereq(squashCycles);
tlbCycles
.prereq(tlbCycles);
idleCycles
.prereq(idleCycles);
blockedCycles
.prereq(blockedCycles);
cacheLines
.prereq(cacheLines);
miscStallCycles
.prereq(miscStallCycles);
pendingDrainCycles
.prereq(pendingDrainCycles);
noActiveThreadStallCycles
.prereq(noActiveThreadStallCycles);
pendingTrapStallCycles
.prereq(pendingTrapStallCycles);
pendingQuiesceStallCycles
.prereq(pendingQuiesceStallCycles);
icacheWaitRetryStallCycles
.prereq(icacheWaitRetryStallCycles);
icacheSquashes
.prereq(icacheSquashes);
tlbSquashes
.prereq(tlbSquashes);
nisnDist
.init(/* base value */ 0,
/* last value */ fetch->fetchWidth,
/* bucket size */ 1)
.flags(Stats::pdf);
idleRate
.prereq(idleRate);
branchRate
.flags(Stats::total);
rate
.flags(Stats::total);
}
template<class Impl>
void
DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
{
timeBuffer = time_buffer;
// Create wires to get information from proper places in time buffer.
fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
fromRename = timeBuffer->getWire(-renameToFetchDelay);
fromIEW = timeBuffer->getWire(-iewToFetchDelay);
fromCommit = timeBuffer->getWire(-commitToFetchDelay);
}
template<class Impl>
void
DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
template<class Impl>
void
DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
{
// Create wire to write information to proper place in fetch time buf.
toDecode = ftb_ptr->getWire(0);
}
template<class Impl>
void
DefaultFetch<Impl>::startupStage()
{
assert(priorityList.empty());
resetStage();
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
}
template<class Impl>
void
DefaultFetch<Impl>::clearStates(ThreadID tid)
{
fetchStatus[tid] = Running;
pc[tid] = cpu->pcState(tid);
fetchOffset[tid] = 0;
macroop[tid] = NULL;
delayedCommit[tid] = false;
memReq[tid] = NULL;
stalls[tid].decode = false;
stalls[tid].drain = false;
fetchBufferPC[tid] = 0;
fetchBufferValid[tid] = false;
fetchQueue[tid].clear();
// TODO not sure what to do with priorityList for now
// priorityList.push_back(tid);
}
template<class Impl>
void
DefaultFetch<Impl>::resetStage()
{
numInst = 0;
interruptPending = false;
cacheBlocked = false;
priorityList.clear();
// Setup PC and nextPC with initial state.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
fetchStatus[tid] = Running;
pc[tid] = cpu->pcState(tid);
fetchOffset[tid] = 0;
macroop[tid] = NULL;
delayedCommit[tid] = false;
memReq[tid] = NULL;
stalls[tid].decode = false;
stalls[tid].drain = false;
fetchBufferPC[tid] = 0;
fetchBufferValid[tid] = false;
fetchQueue[tid].clear();
priorityList.push_back(tid);
}
wroteToTimeBuffer = false;
_status = Inactive;
}
template<class Impl>
void
DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
{
ThreadID tid = cpu->contextToThread(pkt->req->contextId());
DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
assert(!cpu->switchedOut());
// Only change the status if it's still waiting on the icache access
// to return.
if (fetchStatus[tid] != IcacheWaitResponse ||
pkt->req != memReq[tid]) {
++fetchStats.icacheSquashes;
delete pkt;
return;
}
memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
fetchBufferValid[tid] = true;
// Wake up the CPU (if it went to sleep and was waiting on
// this completion event).
cpu->wakeCPU();
DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
tid);
switchToActive();
// Only switch to IcacheAccessComplete if we're not stalled as well.
if (checkStall(tid)) {
fetchStatus[tid] = Blocked;
} else {
fetchStatus[tid] = IcacheAccessComplete;
}
pkt->req->setAccessLatency();
cpu->ppInstAccessComplete->notify(pkt);
// Reset the mem req to NULL.
delete pkt;
memReq[tid] = NULL;
}
template <class Impl>
void
DefaultFetch<Impl>::drainResume()
{
for (ThreadID i = 0; i < numThreads; ++i) {
stalls[i].decode = false;
stalls[i].drain = false;
}
}
template <class Impl>
void
DefaultFetch<Impl>::drainSanityCheck() const
{
assert(isDrained());
assert(retryPkt == NULL);
assert(retryTid == InvalidThreadID);
assert(!cacheBlocked);
assert(!interruptPending);
for (ThreadID i = 0; i < numThreads; ++i) {
assert(!memReq[i]);
assert(fetchStatus[i] == Idle || stalls[i].drain);
}
branchPred->drainSanityCheck();
}
template <class Impl>
bool
DefaultFetch<Impl>::isDrained() const
{
/* Make sure that threads are either idle of that the commit stage
* has signaled that draining has completed by setting the drain
* stall flag. This effectively forces the pipeline to be disabled
* until the whole system is drained (simulation may continue to
* drain other components).
*/
for (ThreadID i = 0; i < numThreads; ++i) {
// Verify fetch queues are drained
if (!fetchQueue[i].empty())
return false;
// Return false if not idle or drain stalled
if (fetchStatus[i] != Idle) {
if (fetchStatus[i] == Blocked && stalls[i].drain)
continue;
else
return false;
}
}
/* The pipeline might start up again in the middle of the drain
* cycle if the finish translation event is scheduled, so make
* sure that's not the case.
*/
return !finishTranslationEvent.scheduled();
}
template <class Impl>
void
DefaultFetch<Impl>::takeOverFrom()
{
assert(cpu->getInstPort().isConnected());
resetStage();
}
template <class Impl>
void
DefaultFetch<Impl>::drainStall(ThreadID tid)
{
assert(cpu->isDraining());
assert(!stalls[tid].drain);
DPRINTF(Drain, "%i: Thread drained.\n", tid);
stalls[tid].drain = true;
}
template <class Impl>
void
DefaultFetch<Impl>::wakeFromQuiesce()
{
DPRINTF(Fetch, "Waking up from quiesce\n");
// Hopefully this is safe
// @todo: Allow other threads to wake from quiesce.
fetchStatus[0] = Running;
}
template <class Impl>
inline void
DefaultFetch<Impl>::switchToActive()
{
if (_status == Inactive) {
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(O3CPU::FetchIdx);
_status = Active;
}
}
template <class Impl>
inline void
DefaultFetch<Impl>::switchToInactive()
{
if (_status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(O3CPU::FetchIdx);
_status = Inactive;
}
}
template <class Impl>
void
DefaultFetch<Impl>::deactivateThread(ThreadID tid)
{
// Update priority list
auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
if (thread_it != priorityList.end()) {
priorityList.erase(thread_it);
}
}
template <class Impl>
bool
DefaultFetch<Impl>::lookupAndUpdateNextPC(
const DynInstPtr &inst, TheISA::PCState &nextPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
// this function updates it.
bool predict_taken;
if (!inst->isControl()) {
TheISA::advancePC(nextPC, inst->staticInst);
inst->setPredTarg(nextPC);
inst->setPredTaken(false);
return false;
}
ThreadID tid = inst->threadNumber;
predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
nextPC, tid);
if (predict_taken) {
DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
"predicted to be taken to %s\n",
tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
} else {
DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
"predicted to be not taken\n",
tid, inst->seqNum, inst->pcState().instAddr());
}
DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
"predicted to go to %s\n",
tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
inst->setPredTarg(nextPC);
inst->setPredTaken(predict_taken);
++fetchStats.branches;
if (predict_taken) {
++fetchStats.predictedBranches;
}
return predict_taken;
}
template <class Impl>
bool
DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
{
Fault fault = NoFault;
assert(!cpu->switchedOut());
// @todo: not sure if these should block translation.
//AlphaDep
if (cacheBlocked) {
DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
tid);
return false;
} else if (checkInterrupt(pc) && !delayedCommit[tid]) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
// fetch is switched out.
DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
tid);
return false;
}
// Align the fetch address to the start of a fetch buffer segment.
Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
tid, fetchBufferBlockPC, vaddr);
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
// Build request here.
RequestPtr mem_req = std::make_shared<Request>(
fetchBufferBlockPC, fetchBufferSize,
Request::INST_FETCH, cpu->instRequestorId(), pc,
cpu->thread[tid]->contextId());
mem_req->taskId(cpu->taskId());
memReq[tid] = mem_req;
// Initiate translation of the icache block
fetchStatus[tid] = ItlbWait;
FetchTranslation *trans = new FetchTranslation(this);
cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
trans, BaseTLB::Execute);
return true;
}
template <class Impl>
void
DefaultFetch<Impl>::finishTranslation(const Fault &fault,
const RequestPtr &mem_req)
{
ThreadID tid = cpu->contextToThread(mem_req->contextId());
Addr fetchBufferBlockPC = mem_req->getVaddr();
assert(!cpu->switchedOut());
// Wake up CPU if it was idle
cpu->wakeCPU();
if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
mem_req->getVaddr() != memReq[tid]->getVaddr()) {
DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
tid);
++fetchStats.tlbSquashes;
return;
}
// If translation was successful, attempt to read the icache block.
if (fault == NoFault) {
// Check that we're not going off into random memory
// If we have, just wait around for commit to squash something and put
// us on the right track
if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
warn("Address %#x is outside of physical memory, stopping fetch\n",
mem_req->getPaddr());
fetchStatus[tid] = NoGoodAddr;
memReq[tid] = NULL;
return;
}
// Build packet here.
PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
fetchBufferPC[tid] = fetchBufferBlockPC;
fetchBufferValid[tid] = false;
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
fetchStats.cacheLines++;
// Access the cache.
if (!icachePort.sendTimingReq(data_pkt)) {
assert(retryPkt == NULL);
assert(retryTid == InvalidThreadID);
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
fetchStatus[tid] = IcacheWaitRetry;
retryPkt = data_pkt;
retryTid = tid;
cacheBlocked = true;
} else {
DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
"response.\n", tid);
lastIcacheStall[tid] = curTick();
fetchStatus[tid] = IcacheWaitResponse;
// Notify Fetch Request probe when a packet containing a fetch
// request is successfully sent
ppFetchRequestSent->notify(mem_req);
}
} else {
// Don't send an instruction to decode if we can't handle it.
if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
assert(!finishTranslationEvent.scheduled());
finishTranslationEvent.setFault(fault);
finishTranslationEvent.setReq(mem_req);
cpu->schedule(finishTranslationEvent,
cpu->clockEdge(Cycles(1)));
return;
}
DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
// Translation faulted, icache request won't be sent.
memReq[tid] = NULL;
// Send the fault to commit. This thread will not do anything
// until commit handles the fault. The only other way it can
// wake up is if a squash comes along and changes the PC.
TheISA::PCState fetchPC = pc[tid];
DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
// We will use a nop in ordier to carry the fault.
DynInstPtr instruction = buildInst(tid, StaticInst::nopStaticInstPtr,
NULL, fetchPC, fetchPC, false);
instruction->setNotAnInst();
instruction->setPredTarg(fetchPC);
instruction->fault = fault;
wroteToTimeBuffer = true;
DPRINTF(Activity, "Activity this cycle.\n");
cpu->activityThisCycle();
fetchStatus[tid] = TrapPending;
DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
tid, fault->name(), pc[tid]);
}
_status = updateFetchStatus();
}
template <class Impl>
inline void
DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
const DynInstPtr squashInst, ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
tid, newPC);
pc[tid] = newPC;
fetchOffset[tid] = 0;
if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
macroop[tid] = squashInst->macroop;
else
macroop[tid] = NULL;
decoder[tid]->reset();
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
tid);
memReq[tid] = NULL;
} else if (fetchStatus[tid] == ItlbWait) {
DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
tid);
memReq[tid] = NULL;
}
// Get rid of the retrying packet if it was from this thread.
if (retryTid == tid) {
assert(cacheBlocked);
if (retryPkt) {
delete retryPkt;
}
retryPkt = NULL;
retryTid = InvalidThreadID;
}
fetchStatus[tid] = Squashing;
// Empty fetch queue
fetchQueue[tid].clear();
// microops are being squashed, it is not known wheather the
// youngest non-squashed microop was marked delayed commit
// or not. Setting the flag to true ensures that the
// interrupts are not handled when they cannot be, though
// some opportunities to handle interrupts may be missed.
delayedCommit[tid] = true;
++fetchStats.squashCycles;
}
template<class Impl>
void
DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
const DynInstPtr squashInst,
const InstSeqNum seq_num, ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
doSquash(newPC, squashInst, tid);
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
cpu->removeInstsUntil(seq_num, tid);
}
template<class Impl>
bool
DefaultFetch<Impl>::checkStall(ThreadID tid) const
{
bool ret_val = false;
if (stalls[tid].drain) {
assert(cpu->isDraining());
DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
ret_val = true;
}
return ret_val;
}
template<class Impl>
typename DefaultFetch<Impl>::FetchStatus
DefaultFetch<Impl>::updateFetchStatus()
{
//Check Running
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (fetchStatus[tid] == Running ||
fetchStatus[tid] == Squashing ||
fetchStatus[tid] == IcacheAccessComplete) {
if (_status == Inactive) {
DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
"completion\n",tid);
}
cpu->activateStage(O3CPU::FetchIdx);
}
return Active;
}
}
// Stage is switching from active to inactive, notify CPU of it.
if (_status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(O3CPU::FetchIdx);
}
return Inactive;
}
template <class Impl>
void
DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
const InstSeqNum seq_num, DynInstPtr squashInst,
ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
doSquash(newPC, squashInst, tid);
// Tell the CPU to remove any instructions that are not in the ROB.
cpu->removeInstsNotInROB(tid);
}
template <class Impl>
void
DefaultFetch<Impl>::tick()
{
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
bool status_change = false;
wroteToTimeBuffer = false;
for (ThreadID i = 0; i < numThreads; ++i) {
issuePipelinedIfetch[i] = false;
}
while (threads != end) {
ThreadID tid = *threads++;
// Check the signals for each thread to determine the proper status
// for each thread.
bool updated_status = checkSignalsAndUpdate(tid);
status_change = status_change || updated_status;
}
DPRINTF(Fetch, "Running stage.\n");
if (FullSystem) {
if (fromCommit->commitInfo[0].interruptPending) {
interruptPending = true;
}
if (fromCommit->commitInfo[0].clearInterrupt) {
interruptPending = false;
}
}
for (threadFetched = 0; threadFetched < numFetchingThreads;
threadFetched++) {
// Fetch each of the actively fetching threads.
fetch(status_change);
}
// Record number of instructions fetched this cycle for distribution.
fetchStats.nisnDist.sample(numInst);
if (status_change) {
// Change the fetch stage status if there was a status change.
_status = updateFetchStatus();
}
// Issue the next I-cache request if possible.
for (ThreadID i = 0; i < numThreads; ++i) {
if (issuePipelinedIfetch[i]) {
pipelineIcacheAccesses(i);
}
}
// Send instructions enqueued into the fetch queue to decode.
// Limit rate by fetchWidth. Stall if decode is stalled.
unsigned insts_to_decode = 0;
unsigned available_insts = 0;
for (auto tid : *activeThreads) {
if (!stalls[tid].decode) {
available_insts += fetchQueue[tid].size();
}
}
// Pick a random thread to start trying to grab instructions from
auto tid_itr = activeThreads->begin();
std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
while (available_insts != 0 && insts_to_decode < decodeWidth) {
ThreadID tid = *tid_itr;
if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
const auto& inst = fetchQueue[tid].front();
toDecode->insts[toDecode->size++] = inst;
DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
"from fetch queue. Fetch queue size: %i.\n",
tid, inst->seqNum, fetchQueue[tid].size());
wroteToTimeBuffer = true;
fetchQueue[tid].pop_front();
insts_to_decode++;
available_insts--;
}
tid_itr++;
// Wrap around if at end of active threads list
if (tid_itr == activeThreads->end())
tid_itr = activeThreads->begin();
}
// If there was activity this cycle, inform the CPU of it.
if (wroteToTimeBuffer) {
DPRINTF(Activity, "Activity this cycle.\n");
cpu->activityThisCycle();
}
// Reset the number of the instruction we've fetched.
numInst = 0;
}
template <class Impl>
bool
DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
{
// Update the per thread stall statuses.
if (fromDecode->decodeBlock[tid]) {
stalls[tid].decode = true;
}
if (fromDecode->decodeUnblock[tid]) {
assert(stalls[tid].decode);
assert(!fromDecode->decodeBlock[tid]);
stalls[tid].decode = false;
}
// Check squash signals from commit.
if (fromCommit->commitInfo[tid].squash) {
DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
"from commit.\n",tid);
// In any case, squash.
squash(fromCommit->commitInfo[tid].pc,
fromCommit->commitInfo[tid].doneSeqNum,
fromCommit->commitInfo[tid].squashInst, tid);
// If it was a branch mispredict on a control instruction, update the
// branch predictor with that instruction, otherwise just kill the
// invalid state we generated in after sequence number
if (fromCommit->commitInfo[tid].mispredictInst &&
fromCommit->commitInfo[tid].mispredictInst->isControl()) {
branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
fromCommit->commitInfo[tid].pc,
fromCommit->commitInfo[tid].branchTaken,
tid);
} else {
branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
tid);
}
return true;
} else if (fromCommit->commitInfo[tid].doneSeqNum) {
// Update the branch predictor if it wasn't a squashed instruction
// that was broadcasted.
branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
}
// Check squash signals from decode.
if (fromDecode->decodeInfo[tid].squash) {
DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
"from decode.\n",tid);
// Update the branch predictor.
if (fromDecode->decodeInfo[tid].branchMispredict) {
branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].branchTaken,
tid);
} else {
branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
tid);
}
if (fetchStatus[tid] != Squashing) {
DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
fromDecode->decodeInfo[tid].nextPC);
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].squashInst,
fromDecode->decodeInfo[tid].doneSeqNum,
tid);
return true;
}
}
if (checkStall(tid) &&
fetchStatus[tid] != IcacheWaitResponse &&
fetchStatus[tid] != IcacheWaitRetry &&
fetchStatus[tid] != ItlbWait &&
fetchStatus[tid] != QuiescePending) {
DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
fetchStatus[tid] = Blocked;
return true;
}
if (fetchStatus[tid] == Blocked ||
fetchStatus[tid] == Squashing) {
// Switch status to running if fetch isn't being told to block or
// squash this cycle.
DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
tid);
fetchStatus[tid] = Running;
return true;
}
// If we've reached this point, we have not gotten any signals that
// cause fetch to change its status. Fetch remains the same as before.
return false;
}
template<class Impl>
typename Impl::DynInstPtr
DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace)
{
// Get a sequence number.
InstSeqNum seq = cpu->getAndIncrementInstSeq();
// Create a new DynInst from the instruction fetched.
DynInstPtr instruction =
new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
instruction->setTid(tid);
instruction->setThreadState(cpu->thread[tid]);
DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created "
"[sn:%lli].\n", tid, thisPC.instAddr(),
thisPC.microPC(), seq);
DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
instruction->staticInst->
disassemble(thisPC.instAddr()));
#if TRACING_ON
if (trace) {
instruction->traceData =
cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
instruction->staticInst, thisPC, curMacroop);
}
#else
instruction->traceData = NULL;
#endif
// Add instruction to the CPU's list of instructions.
instruction->setInstListIt(cpu->addInst(instruction));
// Write the instruction to the first slot in the queue
// that heads to decode.
assert(numInst < fetchWidth);
fetchQueue[tid].push_back(instruction);
assert(fetchQueue[tid].size() <= fetchQueueSize);
DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
tid, fetchQueue[tid].size(), fetchQueueSize);
//toDecode->insts[toDecode->size++] = instruction;
// Keep track of if we can take an interrupt at this boundary
delayedCommit[tid] = instruction->isDelayedCommit();
return instruction;
}
template<class Impl>
void
DefaultFetch<Impl>::fetch(bool &status_change)
{
//////////////////////////////////////////
// Start actual fetch
//////////////////////////////////////////
ThreadID tid = getFetchingThread();
assert(!cpu->switchedOut());
if (tid == InvalidThreadID) {
// Breaks looping condition in tick()
threadFetched = numFetchingThreads;
if (numThreads == 1) { // @todo Per-thread stats
profileStall(0);
}
return;
}
DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
// The current PC.
TheISA::PCState thisPC = pc[tid];
Addr pcOffset = fetchOffset[tid];
Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
bool inRom = isRomMicroPC(thisPC.microPC());
// If returning from the delay of a cache miss, then update the status
// to running, otherwise do the cache access. Possibly move this up
// to tick() function.
if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
fetchStatus[tid] = Running;
status_change = true;
} else if (fetchStatus[tid] == Running) {
// Align the fetch PC so its at the start of a fetch buffer segment.
Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
// If buffer is no longer valid or fetchAddr has moved to point
// to the next cache block, AND we have no remaining ucode
// from a macro-op, then start fetch from icache.
if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
&& !inRom && !macroop[tid]) {
DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
"instruction, starting at PC %s.\n", tid, thisPC);
fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
if (fetchStatus[tid] == IcacheWaitResponse)
++fetchStats.icacheStallCycles;
else if (fetchStatus[tid] == ItlbWait)
++fetchStats.tlbCycles;
else
++fetchStats.miscStallCycles;
return;
} else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
// Stall CPU if an interrupt is posted and we're not issuing
// an delayed commit micro-op currently (delayed commit instructions
// are not interruptable by interrupts, only faults)
++fetchStats.miscStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
return;
}
} else {
if (fetchStatus[tid] == Idle) {
++fetchStats.idleCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
}
// Status is Idle, so fetch should do nothing.
return;
}
++fetchStats.cycles;
TheISA::PCState nextPC = thisPC;
StaticInstPtr staticInst = NULL;
StaticInstPtr curMacroop = macroop[tid];
// If the read of the first instruction was successful, then grab the
// instructions from the rest of the cache line and put them into the
// queue heading to decode.
DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
"decode.\n", tid);
// Need to keep track of whether or not a predicted branch
// ended this fetch block.
bool predictedBranch = false;
// Need to halt fetch if quiesce instruction detected
bool quiesce = false;
TheISA::MachInst *cacheInsts =
reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
const unsigned numInsts = fetchBufferSize / instSize;
unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
// Loop through instruction memory from the cache.
// Keep issuing while fetchWidth is available and branch is not
// predicted taken
while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
&& !predictedBranch && !quiesce) {
// We need to process more memory if we aren't going to get a
// StaticInst from the rom, the current macroop, or what's already
// in the decoder.
bool needMem = !inRom && !curMacroop &&
!decoder[tid]->instReady();
fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
if (needMem) {
// If buffer is no longer valid or fetchAddr has moved to point
// to the next cache block then start fetch from icache.
if (!fetchBufferValid[tid] ||
fetchBufferBlockPC != fetchBufferPC[tid])
break;
if (blkOffset >= numInsts) {
// We need to process more memory, but we've run out of the
// current block.
break;
}
decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]);
if (decoder[tid]->needMoreBytes()) {
blkOffset++;
fetchAddr += instSize;
pcOffset += instSize;
}
}
// Extract as many instructions and/or microops as we can from
// the memory we've processed so far.
do {
if (!(curMacroop || inRom)) {
if (decoder[tid]->instReady()) {
staticInst = decoder[tid]->decode(thisPC);
// Increment stat of fetched instructions.
++fetchStats.insts;
if (staticInst->isMacroop()) {
curMacroop = staticInst;
} else {
pcOffset = 0;
}
} else {
// We need more bytes for this instruction so blkOffset and
// pcOffset will be updated
break;
}
}
// Whether we're moving to a new macroop because we're at the
// end of the current one, or the branch predictor incorrectly
// thinks we are...
bool newMacro = false;
if (curMacroop || inRom) {
if (inRom) {
staticInst = decoder[tid]->fetchRomMicroop(
thisPC.microPC(), curMacroop);
} else {
staticInst = curMacroop->fetchMicroop(thisPC.microPC());
}
newMacro |= staticInst->isLastMicroop();
}
DynInstPtr instruction =
buildInst(tid, staticInst, curMacroop,
thisPC, nextPC, true);
ppFetch->notify(instruction);
numInst++;
#if TRACING_ON
if (DTRACE(O3PipeView)) {
instruction->fetchTick = curTick();
}
#endif
nextPC = thisPC;
// If we're branching after this instruction, quit fetching
// from the same block.
predictedBranch |= thisPC.branching();
predictedBranch |=
lookupAndUpdateNextPC(instruction, nextPC);
if (predictedBranch) {
DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
}
newMacro |= thisPC.instAddr() != nextPC.instAddr();
// Move to the next instruction, unless we have a branch.
thisPC = nextPC;
inRom = isRomMicroPC(thisPC.microPC());
if (newMacro) {
fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
pcOffset = 0;
curMacroop = NULL;
}
if (instruction->isQuiesce()) {
DPRINTF(Fetch,
"Quiesce instruction encountered, halting fetch!\n");
fetchStatus[tid] = QuiescePending;
status_change = true;
quiesce = true;
break;
}
} while ((curMacroop || decoder[tid]->instReady()) &&
numInst < fetchWidth &&
fetchQueue[tid].size() < fetchQueueSize);
// Re-evaluate whether the next instruction to fetch is in micro-op ROM
// or not.
inRom = isRomMicroPC(thisPC.microPC());
}
if (predictedBranch) {
DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
"instruction encountered.\n", tid);
} else if (numInst >= fetchWidth) {
DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
"for this cycle.\n", tid);
} else if (blkOffset >= fetchBufferSize) {
DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
"fetch buffer.\n", tid);
}
macroop[tid] = curMacroop;
fetchOffset[tid] = pcOffset;
if (numInst > 0) {
wroteToTimeBuffer = true;
}
pc[tid] = thisPC;
// pipeline a fetch if we're crossing a fetch buffer boundary and not in
// a state that would preclude fetching
fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
fetchStatus[tid] != IcacheWaitResponse &&
fetchStatus[tid] != ItlbWait &&
fetchStatus[tid] != IcacheWaitRetry &&
fetchStatus[tid] != QuiescePending &&
!curMacroop;
}
template<class Impl>
void
DefaultFetch<Impl>::recvReqRetry()
{
if (retryPkt != NULL) {
assert(cacheBlocked);
assert(retryTid != InvalidThreadID);
assert(fetchStatus[retryTid] == IcacheWaitRetry);
if (icachePort.sendTimingReq(retryPkt)) {
fetchStatus[retryTid] = IcacheWaitResponse;
// Notify Fetch Request probe when a retryPkt is successfully sent.
// Note that notify must be called before retryPkt is set to NULL.
ppFetchRequestSent->notify(retryPkt->req);
retryPkt = NULL;
retryTid = InvalidThreadID;
cacheBlocked = false;
}
} else {
assert(retryTid == InvalidThreadID);
// Access has been squashed since it was sent out. Just clear
// the cache being blocked.
cacheBlocked = false;
}
}
///////////////////////////////////////
// //
// SMT FETCH POLICY MAINTAINED HERE //
// //
///////////////////////////////////////
template<class Impl>
ThreadID
DefaultFetch<Impl>::getFetchingThread()
{
if (numThreads > 1) {
switch (fetchPolicy) {
case SMTFetchPolicy::RoundRobin:
return roundRobin();
case SMTFetchPolicy::IQCount:
return iqCount();
case SMTFetchPolicy::LSQCount:
return lsqCount();
case SMTFetchPolicy::Branch:
return branchCount();
default:
return InvalidThreadID;
}
} else {
std::list<ThreadID>::iterator thread = activeThreads->begin();
if (thread == activeThreads->end()) {
return InvalidThreadID;
}
ThreadID tid = *thread;
if (fetchStatus[tid] == Running ||
fetchStatus[tid] == IcacheAccessComplete ||
fetchStatus[tid] == Idle) {
return tid;
} else {
return InvalidThreadID;
}
}
}
template<class Impl>
ThreadID
DefaultFetch<Impl>::roundRobin()
{
std::list<ThreadID>::iterator pri_iter = priorityList.begin();
std::list<ThreadID>::iterator end = priorityList.end();
ThreadID high_pri;
while (pri_iter != end) {
high_pri = *pri_iter;
assert(high_pri <= numThreads);
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle) {
priorityList.erase(pri_iter);
priorityList.push_back(high_pri);
return high_pri;
}
pri_iter++;
}
return InvalidThreadID;
}
template<class Impl>
ThreadID
DefaultFetch<Impl>::iqCount()
{
//sorted from lowest->highest
std::priority_queue<unsigned, std::vector<unsigned>,
std::greater<unsigned> > PQ;
std::map<unsigned, ThreadID> threadMap;
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
//we can potentially get tid collisions if two threads
//have the same iqCount, but this should be rare.
PQ.push(iqCount);
threadMap[iqCount] = tid;
}
while (!PQ.empty()) {
ThreadID high_pri = threadMap[PQ.top()];
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
PQ.pop();
}
return InvalidThreadID;
}
template<class Impl>
ThreadID
DefaultFetch<Impl>::lsqCount()
{
//sorted from lowest->highest
std::priority_queue<unsigned, std::vector<unsigned>,
std::greater<unsigned> > PQ;
std::map<unsigned, ThreadID> threadMap;
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
//we can potentially get tid collisions if two threads
//have the same iqCount, but this should be rare.
PQ.push(ldstqCount);
threadMap[ldstqCount] = tid;
}
while (!PQ.empty()) {
ThreadID high_pri = threadMap[PQ.top()];
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
PQ.pop();
}
return InvalidThreadID;
}
template<class Impl>
ThreadID
DefaultFetch<Impl>::branchCount()
{
panic("Branch Count Fetch policy unimplemented\n");
return InvalidThreadID;
}
template<class Impl>
void
DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
{
if (!issuePipelinedIfetch[tid]) {
return;
}
// The next PC to access.
TheISA::PCState thisPC = pc[tid];
if (isRomMicroPC(thisPC.microPC())) {
return;
}
Addr pcOffset = fetchOffset[tid];
Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
// Align the fetch PC so its at the start of a fetch buffer segment.
Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
// Unless buffer already got the block, fetch it from icache.
if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
"starting at PC %s.\n", tid, thisPC);
fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
}
}
template<class Impl>
void
DefaultFetch<Impl>::profileStall(ThreadID tid) {
DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
// @todo Per-thread stats
if (stalls[tid].drain) {
++fetchStats.pendingDrainCycles;
DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
} else if (activeThreads->empty()) {
++fetchStats.noActiveThreadStallCycles;
DPRINTF(Fetch, "Fetch has no active thread!\n");
} else if (fetchStatus[tid] == Blocked) {
++fetchStats.blockedCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
} else if (fetchStatus[tid] == Squashing) {
++fetchStats.squashCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
} else if (fetchStatus[tid] == IcacheWaitResponse) {
++fetchStats.icacheStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
tid);
} else if (fetchStatus[tid] == ItlbWait) {
++fetchStats.tlbCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
"finish!\n", tid);
} else if (fetchStatus[tid] == TrapPending) {
++fetchStats.pendingTrapStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
tid);
} else if (fetchStatus[tid] == QuiescePending) {
++fetchStats.pendingQuiesceStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
"instruction!\n", tid);
} else if (fetchStatus[tid] == IcacheWaitRetry) {
++fetchStats.icacheWaitRetryStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
tid);
} else if (fetchStatus[tid] == NoGoodAddr) {
DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
tid);
} else {
DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
"(Status: %i)\n",
tid, fetchStatus[tid]);
}
}
template<class Impl>
bool
DefaultFetch<Impl>::IcachePort::recvTimingResp(PacketPtr pkt)
{
DPRINTF(O3CPU, "Fetch unit received timing\n");
// We shouldn't ever get a cacheable block in Modified state
assert(pkt->req->isUncacheable() ||
!(pkt->cacheResponding() && !pkt->hasSharers()));
fetch->processCacheCompletion(pkt);
return true;
}
template<class Impl>
void
DefaultFetch<Impl>::IcachePort::recvReqRetry()
{
fetch->recvReqRetry();
}
#endif//__CPU_O3_FETCH_IMPL_HH__