blob: 7d344fa33208d58d60eeeb14d007f5ec1e8bb425 [file] [log] [blame]
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#include <algorithm>
#include <cstring>
#include "config/use_checker.hh"
#include "arch/isa_traits.hh"
#include "arch/utility.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/exetrace.hh"
#include "cpu/o3/fetch.hh"
#include "mem/packet.hh"
#include "mem/request.hh"
#include "sim/byteswap.hh"
#include "sim/host.hh"
#include "sim/core.hh"
#if FULL_SYSTEM
#include "arch/tlb.hh"
#include "arch/vtophys.hh"
#include "sim/system.hh"
#endif // FULL_SYSTEM
template<class Impl>
void
DefaultFetch<Impl>::IcachePort::setPeer(Port *port)
{
Port::setPeer(port);
fetch->setIcache();
}
template<class Impl>
Tick
DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
{
panic("DefaultFetch doesn't expect recvAtomic callback!");
return curTick;
}
template<class Impl>
void
DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
{
DPRINTF(Fetch, "DefaultFetch doesn't update its state from a "
"functional call.");
}
template<class Impl>
void
DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
{
if (status == RangeChange) {
if (!snoopRangeSent) {
snoopRangeSent = true;
sendStatusChange(Port::RangeChange);
}
return;
}
panic("DefaultFetch doesn't expect recvStatusChange callback!");
}
template<class Impl>
bool
DefaultFetch<Impl>::IcachePort::recvTiming(PacketPtr pkt)
{
DPRINTF(Fetch, "Received timing\n");
if (pkt->isResponse()) {
fetch->processCacheCompletion(pkt);
}
//else Snooped a coherence request, just return
return true;
}
template<class Impl>
void
DefaultFetch<Impl>::IcachePort::recvRetry()
{
fetch->recvRetry();
}
template<class Impl>
DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, Params *params)
: cpu(_cpu),
branchPred(params),
predecoder(NULL),
decodeToFetchDelay(params->decodeToFetchDelay),
renameToFetchDelay(params->renameToFetchDelay),
iewToFetchDelay(params->iewToFetchDelay),
commitToFetchDelay(params->commitToFetchDelay),
fetchWidth(params->fetchWidth),
cacheBlocked(false),
retryPkt(NULL),
retryTid(-1),
numThreads(params->numberOfThreads),
numFetchingThreads(params->smtNumFetchingThreads),
interruptPending(false),
drainPending(false),
switchedOut(false)
{
if (numThreads > Impl::MaxThreads)
fatal("numThreads is not a valid value\n");
// Set fetch stage's status to inactive.
_status = Inactive;
std::string policy = params->smtFetchPolicy;
// Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
(int(*)(int)) tolower);
// Figure out fetch policy
if (policy == "singlethread") {
fetchPolicy = SingleThread;
if (numThreads > 1)
panic("Invalid Fetch Policy for a SMT workload.");
} else if (policy == "roundrobin") {
fetchPolicy = RoundRobin;
DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
} else if (policy == "branch") {
fetchPolicy = Branch;
DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
} else if (policy == "iqcount") {
fetchPolicy = IQ;
DPRINTF(Fetch, "Fetch policy set to IQ count\n");
} else if (policy == "lsqcount") {
fetchPolicy = LSQ;
DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
} else {
fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
" RoundRobin,LSQcount,IQcount}\n");
}
// Get the size of an instruction.
instSize = sizeof(TheISA::MachInst);
// Name is finally available, so create the port.
icachePort = new IcachePort(this);
icachePort->snoopRangeSent = false;
#if USE_CHECKER
if (cpu->checker) {
cpu->checker->setIcachePort(icachePort);
}
#endif
}
template <class Impl>
std::string
DefaultFetch<Impl>::name() const
{
return cpu->name() + ".fetch";
}
template <class Impl>
void
DefaultFetch<Impl>::regStats()
{
icacheStallCycles
.name(name() + ".icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles);
fetchedInsts
.name(name() + ".Insts")
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
fetchedBranches
.name(name() + ".Branches")
.desc("Number of branches that fetch encountered")
.prereq(fetchedBranches);
predictedBranches
.name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches);
fetchCycles
.name(name() + ".Cycles")
.desc("Number of cycles fetch has run and was not squashing or"
" blocked")
.prereq(fetchCycles);
fetchSquashCycles
.name(name() + ".SquashCycles")
.desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles);
fetchIdleCycles
.name(name() + ".IdleCycles")
.desc("Number of cycles fetch was idle")
.prereq(fetchIdleCycles);
fetchBlockedCycles
.name(name() + ".BlockedCycles")
.desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles);
fetchedCacheLines
.name(name() + ".CacheLines")
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetchMiscStallCycles
.name(name() + ".MiscStallCycles")
.desc("Number of cycles fetch has spent waiting on interrupts, or "
"bad addresses, or out of MSHRs")
.prereq(fetchMiscStallCycles);
fetchIcacheSquashes
.name(name() + ".IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
fetchNisnDist
.init(/* base value */ 0,
/* last value */ fetchWidth,
/* bucket size */ 1)
.name(name() + ".rateDist")
.desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf);
idleRate
.name(name() + ".idleRate")
.desc("Percent of cycles fetch was idle")
.prereq(idleRate);
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
branchRate
.name(name() + ".branchRate")
.desc("Number of branch fetches per cycle")
.flags(Stats::total);
branchRate = fetchedBranches / cpu->numCycles;
fetchRate
.name(name() + ".rate")
.desc("Number of inst fetches per cycle")
.flags(Stats::total);
fetchRate = fetchedInsts / cpu->numCycles;
branchPred.regStats();
}
template<class Impl>
void
DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
{
timeBuffer = time_buffer;
// Create wires to get information from proper places in time buffer.
fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
fromRename = timeBuffer->getWire(-renameToFetchDelay);
fromIEW = timeBuffer->getWire(-iewToFetchDelay);
fromCommit = timeBuffer->getWire(-commitToFetchDelay);
}
template<class Impl>
void
DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
{
activeThreads = at_ptr;
}
template<class Impl>
void
DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
fetchQueue = fq_ptr;
// Create wire to write information to proper place in fetch queue.
toDecode = fetchQueue->getWire(0);
}
template<class Impl>
void
DefaultFetch<Impl>::initStage()
{
// Setup PC and nextPC with initial state.
for (int tid = 0; tid < numThreads; tid++) {
PC[tid] = cpu->readPC(tid);
nextPC[tid] = cpu->readNextPC(tid);
microPC[tid] = cpu->readMicroPC(tid);
}
for (int tid=0; tid < numThreads; tid++) {
fetchStatus[tid] = Running;
priorityList.push_back(tid);
memReq[tid] = NULL;
stalls[tid].decode = false;
stalls[tid].rename = false;
stalls[tid].iew = false;
stalls[tid].commit = false;
}
// Schedule fetch to get the correct PC from the CPU
// scheduleFetchStartupEvent(1);
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
}
template<class Impl>
void
DefaultFetch<Impl>::setIcache()
{
// Size of cache block.
cacheBlkSize = icachePort->peerBlockSize();
// Create mask to get rid of offset bits.
cacheBlkMask = (cacheBlkSize - 1);
for (int tid=0; tid < numThreads; tid++) {
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
cacheDataPC[tid] = 0;
cacheDataValid[tid] = false;
}
}
template<class Impl>
void
DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
{
unsigned tid = pkt->req->getThreadNum();
DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
assert(!pkt->wasNacked());
// Only change the status if it's still waiting on the icache access
// to return.
if (fetchStatus[tid] != IcacheWaitResponse ||
pkt->req != memReq[tid] ||
isSwitchedOut()) {
++fetchIcacheSquashes;
delete pkt->req;
delete pkt;
return;
}
memcpy(cacheData[tid], pkt->getPtr<uint8_t>(), cacheBlkSize);
cacheDataValid[tid] = true;
if (!drainPending) {
// Wake up the CPU (if it went to sleep and was waiting on
// this completion event).
cpu->wakeCPU();
DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
tid);
switchToActive();
}
// Only switch to IcacheAccessComplete if we're not stalled as well.
if (checkStall(tid)) {
fetchStatus[tid] = Blocked;
} else {
fetchStatus[tid] = IcacheAccessComplete;
}
// Reset the mem req to NULL.
delete pkt->req;
delete pkt;
memReq[tid] = NULL;
}
template <class Impl>
bool
DefaultFetch<Impl>::drain()
{
// Fetch is ready to drain at any time.
cpu->signalDrained();
drainPending = true;
return true;
}
template <class Impl>
void
DefaultFetch<Impl>::resume()
{
drainPending = false;
}
template <class Impl>
void
DefaultFetch<Impl>::switchOut()
{
switchedOut = true;
// Branch predictor needs to have its state cleared.
branchPred.switchOut();
}
template <class Impl>
void
DefaultFetch<Impl>::takeOverFrom()
{
// Reset all state
for (int i = 0; i < Impl::MaxThreads; ++i) {
stalls[i].decode = 0;
stalls[i].rename = 0;
stalls[i].iew = 0;
stalls[i].commit = 0;
PC[i] = cpu->readPC(i);
nextPC[i] = cpu->readNextPC(i);
microPC[i] = cpu->readMicroPC(i);
fetchStatus[i] = Running;
}
numInst = 0;
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
interruptPending = false;
branchPred.takeOverFrom();
}
template <class Impl>
void
DefaultFetch<Impl>::wakeFromQuiesce()
{
DPRINTF(Fetch, "Waking up from quiesce\n");
// Hopefully this is safe
// @todo: Allow other threads to wake from quiesce.
fetchStatus[0] = Running;
}
template <class Impl>
inline void
DefaultFetch<Impl>::switchToActive()
{
if (_status == Inactive) {
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(O3CPU::FetchIdx);
_status = Active;
}
}
template <class Impl>
inline void
DefaultFetch<Impl>::switchToInactive()
{
if (_status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(O3CPU::FetchIdx);
_status = Inactive;
}
}
template <class Impl>
bool
DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
Addr &next_NPC, Addr &next_MicroPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
// this function updates it.
bool predict_taken;
if (!inst->isControl()) {
if (inst->isMicroop() && !inst->isLastMicroop()) {
next_MicroPC++;
} else {
next_PC = next_NPC;
next_NPC = next_NPC + instSize;
next_MicroPC = 0;
}
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
inst->setPredTaken(false);
return false;
}
//Assume for now that all control flow is to a different macroop which
//would reset the micro pc to 0.
next_MicroPC = 0;
int tid = inst->threadNumber;
Addr pred_PC = next_PC;
predict_taken = branchPred.predict(inst, pred_PC, tid);
/* if (predict_taken) {
DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken to %#x.\n",
tid, pred_PC);
} else {
DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid);
}*/
#if ISA_HAS_DELAY_SLOT
next_PC = next_NPC;
if (predict_taken)
next_NPC = pred_PC;
else
next_NPC += instSize;
#else
if (predict_taken)
next_PC = pred_PC;
else
next_PC += instSize;
next_NPC = next_PC + instSize;
#endif
/* DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n",
tid, next_PC, next_NPC);*/
inst->setPredTarg(next_PC, next_NPC, next_MicroPC);
inst->setPredTaken(predict_taken);
++fetchedBranches;
if (predict_taken) {
++predictedBranches;
}
return predict_taken;
}
template <class Impl>
bool
DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid)
{
Fault fault = NoFault;
//AlphaDep
if (cacheBlocked) {
DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
tid);
return false;
} else if (isSwitchedOut()) {
DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, switched out\n",
tid);
return false;
} else if (interruptPending && !(fetch_PC & 0x3)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
// fetch is switched out.
DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
tid);
return false;
}
// Align the fetch PC so it's at the start of a cache block.
Addr block_PC = icacheBlockAlignPC(fetch_PC);
// If we've already got the block, no need to try to fetch it again.
if (cacheDataValid[tid] && block_PC == cacheDataPC[tid]) {
return true;
}
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
// Build request here.
RequestPtr mem_req = new Request(tid, block_PC, cacheBlkSize, 0,
fetch_PC, cpu->readCpuId(), tid);
memReq[tid] = mem_req;
// Translate the instruction request.
fault = cpu->translateInstReq(mem_req, cpu->thread[tid]);
// In the case of faults, the fetch stage may need to stall and wait
// for the ITB miss to be handled.
// If translation was successful, attempt to read the first
// instruction.
if (fault == NoFault) {
#if 0
if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
memReq[tid]->isUncacheable()) {
DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
"misspeculating path)!",
memReq[tid]->paddr);
ret_fault = TheISA::genMachineCheckFault();
return false;
}
#endif
// Build packet here.
PacketPtr data_pkt = new Packet(mem_req,
MemCmd::ReadReq, Packet::Broadcast);
data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
cacheDataPC[tid] = block_PC;
cacheDataValid[tid] = false;
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
fetchedCacheLines++;
// Now do the timing access to see whether or not the instruction
// exists within the cache.
if (!icachePort->sendTiming(data_pkt)) {
assert(retryPkt == NULL);
assert(retryTid == -1);
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
fetchStatus[tid] = IcacheWaitRetry;
retryPkt = data_pkt;
retryTid = tid;
cacheBlocked = true;
return false;
}
DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid);
lastIcacheStall[tid] = curTick;
DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
"response.\n", tid);
fetchStatus[tid] = IcacheWaitResponse;
} else {
delete mem_req;
memReq[tid] = NULL;
}
ret_fault = fault;
return true;
}
template <class Impl>
inline void
DefaultFetch<Impl>::doSquash(const Addr &new_PC,
const Addr &new_NPC, const Addr &new_microPC, unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n",
tid, new_PC, new_NPC);
PC[tid] = new_PC;
nextPC[tid] = new_NPC;
microPC[tid] = new_microPC;
// Clear the icache miss if it's outstanding.
if (fetchStatus[tid] == IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
tid);
memReq[tid] = NULL;
}
// Get rid of the retrying packet if it was from this thread.
if (retryTid == tid) {
assert(cacheBlocked);
if (retryPkt) {
delete retryPkt->req;
delete retryPkt;
}
retryPkt = NULL;
retryTid = -1;
}
fetchStatus[tid] = Squashing;
++fetchSquashCycles;
}
template<class Impl>
void
DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC,
const Addr &new_MicroPC,
const InstSeqNum &seq_num, unsigned tid)
{
DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
doSquash(new_PC, new_NPC, new_MicroPC, tid);
// Tell the CPU to remove any instructions that are in flight between
// fetch and decode.
cpu->removeInstsUntil(seq_num, tid);
}
template<class Impl>
bool
DefaultFetch<Impl>::checkStall(unsigned tid) const
{
bool ret_val = false;
if (cpu->contextSwitch) {
DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
ret_val = true;
} else if (stalls[tid].decode) {
DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
ret_val = true;
} else if (stalls[tid].rename) {
DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
ret_val = true;
} else if (stalls[tid].iew) {
DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
ret_val = true;
} else if (stalls[tid].commit) {
DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
ret_val = true;
}
return ret_val;
}
template<class Impl>
typename DefaultFetch<Impl>::FetchStatus
DefaultFetch<Impl>::updateFetchStatus()
{
//Check Running
std::list<unsigned>::iterator threads = activeThreads->begin();
std::list<unsigned>::iterator end = activeThreads->end();
while (threads != end) {
unsigned tid = *threads++;
if (fetchStatus[tid] == Running ||
fetchStatus[tid] == Squashing ||
fetchStatus[tid] == IcacheAccessComplete) {
if (_status == Inactive) {
DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
"completion\n",tid);
}
cpu->activateStage(O3CPU::FetchIdx);
}
return Active;
}
}
// Stage is switching from active to inactive, notify CPU of it.
if (_status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(O3CPU::FetchIdx);
}
return Inactive;
}
template <class Impl>
void
DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC,
const Addr &new_MicroPC,
const InstSeqNum &seq_num, unsigned tid)
{
DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
doSquash(new_PC, new_NPC, new_MicroPC, tid);
// Tell the CPU to remove any instructions that are not in the ROB.
cpu->removeInstsNotInROB(tid);
}
template <class Impl>
void
DefaultFetch<Impl>::tick()
{
std::list<unsigned>::iterator threads = activeThreads->begin();
std::list<unsigned>::iterator end = activeThreads->end();
bool status_change = false;
wroteToTimeBuffer = false;
while (threads != end) {
unsigned tid = *threads++;
// Check the signals for each thread to determine the proper status
// for each thread.
bool updated_status = checkSignalsAndUpdate(tid);
status_change = status_change || updated_status;
}
DPRINTF(Fetch, "Running stage.\n");
// Reset the number of the instruction we're fetching.
numInst = 0;
#if FULL_SYSTEM
if (fromCommit->commitInfo[0].interruptPending) {
interruptPending = true;
}
if (fromCommit->commitInfo[0].clearInterrupt) {
interruptPending = false;
}
#endif
for (threadFetched = 0; threadFetched < numFetchingThreads;
threadFetched++) {
// Fetch each of the actively fetching threads.
fetch(status_change);
}
// Record number of instructions fetched this cycle for distribution.
fetchNisnDist.sample(numInst);
if (status_change) {
// Change the fetch stage status if there was a status change.
_status = updateFetchStatus();
}
// If there was activity this cycle, inform the CPU of it.
if (wroteToTimeBuffer || cpu->contextSwitch) {
DPRINTF(Activity, "Activity this cycle.\n");
cpu->activityThisCycle();
}
}
template <class Impl>
bool
DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
{
// Update the per thread stall statuses.
if (fromDecode->decodeBlock[tid]) {
stalls[tid].decode = true;
}
if (fromDecode->decodeUnblock[tid]) {
assert(stalls[tid].decode);
assert(!fromDecode->decodeBlock[tid]);
stalls[tid].decode = false;
}
if (fromRename->renameBlock[tid]) {
stalls[tid].rename = true;
}
if (fromRename->renameUnblock[tid]) {
assert(stalls[tid].rename);
assert(!fromRename->renameBlock[tid]);
stalls[tid].rename = false;
}
if (fromIEW->iewBlock[tid]) {
stalls[tid].iew = true;
}
if (fromIEW->iewUnblock[tid]) {
assert(stalls[tid].iew);
assert(!fromIEW->iewBlock[tid]);
stalls[tid].iew = false;
}
if (fromCommit->commitBlock[tid]) {
stalls[tid].commit = true;
}
if (fromCommit->commitUnblock[tid]) {
assert(stalls[tid].commit);
assert(!fromCommit->commitBlock[tid]);
stalls[tid].commit = false;
}
// Check squash signals from commit.
if (fromCommit->commitInfo[tid].squash) {
DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
"from commit.\n",tid);
// In any case, squash.
squash(fromCommit->commitInfo[tid].nextPC,
fromCommit->commitInfo[tid].nextNPC,
fromCommit->commitInfo[tid].nextMicroPC,
fromCommit->commitInfo[tid].doneSeqNum,
tid);
// Also check if there's a mispredict that happened.
if (fromCommit->commitInfo[tid].branchMispredict) {
branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
fromCommit->commitInfo[tid].nextPC,
fromCommit->commitInfo[tid].branchTaken,
tid);
} else {
branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
tid);
}
return true;
} else if (fromCommit->commitInfo[tid].doneSeqNum) {
// Update the branch predictor if it wasn't a squashed instruction
// that was broadcasted.
branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
}
// Check ROB squash signals from commit.
if (fromCommit->commitInfo[tid].robSquashing) {
DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
// Continue to squash.
fetchStatus[tid] = Squashing;
return true;
}
// Check squash signals from decode.
if (fromDecode->decodeInfo[tid].squash) {
DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
"from decode.\n",tid);
// Update the branch predictor.
if (fromDecode->decodeInfo[tid].branchMispredict) {
branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].branchTaken,
tid);
} else {
branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
tid);
}
if (fetchStatus[tid] != Squashing) {
DPRINTF(Fetch, "Squashing from decode with PC = %#x, NPC = %#x\n",
fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].nextNPC);
// Squash unless we're already squashing
squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
fromDecode->decodeInfo[tid].nextNPC,
fromDecode->decodeInfo[tid].nextMicroPC,
fromDecode->decodeInfo[tid].doneSeqNum,
tid);
return true;
}
}
if (checkStall(tid) &&
fetchStatus[tid] != IcacheWaitResponse &&
fetchStatus[tid] != IcacheWaitRetry) {
DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
fetchStatus[tid] = Blocked;
return true;
}
if (fetchStatus[tid] == Blocked ||
fetchStatus[tid] == Squashing) {
// Switch status to running if fetch isn't being told to block or
// squash this cycle.
DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
tid);
fetchStatus[tid] = Running;
return true;
}
// If we've reached this point, we have not gotten any signals that
// cause fetch to change its status. Fetch remains the same as before.
return false;
}
template<class Impl>
void
DefaultFetch<Impl>::fetch(bool &status_change)
{
//////////////////////////////////////////
// Start actual fetch
//////////////////////////////////////////
int tid = getFetchingThread(fetchPolicy);
if (tid == -1 || drainPending) {
DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
// Breaks looping condition in tick()
threadFetched = numFetchingThreads;
return;
}
DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
// The current PC.
Addr fetch_PC = PC[tid];
Addr fetch_NPC = nextPC[tid];
Addr fetch_MicroPC = microPC[tid];
// Fault code for memory access.
Fault fault = NoFault;
// If returning from the delay of a cache miss, then update the status
// to running, otherwise do the cache access. Possibly move this up
// to tick() function.
if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
tid);
fetchStatus[tid] = Running;
status_change = true;
} else if (fetchStatus[tid] == Running) {
DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
"instruction, starting at PC %08p.\n",
tid, fetch_PC);
bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
if (!fetch_success) {
if (cacheBlocked) {
++icacheStallCycles;
} else {
++fetchMiscStallCycles;
}
return;
}
} else {
if (fetchStatus[tid] == Idle) {
++fetchIdleCycles;
DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);
} else if (fetchStatus[tid] == Blocked) {
++fetchBlockedCycles;
DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);
} else if (fetchStatus[tid] == Squashing) {
++fetchSquashCycles;
DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);
} else if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", tid);
}
// Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
// fetch should do nothing.
return;
}
++fetchCycles;
// If we had a stall due to an icache miss, then return.
if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
status_change = true;
return;
}
Addr next_PC = fetch_PC;
Addr next_NPC = fetch_NPC;
Addr next_MicroPC = fetch_MicroPC;
InstSeqNum inst_seq;
MachInst inst;
ExtMachInst ext_inst;
// @todo: Fix this hack.
unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
StaticInstPtr staticInst = NULL;
StaticInstPtr macroop = NULL;
if (fault == NoFault) {
// If the read of the first instruction was successful, then grab the
// instructions from the rest of the cache line and put them into the
// queue heading to decode.
DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
"decode.\n",tid);
// Need to keep track of whether or not a predicted branch
// ended this fetch block.
bool predicted_branch = false;
while (offset < cacheBlkSize &&
numInst < fetchWidth &&
!predicted_branch) {
// If we're branching after this instruction, quite fetching
// from the same block then.
predicted_branch =
(fetch_PC + sizeof(TheISA::MachInst) != fetch_NPC);
if (predicted_branch) {
DPRINTF(Fetch, "Branch detected with PC = %#x, NPC = %#x\n",
fetch_PC, fetch_NPC);
}
// Make sure this is a valid index.
assert(offset <= cacheBlkSize - instSize);
if (!macroop) {
// Get the instruction from the array of the cache line.
inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
(&cacheData[tid][offset]));
predecoder.setTC(cpu->thread[tid]->getTC());
predecoder.moreBytes(fetch_PC, fetch_PC, inst);
ext_inst = predecoder.getExtMachInst();
staticInst = StaticInstPtr(ext_inst, fetch_PC);
if (staticInst->isMacroop())
macroop = staticInst;
}
do {
if (macroop) {
staticInst = macroop->fetchMicroop(fetch_MicroPC);
if (staticInst->isLastMicroop())
macroop = NULL;
}
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
// Create a new DynInst from the instruction fetched.
DynInstPtr instruction = new DynInst(staticInst,
fetch_PC, fetch_NPC, fetch_MicroPC,
next_PC, next_NPC, next_MicroPC,
inst_seq, cpu);
instruction->setTid(tid);
instruction->setASID(tid);
instruction->setThreadState(cpu->thread[tid]);
DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
"[sn:%lli]\n",
tid, instruction->readPC(), inst_seq);
//DPRINTF(Fetch, "[tid:%i]: MachInst is %#x\n", tid, ext_inst);
DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
tid, instruction->staticInst->disassemble(fetch_PC));
#if TRACING_ON
instruction->traceData =
cpu->getTracer()->getInstRecord(curTick, cpu->tcBase(tid),
instruction->staticInst, instruction->readPC());
#else
instruction->traceData = NULL;
#endif
///FIXME This needs to be more robust in dealing with delay slots
predicted_branch |=
lookupAndUpdateNextPC(instruction, next_PC, next_NPC, next_MicroPC);
// Add instruction to the CPU's list of instructions.
instruction->setInstListIt(cpu->addInst(instruction));
// Write the instruction to the first slot in the queue
// that heads to decode.
toDecode->insts[numInst] = instruction;
toDecode->size++;
// Increment stat of fetched instructions.
++fetchedInsts;
// Move to the next instruction, unless we have a branch.
fetch_PC = next_PC;
fetch_NPC = next_NPC;
fetch_MicroPC = next_MicroPC;
if (instruction->isQuiesce()) {
DPRINTF(Fetch, "Quiesce instruction encountered, halting fetch!",
curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
break;
}
++numInst;
} while (staticInst->isMicroop() &&
!staticInst->isLastMicroop() &&
numInst < fetchWidth);
offset += instSize;
}
if (predicted_branch) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
"instruction encountered.\n", tid);
} else if (numInst >= fetchWidth) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
"for this cycle.\n", tid);
} else if (offset >= cacheBlkSize) {
DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
"block.\n", tid);
}
}
if (numInst > 0) {
wroteToTimeBuffer = true;
}
// Now that fetching is completed, update the PC to signify what the next
// cycle will be.
if (fault == NoFault) {
PC[tid] = next_PC;
nextPC[tid] = next_NPC;
microPC[tid] = next_MicroPC;
DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC);
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
// miss)
if (fetchStatus[tid] == IcacheWaitResponse) {
panic("Fetch should have exited prior to this!");
}
// Send the fault to commit. This thread will not do anything
// until commit handles the fault. The only other way it can
// wake up is if a squash comes along and changes the PC.
assert(numInst < fetchWidth);
// Get a sequence number.
inst_seq = cpu->getAndIncrementInstSeq();
// We will use a nop in order to carry the fault.
ext_inst = TheISA::NoopMachInst;
// Create a new DynInst from the dummy nop.
DynInstPtr instruction = new DynInst(ext_inst,
fetch_PC, fetch_NPC, fetch_MicroPC,
next_PC, next_NPC, next_MicroPC,
inst_seq, cpu);
instruction->setPredTarg(next_NPC, next_NPC + instSize, 0);
instruction->setTid(tid);
instruction->setASID(tid);
instruction->setThreadState(cpu->thread[tid]);
instruction->traceData = NULL;
instruction->setInstListIt(cpu->addInst(instruction));
instruction->fault = fault;
toDecode->insts[numInst] = instruction;
toDecode->size++;
DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
fetchStatus[tid] = TrapPending;
status_change = true;
DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %08p",
tid, fault->name(), PC[tid]);
}
}
template<class Impl>
void
DefaultFetch<Impl>::recvRetry()
{
if (retryPkt != NULL) {
assert(cacheBlocked);
assert(retryTid != -1);
assert(fetchStatus[retryTid] == IcacheWaitRetry);
if (icachePort->sendTiming(retryPkt)) {
fetchStatus[retryTid] = IcacheWaitResponse;
retryPkt = NULL;
retryTid = -1;
cacheBlocked = false;
}
} else {
assert(retryTid == -1);
// Access has been squashed since it was sent out. Just clear
// the cache being blocked.
cacheBlocked = false;
}
}
///////////////////////////////////////
// //
// SMT FETCH POLICY MAINTAINED HERE //
// //
///////////////////////////////////////
template<class Impl>
int
DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
{
if (numThreads > 1) {
switch (fetch_priority) {
case SingleThread:
return 0;
case RoundRobin:
return roundRobin();
case IQ:
return iqCount();
case LSQ:
return lsqCount();
case Branch:
return branchCount();
default:
return -1;
}
} else {
std::list<unsigned>::iterator thread = activeThreads->begin();
assert(thread != activeThreads->end());
int tid = *thread;
if (fetchStatus[tid] == Running ||
fetchStatus[tid] == IcacheAccessComplete ||
fetchStatus[tid] == Idle) {
return tid;
} else {
return -1;
}
}
}
template<class Impl>
int
DefaultFetch<Impl>::roundRobin()
{
std::list<unsigned>::iterator pri_iter = priorityList.begin();
std::list<unsigned>::iterator end = priorityList.end();
int high_pri;
while (pri_iter != end) {
high_pri = *pri_iter;
assert(high_pri <= numThreads);
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle) {
priorityList.erase(pri_iter);
priorityList.push_back(high_pri);
return high_pri;
}
pri_iter++;
}
return -1;
}
template<class Impl>
int
DefaultFetch<Impl>::iqCount()
{
std::priority_queue<unsigned> PQ;
std::list<unsigned>::iterator threads = activeThreads->begin();
std::list<unsigned>::iterator end = activeThreads->end();
while (threads != end) {
unsigned tid = *threads++;
PQ.push(fromIEW->iewInfo[tid].iqCount);
}
while (!PQ.empty()) {
unsigned high_pri = PQ.top();
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
PQ.pop();
}
return -1;
}
template<class Impl>
int
DefaultFetch<Impl>::lsqCount()
{
std::priority_queue<unsigned> PQ;
std::list<unsigned>::iterator threads = activeThreads->begin();
std::list<unsigned>::iterator end = activeThreads->end();
while (threads != end) {
unsigned tid = *threads++;
PQ.push(fromIEW->iewInfo[tid].ldstqCount);
}
while (!PQ.empty()) {
unsigned high_pri = PQ.top();
if (fetchStatus[high_pri] == Running ||
fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
PQ.pop();
}
return -1;
}
template<class Impl>
int
DefaultFetch<Impl>::branchCount()
{
std::list<unsigned>::iterator thread = activeThreads->begin();
assert(thread != activeThreads->end());
unsigned tid = *thread;
panic("Branch Count Fetch policy unimplemented\n");
return 0 * tid;
}