blob: 24640f6a5519372198e98bbbf2c476d18fba6c38 [file] [log] [blame]
/*
* Copyright (c) 2012, 2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_O3_DECODE_IMPL_HH__
#define __CPU_O3_DECODE_IMPL_HH__
#include "arch/types.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/o3/decode.hh"
#include "cpu/inst_seq.hh"
#include "debug/Activity.hh"
#include "debug/Decode.hh"
#include "debug/O3PipeView.hh"
#include "params/DerivO3CPU.hh"
#include "sim/full_system.hh"
// clang complains about std::set being overloaded with Packet::set if
// we open up the entire namespace std
using std::list;
template<class Impl>
DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params)
: cpu(_cpu),
renameToDecodeDelay(params->renameToDecodeDelay),
iewToDecodeDelay(params->iewToDecodeDelay),
commitToDecodeDelay(params->commitToDecodeDelay),
fetchToDecodeDelay(params->fetchToDecodeDelay),
decodeWidth(params->decodeWidth),
numThreads(params->numThreads),
stats(_cpu)
{
if (decodeWidth > Impl::MaxWidth)
fatal("decodeWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease MaxWidth in src/cpu/o3/impl.hh\n",
decodeWidth, static_cast<int>(Impl::MaxWidth));
// @todo: Make into a parameter
skidBufferMax = (fetchToDecodeDelay + 1) * params->fetchWidth;
for (int tid = 0; tid < Impl::MaxThreads; tid++) {
stalls[tid] = {false};
decodeStatus[tid] = Idle;
bdelayDoneSeqNum[tid] = 0;
squashInst[tid] = nullptr;
squashAfterDelaySlot[tid] = 0;
}
}
template<class Impl>
void
DefaultDecode<Impl>::startupStage()
{
resetStage();
}
template<class Impl>
void
DefaultDecode<Impl>::clearStates(ThreadID tid)
{
decodeStatus[tid] = Idle;
stalls[tid].rename = false;
}
template<class Impl>
void
DefaultDecode<Impl>::resetStage()
{
_status = Inactive;
// Setup status, make sure stall signals are clear.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
decodeStatus[tid] = Idle;
stalls[tid].rename = false;
}
}
template <class Impl>
std::string
DefaultDecode<Impl>::name() const
{
return cpu->name() + ".decode";
}
template <class Impl>
DefaultDecode<Impl>::DecodeStats::DecodeStats(O3CPU *cpu)
: Stats::Group(cpu, "decode"),
ADD_STAT(idleCycles, "Number of cycles decode is idle"),
ADD_STAT(blockedCycles, "Number of cycles decode is blocked"),
ADD_STAT(runCycles, "Number of cycles decode is running"),
ADD_STAT(unblockCycles, "Number of cycles decode is unblocking"),
ADD_STAT(squashCycles, "Number of cycles decode is squashing"),
ADD_STAT(branchResolved, "Number of times decode resolved a "
" branch"),
ADD_STAT(branchMispred, "Number of times decode detected a branch"
" misprediction"),
ADD_STAT(controlMispred,"Number of times decode detected an"
" instruction incorrectly predicted as a control"),
ADD_STAT(decodedInsts, "Number of instructions handled by decode"),
ADD_STAT(squashedInsts, "Number of squashed instructions handled"
" by decode")
{
idleCycles.prereq(idleCycles);
blockedCycles.prereq(blockedCycles);
runCycles.prereq(runCycles);
unblockCycles.prereq(unblockCycles);
squashCycles.prereq(squashCycles);
branchResolved.prereq(branchResolved);
branchMispred.prereq(branchMispred);
controlMispred.prereq(controlMispred);
decodedInsts.prereq(decodedInsts);
squashedInsts.prereq(squashedInsts);
}
template<class Impl>
void
DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
// Setup wire to write information back to fetch.
toFetch = timeBuffer->getWire(0);
// Create wires to get information from proper places in time buffer.
fromRename = timeBuffer->getWire(-renameToDecodeDelay);
fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
}
template<class Impl>
void
DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
decodeQueue = dq_ptr;
// Setup wire to write information to proper place in decode queue.
toRename = decodeQueue->getWire(0);
}
template<class Impl>
void
DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
fetchQueue = fq_ptr;
// Setup wire to read information from fetch queue.
fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
}
template<class Impl>
void
DefaultDecode<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
template <class Impl>
void
DefaultDecode<Impl>::drainSanityCheck() const
{
for (ThreadID tid = 0; tid < numThreads; ++tid) {
assert(insts[tid].empty());
assert(skidBuffer[tid].empty());
}
}
template <class Impl>
bool
DefaultDecode<Impl>::isDrained() const
{
for (ThreadID tid = 0; tid < numThreads; ++tid) {
if (!insts[tid].empty() || !skidBuffer[tid].empty() ||
(decodeStatus[tid] != Running && decodeStatus[tid] != Idle))
return false;
}
return true;
}
template<class Impl>
bool
DefaultDecode<Impl>::checkStall(ThreadID tid) const
{
bool ret_val = false;
if (stalls[tid].rename) {
DPRINTF(Decode,"[tid:%i] Stall fom Rename stage detected.\n", tid);
ret_val = true;
}
return ret_val;
}
template<class Impl>
inline bool
DefaultDecode<Impl>::fetchInstsValid()
{
return fromFetch->size > 0;
}
template<class Impl>
bool
DefaultDecode<Impl>::block(ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] Blocking.\n", tid);
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidInsert(tid);
// If the decode status is blocked or unblocking then decode has not yet
// signalled fetch to unblock. In that case, there is no need to tell
// fetch to block.
if (decodeStatus[tid] != Blocked) {
// Set the status to Blocked.
decodeStatus[tid] = Blocked;
if (toFetch->decodeUnblock[tid]) {
toFetch->decodeUnblock[tid] = false;
} else {
toFetch->decodeBlock[tid] = true;
wroteToTimeBuffer = true;
}
return true;
}
return false;
}
template<class Impl>
bool
DefaultDecode<Impl>::unblock(ThreadID tid)
{
// Decode is done unblocking only if the skid buffer is empty.
if (skidBuffer[tid].empty()) {
DPRINTF(Decode, "[tid:%i] Done unblocking.\n", tid);
toFetch->decodeUnblock[tid] = true;
wroteToTimeBuffer = true;
decodeStatus[tid] = Running;
return true;
}
DPRINTF(Decode, "[tid:%i] Currently unblocking.\n", tid);
return false;
}
template<class Impl>
void
DefaultDecode<Impl>::squash(const DynInstPtr &inst, ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
"prediction detected at decode.\n", tid, inst->seqNum);
// Send back mispredict information.
toFetch->decodeInfo[tid].branchMispredict = true;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].mispredictInst = inst;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching();
toFetch->decodeInfo[tid].squashInst = inst;
if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) {
toFetch->decodeInfo[tid].branchTaken = true;
}
InstSeqNum squash_seq_num = inst->seqNum;
// Might have to tell fetch to unblock.
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
toFetch->decodeUnblock[tid] = 1;
}
// Set status to squashing.
decodeStatus[tid] = Squashing;
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid &&
fromFetch->insts[i]->seqNum > squash_seq_num) {
fromFetch->insts[i]->setSquashed();
}
}
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
// Squash instructions up until this one
cpu->removeInstsUntil(squash_seq_num, tid);
}
template<class Impl>
unsigned
DefaultDecode<Impl>::squash(ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] Squashing.\n",tid);
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
if (FullSystem) {
toFetch->decodeUnblock[tid] = 1;
} else {
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals
// to be high. This shouldn't happen in full system.
// @todo: Determine if this still happens.
if (toFetch->decodeBlock[tid])
toFetch->decodeBlock[tid] = 0;
else
toFetch->decodeUnblock[tid] = 1;
}
}
// Set status to squashing.
decodeStatus[tid] = Squashing;
// Go through incoming instructions from fetch and squash them.
unsigned squash_count = 0;
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid) {
fromFetch->insts[i]->setSquashed();
squash_count++;
}
}
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
return squash_count;
}
template<class Impl>
void
DefaultDecode<Impl>::skidInsert(ThreadID tid)
{
DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
insts[tid].pop();
assert(tid == inst->threadNumber);
skidBuffer[tid].push(inst);
DPRINTF(Decode,"Inserting [tid:%d][sn:%lli] PC: %s into decode skidBuffer %i\n",
inst->threadNumber, inst->seqNum, inst->pcState(), skidBuffer[tid].size());
}
// @todo: Eventually need to enforce this by not letting a thread
// fetch past its skidbuffer
assert(skidBuffer[tid].size() <= skidBufferMax);
}
template<class Impl>
bool
DefaultDecode<Impl>::skidsEmpty()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!skidBuffer[tid].empty())
return false;
}
return true;
}
template<class Impl>
void
DefaultDecode<Impl>::updateStatus()
{
bool any_unblocking = false;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (decodeStatus[tid] == Unblocking) {
any_unblocking = true;
break;
}
}
// Decode will have activity if it's unblocking.
if (any_unblocking) {
if (_status == Inactive) {
_status = Active;
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(O3CPU::DecodeIdx);
}
} else {
// If it's not unblocking, then decode will not have any internal
// activity. Switch it to inactive.
if (_status == Active) {
_status = Inactive;
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(O3CPU::DecodeIdx);
}
}
}
template <class Impl>
void
DefaultDecode<Impl>::sortInsts()
{
int insts_from_fetch = fromFetch->size;
for (int i = 0; i < insts_from_fetch; ++i) {
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
}
}
template<class Impl>
void
DefaultDecode<Impl>::readStallSignals(ThreadID tid)
{
if (fromRename->renameBlock[tid]) {
stalls[tid].rename = true;
}
if (fromRename->renameUnblock[tid]) {
assert(stalls[tid].rename);
stalls[tid].rename = false;
}
}
template <class Impl>
bool
DefaultDecode<Impl>::checkSignalsAndUpdate(ThreadID tid)
{
// Check if there's a squash signal, squash if there is.
// Check stall signals, block if necessary.
// If status was blocked
// Check if stall conditions have passed
// if so then go to unblocking
// If status was Squashing
// check if squashing is not high. Switch to running this cycle.
// Update the per thread stall statuses.
readStallSignals(tid);
// Check squash signals from commit.
if (fromCommit->commitInfo[tid].squash) {
DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash "
"from commit.\n", tid);
squash(tid);
return true;
}
if (checkStall(tid)) {
return block(tid);
}
if (decodeStatus[tid] == Blocked) {
DPRINTF(Decode, "[tid:%i] Done blocking, switching to unblocking.\n",
tid);
decodeStatus[tid] = Unblocking;
unblock(tid);
return true;
}
if (decodeStatus[tid] == Squashing) {
// Switch status to running if decode isn't being told to block or
// squash this cycle.
DPRINTF(Decode, "[tid:%i] Done squashing, switching to running.\n",
tid);
decodeStatus[tid] = Running;
return false;
}
// If we've reached this point, we have not gotten any signals that
// cause decode to change its status. Decode remains the same as before.
return false;
}
template<class Impl>
void
DefaultDecode<Impl>::tick()
{
wroteToTimeBuffer = false;
bool status_change = false;
toRenameIndex = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
sortInsts();
//Check stall and squash signals.
while (threads != end) {
ThreadID tid = *threads++;
DPRINTF(Decode,"Processing [tid:%i]\n",tid);
status_change = checkSignalsAndUpdate(tid) || status_change;
decode(status_change, tid);
}
if (status_change) {
updateStatus();
}
if (wroteToTimeBuffer) {
DPRINTF(Activity, "Activity this cycle.\n");
cpu->activityThisCycle();
}
}
template<class Impl>
void
DefaultDecode<Impl>::decode(bool &status_change, ThreadID tid)
{
// If status is Running or idle,
// call decodeInsts()
// If status is Unblocking,
// buffer any instructions coming from fetch
// continue trying to empty skid buffer
// check if stall conditions have passed
if (decodeStatus[tid] == Blocked) {
++stats.blockedCycles;
} else if (decodeStatus[tid] == Squashing) {
++stats.squashCycles;
}
// Decode should try to decode as many instructions as its bandwidth
// will allow, as long as it is not currently blocked.
if (decodeStatus[tid] == Running ||
decodeStatus[tid] == Idle) {
DPRINTF(Decode, "[tid:%i] Not blocked, so attempting to run "
"stage.\n",tid);
decodeInsts(tid);
} else if (decodeStatus[tid] == Unblocking) {
// Make sure that the skid buffer has something in it if the
// status is unblocking.
assert(!skidsEmpty());
// If the status was unblocking, then instructions from the skid
// buffer were used. Remove those instructions and handle
// the rest of unblocking.
decodeInsts(tid);
if (fetchInstsValid()) {
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidInsert(tid);
}
status_change = unblock(tid) || status_change;
}
}
template <class Impl>
void
DefaultDecode<Impl>::decodeInsts(ThreadID tid)
{
// Instructions can come either from the skid buffer or the list of
// instructions coming from fetch, depending on decode's status.
int insts_available = decodeStatus[tid] == Unblocking ?
skidBuffer[tid].size() : insts[tid].size();
if (insts_available == 0) {
DPRINTF(Decode, "[tid:%i] Nothing to do, breaking out"
" early.\n",tid);
// Should I change the status to idle?
++stats.idleCycles;
return;
} else if (decodeStatus[tid] == Unblocking) {
DPRINTF(Decode, "[tid:%i] Unblocking, removing insts from skid "
"buffer.\n",tid);
++stats.unblockCycles;
} else if (decodeStatus[tid] == Running) {
++stats.runCycles;
}
std::queue<DynInstPtr>
&insts_to_decode = decodeStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
DPRINTF(Decode, "[tid:%i] Sending instruction to rename.\n",tid);
while (insts_available > 0 && toRenameIndex < decodeWidth) {
assert(!insts_to_decode.empty());
DynInstPtr inst = std::move(insts_to_decode.front());
insts_to_decode.pop();
DPRINTF(Decode, "[tid:%i] Processing instruction [sn:%lli] with "
"PC %s\n", tid, inst->seqNum, inst->pcState());
if (inst->isSquashed()) {
DPRINTF(Decode, "[tid:%i] Instruction %i with PC %s is "
"squashed, skipping.\n",
tid, inst->seqNum, inst->pcState());
++stats.squashedInsts;
--insts_available;
continue;
}
// Also check if instructions have no source registers. Mark
// them as ready to issue at any time. Not sure if this check
// should exist here or at a later stage; however it doesn't matter
// too much for function correctness.
if (inst->numSrcRegs() == 0) {
inst->setCanIssue();
}
// This current instruction is valid, so add it into the decode
// queue. The next instruction may not be valid, so check to
// see if branches were predicted correctly.
toRename->insts[toRenameIndex] = inst;
++(toRename->size);
++toRenameIndex;
++stats.decodedInsts;
--insts_available;
#if TRACING_ON
if (DTRACE(O3PipeView)) {
inst->decodeTick = curTick() - inst->fetchTick;
}
#endif
// Ensure that if it was predicted as a branch, it really is a
// branch.
if (inst->readPredTaken() && !inst->isControl()) {
panic("Instruction predicted as a branch!");
++stats.controlMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
break;
}
// Go ahead and compute any PC-relative branches.
// This includes direct unconditional control and
// direct conditional control that is predicted taken.
if (inst->isDirectCtrl() &&
(inst->isUncondCtrl() || inst->readPredTaken()))
{
++stats.branchResolved;
if (!(inst->branchTarget() == inst->readPredTarg())) {
++stats.branchMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
TheISA::PCState target = inst->branchTarget();
DPRINTF(Decode,
"[tid:%i] [sn:%llu] "
"Updating predictions: Wrong predicted target: %s \
PredPC: %s\n",
tid, inst->seqNum, inst->readPredTarg(), target);
//The micro pc after an instruction level branch should be 0
inst->setPredTarg(target);
break;
}
}
}
// If we didn't process all instructions, then we will need to block
// and put all those instructions into the skid buffer.
if (!insts_to_decode.empty()) {
block(tid);
}
// Record that decode has written to the time buffer for activity
// tracking.
if (toRenameIndex) {
wroteToTimeBuffer = true;
}
}
#endif//__CPU_O3_DECODE_IMPL_HH__