blob: ac0c7ac5b75f30c5a3cd78fcdaf703cd76a87801 [file] [log] [blame]
/*
* Copyright (c) 2011-2012,2016-2017, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2002-2005 The Regents of The University of Michigan
* Copyright (c) 2011 Regents of the University of California
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* Copyright (c) 2013 Mark D. Hill and David A. Wood
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Steve Reinhardt
* Nathan Binkert
* Rick Strong
*/
#include "cpu/base.hh"
#include <iostream>
#include <sstream>
#include <string>
#include "arch/generic/tlb.hh"
#include "base/cprintf.hh"
#include "base/loader/symtab.hh"
#include "base/logging.hh"
#include "base/output.hh"
#include "base/trace.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/cpuevent.hh"
#include "cpu/profile.hh"
#include "cpu/thread_context.hh"
#include "debug/Mwait.hh"
#include "debug/SyscallVerbose.hh"
#include "debug/Thread.hh"
#include "mem/page_table.hh"
#include "params/BaseCPU.hh"
#include "sim/clocked_object.hh"
#include "sim/full_system.hh"
#include "sim/process.hh"
#include "sim/sim_events.hh"
#include "sim/sim_exit.hh"
#include "sim/system.hh"
// Hack
#include "sim/stat_control.hh"
using namespace std;
vector<BaseCPU *> BaseCPU::cpuList;
// This variable reflects the max number of threads in any CPU. Be
// careful to only use it once all the CPUs that you care about have
// been initialized
int maxThreadsPerCPU = 1;
CPUProgressEvent::CPUProgressEvent(BaseCPU *_cpu, Tick ival)
: Event(Event::Progress_Event_Pri), _interval(ival), lastNumInst(0),
cpu(_cpu), _repeatEvent(true)
{
if (_interval)
cpu->schedule(this, curTick() + _interval);
}
void
CPUProgressEvent::process()
{
Counter temp = cpu->totalOps();
if (_repeatEvent)
cpu->schedule(this, curTick() + _interval);
if (cpu->switchedOut()) {
return;
}
#ifndef NDEBUG
double ipc = double(temp - lastNumInst) / (_interval / cpu->clockPeriod());
DPRINTFN("%s progress event, total committed:%i, progress insts committed: "
"%lli, IPC: %0.8d\n", cpu->name(), temp, temp - lastNumInst,
ipc);
ipc = 0.0;
#else
cprintf("%lli: %s progress event, total committed:%i, progress insts "
"committed: %lli\n", curTick(), cpu->name(), temp,
temp - lastNumInst);
#endif
lastNumInst = temp;
}
const char *
CPUProgressEvent::description() const
{
return "CPU Progress";
}
BaseCPU::BaseCPU(Params *p, bool is_checker)
: ClockedObject(p), instCnt(0), _cpuId(p->cpu_id), _socketId(p->socket_id),
_instMasterId(p->system->getMasterId(this, "inst")),
_dataMasterId(p->system->getMasterId(this, "data")),
_taskId(ContextSwitchTaskId::Unknown), _pid(invldPid),
_switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
interrupts(p->interrupts), profileEvent(NULL),
numThreads(p->numThreads), system(p->system),
previousCycle(0), previousState(CPU_STATE_SLEEP),
functionTraceStream(nullptr), currentFunctionStart(0),
currentFunctionEnd(0), functionEntryTick(0),
addressMonitor(p->numThreads),
syscallRetryLatency(p->syscallRetryLatency),
pwrGatingLatency(p->pwr_gating_latency),
powerGatingOnIdle(p->power_gating_on_idle),
enterPwrGatingEvent([this]{ enterPwrGating(); }, name())
{
// if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) {
_cpuId = cpuList.size();
}
// add self to global list of CPUs
cpuList.push_back(this);
DPRINTF(SyscallVerbose, "Constructing CPU with id %d, socket id %d\n",
_cpuId, _socketId);
if (numThreads > maxThreadsPerCPU)
maxThreadsPerCPU = numThreads;
functionTracingEnabled = false;
if (p->function_trace) {
const string fname = csprintf("ftrace.%s", name());
functionTraceStream = simout.findOrCreate(fname)->stream();
currentFunctionStart = currentFunctionEnd = 0;
functionEntryTick = p->function_trace_start;
if (p->function_trace_start == 0) {
functionTracingEnabled = true;
} else {
Event *event = new EventFunctionWrapper(
[this]{ enableFunctionTrace(); }, name(), true);
schedule(event, p->function_trace_start);
}
}
// The interrupts should always be present unless this CPU is
// switched in later or in case it is a checker CPU
if (!params()->switched_out && !is_checker) {
fatal_if(interrupts.size() != numThreads,
"CPU %s has %i interrupt controllers, but is expecting one "
"per thread (%i)\n",
name(), interrupts.size(), numThreads);
for (ThreadID tid = 0; tid < numThreads; tid++)
interrupts[tid]->setCPU(this);
}
if (FullSystem) {
if (params()->profile)
profileEvent = new EventFunctionWrapper(
[this]{ processProfileEvent(); },
name());
}
tracer = params()->tracer;
if (params()->isa.size() != numThreads) {
fatal("Number of ISAs (%i) assigned to the CPU does not equal number "
"of threads (%i).\n", params()->isa.size(), numThreads);
}
}
void
BaseCPU::enableFunctionTrace()
{
functionTracingEnabled = true;
}
BaseCPU::~BaseCPU()
{
delete profileEvent;
}
void
BaseCPU::armMonitor(ThreadID tid, Addr address)
{
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
monitor.armed = true;
monitor.vAddr = address;
monitor.pAddr = 0x0;
DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
}
bool
BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
{
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
if (!monitor.gotWakeup) {
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
assert(pkt->req->hasPaddr());
monitor.pAddr = pkt->getAddr() & mask;
monitor.waiting = true;
DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
"line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
return true;
} else {
monitor.gotWakeup = false;
return false;
}
}
void
BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseTLB *dtb)
{
assert(tid < numThreads);
AddressMonitor &monitor = addressMonitor[tid];
RequestPtr req = std::make_shared<Request>();
Addr addr = monitor.vAddr;
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
int size = block_size;
//The address of the next line if it crosses a cache line boundary.
Addr secondAddr = roundDown(addr + size - 1, block_size);
if (secondAddr > addr)
size = secondAddr - addr;
req->setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
// translate to physical address
Fault fault = dtb->translateAtomic(req, tc, BaseTLB::Read);
assert(fault == NoFault);
monitor.pAddr = req->getPaddr() & mask;
monitor.waiting = true;
DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
tid, monitor.vAddr, monitor.pAddr);
}
void
BaseCPU::init()
{
// Set up instruction-count-based termination events, if any. This needs
// to happen after threadContexts has been constructed.
if (params()->max_insts_any_thread != 0) {
const char *cause = "a thread reached the max instruction count";
for (ThreadID tid = 0; tid < numThreads; ++tid)
scheduleInstStop(tid, params()->max_insts_any_thread, cause);
}
// Set up instruction-count-based termination events for SimPoints
// Typically, there are more than one action points.
// Simulation.py is responsible to take the necessary actions upon
// exitting the simulation loop.
if (!params()->simpoint_start_insts.empty()) {
const char *cause = "simpoint starting point found";
for (size_t i = 0; i < params()->simpoint_start_insts.size(); ++i)
scheduleInstStop(0, params()->simpoint_start_insts[i], cause);
}
if (params()->max_insts_all_threads != 0) {
const char *cause = "all threads reached the max instruction count";
// allocate & initialize shared downcounter: each event will
// decrement this when triggered; simulation will terminate
// when counter reaches 0
int *counter = new int;
*counter = numThreads;
for (ThreadID tid = 0; tid < numThreads; ++tid) {
Event *event = new CountedExitEvent(cause, *counter);
threadContexts[tid]->scheduleInstCountEvent(
event, params()->max_insts_all_threads);
}
}
if (!params()->switched_out) {
registerThreadContexts();
verifyMemoryMode();
}
}
void
BaseCPU::startup()
{
if (FullSystem) {
if (!params()->switched_out && profileEvent)
schedule(profileEvent, curTick());
}
if (params()->progress_interval) {
new CPUProgressEvent(this, params()->progress_interval);
}
if (_switchedOut)
ClockedObject::pwrState(Enums::PwrState::OFF);
// Assumption CPU start to operate instantaneously without any latency
if (ClockedObject::pwrState() == Enums::PwrState::UNDEFINED)
ClockedObject::pwrState(Enums::PwrState::ON);
}
ProbePoints::PMUUPtr
BaseCPU::pmuProbePoint(const char *name)
{
ProbePoints::PMUUPtr ptr;
ptr.reset(new ProbePoints::PMU(getProbeManager(), name));
return ptr;
}
void
BaseCPU::regProbePoints()
{
ppAllCycles = pmuProbePoint("Cycles");
ppActiveCycles = pmuProbePoint("ActiveCycles");
ppRetiredInsts = pmuProbePoint("RetiredInsts");
ppRetiredInstsPC = pmuProbePoint("RetiredInstsPC");
ppRetiredLoads = pmuProbePoint("RetiredLoads");
ppRetiredStores = pmuProbePoint("RetiredStores");
ppRetiredBranches = pmuProbePoint("RetiredBranches");
ppSleeping = new ProbePointArg<bool>(this->getProbeManager(),
"Sleeping");
}
void
BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc)
{
if (!inst->isMicroop() || inst->isLastMicroop()) {
ppRetiredInsts->notify(1);
ppRetiredInstsPC->notify(pc);
}
if (inst->isLoad())
ppRetiredLoads->notify(1);
if (inst->isStore() || inst->isAtomic())
ppRetiredStores->notify(1);
if (inst->isControl())
ppRetiredBranches->notify(1);
}
void
BaseCPU::regStats()
{
ClockedObject::regStats();
using namespace Stats;
numCycles
.name(name() + ".numCycles")
.desc("number of cpu cycles simulated")
;
numWorkItemsStarted
.name(name() + ".numWorkItemsStarted")
.desc("number of work items this cpu started")
;
numWorkItemsCompleted
.name(name() + ".numWorkItemsCompleted")
.desc("number of work items this cpu completed")
;
int size = threadContexts.size();
if (size > 1) {
for (int i = 0; i < size; ++i) {
stringstream namestr;
ccprintf(namestr, "%s.ctx%d", name(), i);
threadContexts[i]->regStats(namestr.str());
}
} else if (size == 1)
threadContexts[0]->regStats(name());
}
Port &
BaseCPU::getPort(const string &if_name, PortID idx)
{
// Get the right port based on name. This applies to all the
// subclasses of the base CPU and relies on their implementation
// of getDataPort and getInstPort.
if (if_name == "dcache_port")
return getDataPort();
else if (if_name == "icache_port")
return getInstPort();
else
return ClockedObject::getPort(if_name, idx);
}
void
BaseCPU::registerThreadContexts()
{
assert(system->multiThread || numThreads == 1);
ThreadID size = threadContexts.size();
for (ThreadID tid = 0; tid < size; ++tid) {
ThreadContext *tc = threadContexts[tid];
if (system->multiThread) {
tc->setContextId(system->registerThreadContext(tc));
} else {
tc->setContextId(system->registerThreadContext(tc, _cpuId));
}
if (!FullSystem)
tc->getProcessPtr()->assignThreadContext(tc->contextId());
}
}
void
BaseCPU::deschedulePowerGatingEvent()
{
if (enterPwrGatingEvent.scheduled()){
deschedule(enterPwrGatingEvent);
}
}
void
BaseCPU::schedulePowerGatingEvent()
{
for (auto tc : threadContexts) {
if (tc->status() == ThreadContext::Active)
return;
}
if (ClockedObject::pwrState() == Enums::PwrState::CLK_GATED &&
powerGatingOnIdle) {
assert(!enterPwrGatingEvent.scheduled());
// Schedule a power gating event when clock gated for the specified
// amount of time
schedule(enterPwrGatingEvent, clockEdge(pwrGatingLatency));
}
}
int
BaseCPU::findContext(ThreadContext *tc)
{
ThreadID size = threadContexts.size();
for (ThreadID tid = 0; tid < size; ++tid) {
if (tc == threadContexts[tid])
return tid;
}
return 0;
}
void
BaseCPU::activateContext(ThreadID thread_num)
{
DPRINTF(Thread, "activate contextId %d\n",
threadContexts[thread_num]->contextId());
// Squash enter power gating event while cpu gets activated
if (enterPwrGatingEvent.scheduled())
deschedule(enterPwrGatingEvent);
// For any active thread running, update CPU power state to active (ON)
ClockedObject::pwrState(Enums::PwrState::ON);
updateCycleCounters(CPU_STATE_WAKEUP);
}
void
BaseCPU::suspendContext(ThreadID thread_num)
{
DPRINTF(Thread, "suspend contextId %d\n",
threadContexts[thread_num]->contextId());
// Check if all threads are suspended
for (auto t : threadContexts) {
if (t->status() != ThreadContext::Suspended) {
return;
}
}
// All CPU thread are suspended, update cycle count
updateCycleCounters(CPU_STATE_SLEEP);
// All CPU threads suspended, enter lower power state for the CPU
ClockedObject::pwrState(Enums::PwrState::CLK_GATED);
// If pwrGatingLatency is set to 0 then this mechanism is disabled
if (powerGatingOnIdle) {
// Schedule power gating event when clock gated for pwrGatingLatency
// cycles
schedule(enterPwrGatingEvent, clockEdge(pwrGatingLatency));
}
}
void
BaseCPU::haltContext(ThreadID thread_num)
{
updateCycleCounters(BaseCPU::CPU_STATE_SLEEP);
}
void
BaseCPU::enterPwrGating(void)
{
ClockedObject::pwrState(Enums::PwrState::OFF);
}
void
BaseCPU::switchOut()
{
assert(!_switchedOut);
_switchedOut = true;
if (profileEvent && profileEvent->scheduled())
deschedule(profileEvent);
// Flush all TLBs in the CPU to avoid having stale translations if
// it gets switched in later.
flushTLBs();
// Go to the power gating state
ClockedObject::pwrState(Enums::PwrState::OFF);
}
void
BaseCPU::takeOverFrom(BaseCPU *oldCPU)
{
assert(threadContexts.size() == oldCPU->threadContexts.size());
assert(_cpuId == oldCPU->cpuId());
assert(_switchedOut);
assert(oldCPU != this);
_pid = oldCPU->getPid();
_taskId = oldCPU->taskId();
// Take over the power state of the switchedOut CPU
ClockedObject::pwrState(oldCPU->pwrState());
previousState = oldCPU->previousState;
previousCycle = oldCPU->previousCycle;
_switchedOut = false;
ThreadID size = threadContexts.size();
for (ThreadID i = 0; i < size; ++i) {
ThreadContext *newTC = threadContexts[i];
ThreadContext *oldTC = oldCPU->threadContexts[i];
newTC->takeOverFrom(oldTC);
CpuEvent::replaceThreadContext(oldTC, newTC);
assert(newTC->contextId() == oldTC->contextId());
assert(newTC->threadId() == oldTC->threadId());
system->replaceThreadContext(newTC, newTC->contextId());
/* This code no longer works since the zero register (e.g.,
* r31 on Alpha) doesn't necessarily contain zero at this
* point.
if (DTRACE(Context))
ThreadContext::compare(oldTC, newTC);
*/
Port *old_itb_port = oldTC->getITBPtr()->getTableWalkerPort();
Port *old_dtb_port = oldTC->getDTBPtr()->getTableWalkerPort();
Port *new_itb_port = newTC->getITBPtr()->getTableWalkerPort();
Port *new_dtb_port = newTC->getDTBPtr()->getTableWalkerPort();
// Move over any table walker ports if they exist
if (new_itb_port)
new_itb_port->takeOverFrom(old_itb_port);
if (new_dtb_port)
new_dtb_port->takeOverFrom(old_dtb_port);
newTC->getITBPtr()->takeOverFrom(oldTC->getITBPtr());
newTC->getDTBPtr()->takeOverFrom(oldTC->getDTBPtr());
// Checker whether or not we have to transfer CheckerCPU
// objects over in the switch
CheckerCPU *oldChecker = oldTC->getCheckerCpuPtr();
CheckerCPU *newChecker = newTC->getCheckerCpuPtr();
if (oldChecker && newChecker) {
Port *old_checker_itb_port =
oldChecker->getITBPtr()->getTableWalkerPort();
Port *old_checker_dtb_port =
oldChecker->getDTBPtr()->getTableWalkerPort();
Port *new_checker_itb_port =
newChecker->getITBPtr()->getTableWalkerPort();
Port *new_checker_dtb_port =
newChecker->getDTBPtr()->getTableWalkerPort();
newChecker->getITBPtr()->takeOverFrom(oldChecker->getITBPtr());
newChecker->getDTBPtr()->takeOverFrom(oldChecker->getDTBPtr());
// Move over any table walker ports if they exist for checker
if (new_checker_itb_port)
new_checker_itb_port->takeOverFrom(old_checker_itb_port);
if (new_checker_dtb_port)
new_checker_dtb_port->takeOverFrom(old_checker_dtb_port);
}
}
interrupts = oldCPU->interrupts;
for (ThreadID tid = 0; tid < numThreads; tid++) {
interrupts[tid]->setCPU(this);
}
oldCPU->interrupts.clear();
if (FullSystem) {
for (ThreadID i = 0; i < size; ++i)
threadContexts[i]->profileClear();
if (profileEvent)
schedule(profileEvent, curTick());
}
// All CPUs have an instruction and a data port, and the new CPU's
// ports are dangling while the old CPU has its ports connected
// already. Unbind the old CPU and then bind the ports of the one
// we are switching to.
getInstPort().takeOverFrom(&oldCPU->getInstPort());
getDataPort().takeOverFrom(&oldCPU->getDataPort());
}
void
BaseCPU::flushTLBs()
{
for (ThreadID i = 0; i < threadContexts.size(); ++i) {
ThreadContext &tc(*threadContexts[i]);
CheckerCPU *checker(tc.getCheckerCpuPtr());
tc.getITBPtr()->flushAll();
tc.getDTBPtr()->flushAll();
if (checker) {
checker->getITBPtr()->flushAll();
checker->getDTBPtr()->flushAll();
}
}
}
void
BaseCPU::processProfileEvent()
{
ThreadID size = threadContexts.size();
for (ThreadID i = 0; i < size; ++i)
threadContexts[i]->profileSample();
schedule(profileEvent, curTick() + params()->profile);
}
void
BaseCPU::serialize(CheckpointOut &cp) const
{
SERIALIZE_SCALAR(instCnt);
if (!_switchedOut) {
/* Unlike _pid, _taskId is not serialized, as they are dynamically
* assigned unique ids that are only meaningful for the duration of
* a specific run. We will need to serialize the entire taskMap in
* system. */
SERIALIZE_SCALAR(_pid);
// Serialize the threads, this is done by the CPU implementation.
for (ThreadID i = 0; i < numThreads; ++i) {
ScopedCheckpointSection sec(cp, csprintf("xc.%i", i));
interrupts[i]->serialize(cp);
serializeThread(cp, i);
}
}
}
void
BaseCPU::unserialize(CheckpointIn &cp)
{
UNSERIALIZE_SCALAR(instCnt);
if (!_switchedOut) {
UNSERIALIZE_SCALAR(_pid);
// Unserialize the threads, this is done by the CPU implementation.
for (ThreadID i = 0; i < numThreads; ++i) {
ScopedCheckpointSection sec(cp, csprintf("xc.%i", i));
interrupts[i]->unserialize(cp);
unserializeThread(cp, i);
}
}
}
void
BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
{
const Tick now(getCurrentInstCount(tid));
Event *event(new LocalSimLoopExitEvent(cause, 0));
threadContexts[tid]->scheduleInstCountEvent(event, now + insts);
}
Tick
BaseCPU::getCurrentInstCount(ThreadID tid)
{
return threadContexts[tid]->getCurrentInstCount();
}
AddressMonitor::AddressMonitor() {
armed = false;
waiting = false;
gotWakeup = false;
}
bool AddressMonitor::doMonitor(PacketPtr pkt) {
assert(pkt->req->hasPaddr());
if (armed && waiting) {
if (pAddr == pkt->getAddr()) {
DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
pkt->getAddr());
waiting = false;
return true;
}
}
return false;
}
void
BaseCPU::traceFunctionsInternal(Addr pc)
{
if (!debugSymbolTable)
return;
// if pc enters different function, print new function symbol and
// update saved range. Otherwise do nothing.
if (pc < currentFunctionStart || pc >= currentFunctionEnd) {
string sym_str;
bool found = debugSymbolTable->findNearestSymbol(pc, sym_str,
currentFunctionStart,
currentFunctionEnd);
if (!found) {
// no symbol found: use addr as label
sym_str = csprintf("0x%x", pc);
currentFunctionStart = pc;
currentFunctionEnd = pc + 1;
}
ccprintf(*functionTraceStream, " (%d)\n%d: %s",
curTick() - functionEntryTick, curTick(), sym_str);
functionEntryTick = curTick();
}
}
bool
BaseCPU::waitForRemoteGDB() const
{
return params()->wait_for_remote_gdb;
}