| /* |
| * Copyright 2014 Google, Inc. |
| * Copyright (c) 2010-2013,2015,2017-2018, 2020-2021 ARM Limited |
| * All rights reserved |
| * |
| * The license below extends only to copyright in the software and shall |
| * not be construed as granting a license to any other intellectual |
| * property including but not limited to intellectual property relating |
| * to a hardware implementation of the functionality of the software |
| * licensed hereunder. You may use the software subject to the license |
| * terms below provided that you ensure that this notice is replicated |
| * unmodified and in its entirety in all distributions of the software, |
| * modified or unmodified, in source code or in binary form. |
| * |
| * Copyright (c) 2002-2005 The Regents of The University of Michigan |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "cpu/simple/timing.hh" |
| |
| #include "arch/generic/decoder.hh" |
| #include "base/compiler.hh" |
| #include "config/the_isa.hh" |
| #include "cpu/exetrace.hh" |
| #include "debug/Config.hh" |
| #include "debug/Drain.hh" |
| #include "debug/ExecFaulting.hh" |
| #include "debug/HtmCpu.hh" |
| #include "debug/Mwait.hh" |
| #include "debug/SimpleCPU.hh" |
| #include "mem/packet.hh" |
| #include "mem/packet_access.hh" |
| #include "params/BaseTimingSimpleCPU.hh" |
| #include "sim/faults.hh" |
| #include "sim/full_system.hh" |
| #include "sim/system.hh" |
| |
| namespace gem5 |
| { |
| |
| void |
| TimingSimpleCPU::init() |
| { |
| BaseSimpleCPU::init(); |
| } |
| |
| void |
| TimingSimpleCPU::TimingCPUPort::TickEvent::schedule(PacketPtr _pkt, Tick t) |
| { |
| pkt = _pkt; |
| cpu->schedule(this, t); |
| } |
| |
| TimingSimpleCPU::TimingSimpleCPU(const BaseTimingSimpleCPUParams &p) |
| : BaseSimpleCPU(p), fetchTranslation(this), icachePort(this), |
| dcachePort(this), ifetch_pkt(NULL), dcache_pkt(NULL), previousCycle(0), |
| fetchEvent([this]{ fetch(); }, name()) |
| { |
| _status = Idle; |
| } |
| |
| |
| |
| TimingSimpleCPU::~TimingSimpleCPU() |
| { |
| } |
| |
| DrainState |
| TimingSimpleCPU::drain() |
| { |
| // Deschedule any power gating event (if any) |
| deschedulePowerGatingEvent(); |
| |
| if (switchedOut()) |
| return DrainState::Drained; |
| |
| if (_status == Idle || |
| (_status == BaseSimpleCPU::Running && isCpuDrained())) { |
| DPRINTF(Drain, "No need to drain.\n"); |
| activeThreads.clear(); |
| return DrainState::Drained; |
| } else { |
| DPRINTF(Drain, "Requesting drain.\n"); |
| |
| // The fetch event can become descheduled if a drain didn't |
| // succeed on the first attempt. We need to reschedule it if |
| // the CPU is waiting for a microcode routine to complete. |
| if (_status == BaseSimpleCPU::Running && !fetchEvent.scheduled()) |
| schedule(fetchEvent, clockEdge()); |
| |
| return DrainState::Draining; |
| } |
| } |
| |
| void |
| TimingSimpleCPU::drainResume() |
| { |
| assert(!fetchEvent.scheduled()); |
| if (switchedOut()) |
| return; |
| |
| DPRINTF(SimpleCPU, "Resume\n"); |
| verifyMemoryMode(); |
| |
| assert(!threadContexts.empty()); |
| |
| _status = BaseSimpleCPU::Idle; |
| |
| for (ThreadID tid = 0; tid < numThreads; tid++) { |
| if (threadInfo[tid]->thread->status() == ThreadContext::Active) { |
| threadInfo[tid]->execContextStats.notIdleFraction = 1; |
| |
| activeThreads.push_back(tid); |
| |
| _status = BaseSimpleCPU::Running; |
| |
| // Fetch if any threads active |
| if (!fetchEvent.scheduled()) { |
| schedule(fetchEvent, nextCycle()); |
| } |
| } else { |
| threadInfo[tid]->execContextStats.notIdleFraction = 0; |
| } |
| } |
| |
| // Reschedule any power gating event (if any) |
| schedulePowerGatingEvent(); |
| } |
| |
| bool |
| TimingSimpleCPU::tryCompleteDrain() |
| { |
| if (drainState() != DrainState::Draining) |
| return false; |
| |
| DPRINTF(Drain, "tryCompleteDrain.\n"); |
| if (!isCpuDrained()) |
| return false; |
| |
| DPRINTF(Drain, "CPU done draining, processing drain event\n"); |
| signalDrainDone(); |
| |
| return true; |
| } |
| |
| void |
| TimingSimpleCPU::switchOut() |
| { |
| SimpleExecContext& t_info = *threadInfo[curThread]; |
| [[maybe_unused]] SimpleThread* thread = t_info.thread; |
| |
| // hardware transactional memory |
| // Cannot switch out the CPU in the middle of a transaction |
| assert(!t_info.inHtmTransactionalState()); |
| |
| BaseSimpleCPU::switchOut(); |
| |
| assert(!fetchEvent.scheduled()); |
| assert(_status == BaseSimpleCPU::Running || _status == Idle); |
| assert(!t_info.stayAtPC); |
| assert(thread->pcState().microPC() == 0); |
| |
| updateCycleCounts(); |
| updateCycleCounters(BaseCPU::CPU_STATE_ON); |
| } |
| |
| |
| void |
| TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU) |
| { |
| BaseSimpleCPU::takeOverFrom(oldCPU); |
| |
| previousCycle = curCycle(); |
| } |
| |
| void |
| TimingSimpleCPU::verifyMemoryMode() const |
| { |
| if (!system->isTimingMode()) { |
| fatal("The timing CPU requires the memory system to be in " |
| "'timing' mode.\n"); |
| } |
| } |
| |
| void |
| TimingSimpleCPU::activateContext(ThreadID thread_num) |
| { |
| DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num); |
| |
| assert(thread_num < numThreads); |
| |
| threadInfo[thread_num]->execContextStats.notIdleFraction = 1; |
| if (_status == BaseSimpleCPU::Idle) |
| _status = BaseSimpleCPU::Running; |
| |
| // kick things off by initiating the fetch of the next instruction |
| if (!fetchEvent.scheduled()) |
| schedule(fetchEvent, clockEdge(Cycles(0))); |
| |
| if (std::find(activeThreads.begin(), activeThreads.end(), thread_num) |
| == activeThreads.end()) { |
| activeThreads.push_back(thread_num); |
| } |
| |
| BaseCPU::activateContext(thread_num); |
| } |
| |
| |
| void |
| TimingSimpleCPU::suspendContext(ThreadID thread_num) |
| { |
| DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); |
| |
| assert(thread_num < numThreads); |
| activeThreads.remove(thread_num); |
| |
| // hardware transactional memory |
| // Cannot suspend context in the middle of a transaction. |
| assert(!threadInfo[curThread]->inHtmTransactionalState()); |
| |
| if (_status == Idle) |
| return; |
| |
| assert(_status == BaseSimpleCPU::Running); |
| |
| threadInfo[thread_num]->execContextStats.notIdleFraction = 0; |
| |
| if (activeThreads.empty()) { |
| _status = Idle; |
| |
| if (fetchEvent.scheduled()) { |
| deschedule(fetchEvent); |
| } |
| } |
| |
| BaseCPU::suspendContext(thread_num); |
| } |
| |
| bool |
| TimingSimpleCPU::handleReadPacket(PacketPtr pkt) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| const RequestPtr &req = pkt->req; |
| |
| // hardware transactional memory |
| // sanity check |
| if (req->isHTMCmd()) { |
| assert(!req->isLocalAccess()); |
| } |
| |
| // We're about the issues a locked load, so tell the monitor |
| // to start caring about this address |
| if (pkt->isRead() && pkt->req->isLLSC()) { |
| thread->getIsaPtr()->handleLockedRead(pkt->req); |
| } |
| if (req->isLocalAccess()) { |
| Cycles delay = req->localAccessor(thread->getTC(), pkt); |
| new IprEvent(pkt, this, clockEdge(delay)); |
| _status = DcacheWaitResponse; |
| dcache_pkt = NULL; |
| } else if (!dcachePort.sendTimingReq(pkt)) { |
| _status = DcacheRetry; |
| dcache_pkt = pkt; |
| } else { |
| _status = DcacheWaitResponse; |
| // memory system takes ownership of packet |
| dcache_pkt = NULL; |
| } |
| return dcache_pkt == NULL; |
| } |
| |
| void |
| TimingSimpleCPU::sendData(const RequestPtr &req, uint8_t *data, uint64_t *res, |
| bool read) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| PacketPtr pkt = buildPacket(req, read); |
| pkt->dataDynamic<uint8_t>(data); |
| |
| // hardware transactional memory |
| // If the core is in transactional mode or if the request is HtmCMD |
| // to abort a transaction, the packet should reflect that it is |
| // transactional and also contain a HtmUid for debugging. |
| const bool is_htm_speculative = t_info.inHtmTransactionalState(); |
| if (is_htm_speculative || req->isHTMAbort()) { |
| pkt->setHtmTransactional(t_info.getHtmTransactionUid()); |
| } |
| if (req->isHTMAbort()) |
| DPRINTF(HtmCpu, "htmabort htmUid=%u\n", t_info.getHtmTransactionUid()); |
| |
| if (req->getFlags().isSet(Request::NO_ACCESS)) { |
| assert(!dcache_pkt); |
| pkt->makeResponse(); |
| completeDataAccess(pkt); |
| } else if (read) { |
| handleReadPacket(pkt); |
| } else { |
| bool do_access = true; // flag to suppress cache access |
| |
| if (req->isLLSC()) { |
| do_access = thread->getIsaPtr()->handleLockedWrite( |
| req, dcachePort.cacheBlockMask); |
| } else if (req->isCondSwap()) { |
| assert(res); |
| req->setExtraData(*res); |
| } |
| |
| if (do_access) { |
| dcache_pkt = pkt; |
| handleWritePacket(); |
| threadSnoop(pkt, curThread); |
| } else { |
| _status = DcacheWaitResponse; |
| completeDataAccess(pkt); |
| } |
| } |
| } |
| |
| void |
| TimingSimpleCPU::sendSplitData(const RequestPtr &req1, const RequestPtr &req2, |
| const RequestPtr &req, uint8_t *data, bool read) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| PacketPtr pkt1, pkt2; |
| buildSplitPacket(pkt1, pkt2, req1, req2, req, data, read); |
| |
| // hardware transactional memory |
| // HTM commands should never use SplitData |
| assert(!req1->isHTMCmd() && !req2->isHTMCmd()); |
| |
| // If the thread is executing transactionally, |
| // reflect this in the packets. |
| if (t_info.inHtmTransactionalState()) { |
| pkt1->setHtmTransactional(t_info.getHtmTransactionUid()); |
| pkt2->setHtmTransactional(t_info.getHtmTransactionUid()); |
| } |
| |
| if (req->getFlags().isSet(Request::NO_ACCESS)) { |
| assert(!dcache_pkt); |
| pkt1->makeResponse(); |
| completeDataAccess(pkt1); |
| } else if (read) { |
| SplitFragmentSenderState * send_state = |
| dynamic_cast<SplitFragmentSenderState *>(pkt1->senderState); |
| if (handleReadPacket(pkt1)) { |
| send_state->clearFromParent(); |
| send_state = dynamic_cast<SplitFragmentSenderState *>( |
| pkt2->senderState); |
| if (handleReadPacket(pkt2)) { |
| send_state->clearFromParent(); |
| } |
| } |
| } else { |
| dcache_pkt = pkt1; |
| SplitFragmentSenderState * send_state = |
| dynamic_cast<SplitFragmentSenderState *>(pkt1->senderState); |
| if (handleWritePacket()) { |
| send_state->clearFromParent(); |
| dcache_pkt = pkt2; |
| send_state = dynamic_cast<SplitFragmentSenderState *>( |
| pkt2->senderState); |
| if (handleWritePacket()) { |
| send_state->clearFromParent(); |
| } |
| } |
| } |
| } |
| |
| void |
| TimingSimpleCPU::translationFault(const Fault &fault) |
| { |
| // fault may be NoFault in cases where a fault is suppressed, |
| // for instance prefetches. |
| updateCycleCounts(); |
| updateCycleCounters(BaseCPU::CPU_STATE_ON); |
| |
| if ((fault != NoFault) && traceData) { |
| traceFault(); |
| } |
| |
| postExecute(); |
| |
| advanceInst(fault); |
| } |
| |
| PacketPtr |
| TimingSimpleCPU::buildPacket(const RequestPtr &req, bool read) |
| { |
| return read ? Packet::createRead(req) : Packet::createWrite(req); |
| } |
| |
| void |
| TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2, |
| const RequestPtr &req1, const RequestPtr &req2, const RequestPtr &req, |
| uint8_t *data, bool read) |
| { |
| pkt1 = pkt2 = NULL; |
| |
| assert(!req1->isLocalAccess() && !req2->isLocalAccess()); |
| |
| if (req->getFlags().isSet(Request::NO_ACCESS)) { |
| pkt1 = buildPacket(req, read); |
| return; |
| } |
| |
| pkt1 = buildPacket(req1, read); |
| pkt2 = buildPacket(req2, read); |
| |
| PacketPtr pkt = new Packet(req, pkt1->cmd.responseCommand()); |
| |
| pkt->dataDynamic<uint8_t>(data); |
| pkt1->dataStatic<uint8_t>(data); |
| pkt2->dataStatic<uint8_t>(data + req1->getSize()); |
| |
| SplitMainSenderState * main_send_state = new SplitMainSenderState; |
| pkt->senderState = main_send_state; |
| main_send_state->fragments[0] = pkt1; |
| main_send_state->fragments[1] = pkt2; |
| main_send_state->outstanding = 2; |
| pkt1->senderState = new SplitFragmentSenderState(pkt, 0); |
| pkt2->senderState = new SplitFragmentSenderState(pkt, 1); |
| } |
| |
| Fault |
| TimingSimpleCPU::initiateMemRead(Addr addr, unsigned size, |
| Request::Flags flags, |
| const std::vector<bool>& byte_enable) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| Fault fault; |
| const Addr pc = thread->pcState().instAddr(); |
| unsigned block_size = cacheLineSize(); |
| BaseMMU::Mode mode = BaseMMU::Read; |
| |
| if (traceData) |
| traceData->setMem(addr, size, flags); |
| |
| RequestPtr req = std::make_shared<Request>( |
| addr, size, flags, dataRequestorId(), pc, thread->contextId()); |
| req->setByteEnable(byte_enable); |
| |
| req->taskId(taskId()); |
| |
| Addr split_addr = roundDown(addr + size - 1, block_size); |
| assert(split_addr <= addr || split_addr - addr < block_size); |
| |
| _status = DTBWaitResponse; |
| if (split_addr > addr) { |
| RequestPtr req1, req2; |
| assert(!req->isLLSC() && !req->isSwap()); |
| req->splitOnVaddr(split_addr, req1, req2); |
| |
| WholeTranslationState *state = |
| new WholeTranslationState(req, req1, req2, new uint8_t[size], |
| NULL, mode); |
| DataTranslation<TimingSimpleCPU *> *trans1 = |
| new DataTranslation<TimingSimpleCPU *>(this, state, 0); |
| DataTranslation<TimingSimpleCPU *> *trans2 = |
| new DataTranslation<TimingSimpleCPU *>(this, state, 1); |
| |
| thread->mmu->translateTiming(req1, thread->getTC(), trans1, mode); |
| thread->mmu->translateTiming(req2, thread->getTC(), trans2, mode); |
| } else { |
| WholeTranslationState *state = |
| new WholeTranslationState(req, new uint8_t[size], NULL, mode); |
| DataTranslation<TimingSimpleCPU *> *translation |
| = new DataTranslation<TimingSimpleCPU *>(this, state); |
| thread->mmu->translateTiming(req, thread->getTC(), translation, mode); |
| } |
| |
| return NoFault; |
| } |
| |
| bool |
| TimingSimpleCPU::handleWritePacket() |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| const RequestPtr &req = dcache_pkt->req; |
| if (req->isLocalAccess()) { |
| Cycles delay = req->localAccessor(thread->getTC(), dcache_pkt); |
| new IprEvent(dcache_pkt, this, clockEdge(delay)); |
| _status = DcacheWaitResponse; |
| dcache_pkt = NULL; |
| } else if (!dcachePort.sendTimingReq(dcache_pkt)) { |
| _status = DcacheRetry; |
| } else { |
| _status = DcacheWaitResponse; |
| // memory system takes ownership of packet |
| dcache_pkt = NULL; |
| } |
| return dcache_pkt == NULL; |
| } |
| |
| Fault |
| TimingSimpleCPU::writeMem(uint8_t *data, unsigned size, |
| Addr addr, Request::Flags flags, uint64_t *res, |
| const std::vector<bool>& byte_enable) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| uint8_t *newData = new uint8_t[size]; |
| const Addr pc = thread->pcState().instAddr(); |
| unsigned block_size = cacheLineSize(); |
| BaseMMU::Mode mode = BaseMMU::Write; |
| |
| if (data == NULL) { |
| assert(flags & Request::STORE_NO_DATA); |
| // This must be a cache block cleaning request |
| memset(newData, 0, size); |
| } else { |
| memcpy(newData, data, size); |
| } |
| |
| if (traceData) |
| traceData->setMem(addr, size, flags); |
| |
| RequestPtr req = std::make_shared<Request>( |
| addr, size, flags, dataRequestorId(), pc, thread->contextId()); |
| req->setByteEnable(byte_enable); |
| |
| req->taskId(taskId()); |
| |
| Addr split_addr = roundDown(addr + size - 1, block_size); |
| assert(split_addr <= addr || split_addr - addr < block_size); |
| |
| _status = DTBWaitResponse; |
| |
| // TODO: TimingSimpleCPU doesn't support arbitrarily long multi-line mem. |
| // accesses yet |
| |
| if (split_addr > addr) { |
| RequestPtr req1, req2; |
| assert(!req->isLLSC() && !req->isSwap()); |
| req->splitOnVaddr(split_addr, req1, req2); |
| |
| WholeTranslationState *state = |
| new WholeTranslationState(req, req1, req2, newData, res, mode); |
| DataTranslation<TimingSimpleCPU *> *trans1 = |
| new DataTranslation<TimingSimpleCPU *>(this, state, 0); |
| DataTranslation<TimingSimpleCPU *> *trans2 = |
| new DataTranslation<TimingSimpleCPU *>(this, state, 1); |
| |
| thread->mmu->translateTiming(req1, thread->getTC(), trans1, mode); |
| thread->mmu->translateTiming(req2, thread->getTC(), trans2, mode); |
| } else { |
| WholeTranslationState *state = |
| new WholeTranslationState(req, newData, res, mode); |
| DataTranslation<TimingSimpleCPU *> *translation = |
| new DataTranslation<TimingSimpleCPU *>(this, state); |
| thread->mmu->translateTiming(req, thread->getTC(), translation, mode); |
| } |
| |
| // Translation faults will be returned via finishTranslation() |
| return NoFault; |
| } |
| |
| Fault |
| TimingSimpleCPU::initiateMemAMO(Addr addr, unsigned size, |
| Request::Flags flags, |
| AtomicOpFunctorPtr amo_op) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| Fault fault; |
| const Addr pc = thread->pcState().instAddr(); |
| unsigned block_size = cacheLineSize(); |
| BaseMMU::Mode mode = BaseMMU::Write; |
| |
| if (traceData) |
| traceData->setMem(addr, size, flags); |
| |
| RequestPtr req = std::make_shared<Request>(addr, size, flags, |
| dataRequestorId(), pc, thread->contextId(), |
| std::move(amo_op)); |
| |
| assert(req->hasAtomicOpFunctor()); |
| |
| req->taskId(taskId()); |
| |
| Addr split_addr = roundDown(addr + size - 1, block_size); |
| |
| // AMO requests that access across a cache line boundary are not |
| // allowed since the cache does not guarantee AMO ops to be executed |
| // atomically in two cache lines |
| // For ISAs such as x86 that requires AMO operations to work on |
| // accesses that cross cache-line boundaries, the cache needs to be |
| // modified to support locking both cache lines to guarantee the |
| // atomicity. |
| if (split_addr > addr) { |
| panic("AMO requests should not access across a cache line boundary\n"); |
| } |
| |
| _status = DTBWaitResponse; |
| |
| WholeTranslationState *state = |
| new WholeTranslationState(req, new uint8_t[size], NULL, mode); |
| DataTranslation<TimingSimpleCPU *> *translation |
| = new DataTranslation<TimingSimpleCPU *>(this, state); |
| thread->mmu->translateTiming(req, thread->getTC(), translation, mode); |
| |
| return NoFault; |
| } |
| |
| void |
| TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender) |
| { |
| for (ThreadID tid = 0; tid < numThreads; tid++) { |
| if (tid != sender) { |
| if (getCpuAddrMonitor(tid)->doMonitor(pkt)) { |
| wakeup(tid); |
| } |
| threadInfo[tid]->thread->getIsaPtr()->handleLockedSnoop(pkt, |
| dcachePort.cacheBlockMask); |
| } |
| } |
| } |
| |
| void |
| TimingSimpleCPU::finishTranslation(WholeTranslationState *state) |
| { |
| _status = BaseSimpleCPU::Running; |
| |
| if (state->getFault() != NoFault) { |
| if (state->isPrefetch()) { |
| state->setNoFault(); |
| } |
| delete [] state->data; |
| state->deleteReqs(); |
| translationFault(state->getFault()); |
| } else { |
| if (!state->isSplit) { |
| sendData(state->mainReq, state->data, state->res, |
| state->mode == BaseMMU::Read); |
| } else { |
| sendSplitData(state->sreqLow, state->sreqHigh, state->mainReq, |
| state->data, state->mode == BaseMMU::Read); |
| } |
| } |
| |
| delete state; |
| } |
| |
| |
| void |
| TimingSimpleCPU::fetch() |
| { |
| // Change thread if multi-threaded |
| swapActiveThread(); |
| |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| DPRINTF(SimpleCPU, "Fetch\n"); |
| |
| if (!curStaticInst || !curStaticInst->isDelayedCommit()) { |
| checkForInterrupts(); |
| checkPcEventQueue(); |
| } |
| |
| // We must have just got suspended by a PC event |
| if (_status == Idle) |
| return; |
| |
| MicroPC upc = thread->pcState().microPC(); |
| bool needToFetch = !isRomMicroPC(upc) && !curMacroStaticInst; |
| |
| if (needToFetch) { |
| _status = BaseSimpleCPU::Running; |
| RequestPtr ifetch_req = std::make_shared<Request>(); |
| ifetch_req->taskId(taskId()); |
| ifetch_req->setContext(thread->contextId()); |
| setupFetchRequest(ifetch_req); |
| DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr()); |
| thread->mmu->translateTiming(ifetch_req, thread->getTC(), |
| &fetchTranslation, BaseMMU::Execute); |
| } else { |
| _status = IcacheWaitResponse; |
| completeIfetch(NULL); |
| |
| updateCycleCounts(); |
| updateCycleCounters(BaseCPU::CPU_STATE_ON); |
| } |
| } |
| |
| |
| void |
| TimingSimpleCPU::sendFetch(const Fault &fault, const RequestPtr &req, |
| ThreadContext *tc) |
| { |
| auto &decoder = threadInfo[curThread]->thread->decoder; |
| |
| if (fault == NoFault) { |
| DPRINTF(SimpleCPU, "Sending fetch for addr %#x(pa: %#x)\n", |
| req->getVaddr(), req->getPaddr()); |
| ifetch_pkt = new Packet(req, MemCmd::ReadReq); |
| ifetch_pkt->dataStatic(decoder->moreBytesPtr()); |
| DPRINTF(SimpleCPU, " -- pkt addr: %#x\n", ifetch_pkt->getAddr()); |
| |
| if (!icachePort.sendTimingReq(ifetch_pkt)) { |
| // Need to wait for retry |
| _status = IcacheRetry; |
| } else { |
| // Need to wait for cache to respond |
| _status = IcacheWaitResponse; |
| // ownership of packet transferred to memory system |
| ifetch_pkt = NULL; |
| } |
| } else { |
| DPRINTF(SimpleCPU, "Translation of addr %#x faulted\n", req->getVaddr()); |
| // fetch fault: advance directly to next instruction (fault handler) |
| _status = BaseSimpleCPU::Running; |
| advanceInst(fault); |
| } |
| |
| updateCycleCounts(); |
| updateCycleCounters(BaseCPU::CPU_STATE_ON); |
| } |
| |
| |
| void |
| TimingSimpleCPU::advanceInst(const Fault &fault) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| |
| if (_status == Faulting) |
| return; |
| |
| if (fault != NoFault) { |
| // hardware transactional memory |
| // If a fault occurred within a transaction |
| // ensure that the transaction aborts |
| if (t_info.inHtmTransactionalState() && |
| !std::dynamic_pointer_cast<GenericHtmFailureFault>(fault)) { |
| DPRINTF(HtmCpu, "fault (%s) occurred - " |
| "replacing with HTM abort fault htmUid=%u\n", |
| fault->name(), t_info.getHtmTransactionUid()); |
| |
| Fault tmfault = std::make_shared<GenericHtmFailureFault>( |
| t_info.getHtmTransactionUid(), |
| HtmFailureFaultCause::EXCEPTION); |
| |
| advancePC(tmfault); |
| reschedule(fetchEvent, clockEdge(), true); |
| _status = Faulting; |
| return; |
| } |
| |
| DPRINTF(SimpleCPU, "Fault occured. Handling the fault\n"); |
| |
| advancePC(fault); |
| |
| // A syscall fault could suspend this CPU (e.g., futex_wait) |
| // If the _status is not Idle, schedule an event to fetch the next |
| // instruction after 'stall' ticks. |
| // If the cpu has been suspended (i.e., _status == Idle), another |
| // cpu will wake this cpu up later. |
| if (_status != Idle) { |
| DPRINTF(SimpleCPU, "Scheduling fetch event after the Fault\n"); |
| |
| Tick stall = std::dynamic_pointer_cast<SyscallRetryFault>(fault) ? |
| clockEdge(syscallRetryLatency) : clockEdge(); |
| reschedule(fetchEvent, stall, true); |
| _status = Faulting; |
| } |
| |
| return; |
| } |
| |
| if (!t_info.stayAtPC) |
| advancePC(fault); |
| |
| if (tryCompleteDrain()) |
| return; |
| |
| serviceInstCountEvents(); |
| |
| if (_status == BaseSimpleCPU::Running) { |
| // kick off fetch of next instruction... callback from icache |
| // response will cause that instruction to be executed, |
| // keeping the CPU running. |
| fetch(); |
| } |
| } |
| |
| |
| void |
| TimingSimpleCPU::completeIfetch(PacketPtr pkt) |
| { |
| SimpleExecContext& t_info = *threadInfo[curThread]; |
| |
| DPRINTF(SimpleCPU, "Complete ICache Fetch for addr %#x\n", pkt ? |
| pkt->getAddr() : 0); |
| |
| // received a response from the icache: execute the received |
| // instruction |
| panic_if(pkt && pkt->isError(), "Instruction fetch (%s) failed: %s", |
| pkt->getAddrRange().to_string(), pkt->print()); |
| assert(_status == IcacheWaitResponse); |
| |
| _status = BaseSimpleCPU::Running; |
| |
| updateCycleCounts(); |
| updateCycleCounters(BaseCPU::CPU_STATE_ON); |
| |
| if (pkt) |
| pkt->req->setAccessLatency(); |
| |
| |
| preExecute(); |
| |
| // hardware transactional memory |
| if (curStaticInst && curStaticInst->isHtmStart()) { |
| // if this HtmStart is not within a transaction, |
| // then assign it a new htmTransactionUid |
| if (!t_info.inHtmTransactionalState()) |
| t_info.newHtmTransactionUid(); |
| SimpleThread* thread = t_info.thread; |
| thread->htmTransactionStarts++; |
| DPRINTF(HtmCpu, "htmTransactionStarts++=%u\n", |
| thread->htmTransactionStarts); |
| } |
| |
| if (curStaticInst && curStaticInst->isMemRef()) { |
| // load or store: just send to dcache |
| Fault fault = curStaticInst->initiateAcc(&t_info, traceData); |
| |
| // If we're not running now the instruction will complete in a dcache |
| // response callback or the instruction faulted and has started an |
| // ifetch |
| if (_status == BaseSimpleCPU::Running) { |
| if (fault != NoFault && traceData) { |
| traceFault(); |
| } |
| |
| postExecute(); |
| // @todo remove me after debugging with legion done |
| if (curStaticInst && (!curStaticInst->isMicroop() || |
| curStaticInst->isFirstMicroop())) |
| instCnt++; |
| advanceInst(fault); |
| } |
| } else if (curStaticInst) { |
| // non-memory instruction: execute completely now |
| Fault fault = curStaticInst->execute(&t_info, traceData); |
| |
| // keep an instruction count |
| if (fault == NoFault) |
| countInst(); |
| else if (traceData) { |
| traceFault(); |
| } |
| |
| postExecute(); |
| // @todo remove me after debugging with legion done |
| if (curStaticInst && (!curStaticInst->isMicroop() || |
| curStaticInst->isFirstMicroop())) |
| instCnt++; |
| advanceInst(fault); |
| } else { |
| advanceInst(NoFault); |
| } |
| |
| if (pkt) { |
| delete pkt; |
| } |
| } |
| |
| void |
| TimingSimpleCPU::IcachePort::ITickEvent::process() |
| { |
| cpu->completeIfetch(pkt); |
| } |
| |
| bool |
| TimingSimpleCPU::IcachePort::recvTimingResp(PacketPtr pkt) |
| { |
| DPRINTF(SimpleCPU, "Received fetch response %#x\n", pkt->getAddr()); |
| |
| // hardware transactional memory |
| // Currently, there is no support for tracking instruction fetches |
| // in an transaction's read set. |
| if (pkt->htmTransactionFailedInCache()) { |
| panic("HTM transactional support for" |
| " instruction stream not yet supported\n"); |
| } |
| |
| // we should only ever see one response per cycle since we only |
| // issue a new request once this response is sunk |
| assert(!tickEvent.scheduled()); |
| // delay processing of returned data until next CPU clock edge |
| tickEvent.schedule(pkt, cpu->clockEdge()); |
| |
| return true; |
| } |
| |
| void |
| TimingSimpleCPU::IcachePort::recvReqRetry() |
| { |
| // we shouldn't get a retry unless we have a packet that we're |
| // waiting to transmit |
| assert(cpu->ifetch_pkt != NULL); |
| assert(cpu->_status == IcacheRetry); |
| PacketPtr tmp = cpu->ifetch_pkt; |
| if (sendTimingReq(tmp)) { |
| cpu->_status = IcacheWaitResponse; |
| cpu->ifetch_pkt = NULL; |
| } |
| } |
| |
| void |
| TimingSimpleCPU::completeDataAccess(PacketPtr pkt) |
| { |
| // hardware transactional memory |
| |
| SimpleExecContext *t_info = threadInfo[curThread]; |
| [[maybe_unused]] const bool is_htm_speculative = |
| t_info->inHtmTransactionalState(); |
| |
| // received a response from the dcache: complete the load or store |
| // instruction |
| panic_if(pkt->isError(), "Data access (%s) failed: %s", |
| pkt->getAddrRange().to_string(), pkt->print()); |
| assert(_status == DcacheWaitResponse || _status == DTBWaitResponse || |
| pkt->req->getFlags().isSet(Request::NO_ACCESS)); |
| |
| pkt->req->setAccessLatency(); |
| |
| updateCycleCounts(); |
| updateCycleCounters(BaseCPU::CPU_STATE_ON); |
| |
| if (pkt->senderState) { |
| // hardware transactional memory |
| // There shouldn't be HtmCmds occurring in multipacket requests |
| if (pkt->req->isHTMCmd()) { |
| panic("unexpected HTM case"); |
| } |
| |
| SplitFragmentSenderState * send_state = |
| dynamic_cast<SplitFragmentSenderState *>(pkt->senderState); |
| assert(send_state); |
| PacketPtr big_pkt = send_state->bigPkt; |
| delete send_state; |
| |
| if (pkt->isHtmTransactional()) { |
| assert(is_htm_speculative); |
| |
| big_pkt->setHtmTransactional( |
| pkt->getHtmTransactionUid() |
| ); |
| } |
| |
| if (pkt->htmTransactionFailedInCache()) { |
| assert(is_htm_speculative); |
| big_pkt->setHtmTransactionFailedInCache( |
| pkt->getHtmTransactionFailedInCacheRC() |
| ); |
| } |
| |
| delete pkt; |
| |
| SplitMainSenderState * main_send_state = |
| dynamic_cast<SplitMainSenderState *>(big_pkt->senderState); |
| assert(main_send_state); |
| // Record the fact that this packet is no longer outstanding. |
| assert(main_send_state->outstanding != 0); |
| main_send_state->outstanding--; |
| |
| if (main_send_state->outstanding) { |
| return; |
| } else { |
| delete main_send_state; |
| big_pkt->senderState = NULL; |
| pkt = big_pkt; |
| } |
| } |
| |
| _status = BaseSimpleCPU::Running; |
| |
| Fault fault; |
| |
| // hardware transactional memory |
| // sanity checks |
| // ensure htmTransactionUids are equivalent |
| if (pkt->isHtmTransactional()) |
| assert (pkt->getHtmTransactionUid() == |
| t_info->getHtmTransactionUid()); |
| |
| // can't have a packet that fails a transaction while not in a transaction |
| if (pkt->htmTransactionFailedInCache()) |
| assert(is_htm_speculative); |
| |
| // shouldn't fail through stores because this would be inconsistent w/ O3 |
| // which cannot fault after the store has been sent to memory |
| if (pkt->htmTransactionFailedInCache() && !pkt->isWrite()) { |
| const HtmCacheFailure htm_rc = |
| pkt->getHtmTransactionFailedInCacheRC(); |
| DPRINTF(HtmCpu, "HTM abortion in cache (rc=%s) detected htmUid=%u\n", |
| htmFailureToStr(htm_rc), pkt->getHtmTransactionUid()); |
| |
| // Currently there are only two reasons why a transaction would |
| // fail in the memory subsystem-- |
| // (1) A transactional line was evicted from the cache for |
| // space (or replacement policy) reasons. |
| // (2) Another core/device requested a cache line that is in this |
| // transaction's read/write set that is incompatible with the |
| // HTM's semantics, e.g. another core requesting exclusive access |
| // of a line in this core's read set. |
| if (htm_rc == HtmCacheFailure::FAIL_SELF) { |
| fault = std::make_shared<GenericHtmFailureFault>( |
| t_info->getHtmTransactionUid(), |
| HtmFailureFaultCause::SIZE); |
| } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) { |
| fault = std::make_shared<GenericHtmFailureFault>( |
| t_info->getHtmTransactionUid(), |
| HtmFailureFaultCause::MEMORY); |
| } else { |
| panic("HTM - unhandled rc %s", htmFailureToStr(htm_rc)); |
| } |
| } else { |
| fault = curStaticInst->completeAcc(pkt, t_info, |
| traceData); |
| } |
| |
| // hardware transactional memory |
| // Track HtmStop instructions, |
| // e.g. instructions which commit a transaction. |
| if (curStaticInst && curStaticInst->isHtmStop()) { |
| t_info->thread->htmTransactionStops++; |
| DPRINTF(HtmCpu, "htmTransactionStops++=%u\n", |
| t_info->thread->htmTransactionStops); |
| } |
| |
| // keep an instruction count |
| if (fault == NoFault) |
| countInst(); |
| else if (traceData) { |
| traceFault(); |
| } |
| |
| delete pkt; |
| |
| postExecute(); |
| |
| advanceInst(fault); |
| } |
| |
| void |
| TimingSimpleCPU::updateCycleCounts() |
| { |
| const Cycles delta(curCycle() - previousCycle); |
| |
| baseStats.numCycles += delta; |
| |
| previousCycle = curCycle(); |
| } |
| |
| void |
| TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) |
| { |
| for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { |
| if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { |
| cpu->wakeup(tid); |
| } |
| } |
| |
| // Making it uniform across all CPUs: |
| // The CPUs need to be woken up only on an invalidation packet |
| // (when using caches) or on an incoming write packet (when not |
| // using caches) It is not necessary to wake up the processor on |
| // all incoming packets |
| if (pkt->isInvalidate() || pkt->isWrite()) { |
| for (auto &t_info : cpu->threadInfo) { |
| t_info->thread->getIsaPtr()->handleLockedSnoop(pkt, |
| cacheBlockMask); |
| } |
| } else if (pkt->req && pkt->req->isTlbiExtSync()) { |
| // We received a TLBI_EXT_SYNC request. |
| // In a detailed sim we would wait for memory ops to complete, |
| // but in our simple case we just respond immediately |
| auto reply_req = Request::createMemManagement( |
| Request::TLBI_EXT_SYNC_COMP, |
| cpu->dataRequestorId()); |
| |
| // Extra Data = the transaction ID of the Sync we're completing |
| reply_req->setExtraData(pkt->req->getExtraData()); |
| PacketPtr reply_pkt = Packet::createRead(reply_req); |
| |
| // TODO - reserve some credit for these responses? |
| if (!sendTimingReq(reply_pkt)) { |
| panic("Couldn't send TLBI_EXT_SYNC_COMP message"); |
| } |
| } |
| } |
| |
| void |
| TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt) |
| { |
| for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { |
| if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { |
| cpu->wakeup(tid); |
| } |
| } |
| } |
| |
| bool |
| TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt) |
| { |
| DPRINTF(SimpleCPU, "Received load/store response %#x\n", pkt->getAddr()); |
| |
| // The timing CPU is not really ticked, instead it relies on the |
| // memory system (fetch and load/store) to set the pace. |
| if (!tickEvent.scheduled()) { |
| // Delay processing of returned data until next CPU clock edge |
| tickEvent.schedule(pkt, cpu->clockEdge()); |
| return true; |
| } else { |
| // In the case of a split transaction and a cache that is |
| // faster than a CPU we could get two responses in the |
| // same tick, delay the second one |
| if (!retryRespEvent.scheduled()) |
| cpu->schedule(retryRespEvent, cpu->clockEdge(Cycles(1))); |
| return false; |
| } |
| } |
| |
| void |
| TimingSimpleCPU::DcachePort::DTickEvent::process() |
| { |
| cpu->completeDataAccess(pkt); |
| } |
| |
| void |
| TimingSimpleCPU::DcachePort::recvReqRetry() |
| { |
| // we shouldn't get a retry unless we have a packet that we're |
| // waiting to transmit |
| assert(cpu->dcache_pkt != NULL); |
| assert(cpu->_status == DcacheRetry); |
| PacketPtr tmp = cpu->dcache_pkt; |
| if (tmp->senderState) { |
| // This is a packet from a split access. |
| SplitFragmentSenderState * send_state = |
| dynamic_cast<SplitFragmentSenderState *>(tmp->senderState); |
| assert(send_state); |
| PacketPtr big_pkt = send_state->bigPkt; |
| |
| SplitMainSenderState * main_send_state = |
| dynamic_cast<SplitMainSenderState *>(big_pkt->senderState); |
| assert(main_send_state); |
| |
| if (sendTimingReq(tmp)) { |
| // If we were able to send without retrying, record that fact |
| // and try sending the other fragment. |
| send_state->clearFromParent(); |
| int other_index = main_send_state->getPendingFragment(); |
| if (other_index > 0) { |
| tmp = main_send_state->fragments[other_index]; |
| cpu->dcache_pkt = tmp; |
| if ((big_pkt->isRead() && cpu->handleReadPacket(tmp)) || |
| (big_pkt->isWrite() && cpu->handleWritePacket())) { |
| main_send_state->fragments[other_index] = NULL; |
| } |
| } else { |
| cpu->_status = DcacheWaitResponse; |
| // memory system takes ownership of packet |
| cpu->dcache_pkt = NULL; |
| } |
| } |
| } else if (sendTimingReq(tmp)) { |
| cpu->_status = DcacheWaitResponse; |
| // memory system takes ownership of packet |
| cpu->dcache_pkt = NULL; |
| } |
| } |
| |
| TimingSimpleCPU::IprEvent::IprEvent(Packet *_pkt, TimingSimpleCPU *_cpu, |
| Tick t) |
| : pkt(_pkt), cpu(_cpu) |
| { |
| cpu->schedule(this, t); |
| } |
| |
| void |
| TimingSimpleCPU::IprEvent::process() |
| { |
| cpu->completeDataAccess(pkt); |
| } |
| |
| const char * |
| TimingSimpleCPU::IprEvent::description() const |
| { |
| return "Timing Simple CPU Delay IPR event"; |
| } |
| |
| |
| void |
| TimingSimpleCPU::printAddr(Addr a) |
| { |
| dcachePort.printAddr(a); |
| } |
| |
| Fault |
| TimingSimpleCPU::initiateMemMgmtCmd(Request::Flags flags) |
| { |
| SimpleExecContext &t_info = *threadInfo[curThread]; |
| SimpleThread* thread = t_info.thread; |
| |
| const Addr addr = 0x0ul; |
| const Addr pc = thread->pcState().instAddr(); |
| const int size = 8; |
| |
| if (traceData) |
| traceData->setMem(addr, size, flags); |
| |
| RequestPtr req = std::make_shared<Request>( |
| addr, size, flags, dataRequestorId()); |
| |
| req->setPC(pc); |
| req->setContext(thread->contextId()); |
| req->taskId(taskId()); |
| req->setInstCount(t_info.numInst); |
| |
| assert(req->isHTMCmd() || req->isTlbiCmd()); |
| |
| // Use the payload as a sanity check, |
| // the memory subsystem will clear allocated data |
| uint8_t *data = new uint8_t[size]; |
| assert(data); |
| uint64_t rc = 0xdeadbeeflu; |
| memcpy (data, &rc, size); |
| |
| // debugging output |
| if (req->isHTMCmd()) { |
| if (req->isHTMStart()) |
| DPRINTF(HtmCpu, "HTMstart htmUid=%u\n", |
| t_info.getHtmTransactionUid()); |
| else if (req->isHTMCommit()) |
| DPRINTF(HtmCpu, "HTMcommit htmUid=%u\n", |
| t_info.getHtmTransactionUid()); |
| else if (req->isHTMCancel()) |
| DPRINTF(HtmCpu, "HTMcancel htmUid=%u\n", |
| t_info.getHtmTransactionUid()); |
| else |
| panic("initiateMemMgmtCmd: unknown HTM CMD"); |
| } |
| |
| sendData(req, data, nullptr, true); |
| |
| return NoFault; |
| } |
| |
| void |
| TimingSimpleCPU::htmSendAbortSignal(ThreadID tid, uint64_t htm_uid, |
| HtmFailureFaultCause cause) |
| { |
| SimpleExecContext& t_info = *threadInfo[tid]; |
| SimpleThread* thread = t_info.thread; |
| |
| const Addr addr = 0x0ul; |
| const Addr pc = thread->pcState().instAddr(); |
| const int size = 8; |
| const Request::Flags flags = |
| Request::PHYSICAL|Request::STRICT_ORDER|Request::HTM_ABORT; |
| |
| if (traceData) |
| traceData->setMem(addr, size, flags); |
| |
| // notify l1 d-cache (ruby) that core has aborted transaction |
| |
| RequestPtr req = std::make_shared<Request>( |
| addr, size, flags, dataRequestorId()); |
| |
| req->setPC(pc); |
| req->setContext(thread->contextId()); |
| req->taskId(taskId()); |
| req->setInstCount(t_info.numInst); |
| req->setHtmAbortCause(cause); |
| |
| assert(req->isHTMAbort()); |
| |
| uint8_t *data = new uint8_t[size]; |
| assert(data); |
| uint64_t rc = 0lu; |
| memcpy (data, &rc, size); |
| |
| sendData(req, data, nullptr, true); |
| } |
| |
| } // namespace gem5 |