| /* |
| * Copyright (c) 2002-2005 The Regents of The University of Michigan |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Authors: Steve Reinhardt |
| */ |
| |
| #include "arch/locked_mem.hh" |
| #include "arch/mmaped_ipr.hh" |
| #include "arch/utility.hh" |
| #include "base/bigint.hh" |
| #include "config/the_isa.hh" |
| #include "cpu/exetrace.hh" |
| #include "cpu/simple/atomic.hh" |
| #include "mem/packet.hh" |
| #include "mem/packet_access.hh" |
| #include "params/AtomicSimpleCPU.hh" |
| #include "sim/system.hh" |
| |
| using namespace std; |
| using namespace TheISA; |
| |
| AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) |
| : Event(CPU_Tick_Pri), cpu(c) |
| { |
| } |
| |
| |
| void |
| AtomicSimpleCPU::TickEvent::process() |
| { |
| cpu->tick(); |
| } |
| |
| const char * |
| AtomicSimpleCPU::TickEvent::description() const |
| { |
| return "AtomicSimpleCPU tick"; |
| } |
| |
| Port * |
| AtomicSimpleCPU::getPort(const string &if_name, int idx) |
| { |
| if (if_name == "dcache_port") |
| return &dcachePort; |
| else if (if_name == "icache_port") |
| return &icachePort; |
| else if (if_name == "physmem_port") { |
| hasPhysMemPort = true; |
| return &physmemPort; |
| } |
| else |
| panic("No Such Port\n"); |
| } |
| |
| void |
| AtomicSimpleCPU::init() |
| { |
| BaseCPU::init(); |
| #if FULL_SYSTEM |
| ThreadID size = threadContexts.size(); |
| for (ThreadID i = 0; i < size; ++i) { |
| ThreadContext *tc = threadContexts[i]; |
| |
| // initialize CPU, including PC |
| TheISA::initCPU(tc, tc->contextId()); |
| } |
| #endif |
| if (hasPhysMemPort) { |
| bool snoop = false; |
| AddrRangeList pmAddrList; |
| physmemPort.getPeerAddressRanges(pmAddrList, snoop); |
| physMemAddr = *pmAddrList.begin(); |
| } |
| // Atomic doesn't do MT right now, so contextId == threadId |
| ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT |
| data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too |
| data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too |
| } |
| |
| bool |
| AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) |
| { |
| panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); |
| return true; |
| } |
| |
| Tick |
| AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) |
| { |
| //Snooping a coherence request, just return |
| return 0; |
| } |
| |
| void |
| AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) |
| { |
| //No internal storage to update, just return |
| return; |
| } |
| |
| void |
| AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) |
| { |
| if (status == RangeChange) { |
| if (!snoopRangeSent) { |
| snoopRangeSent = true; |
| sendStatusChange(Port::RangeChange); |
| } |
| return; |
| } |
| |
| panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); |
| } |
| |
| void |
| AtomicSimpleCPU::CpuPort::recvRetry() |
| { |
| panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); |
| } |
| |
| void |
| AtomicSimpleCPU::DcachePort::setPeer(Port *port) |
| { |
| Port::setPeer(port); |
| |
| #if FULL_SYSTEM |
| // Update the ThreadContext's memory ports (Functional/Virtual |
| // Ports) |
| cpu->tcBase()->connectMemPorts(cpu->tcBase()); |
| #endif |
| } |
| |
| AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) |
| : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false), |
| simulate_data_stalls(p->simulate_data_stalls), |
| simulate_inst_stalls(p->simulate_inst_stalls), |
| icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this), |
| physmemPort(name() + "-iport", this), hasPhysMemPort(false) |
| { |
| _status = Idle; |
| |
| icachePort.snoopRangeSent = false; |
| dcachePort.snoopRangeSent = false; |
| |
| } |
| |
| |
| AtomicSimpleCPU::~AtomicSimpleCPU() |
| { |
| if (tickEvent.scheduled()) { |
| deschedule(tickEvent); |
| } |
| } |
| |
| void |
| AtomicSimpleCPU::serialize(ostream &os) |
| { |
| SimObject::State so_state = SimObject::getState(); |
| SERIALIZE_ENUM(so_state); |
| SERIALIZE_SCALAR(locked); |
| BaseSimpleCPU::serialize(os); |
| nameOut(os, csprintf("%s.tickEvent", name())); |
| tickEvent.serialize(os); |
| } |
| |
| void |
| AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) |
| { |
| SimObject::State so_state; |
| UNSERIALIZE_ENUM(so_state); |
| UNSERIALIZE_SCALAR(locked); |
| BaseSimpleCPU::unserialize(cp, section); |
| tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); |
| } |
| |
| void |
| AtomicSimpleCPU::resume() |
| { |
| if (_status == Idle || _status == SwitchedOut) |
| return; |
| |
| DPRINTF(SimpleCPU, "Resume\n"); |
| assert(system->getMemoryMode() == Enums::atomic); |
| |
| changeState(SimObject::Running); |
| if (thread->status() == ThreadContext::Active) { |
| if (!tickEvent.scheduled()) |
| schedule(tickEvent, nextCycle()); |
| } |
| } |
| |
| void |
| AtomicSimpleCPU::switchOut() |
| { |
| assert(_status == Running || _status == Idle); |
| _status = SwitchedOut; |
| |
| tickEvent.squash(); |
| } |
| |
| |
| void |
| AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) |
| { |
| BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort); |
| |
| assert(!tickEvent.scheduled()); |
| |
| // if any of this CPU's ThreadContexts are active, mark the CPU as |
| // running and schedule its tick event. |
| ThreadID size = threadContexts.size(); |
| for (ThreadID i = 0; i < size; ++i) { |
| ThreadContext *tc = threadContexts[i]; |
| if (tc->status() == ThreadContext::Active && _status != Running) { |
| _status = Running; |
| schedule(tickEvent, nextCycle()); |
| break; |
| } |
| } |
| if (_status != Running) { |
| _status = Idle; |
| } |
| assert(threadContexts.size() == 1); |
| ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT |
| data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too |
| data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too |
| } |
| |
| |
| void |
| AtomicSimpleCPU::activateContext(int thread_num, int delay) |
| { |
| DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); |
| |
| assert(thread_num == 0); |
| assert(thread); |
| |
| assert(_status == Idle); |
| assert(!tickEvent.scheduled()); |
| |
| notIdleFraction++; |
| numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend); |
| |
| //Make sure ticks are still on multiples of cycles |
| schedule(tickEvent, nextCycle(curTick + ticks(delay))); |
| _status = Running; |
| } |
| |
| |
| void |
| AtomicSimpleCPU::suspendContext(int thread_num) |
| { |
| DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); |
| |
| assert(thread_num == 0); |
| assert(thread); |
| |
| if (_status == Idle) |
| return; |
| |
| assert(_status == Running); |
| |
| // tick event may not be scheduled if this gets called from inside |
| // an instruction's execution, e.g. "quiesce" |
| if (tickEvent.scheduled()) |
| deschedule(tickEvent); |
| |
| notIdleFraction--; |
| _status = Idle; |
| } |
| |
| |
| template <class T> |
| Fault |
| AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) |
| { |
| // use the CPU's statically allocated read request and packet objects |
| Request *req = &data_read_req; |
| |
| if (traceData) { |
| traceData->setAddr(addr); |
| } |
| |
| //The block size of our peer. |
| unsigned blockSize = dcachePort.peerBlockSize(); |
| //The size of the data we're trying to read. |
| int dataSize = sizeof(T); |
| |
| uint8_t * dataPtr = (uint8_t *)&data; |
| |
| //The address of the second part of this access if it needs to be split |
| //across a cache line boundary. |
| Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); |
| |
| if(secondAddr > addr) |
| dataSize = secondAddr - addr; |
| |
| dcache_latency = 0; |
| |
| while(1) { |
| req->setVirt(0, addr, dataSize, flags, thread->readPC()); |
| |
| // translate to physical address |
| Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); |
| |
| // Now do the access. |
| if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { |
| Packet pkt = Packet(req, |
| req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, |
| Packet::Broadcast); |
| pkt.dataStatic(dataPtr); |
| |
| if (req->isMmapedIpr()) |
| dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); |
| else { |
| if (hasPhysMemPort && pkt.getAddr() == physMemAddr) |
| dcache_latency += physmemPort.sendAtomic(&pkt); |
| else |
| dcache_latency += dcachePort.sendAtomic(&pkt); |
| } |
| dcache_access = true; |
| |
| assert(!pkt.isError()); |
| |
| if (req->isLLSC()) { |
| TheISA::handleLockedRead(thread, req); |
| } |
| } |
| |
| // This will need a new way to tell if it has a dcache attached. |
| if (req->isUncacheable()) |
| recordEvent("Uncached Read"); |
| |
| //If there's a fault, return it |
| if (fault != NoFault) { |
| if (req->isPrefetch()) { |
| return NoFault; |
| } else { |
| return fault; |
| } |
| } |
| |
| //If we don't need to access a second cache line, stop now. |
| if (secondAddr <= addr) |
| { |
| data = gtoh(data); |
| if (traceData) { |
| traceData->setData(data); |
| } |
| if (req->isLocked() && fault == NoFault) { |
| assert(!locked); |
| locked = true; |
| } |
| return fault; |
| } |
| |
| /* |
| * Set up for accessing the second cache line. |
| */ |
| |
| //Move the pointer we're reading into to the correct location. |
| dataPtr += dataSize; |
| //Adjust the size to get the remaining bytes. |
| dataSize = addr + sizeof(T) - secondAddr; |
| //And access the right address. |
| addr = secondAddr; |
| } |
| } |
| |
| #ifndef DOXYGEN_SHOULD_SKIP_THIS |
| |
| template |
| Fault |
| AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags); |
| |
| template |
| Fault |
| AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags); |
| |
| template |
| Fault |
| AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); |
| |
| template |
| Fault |
| AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); |
| |
| template |
| Fault |
| AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); |
| |
| template |
| Fault |
| AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); |
| |
| #endif //DOXYGEN_SHOULD_SKIP_THIS |
| |
| template<> |
| Fault |
| AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) |
| { |
| return read(addr, *(uint64_t*)&data, flags); |
| } |
| |
| template<> |
| Fault |
| AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) |
| { |
| return read(addr, *(uint32_t*)&data, flags); |
| } |
| |
| |
| template<> |
| Fault |
| AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) |
| { |
| return read(addr, (uint32_t&)data, flags); |
| } |
| |
| |
| template <class T> |
| Fault |
| AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) |
| { |
| // use the CPU's statically allocated write request and packet objects |
| Request *req = &data_write_req; |
| |
| if (traceData) { |
| traceData->setAddr(addr); |
| } |
| |
| //The block size of our peer. |
| unsigned blockSize = dcachePort.peerBlockSize(); |
| //The size of the data we're trying to read. |
| int dataSize = sizeof(T); |
| |
| uint8_t * dataPtr = (uint8_t *)&data; |
| |
| //The address of the second part of this access if it needs to be split |
| //across a cache line boundary. |
| Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); |
| |
| if(secondAddr > addr) |
| dataSize = secondAddr - addr; |
| |
| dcache_latency = 0; |
| |
| while(1) { |
| req->setVirt(0, addr, dataSize, flags, thread->readPC()); |
| |
| // translate to physical address |
| Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); |
| |
| // Now do the access. |
| if (fault == NoFault) { |
| MemCmd cmd = MemCmd::WriteReq; // default |
| bool do_access = true; // flag to suppress cache access |
| |
| if (req->isLLSC()) { |
| cmd = MemCmd::StoreCondReq; |
| do_access = TheISA::handleLockedWrite(thread, req); |
| } else if (req->isSwap()) { |
| cmd = MemCmd::SwapReq; |
| if (req->isCondSwap()) { |
| assert(res); |
| req->setExtraData(*res); |
| } |
| } |
| |
| if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { |
| Packet pkt = Packet(req, cmd, Packet::Broadcast); |
| pkt.dataStatic(dataPtr); |
| |
| if (req->isMmapedIpr()) { |
| dcache_latency += |
| TheISA::handleIprWrite(thread->getTC(), &pkt); |
| } else { |
| //XXX This needs to be outside of the loop in order to |
| //work properly for cache line boundary crossing |
| //accesses in transendian simulations. |
| data = htog(data); |
| if (hasPhysMemPort && pkt.getAddr() == physMemAddr) |
| dcache_latency += physmemPort.sendAtomic(&pkt); |
| else |
| dcache_latency += dcachePort.sendAtomic(&pkt); |
| } |
| dcache_access = true; |
| assert(!pkt.isError()); |
| |
| if (req->isSwap()) { |
| assert(res); |
| *res = pkt.get<T>(); |
| } |
| } |
| |
| if (res && !req->isSwap()) { |
| *res = req->getExtraData(); |
| } |
| } |
| |
| // This will need a new way to tell if it's hooked up to a cache or not. |
| if (req->isUncacheable()) |
| recordEvent("Uncached Write"); |
| |
| //If there's a fault or we don't need to access a second cache line, |
| //stop now. |
| if (fault != NoFault || secondAddr <= addr) |
| { |
| // If the write needs to have a fault on the access, consider |
| // calling changeStatus() and changing it to "bad addr write" |
| // or something. |
| if (traceData) { |
| traceData->setData(gtoh(data)); |
| } |
| if (req->isLocked() && fault == NoFault) { |
| assert(locked); |
| locked = false; |
| } |
| if (fault != NoFault && req->isPrefetch()) { |
| return NoFault; |
| } else { |
| return fault; |
| } |
| } |
| |
| /* |
| * Set up for accessing the second cache line. |
| */ |
| |
| //Move the pointer we're reading into to the correct location. |
| dataPtr += dataSize; |
| //Adjust the size to get the remaining bytes. |
| dataSize = addr + sizeof(T) - secondAddr; |
| //And access the right address. |
| addr = secondAddr; |
| } |
| } |
| |
| |
| #ifndef DOXYGEN_SHOULD_SKIP_THIS |
| |
| template |
| Fault |
| AtomicSimpleCPU::write(Twin32_t data, Addr addr, |
| unsigned flags, uint64_t *res); |
| |
| template |
| Fault |
| AtomicSimpleCPU::write(Twin64_t data, Addr addr, |
| unsigned flags, uint64_t *res); |
| |
| template |
| Fault |
| AtomicSimpleCPU::write(uint64_t data, Addr addr, |
| unsigned flags, uint64_t *res); |
| |
| template |
| Fault |
| AtomicSimpleCPU::write(uint32_t data, Addr addr, |
| unsigned flags, uint64_t *res); |
| |
| template |
| Fault |
| AtomicSimpleCPU::write(uint16_t data, Addr addr, |
| unsigned flags, uint64_t *res); |
| |
| template |
| Fault |
| AtomicSimpleCPU::write(uint8_t data, Addr addr, |
| unsigned flags, uint64_t *res); |
| |
| #endif //DOXYGEN_SHOULD_SKIP_THIS |
| |
| template<> |
| Fault |
| AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) |
| { |
| return write(*(uint64_t*)&data, addr, flags, res); |
| } |
| |
| template<> |
| Fault |
| AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) |
| { |
| return write(*(uint32_t*)&data, addr, flags, res); |
| } |
| |
| |
| template<> |
| Fault |
| AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) |
| { |
| return write((uint32_t)data, addr, flags, res); |
| } |
| |
| |
| void |
| AtomicSimpleCPU::tick() |
| { |
| DPRINTF(SimpleCPU, "Tick\n"); |
| |
| Tick latency = 0; |
| |
| for (int i = 0; i < width || locked; ++i) { |
| numCycles++; |
| |
| if (!curStaticInst || !curStaticInst->isDelayedCommit()) |
| checkForInterrupts(); |
| |
| checkPcEventQueue(); |
| |
| Fault fault = NoFault; |
| |
| bool fromRom = isRomMicroPC(thread->readMicroPC()); |
| if (!fromRom && !curMacroStaticInst) { |
| setupFetchRequest(&ifetch_req); |
| fault = thread->itb->translateAtomic(&ifetch_req, tc, |
| BaseTLB::Execute); |
| } |
| |
| if (fault == NoFault) { |
| Tick icache_latency = 0; |
| bool icache_access = false; |
| dcache_access = false; // assume no dcache access |
| |
| if (!fromRom && !curMacroStaticInst) { |
| // This is commented out because the predecoder would act like |
| // a tiny cache otherwise. It wouldn't be flushed when needed |
| // like the I cache. It should be flushed, and when that works |
| // this code should be uncommented. |
| //Fetch more instruction memory if necessary |
| //if(predecoder.needMoreBytes()) |
| //{ |
| icache_access = true; |
| Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, |
| Packet::Broadcast); |
| ifetch_pkt.dataStatic(&inst); |
| |
| if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) |
| icache_latency = physmemPort.sendAtomic(&ifetch_pkt); |
| else |
| icache_latency = icachePort.sendAtomic(&ifetch_pkt); |
| |
| assert(!ifetch_pkt.isError()); |
| |
| // ifetch_req is initialized to read the instruction directly |
| // into the CPU object's inst field. |
| //} |
| } |
| |
| preExecute(); |
| |
| if (curStaticInst) { |
| fault = curStaticInst->execute(this, traceData); |
| |
| // keep an instruction count |
| if (fault == NoFault) |
| countInst(); |
| else if (traceData) { |
| // If there was a fault, we should trace this instruction. |
| delete traceData; |
| traceData = NULL; |
| } |
| |
| postExecute(); |
| } |
| |
| // @todo remove me after debugging with legion done |
| if (curStaticInst && (!curStaticInst->isMicroop() || |
| curStaticInst->isFirstMicroop())) |
| instCnt++; |
| |
| Tick stall_ticks = 0; |
| if (simulate_inst_stalls && icache_access) |
| stall_ticks += icache_latency; |
| |
| if (simulate_data_stalls && dcache_access) |
| stall_ticks += dcache_latency; |
| |
| if (stall_ticks) { |
| Tick stall_cycles = stall_ticks / ticks(1); |
| Tick aligned_stall_ticks = ticks(stall_cycles); |
| |
| if (aligned_stall_ticks < stall_ticks) |
| aligned_stall_ticks += 1; |
| |
| latency += aligned_stall_ticks; |
| } |
| |
| } |
| if(fault != NoFault || !stayAtPC) |
| advancePC(fault); |
| } |
| |
| // instruction takes at least one cycle |
| if (latency < ticks(1)) |
| latency = ticks(1); |
| |
| if (_status != Idle) |
| schedule(tickEvent, curTick + latency); |
| } |
| |
| |
| void |
| AtomicSimpleCPU::printAddr(Addr a) |
| { |
| dcachePort.printAddr(a); |
| } |
| |
| |
| //////////////////////////////////////////////////////////////////////// |
| // |
| // AtomicSimpleCPU Simulation Object |
| // |
| AtomicSimpleCPU * |
| AtomicSimpleCPUParams::create() |
| { |
| numThreads = 1; |
| #if !FULL_SYSTEM |
| if (workload.size() != 1) |
| panic("only one workload allowed"); |
| #endif |
| return new AtomicSimpleCPU(this); |
| } |