| /* |
| * Copyright (c) 2005 The Regents of The University of Michigan |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer; |
| * redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution; |
| * neither the name of the copyright holders nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #ifndef __CPU_OOO_CPU_OOO_CPU_HH__ |
| #define __CPU_OOO_CPU_OOO_CPU_HH__ |
| |
| #include "base/statistics.hh" |
| #include "config/full_system.hh" |
| #include "cpu/base.hh" |
| #include "cpu/exec_context.hh" |
| #include "encumbered/cpu/full/fu_pool.hh" |
| #include "cpu/ooo_cpu/ea_list.hh" |
| #include "cpu/pc_event.hh" |
| #include "cpu/static_inst.hh" |
| #include "mem/mem_interface.hh" |
| #include "sim/eventq.hh" |
| |
| // forward declarations |
| #if FULL_SYSTEM |
| class Processor; |
| class AlphaITB; |
| class AlphaDTB; |
| class PhysicalMemory; |
| |
| class RemoteGDB; |
| class GDBListener; |
| |
| #else |
| |
| class Process; |
| |
| #endif // FULL_SYSTEM |
| |
| class Checkpoint; |
| class MemInterface; |
| |
| namespace Trace { |
| class InstRecord; |
| } |
| |
| /** |
| * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with |
| * simple out-of-order capabilities added to it. It is still a 1 CPI machine |
| * (?), but is capable of handling cache misses. Basically it models having |
| * a ROB/IQ by only allowing a certain amount of instructions to execute while |
| * the cache miss is outstanding. |
| */ |
| |
| template <class Impl> |
| class OoOCPU : public BaseCPU |
| { |
| private: |
| typedef typename Impl::DynInst DynInst; |
| typedef typename Impl::DynInstPtr DynInstPtr; |
| |
| public: |
| // main simulation loop (one cycle) |
| void tick(); |
| |
| private: |
| struct TickEvent : public Event |
| { |
| OoOCPU *cpu; |
| int width; |
| |
| TickEvent(OoOCPU *c, int w); |
| void process(); |
| const char *description(); |
| }; |
| |
| TickEvent tickEvent; |
| |
| /// Schedule tick event, regardless of its current state. |
| void scheduleTickEvent(int delay) |
| { |
| if (tickEvent.squashed()) |
| tickEvent.reschedule(curTick + delay); |
| else if (!tickEvent.scheduled()) |
| tickEvent.schedule(curTick + delay); |
| } |
| |
| /// Unschedule tick event, regardless of its current state. |
| void unscheduleTickEvent() |
| { |
| if (tickEvent.scheduled()) |
| tickEvent.squash(); |
| } |
| |
| private: |
| Trace::InstRecord *traceData; |
| |
| template<typename T> |
| void trace_data(T data); |
| |
| public: |
| // |
| enum Status { |
| Running, |
| Idle, |
| IcacheMiss, |
| IcacheMissComplete, |
| DcacheMissStall, |
| SwitchedOut |
| }; |
| |
| private: |
| Status _status; |
| |
| public: |
| void post_interrupt(int int_num, int index); |
| |
| void zero_fill_64(Addr addr) { |
| static int warned = 0; |
| if (!warned) { |
| warn ("WH64 is not implemented"); |
| warned = 1; |
| } |
| }; |
| |
| struct Params : public BaseCPU::Params |
| { |
| MemInterface *icache_interface; |
| MemInterface *dcache_interface; |
| int width; |
| #if FULL_SYSTEM |
| AlphaITB *itb; |
| AlphaDTB *dtb; |
| FunctionalMemory *mem; |
| #else |
| Process *process; |
| #endif |
| int issueWidth; |
| }; |
| |
| OoOCPU(Params *params); |
| |
| virtual ~OoOCPU(); |
| |
| void init(); |
| |
| private: |
| void copyFromXC(); |
| |
| public: |
| // execution context |
| ExecContext *xc; |
| |
| void switchOut(); |
| void takeOverFrom(BaseCPU *oldCPU); |
| |
| #if FULL_SYSTEM |
| Addr dbg_vtophys(Addr addr); |
| |
| bool interval_stats; |
| #endif |
| |
| // L1 instruction cache |
| MemInterface *icacheInterface; |
| |
| // L1 data cache |
| MemInterface *dcacheInterface; |
| |
| FuncUnitPool *fuPool; |
| |
| // Refcounted pointer to the one memory request. |
| MemReqPtr cacheMemReq; |
| |
| class ICacheCompletionEvent : public Event |
| { |
| private: |
| OoOCPU *cpu; |
| |
| public: |
| ICacheCompletionEvent(OoOCPU *_cpu); |
| |
| virtual void process(); |
| virtual const char *description(); |
| }; |
| |
| // Will need to create a cache completion event upon any memory miss. |
| ICacheCompletionEvent iCacheCompletionEvent; |
| |
| class DCacheCompletionEvent; |
| |
| typedef typename |
| std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt; |
| |
| class DCacheCompletionEvent : public Event |
| { |
| private: |
| OoOCPU *cpu; |
| DynInstPtr inst; |
| DCacheCompEventIt dcceIt; |
| |
| public: |
| DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, |
| DCacheCompEventIt &_dcceIt); |
| |
| virtual void process(); |
| virtual const char *description(); |
| }; |
| |
| friend class DCacheCompletionEvent; |
| |
| protected: |
| std::list<DCacheCompletionEvent> dCacheCompList; |
| DCacheCompEventIt dcceIt; |
| |
| private: |
| Status status() const { return _status; } |
| |
| virtual void activateContext(int thread_num, int delay); |
| virtual void suspendContext(int thread_num); |
| virtual void deallocateContext(int thread_num); |
| virtual void haltContext(int thread_num); |
| |
| // statistics |
| virtual void regStats(); |
| virtual void resetStats(); |
| |
| // number of simulated instructions |
| Counter numInst; |
| Counter startNumInst; |
| Stats::Scalar<> numInsts; |
| |
| virtual Counter totalInstructions() const |
| { |
| return numInst - startNumInst; |
| } |
| |
| // number of simulated memory references |
| Stats::Scalar<> numMemRefs; |
| |
| // number of simulated loads |
| Counter numLoad; |
| Counter startNumLoad; |
| |
| // number of idle cycles |
| Stats::Average<> notIdleFraction; |
| Stats::Formula idleFraction; |
| |
| // number of cycles stalled for I-cache misses |
| Stats::Scalar<> icacheStallCycles; |
| Counter lastIcacheStall; |
| |
| // number of cycles stalled for D-cache misses |
| Stats::Scalar<> dcacheStallCycles; |
| Counter lastDcacheStall; |
| |
| void processICacheCompletion(); |
| |
| public: |
| |
| virtual void serialize(std::ostream &os); |
| virtual void unserialize(Checkpoint *cp, const std::string §ion); |
| |
| #if FULL_SYSTEM |
| bool validInstAddr(Addr addr) { return true; } |
| bool validDataAddr(Addr addr) { return true; } |
| int getInstAsid() { return xc->regs.instAsid(); } |
| int getDataAsid() { return xc->regs.dataAsid(); } |
| |
| Fault translateInstReq(MemReqPtr &req) |
| { |
| return itb->translate(req); |
| } |
| |
| Fault translateDataReadReq(MemReqPtr &req) |
| { |
| return dtb->translate(req, false); |
| } |
| |
| Fault translateDataWriteReq(MemReqPtr &req) |
| { |
| return dtb->translate(req, true); |
| } |
| |
| #else |
| bool validInstAddr(Addr addr) |
| { return xc->validInstAddr(addr); } |
| |
| bool validDataAddr(Addr addr) |
| { return xc->validDataAddr(addr); } |
| |
| int getInstAsid() { return xc->asid; } |
| int getDataAsid() { return xc->asid; } |
| |
| Fault dummyTranslation(MemReqPtr &req) |
| { |
| #if 0 |
| assert((req->vaddr >> 48 & 0xffff) == 0); |
| #endif |
| |
| // put the asid in the upper 16 bits of the paddr |
| req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); |
| req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; |
| return NoFault; |
| } |
| Fault translateInstReq(MemReqPtr &req) |
| { |
| return dummyTranslation(req); |
| } |
| Fault translateDataReadReq(MemReqPtr &req) |
| { |
| return dummyTranslation(req); |
| } |
| Fault translateDataWriteReq(MemReqPtr &req) |
| { |
| return dummyTranslation(req); |
| } |
| |
| #endif |
| |
| template <class T> |
| Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); |
| |
| template <class T> |
| Fault write(T data, Addr addr, unsigned flags, |
| uint64_t *res, DynInstPtr inst); |
| |
| void prefetch(Addr addr, unsigned flags) |
| { |
| // need to do this... |
| } |
| |
| void writeHint(Addr addr, int size, unsigned flags) |
| { |
| // need to do this... |
| } |
| |
| Fault copySrcTranslate(Addr src); |
| |
| Fault copy(Addr dest); |
| |
| private: |
| bool executeInst(DynInstPtr &inst); |
| |
| void renameInst(DynInstPtr &inst); |
| |
| void addInst(DynInstPtr &inst); |
| |
| void commitHeadInst(); |
| |
| bool getOneInst(); |
| |
| Fault fetchCacheLine(); |
| |
| InstSeqNum getAndIncrementInstSeq(); |
| |
| bool ambigMemAddr; |
| |
| private: |
| InstSeqNum globalSeqNum; |
| |
| DynInstPtr renameTable[TheISA::TotalNumRegs]; |
| DynInstPtr commitTable[TheISA::TotalNumRegs]; |
| |
| // Might need a table of the shadow registers as well. |
| #if FULL_SYSTEM |
| DynInstPtr palShadowTable[TheISA::NumIntRegs]; |
| #endif |
| |
| public: |
| // The register accessor methods provide the index of the |
| // instruction's operand (e.g., 0 or 1), not the architectural |
| // register index, to simplify the implementation of register |
| // renaming. We find the architectural register index by indexing |
| // into the instruction's own operand index table. Note that a |
| // raw pointer to the StaticInst is provided instead of a |
| // ref-counted StaticInstPtr to redice overhead. This is fine as |
| // long as these methods don't copy the pointer into any long-term |
| // storage (which is pretty hard to imagine they would have reason |
| // to do). |
| |
| // In the OoO case these shouldn't read from the XC but rather from the |
| // rename table of DynInsts. Also these likely shouldn't be called very |
| // often, other than when adding things into the xc during say a syscall. |
| |
| uint64_t readIntReg(StaticInst *si, int idx) |
| { |
| return xc->readIntReg(si->srcRegIdx(idx)); |
| } |
| |
| FloatReg readFloatReg(StaticInst *si, int idx, width) |
| { |
| int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; |
| return xc->readFloatReg(reg_idx, width); |
| } |
| |
| FloatReg readFloatReg(StaticInst *si, int idx) |
| { |
| int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; |
| return xc->readFloatReg(reg_idx); |
| } |
| |
| FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width) |
| { |
| int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; |
| return xc->readFloatRegBits(reg_idx, width); |
| } |
| |
| FloatRegBits readFloatRegBits(StaticInst *si, int idx) |
| { |
| int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; |
| return xc->readFloatRegBits(reg_idx); |
| } |
| |
| void setIntReg(StaticInst *si, int idx, uint64_t val) |
| { |
| xc->setIntReg(si->destRegIdx(idx), val); |
| } |
| |
| void setFloatReg(StaticInst *si, int idx, FloatReg val, int width) |
| { |
| int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; |
| xc->setFloatReg(reg_idx, val, width); |
| } |
| |
| void setFloatReg(StaticInst *si, int idx, FloatReg val) |
| { |
| int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; |
| xc->setFloatReg(reg_idx, val); |
| } |
| |
| void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width) |
| { |
| int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; |
| xc->setFloatRegBits(reg_idx, val, width); |
| } |
| |
| void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val) |
| { |
| int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; |
| xc->setFloatRegBits(reg_idx, val); |
| } |
| |
| uint64_t readPC() { return PC; } |
| void setNextPC(Addr val) { nextPC = val; } |
| |
| private: |
| Addr PC; |
| Addr nextPC; |
| |
| unsigned issueWidth; |
| |
| bool fetchRedirExcp; |
| bool fetchRedirBranch; |
| |
| /** Mask to get a cache block's address. */ |
| Addr cacheBlkMask; |
| |
| unsigned cacheBlkSize; |
| |
| Addr cacheBlkPC; |
| |
| /** The cache line being fetched. */ |
| uint8_t *cacheData; |
| |
| protected: |
| bool cacheBlkValid; |
| |
| private: |
| |
| // Align an address (typically a PC) to the start of an I-cache block. |
| // We fold in the PISA 64- to 32-bit conversion here as well. |
| Addr icacheBlockAlignPC(Addr addr) |
| { |
| addr = TheISA::realPCToFetchPC(addr); |
| return (addr & ~(cacheBlkMask)); |
| } |
| |
| unsigned instSize; |
| |
| // ROB tracking stuff. |
| DynInstPtr robHeadPtr; |
| DynInstPtr robTailPtr; |
| unsigned robSize; |
| unsigned robInsts; |
| |
| // List of outstanding EA instructions. |
| protected: |
| EAList eaList; |
| |
| public: |
| void branchToTarget(Addr val) |
| { |
| if (!fetchRedirExcp) { |
| fetchRedirBranch = true; |
| PC = val; |
| } |
| } |
| |
| // ISA stuff: |
| uint64_t readUniq() { return xc->readUniq(); } |
| void setUniq(uint64_t val) { xc->setUniq(val); } |
| |
| uint64_t readFpcr() { return xc->readFpcr(); } |
| void setFpcr(uint64_t val) { xc->setFpcr(val); } |
| |
| #if FULL_SYSTEM |
| uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } |
| Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } |
| Fault hwrei() { return xc->hwrei(); } |
| int readIntrFlag() { return xc->readIntrFlag(); } |
| void setIntrFlag(int val) { xc->setIntrFlag(val); } |
| bool inPalMode() { return xc->inPalMode(); } |
| void trap(Fault fault) { fault->invoke(xc); } |
| bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } |
| #else |
| void syscall() { xc->syscall(); } |
| #endif |
| |
| ExecContext *xcBase() { return xc; } |
| }; |
| |
| |
| // precise architected memory state accessor macros |
| template <class Impl> |
| template <class T> |
| Fault |
| OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) |
| { |
| MemReqPtr readReq = new MemReq(); |
| readReq->xc = xc; |
| readReq->asid = 0; |
| readReq->data = new uint8_t[64]; |
| |
| readReq->reset(addr, sizeof(T), flags); |
| |
| // translate to physical address - This might be an ISA impl call |
| Fault fault = translateDataReadReq(readReq); |
| |
| // do functional access |
| if (fault == NoFault) |
| fault = xc->mem->read(readReq, data); |
| #if 0 |
| if (traceData) { |
| traceData->setAddr(addr); |
| if (fault == NoFault) |
| traceData->setData(data); |
| } |
| #endif |
| |
| // if we have a cache, do cache access too |
| if (fault == NoFault && dcacheInterface) { |
| readReq->cmd = Read; |
| readReq->completionEvent = NULL; |
| readReq->time = curTick; |
| /*MemAccessResult result = */dcacheInterface->access(readReq); |
| |
| if (dcacheInterface->doEvents()) { |
| readReq->completionEvent = new DCacheCompletionEvent(this, inst, |
| dcceIt); |
| } |
| } |
| |
| if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) |
| recordEvent("Uncached Read"); |
| |
| return fault; |
| } |
| |
| template <class Impl> |
| template <class T> |
| Fault |
| OoOCPU<Impl>::write(T data, Addr addr, unsigned flags, |
| uint64_t *res, DynInstPtr inst) |
| { |
| MemReqPtr writeReq = new MemReq(); |
| writeReq->xc = xc; |
| writeReq->asid = 0; |
| writeReq->data = new uint8_t[64]; |
| |
| #if 0 |
| if (traceData) { |
| traceData->setAddr(addr); |
| traceData->setData(data); |
| } |
| #endif |
| |
| writeReq->reset(addr, sizeof(T), flags); |
| |
| // translate to physical address |
| Fault fault = translateDataWriteReq(writeReq); |
| |
| // do functional access |
| if (fault == NoFault) |
| fault = xc->write(writeReq, data); |
| |
| if (fault == NoFault && dcacheInterface) { |
| writeReq->cmd = Write; |
| memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); |
| writeReq->completionEvent = NULL; |
| writeReq->time = curTick; |
| /*MemAccessResult result = */dcacheInterface->access(writeReq); |
| |
| if (dcacheInterface->doEvents()) { |
| writeReq->completionEvent = new DCacheCompletionEvent(this, inst, |
| dcceIt); |
| } |
| } |
| |
| if (res && (fault == NoFault)) |
| *res = writeReq->result; |
| |
| if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) |
| recordEvent("Uncached Write"); |
| |
| return fault; |
| } |
| |
| |
| #endif // __CPU_OOO_CPU_OOO_CPU_HH__ |