| /* |
| * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Author: Lisa Hsu |
| */ |
| |
| #include "gpu-compute/gpu_tlb.hh" |
| |
| #include <cmath> |
| #include <cstring> |
| |
| #include "arch/x86/faults.hh" |
| #include "arch/x86/insts/microldstop.hh" |
| #include "arch/x86/page_size.hh" |
| #include "arch/x86/pagetable.hh" |
| #include "arch/x86/pagetable_walker.hh" |
| #include "arch/x86/regs/misc.hh" |
| #include "arch/x86/regs/msr.hh" |
| #include "arch/x86/x86_traits.hh" |
| #include "base/bitfield.hh" |
| #include "base/logging.hh" |
| #include "base/output.hh" |
| #include "base/trace.hh" |
| #include "cpu/base.hh" |
| #include "cpu/thread_context.hh" |
| #include "debug/GPUPrefetch.hh" |
| #include "debug/GPUTLB.hh" |
| #include "mem/packet_access.hh" |
| #include "mem/page_table.hh" |
| #include "mem/request.hh" |
| #include "sim/process.hh" |
| #include "sim/pseudo_inst.hh" |
| |
| namespace gem5 |
| { |
| namespace X86ISA |
| { |
| |
| GpuTLB::GpuTLB(const Params &p) |
| : ClockedObject(p), configAddress(0), size(p.size), |
| cleanupEvent([this]{ cleanup(); }, name(), false, |
| Event::Maximum_Pri), |
| exitEvent([this]{ exitCallback(); }, name()), stats(this) |
| { |
| assoc = p.assoc; |
| assert(assoc <= size); |
| numSets = size/assoc; |
| allocationPolicy = p.allocationPolicy; |
| hasMemSidePort = false; |
| accessDistance = p.accessDistance; |
| |
| tlb.assign(size, TlbEntry()); |
| |
| freeList.resize(numSets); |
| entryList.resize(numSets); |
| |
| for (int set = 0; set < numSets; ++set) { |
| for (int way = 0; way < assoc; ++way) { |
| int x = set * assoc + way; |
| freeList[set].push_back(&tlb.at(x)); |
| } |
| } |
| |
| FA = (size == assoc); |
| |
| /** |
| * @warning: the set-associative version assumes you have a |
| * fixed page size of 4KB. |
| * If the page size is greather than 4KB (as defined in the |
| * X86ISA::PageBytes), then there are various issues w/ the current |
| * implementation (you'd have the same 8KB page being replicated in |
| * different sets etc) |
| */ |
| setMask = numSets - 1; |
| |
| maxCoalescedReqs = p.maxOutstandingReqs; |
| |
| // Do not allow maxCoalescedReqs to be more than the TLB associativity |
| if (maxCoalescedReqs > assoc) { |
| maxCoalescedReqs = assoc; |
| cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); |
| } |
| |
| outstandingReqs = 0; |
| hitLatency = p.hitLatency; |
| missLatency1 = p.missLatency1; |
| missLatency2 = p.missLatency2; |
| |
| // create the response ports based on the number of connected ports |
| for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) { |
| cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", |
| name(), i), this, i)); |
| } |
| |
| // create the request ports based on the number of connected ports |
| for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) { |
| memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", |
| name(), i), this, i)); |
| } |
| } |
| |
| // fixme: this is never called? |
| GpuTLB::~GpuTLB() |
| { |
| // make sure all the hash-maps are empty |
| assert(translationReturnEvent.empty()); |
| } |
| |
| Port & |
| GpuTLB::getPort(const std::string &if_name, PortID idx) |
| { |
| if (if_name == "cpu_side_ports") { |
| if (idx >= static_cast<PortID>(cpuSidePort.size())) { |
| panic("TLBCoalescer::getPort: unknown index %d\n", idx); |
| } |
| |
| return *cpuSidePort[idx]; |
| } else if (if_name == "mem_side_ports") { |
| if (idx >= static_cast<PortID>(memSidePort.size())) { |
| panic("TLBCoalescer::getPort: unknown index %d\n", idx); |
| } |
| |
| hasMemSidePort = true; |
| |
| return *memSidePort[idx]; |
| } else { |
| panic("TLBCoalescer::getPort: unknown port %s\n", if_name); |
| } |
| } |
| |
| TlbEntry* |
| GpuTLB::insert(Addr vpn, TlbEntry &entry) |
| { |
| TlbEntry *newEntry = nullptr; |
| |
| /** |
| * vpn holds the virtual page address |
| * The least significant bits are simply masked |
| */ |
| int set = (vpn >> PageShift) & setMask; |
| |
| if (!freeList[set].empty()) { |
| newEntry = freeList[set].front(); |
| freeList[set].pop_front(); |
| } else { |
| newEntry = entryList[set].back(); |
| entryList[set].pop_back(); |
| } |
| |
| *newEntry = entry; |
| newEntry->vaddr = vpn; |
| entryList[set].push_front(newEntry); |
| |
| return newEntry; |
| } |
| |
| GpuTLB::EntryList::iterator |
| GpuTLB::lookupIt(Addr va, bool update_lru) |
| { |
| int set = (va >> PageShift) & setMask; |
| |
| if (FA) { |
| assert(!set); |
| } |
| |
| auto entry = entryList[set].begin(); |
| for (; entry != entryList[set].end(); ++entry) { |
| int page_size = (*entry)->size(); |
| |
| if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { |
| DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " |
| "with size %#x.\n", va, (*entry)->vaddr, page_size); |
| |
| if (update_lru) { |
| entryList[set].push_front(*entry); |
| entryList[set].erase(entry); |
| entry = entryList[set].begin(); |
| } |
| |
| break; |
| } |
| } |
| |
| return entry; |
| } |
| |
| TlbEntry* |
| GpuTLB::lookup(Addr va, bool update_lru) |
| { |
| int set = (va >> PageShift) & setMask; |
| |
| auto entry = lookupIt(va, update_lru); |
| |
| if (entry == entryList[set].end()) |
| return nullptr; |
| else |
| return *entry; |
| } |
| |
| void |
| GpuTLB::invalidateAll() |
| { |
| DPRINTF(GPUTLB, "Invalidating all entries.\n"); |
| |
| for (int i = 0; i < numSets; ++i) { |
| while (!entryList[i].empty()) { |
| TlbEntry *entry = entryList[i].front(); |
| entryList[i].pop_front(); |
| freeList[i].push_back(entry); |
| } |
| } |
| } |
| |
| void |
| GpuTLB::setConfigAddress(uint32_t addr) |
| { |
| configAddress = addr; |
| } |
| |
| void |
| GpuTLB::invalidateNonGlobal() |
| { |
| DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); |
| |
| for (int i = 0; i < numSets; ++i) { |
| for (auto entryIt = entryList[i].begin(); |
| entryIt != entryList[i].end();) { |
| if (!(*entryIt)->global) { |
| freeList[i].push_back(*entryIt); |
| entryList[i].erase(entryIt++); |
| } else { |
| ++entryIt; |
| } |
| } |
| } |
| } |
| |
| void |
| GpuTLB::demapPage(Addr va, uint64_t asn) |
| { |
| |
| int set = (va >> PageShift) & setMask; |
| auto entry = lookupIt(va, false); |
| |
| if (entry != entryList[set].end()) { |
| freeList[set].push_back(*entry); |
| entryList[set].erase(entry); |
| } |
| } |
| |
| |
| |
| namespace |
| { |
| |
| Cycles |
| localMiscRegAccess(bool read, MiscRegIndex regNum, |
| ThreadContext *tc, PacketPtr pkt) |
| { |
| if (read) { |
| RegVal data = htole(tc->readMiscReg(regNum)); |
| // Make sure we don't trot off the end of data. |
| pkt->setData((uint8_t *)&data); |
| } else { |
| RegVal data = htole(tc->readMiscRegNoEffect(regNum)); |
| tc->setMiscReg(regNum, letoh(data)); |
| } |
| return Cycles(1); |
| } |
| |
| } // anonymous namespace |
| |
| Fault |
| GpuTLB::translateInt(bool read, const RequestPtr &req, ThreadContext *tc) |
| { |
| DPRINTF(GPUTLB, "Addresses references internal memory.\n"); |
| Addr vaddr = req->getVaddr(); |
| Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; |
| |
| if (prefix == IntAddrPrefixCPUID) { |
| panic("CPUID memory space not yet implemented!\n"); |
| } else if (prefix == IntAddrPrefixMSR) { |
| vaddr = (vaddr >> 3) & ~IntAddrPrefixMask; |
| |
| MiscRegIndex regNum; |
| if (!msrAddrToIndex(regNum, vaddr)) |
| return std::make_shared<GeneralProtection>(0); |
| |
| req->setLocalAccessor( |
| [read,regNum](ThreadContext *tc, PacketPtr pkt) |
| { |
| return localMiscRegAccess(read, regNum, tc, pkt); |
| } |
| ); |
| |
| return NoFault; |
| } else if (prefix == IntAddrPrefixIO) { |
| // TODO If CPL > IOPL or in virtual mode, check the I/O permission |
| // bitmap in the TSS. |
| |
| Addr IOPort = vaddr & ~IntAddrPrefixMask; |
| // Make sure the address fits in the expected 16 bit IO address |
| // space. |
| assert(!(IOPort & ~0xFFFF)); |
| if (IOPort == 0xCF8 && req->getSize() == 4) { |
| req->setLocalAccessor( |
| [read](ThreadContext *tc, PacketPtr pkt) |
| { |
| return localMiscRegAccess( |
| read, MISCREG_PCI_CONFIG_ADDRESS, tc, pkt); |
| } |
| ); |
| } else if ((IOPort & ~mask(2)) == 0xCFC) { |
| req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); |
| Addr configAddress = |
| tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); |
| if (bits(configAddress, 31, 31)) { |
| req->setPaddr(PhysAddrPrefixPciConfig | |
| mbits(configAddress, 30, 2) | |
| (IOPort & mask(2))); |
| } else { |
| req->setPaddr(PhysAddrPrefixIO | IOPort); |
| } |
| } else { |
| req->setFlags(Request::UNCACHEABLE | Request::STRICT_ORDER); |
| req->setPaddr(PhysAddrPrefixIO | IOPort); |
| } |
| return NoFault; |
| } else { |
| panic("Access to unrecognized internal address space %#x.\n", |
| prefix); |
| } |
| } |
| |
| /** |
| * TLB_lookup will only perform a TLB lookup returning true on a TLB hit |
| * and false on a TLB miss. |
| * Many of the checks about different modes have been converted to |
| * assertions, since these parts of the code are not really used. |
| * On a hit it will update the LRU stack. |
| */ |
| bool |
| GpuTLB::tlbLookup(const RequestPtr &req, |
| ThreadContext *tc, bool update_stats) |
| { |
| bool tlb_hit = false; |
| #ifndef NDEBUG |
| uint32_t flags = req->getFlags(); |
| int seg = flags & SegmentFlagMask; |
| #endif |
| |
| assert(seg != SEGMENT_REG_MS); |
| Addr vaddr = req->getVaddr(); |
| DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); |
| HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); |
| |
| if (m5Reg.prot) { |
| DPRINTF(GPUTLB, "In protected mode.\n"); |
| // make sure we are in 64-bit mode |
| assert(m5Reg.mode == LongMode); |
| |
| // If paging is enabled, do the translation. |
| if (m5Reg.paging) { |
| DPRINTF(GPUTLB, "Paging enabled.\n"); |
| //update LRU stack on a hit |
| TlbEntry *entry = lookup(vaddr, true); |
| |
| if (entry) |
| tlb_hit = true; |
| |
| if (!update_stats) { |
| // functional tlb access for memory initialization |
| // i.e., memory seeding or instr. seeding -> don't update |
| // TLB and stats |
| return tlb_hit; |
| } |
| |
| stats.localNumTLBAccesses++; |
| |
| if (!entry) { |
| stats.localNumTLBMisses++; |
| } else { |
| stats.localNumTLBHits++; |
| } |
| } |
| } |
| |
| return tlb_hit; |
| } |
| |
| Fault |
| GpuTLB::translate(const RequestPtr &req, ThreadContext *tc, |
| Translation *translation, Mode mode, |
| bool &delayedResponse, bool timing, int &latency) |
| { |
| uint32_t flags = req->getFlags(); |
| int seg = flags & SegmentFlagMask; |
| bool storeCheck = flags & (StoreCheck << FlagShift); |
| |
| // If this is true, we're dealing with a request |
| // to a non-memory address space. |
| if (seg == SEGMENT_REG_MS) { |
| return translateInt(mode == Mode::Read, req, tc); |
| } |
| |
| delayedResponse = false; |
| Addr vaddr = req->getVaddr(); |
| DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); |
| |
| HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); |
| |
| // If protected mode has been enabled... |
| if (m5Reg.prot) { |
| DPRINTF(GPUTLB, "In protected mode.\n"); |
| // If we're not in 64-bit mode, do protection/limit checks |
| if (m5Reg.mode != LongMode) { |
| DPRINTF(GPUTLB, "Not in long mode. Checking segment " |
| "protection.\n"); |
| |
| // Check for a null segment selector. |
| if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || |
| seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) |
| && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { |
| return std::make_shared<GeneralProtection>(0); |
| } |
| |
| bool expandDown = false; |
| SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); |
| |
| if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { |
| if (!attr.writable && (mode == BaseMMU::Write || |
| storeCheck)) |
| return std::make_shared<GeneralProtection>(0); |
| |
| if (!attr.readable && mode == BaseMMU::Read) |
| return std::make_shared<GeneralProtection>(0); |
| |
| expandDown = attr.expandDown; |
| |
| } |
| |
| Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); |
| Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); |
| // This assumes we're not in 64 bit mode. If we were, the |
| // default address size is 64 bits, overridable to 32. |
| int size = 32; |
| bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); |
| SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); |
| |
| if ((csAttr.defaultSize && sizeOverride) || |
| (!csAttr.defaultSize && !sizeOverride)) { |
| size = 16; |
| } |
| |
| Addr offset = bits(vaddr - base, size - 1, 0); |
| Addr endOffset = offset + req->getSize() - 1; |
| |
| if (expandDown) { |
| DPRINTF(GPUTLB, "Checking an expand down segment.\n"); |
| warn_once("Expand down segments are untested.\n"); |
| |
| if (offset <= limit || endOffset <= limit) |
| return std::make_shared<GeneralProtection>(0); |
| } else { |
| if (offset > limit || endOffset > limit) |
| return std::make_shared<GeneralProtection>(0); |
| } |
| } |
| |
| // If paging is enabled, do the translation. |
| if (m5Reg.paging) { |
| DPRINTF(GPUTLB, "Paging enabled.\n"); |
| // The vaddr already has the segment base applied. |
| TlbEntry *entry = lookup(vaddr); |
| stats.localNumTLBAccesses++; |
| |
| if (!entry) { |
| stats.localNumTLBMisses++; |
| if (timing) { |
| latency = missLatency1; |
| } |
| |
| if (FullSystem) { |
| fatal("GpuTLB doesn't support full-system mode\n"); |
| } else { |
| DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " |
| "at pc %#x.\n", vaddr, tc->instAddr()); |
| |
| Process *p = tc->getProcessPtr(); |
| const EmulationPageTable::Entry *pte = |
| p->pTable->lookup(vaddr); |
| |
| if (!pte && mode != BaseMMU::Execute) { |
| // penalize a "page fault" more |
| if (timing) |
| latency += missLatency2; |
| |
| if (p->fixupFault(vaddr)) |
| pte = p->pTable->lookup(vaddr); |
| } |
| |
| if (!pte) { |
| return std::make_shared<PageFault>(vaddr, true, |
| mode, true, |
| false); |
| } else { |
| Addr alignedVaddr = p->pTable->pageAlign(vaddr); |
| |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", |
| alignedVaddr, pte->paddr); |
| |
| TlbEntry gpuEntry(p->pid(), alignedVaddr, |
| pte->paddr, false, false); |
| entry = insert(alignedVaddr, gpuEntry); |
| } |
| |
| DPRINTF(GPUTLB, "Miss was serviced.\n"); |
| } |
| } else { |
| stats.localNumTLBHits++; |
| |
| if (timing) { |
| latency = hitLatency; |
| } |
| } |
| |
| // Do paging protection checks. |
| bool inUser = (m5Reg.cpl == 3 && |
| !(flags & (CPL0FlagBit << FlagShift))); |
| |
| CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); |
| bool badWrite = (!entry->writable && (inUser || cr0.wp)); |
| |
| if ((inUser && !entry->user) || (mode == BaseMMU::Write && |
| badWrite)) { |
| // The page must have been present to get into the TLB in |
| // the first place. We'll assume the reserved bits are |
| // fine even though we're not checking them. |
| return std::make_shared<PageFault>(vaddr, true, mode, |
| inUser, false); |
| } |
| |
| if (storeCheck && badWrite) { |
| // This would fault if this were a write, so return a page |
| // fault that reflects that happening. |
| return std::make_shared<PageFault>(vaddr, true, |
| BaseMMU::Write, |
| inUser, false); |
| } |
| |
| |
| DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " |
| "checks.\n", entry->paddr); |
| |
| int page_size = entry->size(); |
| Addr paddr = entry->paddr | (vaddr & (page_size - 1)); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); |
| req->setPaddr(paddr); |
| |
| if (entry->uncacheable) |
| req->setFlags(Request::UNCACHEABLE); |
| } else { |
| //Use the address which already has segmentation applied. |
| DPRINTF(GPUTLB, "Paging disabled.\n"); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); |
| req->setPaddr(vaddr); |
| } |
| } else { |
| // Real mode |
| DPRINTF(GPUTLB, "In real mode.\n"); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); |
| req->setPaddr(vaddr); |
| } |
| |
| // Check for an access to the local APIC |
| if (FullSystem) { |
| LocalApicBase localApicBase = |
| tc->readMiscRegNoEffect(MISCREG_APIC_BASE); |
| |
| Addr baseAddr = localApicBase.base * PageBytes; |
| Addr paddr = req->getPaddr(); |
| |
| if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { |
| // Force the access to be uncacheable. |
| req->setFlags(Request::UNCACHEABLE); |
| req->setPaddr(x86LocalAPICAddress(tc->contextId(), |
| paddr - baseAddr)); |
| } |
| } |
| |
| return NoFault; |
| }; |
| |
| Fault |
| GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc, |
| Mode mode, int &latency) |
| { |
| bool delayedResponse; |
| |
| return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, |
| false, latency); |
| } |
| |
| void |
| GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc, |
| Translation *translation, Mode mode, int &latency) |
| { |
| bool delayedResponse; |
| assert(translation); |
| |
| Fault fault = GpuTLB::translate(req, tc, translation, mode, |
| delayedResponse, true, latency); |
| |
| if (!delayedResponse) |
| translation->finish(fault, req, tc, mode); |
| } |
| |
| Walker* |
| GpuTLB::getWalker() |
| { |
| return walker; |
| } |
| |
| |
| void |
| GpuTLB::serialize(CheckpointOut &cp) const |
| { |
| } |
| |
| void |
| GpuTLB::unserialize(CheckpointIn &cp) |
| { |
| } |
| |
| /** |
| * Do the TLB lookup for this coalesced request and schedule |
| * another event <TLB access latency> cycles later. |
| */ |
| |
| void |
| GpuTLB::issueTLBLookup(PacketPtr pkt) |
| { |
| assert(pkt); |
| assert(pkt->senderState); |
| |
| Addr virt_page_addr = roundDown(pkt->req->getVaddr(), |
| X86ISA::PageBytes); |
| |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| bool update_stats = !sender_state->isPrefetch; |
| ThreadContext * tmp_tc = sender_state->tc; |
| |
| DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", |
| virt_page_addr); |
| |
| int req_cnt = sender_state->reqCnt.back(); |
| |
| if (update_stats) { |
| stats.accessCycles -= (curTick() * req_cnt); |
| stats.localCycles -= curTick(); |
| updatePageFootprint(virt_page_addr); |
| stats.globalNumTLBAccesses += req_cnt; |
| } |
| |
| tlbOutcome lookup_outcome = TLB_MISS; |
| const RequestPtr &tmp_req = pkt->req; |
| |
| // Access the TLB and figure out if it's a hit or a miss. |
| bool success = tlbLookup(tmp_req, tmp_tc, update_stats); |
| |
| if (success) { |
| lookup_outcome = TLB_HIT; |
| // Put the entry in SenderState |
| TlbEntry *entry = lookup(tmp_req->getVaddr(), false); |
| assert(entry); |
| |
| auto p = sender_state->tc->getProcessPtr(); |
| sender_state->tlbEntry = |
| new TlbEntry(p->pid(), entry->vaddr, entry->paddr, |
| false, false); |
| |
| if (update_stats) { |
| // the reqCnt has an entry per level, so its size tells us |
| // which level we are in |
| sender_state->hitLevel = sender_state->reqCnt.size(); |
| stats.globalNumTLBHits += req_cnt; |
| } |
| } else { |
| if (update_stats) |
| stats.globalNumTLBMisses += req_cnt; |
| } |
| |
| /* |
| * We now know the TLB lookup outcome (if it's a hit or a miss), as |
| * well as the TLB access latency. |
| * |
| * We create and schedule a new TLBEvent which will help us take the |
| * appropriate actions (e.g., update TLB on a hit, send request to |
| * lower level TLB on a miss, or start a page walk if this was the |
| * last-level TLB) |
| */ |
| TLBEvent *tlb_event = |
| new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); |
| |
| if (translationReturnEvent.count(virt_page_addr)) { |
| panic("Virtual Page Address %#x already has a return event\n", |
| virt_page_addr); |
| } |
| |
| translationReturnEvent[virt_page_addr] = tlb_event; |
| assert(tlb_event); |
| |
| DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", |
| curTick() + cyclesToTicks(Cycles(hitLatency))); |
| |
| schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency))); |
| } |
| |
| GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, |
| tlbOutcome tlb_outcome, PacketPtr _pkt) |
| : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), |
| outcome(tlb_outcome), pkt(_pkt) |
| { |
| } |
| |
| /** |
| * Do Paging protection checks. If we encounter a page fault, then |
| * an assertion is fired. |
| */ |
| void |
| GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, |
| TlbEntry * tlb_entry, Mode mode) |
| { |
| HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); |
| uint32_t flags = pkt->req->getFlags(); |
| bool storeCheck = flags & (StoreCheck << FlagShift); |
| |
| // Do paging protection checks. |
| bool inUser |
| = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); |
| CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); |
| |
| bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); |
| |
| if ((inUser && !tlb_entry->user) || |
| (mode == BaseMMU::Write && badWrite)) { |
| // The page must have been present to get into the TLB in |
| // the first place. We'll assume the reserved bits are |
| // fine even though we're not checking them. |
| panic("Page fault detected"); |
| } |
| |
| if (storeCheck && badWrite) { |
| // This would fault if this were a write, so return a page |
| // fault that reflects that happening. |
| panic("Page fault detected"); |
| } |
| } |
| |
| /** |
| * handleTranslationReturn is called on a TLB hit, |
| * when a TLB miss returns or when a page fault returns. |
| * The latter calls handelHit with TLB miss as tlbOutcome. |
| */ |
| void |
| GpuTLB::handleTranslationReturn(Addr virt_page_addr, |
| tlbOutcome tlb_outcome, PacketPtr pkt) |
| { |
| assert(pkt); |
| Addr vaddr = pkt->req->getVaddr(); |
| |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| ThreadContext *tc = sender_state->tc; |
| Mode mode = sender_state->tlbMode; |
| |
| TlbEntry *local_entry, *new_entry; |
| |
| if (tlb_outcome == TLB_HIT) { |
| DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", |
| vaddr); |
| local_entry = sender_state->tlbEntry; |
| } else { |
| DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", |
| vaddr); |
| |
| /** |
| * We are returning either from a page walk or from a hit at a |
| * lower TLB level. The senderState should be "carrying" a pointer |
| * to the correct TLBEntry. |
| */ |
| new_entry = sender_state->tlbEntry; |
| assert(new_entry); |
| local_entry = new_entry; |
| |
| if (allocationPolicy) { |
| DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", |
| virt_page_addr); |
| |
| local_entry = insert(virt_page_addr, *new_entry); |
| } |
| |
| assert(local_entry); |
| } |
| |
| /** |
| * At this point the packet carries an up-to-date tlbEntry pointer |
| * in its senderState. |
| * Next step is to do the paging protection checks. |
| */ |
| DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " |
| "while paddr was %#x.\n", local_entry->vaddr, |
| local_entry->paddr); |
| |
| pagingProtectionChecks(tc, pkt, local_entry, mode); |
| int page_size = local_entry->size(); |
| Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); |
| |
| // Since this packet will be sent through the cpu side port, |
| // it must be converted to a response pkt if it is not one already |
| if (pkt->isRequest()) { |
| pkt->makeTimingResponse(); |
| } |
| |
| pkt->req->setPaddr(paddr); |
| |
| if (local_entry->uncacheable) { |
| pkt->req->setFlags(Request::UNCACHEABLE); |
| } |
| |
| //send packet back to coalescer |
| cpuSidePort[0]->sendTimingResp(pkt); |
| //schedule cleanup event |
| cleanupQueue.push(virt_page_addr); |
| |
| // schedule this only once per cycle. |
| // The check is required because we might have multiple translations |
| // returning the same cycle |
| // this is a maximum priority event and must be on the same cycle |
| // as the cleanup event in TLBCoalescer to avoid a race with |
| // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry |
| if (!cleanupEvent.scheduled()) |
| schedule(cleanupEvent, curTick()); |
| } |
| |
| /** |
| * Here we take the appropriate actions based on the result of the |
| * TLB lookup. |
| */ |
| void |
| GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, |
| PacketPtr pkt) |
| { |
| DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); |
| |
| assert(translationReturnEvent[virtPageAddr]); |
| assert(pkt); |
| |
| TranslationState *tmp_sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| int req_cnt = tmp_sender_state->reqCnt.back(); |
| bool update_stats = !tmp_sender_state->isPrefetch; |
| |
| |
| if (outcome == TLB_HIT) { |
| handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); |
| |
| if (update_stats) { |
| stats.accessCycles += (req_cnt * curTick()); |
| stats.localCycles += curTick(); |
| } |
| |
| } else if (outcome == TLB_MISS) { |
| |
| DPRINTF(GPUTLB, "This is a TLB miss\n"); |
| if (update_stats) { |
| stats.accessCycles += (req_cnt*curTick()); |
| stats.localCycles += curTick(); |
| } |
| |
| if (hasMemSidePort) { |
| // the one cyle added here represent the delay from when we get |
| // the reply back till when we propagate it to the coalescer |
| // above. |
| if (update_stats) { |
| stats.accessCycles += (req_cnt * 1); |
| stats.localCycles += 1; |
| } |
| |
| /** |
| * There is a TLB below. Send the coalesced request. |
| * We actually send the very first packet of all the |
| * pending packets for this virtual page address. |
| */ |
| if (!memSidePort[0]->sendTimingReq(pkt)) { |
| DPRINTF(GPUTLB, "Failed sending translation request to " |
| "lower level TLB for addr %#x\n", virtPageAddr); |
| |
| memSidePort[0]->retries.push_back(pkt); |
| } else { |
| DPRINTF(GPUTLB, "Sent translation request to lower level " |
| "TLB for addr %#x\n", virtPageAddr); |
| } |
| } else { |
| //this is the last level TLB. Start a page walk |
| DPRINTF(GPUTLB, "Last level TLB - start a page walk for " |
| "addr %#x\n", virtPageAddr); |
| |
| if (update_stats) |
| stats.pageTableCycles -= (req_cnt*curTick()); |
| |
| TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; |
| assert(tlb_event); |
| tlb_event->updateOutcome(PAGE_WALK); |
| schedule(tlb_event, |
| curTick() + cyclesToTicks(Cycles(missLatency2))); |
| } |
| } else if (outcome == PAGE_WALK) { |
| if (update_stats) |
| stats.pageTableCycles += (req_cnt*curTick()); |
| |
| // Need to access the page table and update the TLB |
| DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", |
| virtPageAddr); |
| |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| Process *p = sender_state->tc->getProcessPtr(); |
| Addr vaddr = pkt->req->getVaddr(); |
| |
| Addr alignedVaddr = p->pTable->pageAlign(vaddr); |
| assert(alignedVaddr == virtPageAddr); |
| |
| const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr); |
| if (!pte && sender_state->tlbMode != BaseMMU::Execute && |
| p->fixupFault(vaddr)) { |
| pte = p->pTable->lookup(vaddr); |
| } |
| |
| if (pte) { |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, |
| pte->paddr); |
| |
| sender_state->tlbEntry = |
| new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false, |
| false); |
| } else { |
| sender_state->tlbEntry = nullptr; |
| } |
| |
| handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); |
| } else if (outcome == MISS_RETURN) { |
| /** we add an extra cycle in the return path of the translation |
| * requests in between the various TLB levels. |
| */ |
| handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); |
| } else { |
| panic("Unexpected TLB outcome %d", outcome); |
| } |
| } |
| |
| void |
| GpuTLB::TLBEvent::process() |
| { |
| tlb->translationReturn(virtPageAddr, outcome, pkt); |
| } |
| |
| const char* |
| GpuTLB::TLBEvent::description() const |
| { |
| return "trigger translationDoneEvent"; |
| } |
| |
| void |
| GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) |
| { |
| outcome = _outcome; |
| } |
| |
| Addr |
| GpuTLB::TLBEvent::getTLBEventVaddr() |
| { |
| return virtPageAddr; |
| } |
| |
| /** |
| * recvTiming receives a coalesced timing request from a TLBCoalescer |
| * and it calls issueTLBLookup() |
| * It only rejects the packet if we have exceeded the max |
| * outstanding number of requests for the TLB |
| */ |
| bool |
| GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) |
| { |
| if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { |
| tlb->issueTLBLookup(pkt); |
| // update number of outstanding translation requests |
| tlb->outstandingReqs++; |
| return true; |
| } else { |
| DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", |
| tlb->outstandingReqs); |
| return false; |
| } |
| } |
| |
| /** |
| * handleFuncTranslationReturn is called on a TLB hit, |
| * when a TLB miss returns or when a page fault returns. |
| * It updates LRU, inserts the TLB entry on a miss |
| * depending on the allocation policy and does the required |
| * protection checks. It does NOT create a new packet to |
| * update the packet's addr; this is done in hsail-gpu code. |
| */ |
| void |
| GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) |
| { |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| ThreadContext *tc = sender_state->tc; |
| Mode mode = sender_state->tlbMode; |
| Addr vaddr = pkt->req->getVaddr(); |
| |
| TlbEntry *local_entry, *new_entry; |
| |
| if (tlb_outcome == TLB_HIT) { |
| DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " |
| "%#x\n", vaddr); |
| |
| local_entry = sender_state->tlbEntry; |
| } else { |
| DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " |
| "%#x\n", vaddr); |
| |
| /** |
| * We are returning either from a page walk or from a hit at a |
| * lower TLB level. The senderState should be "carrying" a pointer |
| * to the correct TLBEntry. |
| */ |
| new_entry = sender_state->tlbEntry; |
| assert(new_entry); |
| local_entry = new_entry; |
| |
| if (allocationPolicy) { |
| Addr virt_page_addr = roundDown(vaddr, X86ISA::PageBytes); |
| |
| DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", |
| virt_page_addr); |
| |
| local_entry = insert(virt_page_addr, *new_entry); |
| } |
| |
| assert(local_entry); |
| } |
| |
| DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " |
| "while paddr was %#x.\n", local_entry->vaddr, |
| local_entry->paddr); |
| |
| /** |
| * Do paging checks if it's a normal functional access. If it's for a |
| * prefetch, then sometimes you can try to prefetch something that |
| * won't pass protection. We don't actually want to fault becuase there |
| * is no demand access to deem this a violation. Just put it in the |
| * TLB and it will fault if indeed a future demand access touches it in |
| * violation. |
| * |
| * This feature could be used to explore security issues around |
| * speculative memory accesses. |
| */ |
| if (!sender_state->isPrefetch && sender_state->tlbEntry) |
| pagingProtectionChecks(tc, pkt, local_entry, mode); |
| |
| int page_size = local_entry->size(); |
| Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); |
| |
| pkt->req->setPaddr(paddr); |
| |
| if (local_entry->uncacheable) |
| pkt->req->setFlags(Request::UNCACHEABLE); |
| } |
| |
| // This is used for atomic translations. Need to |
| // make it all happen during the same cycle. |
| void |
| GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) |
| { |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| ThreadContext *tc = sender_state->tc; |
| bool update_stats = !sender_state->isPrefetch; |
| |
| Addr virt_page_addr = roundDown(pkt->req->getVaddr(), |
| X86ISA::PageBytes); |
| |
| if (update_stats) |
| tlb->updatePageFootprint(virt_page_addr); |
| |
| // do the TLB lookup without updating the stats |
| bool success = tlb->tlbLookup(pkt->req, tc, update_stats); |
| tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; |
| |
| // functional mode means no coalescing |
| // global metrics are the same as the local metrics |
| if (update_stats) { |
| tlb->stats.globalNumTLBAccesses++; |
| |
| if (success) { |
| sender_state->hitLevel = sender_state->reqCnt.size(); |
| tlb->stats.globalNumTLBHits++; |
| } |
| } |
| |
| if (!success) { |
| if (update_stats) |
| tlb->stats.globalNumTLBMisses++; |
| if (tlb->hasMemSidePort) { |
| // there is a TLB below -> propagate down the TLB hierarchy |
| tlb->memSidePort[0]->sendFunctional(pkt); |
| // If no valid translation from a prefetch, then just return |
| if (sender_state->isPrefetch && !pkt->req->hasPaddr()) |
| return; |
| } else { |
| // Need to access the page table and update the TLB |
| DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", |
| virt_page_addr); |
| |
| Process *p = tc->getProcessPtr(); |
| |
| Addr vaddr = pkt->req->getVaddr(); |
| |
| Addr alignedVaddr = p->pTable->pageAlign(vaddr); |
| assert(alignedVaddr == virt_page_addr); |
| |
| const EmulationPageTable::Entry *pte = |
| p->pTable->lookup(vaddr); |
| if (!pte && sender_state->tlbMode != BaseMMU::Execute && |
| p->fixupFault(vaddr)) { |
| pte = p->pTable->lookup(vaddr); |
| } |
| |
| if (!sender_state->isPrefetch) { |
| // no PageFaults are permitted after |
| // the second page table lookup |
| assert(pte); |
| |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, |
| pte->paddr); |
| |
| sender_state->tlbEntry = |
| new TlbEntry(p->pid(), virt_page_addr, |
| pte->paddr, false, false); |
| } else { |
| // If this was a prefetch, then do the normal thing if it |
| // was a successful translation. Otherwise, send an empty |
| // TLB entry back so that it can be figured out as empty |
| // and handled accordingly. |
| if (pte) { |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, |
| pte->paddr); |
| |
| sender_state->tlbEntry = |
| new TlbEntry(p->pid(), virt_page_addr, |
| pte->paddr, false, false); |
| } else { |
| DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", |
| alignedVaddr); |
| |
| sender_state->tlbEntry = nullptr; |
| |
| return; |
| } |
| } |
| } |
| } else { |
| DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", |
| tlb->lookup(pkt->req->getVaddr())); |
| |
| TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), |
| update_stats); |
| |
| assert(entry); |
| |
| auto p = sender_state->tc->getProcessPtr(); |
| sender_state->tlbEntry = |
| new TlbEntry(p->pid(), entry->vaddr, entry->paddr, |
| false, false); |
| } |
| // This is the function that would populate pkt->req with the paddr of |
| // the translation. But if no translation happens (i.e Prefetch fails) |
| // then the early returns in the above code wiill keep this function |
| // from executing. |
| tlb->handleFuncTranslationReturn(pkt, tlb_outcome); |
| } |
| |
| void |
| GpuTLB::CpuSidePort::recvReqRetry() |
| { |
| // The CPUSidePort never sends anything but replies. No retries |
| // expected. |
| panic("recvReqRetry called"); |
| } |
| |
| AddrRangeList |
| GpuTLB::CpuSidePort::getAddrRanges() const |
| { |
| // currently not checked by the requestor |
| AddrRangeList ranges; |
| |
| return ranges; |
| } |
| |
| /** |
| * MemSidePort receives the packet back. |
| * We need to call the handleTranslationReturn |
| * and propagate up the hierarchy. |
| */ |
| bool |
| GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) |
| { |
| Addr virt_page_addr = roundDown(pkt->req->getVaddr(), |
| X86ISA::PageBytes); |
| |
| DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", |
| virt_page_addr); |
| |
| TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; |
| assert(tlb_event); |
| assert(virt_page_addr == tlb_event->getTLBEventVaddr()); |
| |
| tlb_event->updateOutcome(MISS_RETURN); |
| tlb->schedule(tlb_event, curTick()+tlb->clockPeriod()); |
| |
| return true; |
| } |
| |
| void |
| GpuTLB::MemSidePort::recvReqRetry() |
| { |
| // No retries should reach the TLB. The retries |
| // should only reach the TLBCoalescer. |
| panic("recvReqRetry called"); |
| } |
| |
| void |
| GpuTLB::cleanup() |
| { |
| while (!cleanupQueue.empty()) { |
| Addr cleanup_addr = cleanupQueue.front(); |
| cleanupQueue.pop(); |
| |
| // delete TLBEvent |
| TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; |
| delete old_tlb_event; |
| translationReturnEvent.erase(cleanup_addr); |
| |
| // update number of outstanding requests |
| outstandingReqs--; |
| } |
| |
| /** the higher level coalescer should retry if it has |
| * any pending requests. |
| */ |
| for (int i = 0; i < cpuSidePort.size(); ++i) { |
| cpuSidePort[i]->sendRetryReq(); |
| } |
| } |
| |
| void |
| GpuTLB::updatePageFootprint(Addr virt_page_addr) |
| { |
| |
| std::pair<AccessPatternTable::iterator, bool> ret; |
| |
| AccessInfo tmp_access_info; |
| tmp_access_info.lastTimeAccessed = 0; |
| tmp_access_info.accessesPerPage = 0; |
| tmp_access_info.totalReuseDistance = 0; |
| tmp_access_info.sumDistance = 0; |
| tmp_access_info.meanDistance = 0; |
| |
| ret = TLBFootprint.insert( |
| AccessPatternTable::value_type(virt_page_addr, tmp_access_info)); |
| |
| bool first_page_access = ret.second; |
| |
| if (first_page_access) { |
| stats.numUniquePages++; |
| } else { |
| int accessed_before; |
| accessed_before = curTick() - ret.first->second.lastTimeAccessed; |
| ret.first->second.totalReuseDistance += accessed_before; |
| } |
| |
| ret.first->second.accessesPerPage++; |
| ret.first->second.lastTimeAccessed = curTick(); |
| |
| if (accessDistance) { |
| ret.first->second.localTLBAccesses |
| .push_back(stats.localNumTLBAccesses.value()); |
| } |
| } |
| |
| void |
| GpuTLB::exitCallback() |
| { |
| std::ostream *page_stat_file = nullptr; |
| |
| if (accessDistance) { |
| |
| // print per page statistics to a separate file (.csv format) |
| // simout is the gem5 output directory (default is m5out or the one |
| // specified with -d |
| page_stat_file = simout.create(name().c_str())->stream(); |
| |
| // print header |
| *page_stat_file |
| << "page,max_access_distance,mean_access_distance, " |
| << "stddev_distance" << std::endl; |
| } |
| |
| // update avg. reuse distance footprint |
| unsigned int sum_avg_reuse_distance_per_page = 0; |
| |
| // iterate through all pages seen by this TLB |
| for (auto &iter : TLBFootprint) { |
| sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance / |
| iter.second.accessesPerPage; |
| |
| if (accessDistance) { |
| unsigned int tmp = iter.second.localTLBAccesses[0]; |
| unsigned int prev = tmp; |
| |
| for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) { |
| if (i) { |
| tmp = prev + 1; |
| } |
| |
| prev = iter.second.localTLBAccesses[i]; |
| // update the localTLBAccesses value |
| // with the actual differece |
| iter.second.localTLBAccesses[i] -= tmp; |
| // compute the sum of AccessDistance per page |
| // used later for mean |
| iter.second.sumDistance += |
| iter.second.localTLBAccesses[i]; |
| } |
| |
| iter.second.meanDistance = |
| iter.second.sumDistance / iter.second.accessesPerPage; |
| |
| // compute std_dev and max (we need a second round because we |
| // need to know the mean value |
| unsigned int max_distance = 0; |
| unsigned int stddev_distance = 0; |
| |
| for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) { |
| unsigned int tmp_access_distance = |
| iter.second.localTLBAccesses[i]; |
| |
| if (tmp_access_distance > max_distance) { |
| max_distance = tmp_access_distance; |
| } |
| |
| unsigned int diff = |
| tmp_access_distance - iter.second.meanDistance; |
| stddev_distance += pow(diff, 2); |
| |
| } |
| |
| stddev_distance = |
| sqrt(stddev_distance/iter.second.accessesPerPage); |
| |
| if (page_stat_file) { |
| *page_stat_file << std::hex << iter.first << ","; |
| *page_stat_file << std::dec << max_distance << ","; |
| *page_stat_file << std::dec << iter.second.meanDistance |
| << ","; |
| *page_stat_file << std::dec << stddev_distance; |
| *page_stat_file << std::endl; |
| } |
| |
| // erase the localTLBAccesses array |
| iter.second.localTLBAccesses.clear(); |
| } |
| } |
| |
| if (!TLBFootprint.empty()) { |
| stats.avgReuseDistance = |
| sum_avg_reuse_distance_per_page / TLBFootprint.size(); |
| } |
| |
| //clear the TLBFootprint map |
| TLBFootprint.clear(); |
| } |
| |
| GpuTLB::GpuTLBStats::GpuTLBStats(statistics::Group *parent) |
| : statistics::Group(parent), |
| ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"), |
| ADD_STAT(localNumTLBHits, "Number of TLB hits"), |
| ADD_STAT(localNumTLBMisses, "Number of TLB misses"), |
| ADD_STAT(localTLBMissRate, "TLB miss rate"), |
| ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"), |
| ADD_STAT(globalNumTLBHits, "Number of TLB hits"), |
| ADD_STAT(globalNumTLBMisses, "Number of TLB misses"), |
| ADD_STAT(globalTLBMissRate, "TLB miss rate"), |
| ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"), |
| ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"), |
| ADD_STAT(numUniquePages, "Number of unique pages touched"), |
| ADD_STAT(localCycles, "Number of cycles spent in queue for all " |
| "incoming reqs"), |
| ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs"), |
| ADD_STAT(avgReuseDistance, "avg. reuse distance over all pages (in " |
| "ticks)") |
| { |
| localLatency = localCycles / localNumTLBAccesses; |
| |
| localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; |
| globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; |
| } |
| } // namespace X86ISA |
| } // namespace gem5 |