| /* |
| * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Author: Lisa Hsu |
| */ |
| |
| #include "gpu-compute/gpu_tlb.hh" |
| |
| #include <cmath> |
| #include <cstring> |
| |
| #include "arch/x86/faults.hh" |
| #include "arch/x86/insts/microldstop.hh" |
| #include "arch/x86/pagetable.hh" |
| #include "arch/x86/pagetable_walker.hh" |
| #include "arch/x86/regs/misc.hh" |
| #include "arch/x86/x86_traits.hh" |
| #include "base/bitfield.hh" |
| #include "base/output.hh" |
| #include "base/trace.hh" |
| #include "cpu/base.hh" |
| #include "cpu/thread_context.hh" |
| #include "debug/GPUPrefetch.hh" |
| #include "debug/GPUTLB.hh" |
| #include "mem/packet_access.hh" |
| #include "mem/page_table.hh" |
| #include "mem/request.hh" |
| #include "sim/process.hh" |
| |
| namespace X86ISA |
| { |
| |
| GpuTLB::GpuTLB(const Params *p) |
| : MemObject(p), configAddress(0), size(p->size), |
| cleanupEvent([this]{ cleanup(); }, name(), false, |
| Event::Maximum_Pri), |
| exitEvent([this]{ exitCallback(); }, name()) |
| { |
| assoc = p->assoc; |
| assert(assoc <= size); |
| numSets = size/assoc; |
| allocationPolicy = p->allocationPolicy; |
| hasMemSidePort = false; |
| accessDistance = p->accessDistance; |
| clock = p->clk_domain->clockPeriod(); |
| |
| tlb.assign(size, GpuTlbEntry()); |
| |
| freeList.resize(numSets); |
| entryList.resize(numSets); |
| |
| for (int set = 0; set < numSets; ++set) { |
| for (int way = 0; way < assoc; ++way) { |
| int x = set * assoc + way; |
| freeList[set].push_back(&tlb.at(x)); |
| } |
| } |
| |
| FA = (size == assoc); |
| |
| /** |
| * @warning: the set-associative version assumes you have a |
| * fixed page size of 4KB. |
| * If the page size is greather than 4KB (as defined in the |
| * TheISA::PageBytes), then there are various issues w/ the current |
| * implementation (you'd have the same 8KB page being replicated in |
| * different sets etc) |
| */ |
| setMask = numSets - 1; |
| |
| #if 0 |
| // GpuTLB doesn't yet support full system |
| walker = p->walker; |
| walker->setTLB(this); |
| #endif |
| |
| maxCoalescedReqs = p->maxOutstandingReqs; |
| |
| // Do not allow maxCoalescedReqs to be more than the TLB associativity |
| if (maxCoalescedReqs > assoc) { |
| maxCoalescedReqs = assoc; |
| cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc); |
| } |
| |
| outstandingReqs = 0; |
| hitLatency = p->hitLatency; |
| missLatency1 = p->missLatency1; |
| missLatency2 = p->missLatency2; |
| |
| // create the slave ports based on the number of connected ports |
| for (size_t i = 0; i < p->port_slave_connection_count; ++i) { |
| cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", |
| name(), i), this, i)); |
| } |
| |
| // create the master ports based on the number of connected ports |
| for (size_t i = 0; i < p->port_master_connection_count; ++i) { |
| memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", |
| name(), i), this, i)); |
| } |
| } |
| |
| // fixme: this is never called? |
| GpuTLB::~GpuTLB() |
| { |
| // make sure all the hash-maps are empty |
| assert(translationReturnEvent.empty()); |
| } |
| |
| BaseSlavePort& |
| GpuTLB::getSlavePort(const std::string &if_name, PortID idx) |
| { |
| if (if_name == "slave") { |
| if (idx >= static_cast<PortID>(cpuSidePort.size())) { |
| panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); |
| } |
| |
| return *cpuSidePort[idx]; |
| } else { |
| panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); |
| } |
| } |
| |
| BaseMasterPort& |
| GpuTLB::getMasterPort(const std::string &if_name, PortID idx) |
| { |
| if (if_name == "master") { |
| if (idx >= static_cast<PortID>(memSidePort.size())) { |
| panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); |
| } |
| |
| hasMemSidePort = true; |
| |
| return *memSidePort[idx]; |
| } else { |
| panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); |
| } |
| } |
| |
| GpuTlbEntry* |
| GpuTLB::insert(Addr vpn, GpuTlbEntry &entry) |
| { |
| GpuTlbEntry *newEntry = nullptr; |
| |
| /** |
| * vpn holds the virtual page address |
| * The least significant bits are simply masked |
| */ |
| int set = (vpn >> TheISA::PageShift) & setMask; |
| |
| if (!freeList[set].empty()) { |
| newEntry = freeList[set].front(); |
| freeList[set].pop_front(); |
| } else { |
| newEntry = entryList[set].back(); |
| entryList[set].pop_back(); |
| } |
| |
| *newEntry = entry; |
| newEntry->vaddr = vpn; |
| entryList[set].push_front(newEntry); |
| |
| return newEntry; |
| } |
| |
| GpuTLB::EntryList::iterator |
| GpuTLB::lookupIt(Addr va, bool update_lru) |
| { |
| int set = (va >> TheISA::PageShift) & setMask; |
| |
| if (FA) { |
| assert(!set); |
| } |
| |
| auto entry = entryList[set].begin(); |
| for (; entry != entryList[set].end(); ++entry) { |
| int page_size = (*entry)->size(); |
| |
| if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) { |
| DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x " |
| "with size %#x.\n", va, (*entry)->vaddr, page_size); |
| |
| if (update_lru) { |
| entryList[set].push_front(*entry); |
| entryList[set].erase(entry); |
| entry = entryList[set].begin(); |
| } |
| |
| break; |
| } |
| } |
| |
| return entry; |
| } |
| |
| GpuTlbEntry* |
| GpuTLB::lookup(Addr va, bool update_lru) |
| { |
| int set = (va >> TheISA::PageShift) & setMask; |
| |
| auto entry = lookupIt(va, update_lru); |
| |
| if (entry == entryList[set].end()) |
| return nullptr; |
| else |
| return *entry; |
| } |
| |
| void |
| GpuTLB::invalidateAll() |
| { |
| DPRINTF(GPUTLB, "Invalidating all entries.\n"); |
| |
| for (int i = 0; i < numSets; ++i) { |
| while (!entryList[i].empty()) { |
| GpuTlbEntry *entry = entryList[i].front(); |
| entryList[i].pop_front(); |
| freeList[i].push_back(entry); |
| } |
| } |
| } |
| |
| void |
| GpuTLB::setConfigAddress(uint32_t addr) |
| { |
| configAddress = addr; |
| } |
| |
| void |
| GpuTLB::invalidateNonGlobal() |
| { |
| DPRINTF(GPUTLB, "Invalidating all non global entries.\n"); |
| |
| for (int i = 0; i < numSets; ++i) { |
| for (auto entryIt = entryList[i].begin(); |
| entryIt != entryList[i].end();) { |
| if (!(*entryIt)->global) { |
| freeList[i].push_back(*entryIt); |
| entryList[i].erase(entryIt++); |
| } else { |
| ++entryIt; |
| } |
| } |
| } |
| } |
| |
| void |
| GpuTLB::demapPage(Addr va, uint64_t asn) |
| { |
| |
| int set = (va >> TheISA::PageShift) & setMask; |
| auto entry = lookupIt(va, false); |
| |
| if (entry != entryList[set].end()) { |
| freeList[set].push_back(*entry); |
| entryList[set].erase(entry); |
| } |
| } |
| |
| Fault |
| GpuTLB::translateInt(RequestPtr req, ThreadContext *tc) |
| { |
| DPRINTF(GPUTLB, "Addresses references internal memory.\n"); |
| Addr vaddr = req->getVaddr(); |
| Addr prefix = (vaddr >> 3) & IntAddrPrefixMask; |
| |
| if (prefix == IntAddrPrefixCPUID) { |
| panic("CPUID memory space not yet implemented!\n"); |
| } else if (prefix == IntAddrPrefixMSR) { |
| vaddr = vaddr >> 3; |
| req->setFlags(Request::MMAPPED_IPR); |
| Addr regNum = 0; |
| |
| switch (vaddr & ~IntAddrPrefixMask) { |
| case 0x10: |
| regNum = MISCREG_TSC; |
| break; |
| case 0x1B: |
| regNum = MISCREG_APIC_BASE; |
| break; |
| case 0xFE: |
| regNum = MISCREG_MTRRCAP; |
| break; |
| case 0x174: |
| regNum = MISCREG_SYSENTER_CS; |
| break; |
| case 0x175: |
| regNum = MISCREG_SYSENTER_ESP; |
| break; |
| case 0x176: |
| regNum = MISCREG_SYSENTER_EIP; |
| break; |
| case 0x179: |
| regNum = MISCREG_MCG_CAP; |
| break; |
| case 0x17A: |
| regNum = MISCREG_MCG_STATUS; |
| break; |
| case 0x17B: |
| regNum = MISCREG_MCG_CTL; |
| break; |
| case 0x1D9: |
| regNum = MISCREG_DEBUG_CTL_MSR; |
| break; |
| case 0x1DB: |
| regNum = MISCREG_LAST_BRANCH_FROM_IP; |
| break; |
| case 0x1DC: |
| regNum = MISCREG_LAST_BRANCH_TO_IP; |
| break; |
| case 0x1DD: |
| regNum = MISCREG_LAST_EXCEPTION_FROM_IP; |
| break; |
| case 0x1DE: |
| regNum = MISCREG_LAST_EXCEPTION_TO_IP; |
| break; |
| case 0x200: |
| regNum = MISCREG_MTRR_PHYS_BASE_0; |
| break; |
| case 0x201: |
| regNum = MISCREG_MTRR_PHYS_MASK_0; |
| break; |
| case 0x202: |
| regNum = MISCREG_MTRR_PHYS_BASE_1; |
| break; |
| case 0x203: |
| regNum = MISCREG_MTRR_PHYS_MASK_1; |
| break; |
| case 0x204: |
| regNum = MISCREG_MTRR_PHYS_BASE_2; |
| break; |
| case 0x205: |
| regNum = MISCREG_MTRR_PHYS_MASK_2; |
| break; |
| case 0x206: |
| regNum = MISCREG_MTRR_PHYS_BASE_3; |
| break; |
| case 0x207: |
| regNum = MISCREG_MTRR_PHYS_MASK_3; |
| break; |
| case 0x208: |
| regNum = MISCREG_MTRR_PHYS_BASE_4; |
| break; |
| case 0x209: |
| regNum = MISCREG_MTRR_PHYS_MASK_4; |
| break; |
| case 0x20A: |
| regNum = MISCREG_MTRR_PHYS_BASE_5; |
| break; |
| case 0x20B: |
| regNum = MISCREG_MTRR_PHYS_MASK_5; |
| break; |
| case 0x20C: |
| regNum = MISCREG_MTRR_PHYS_BASE_6; |
| break; |
| case 0x20D: |
| regNum = MISCREG_MTRR_PHYS_MASK_6; |
| break; |
| case 0x20E: |
| regNum = MISCREG_MTRR_PHYS_BASE_7; |
| break; |
| case 0x20F: |
| regNum = MISCREG_MTRR_PHYS_MASK_7; |
| break; |
| case 0x250: |
| regNum = MISCREG_MTRR_FIX_64K_00000; |
| break; |
| case 0x258: |
| regNum = MISCREG_MTRR_FIX_16K_80000; |
| break; |
| case 0x259: |
| regNum = MISCREG_MTRR_FIX_16K_A0000; |
| break; |
| case 0x268: |
| regNum = MISCREG_MTRR_FIX_4K_C0000; |
| break; |
| case 0x269: |
| regNum = MISCREG_MTRR_FIX_4K_C8000; |
| break; |
| case 0x26A: |
| regNum = MISCREG_MTRR_FIX_4K_D0000; |
| break; |
| case 0x26B: |
| regNum = MISCREG_MTRR_FIX_4K_D8000; |
| break; |
| case 0x26C: |
| regNum = MISCREG_MTRR_FIX_4K_E0000; |
| break; |
| case 0x26D: |
| regNum = MISCREG_MTRR_FIX_4K_E8000; |
| break; |
| case 0x26E: |
| regNum = MISCREG_MTRR_FIX_4K_F0000; |
| break; |
| case 0x26F: |
| regNum = MISCREG_MTRR_FIX_4K_F8000; |
| break; |
| case 0x277: |
| regNum = MISCREG_PAT; |
| break; |
| case 0x2FF: |
| regNum = MISCREG_DEF_TYPE; |
| break; |
| case 0x400: |
| regNum = MISCREG_MC0_CTL; |
| break; |
| case 0x404: |
| regNum = MISCREG_MC1_CTL; |
| break; |
| case 0x408: |
| regNum = MISCREG_MC2_CTL; |
| break; |
| case 0x40C: |
| regNum = MISCREG_MC3_CTL; |
| break; |
| case 0x410: |
| regNum = MISCREG_MC4_CTL; |
| break; |
| case 0x414: |
| regNum = MISCREG_MC5_CTL; |
| break; |
| case 0x418: |
| regNum = MISCREG_MC6_CTL; |
| break; |
| case 0x41C: |
| regNum = MISCREG_MC7_CTL; |
| break; |
| case 0x401: |
| regNum = MISCREG_MC0_STATUS; |
| break; |
| case 0x405: |
| regNum = MISCREG_MC1_STATUS; |
| break; |
| case 0x409: |
| regNum = MISCREG_MC2_STATUS; |
| break; |
| case 0x40D: |
| regNum = MISCREG_MC3_STATUS; |
| break; |
| case 0x411: |
| regNum = MISCREG_MC4_STATUS; |
| break; |
| case 0x415: |
| regNum = MISCREG_MC5_STATUS; |
| break; |
| case 0x419: |
| regNum = MISCREG_MC6_STATUS; |
| break; |
| case 0x41D: |
| regNum = MISCREG_MC7_STATUS; |
| break; |
| case 0x402: |
| regNum = MISCREG_MC0_ADDR; |
| break; |
| case 0x406: |
| regNum = MISCREG_MC1_ADDR; |
| break; |
| case 0x40A: |
| regNum = MISCREG_MC2_ADDR; |
| break; |
| case 0x40E: |
| regNum = MISCREG_MC3_ADDR; |
| break; |
| case 0x412: |
| regNum = MISCREG_MC4_ADDR; |
| break; |
| case 0x416: |
| regNum = MISCREG_MC5_ADDR; |
| break; |
| case 0x41A: |
| regNum = MISCREG_MC6_ADDR; |
| break; |
| case 0x41E: |
| regNum = MISCREG_MC7_ADDR; |
| break; |
| case 0x403: |
| regNum = MISCREG_MC0_MISC; |
| break; |
| case 0x407: |
| regNum = MISCREG_MC1_MISC; |
| break; |
| case 0x40B: |
| regNum = MISCREG_MC2_MISC; |
| break; |
| case 0x40F: |
| regNum = MISCREG_MC3_MISC; |
| break; |
| case 0x413: |
| regNum = MISCREG_MC4_MISC; |
| break; |
| case 0x417: |
| regNum = MISCREG_MC5_MISC; |
| break; |
| case 0x41B: |
| regNum = MISCREG_MC6_MISC; |
| break; |
| case 0x41F: |
| regNum = MISCREG_MC7_MISC; |
| break; |
| case 0xC0000080: |
| regNum = MISCREG_EFER; |
| break; |
| case 0xC0000081: |
| regNum = MISCREG_STAR; |
| break; |
| case 0xC0000082: |
| regNum = MISCREG_LSTAR; |
| break; |
| case 0xC0000083: |
| regNum = MISCREG_CSTAR; |
| break; |
| case 0xC0000084: |
| regNum = MISCREG_SF_MASK; |
| break; |
| case 0xC0000100: |
| regNum = MISCREG_FS_BASE; |
| break; |
| case 0xC0000101: |
| regNum = MISCREG_GS_BASE; |
| break; |
| case 0xC0000102: |
| regNum = MISCREG_KERNEL_GS_BASE; |
| break; |
| case 0xC0000103: |
| regNum = MISCREG_TSC_AUX; |
| break; |
| case 0xC0010000: |
| regNum = MISCREG_PERF_EVT_SEL0; |
| break; |
| case 0xC0010001: |
| regNum = MISCREG_PERF_EVT_SEL1; |
| break; |
| case 0xC0010002: |
| regNum = MISCREG_PERF_EVT_SEL2; |
| break; |
| case 0xC0010003: |
| regNum = MISCREG_PERF_EVT_SEL3; |
| break; |
| case 0xC0010004: |
| regNum = MISCREG_PERF_EVT_CTR0; |
| break; |
| case 0xC0010005: |
| regNum = MISCREG_PERF_EVT_CTR1; |
| break; |
| case 0xC0010006: |
| regNum = MISCREG_PERF_EVT_CTR2; |
| break; |
| case 0xC0010007: |
| regNum = MISCREG_PERF_EVT_CTR3; |
| break; |
| case 0xC0010010: |
| regNum = MISCREG_SYSCFG; |
| break; |
| case 0xC0010016: |
| regNum = MISCREG_IORR_BASE0; |
| break; |
| case 0xC0010017: |
| regNum = MISCREG_IORR_BASE1; |
| break; |
| case 0xC0010018: |
| regNum = MISCREG_IORR_MASK0; |
| break; |
| case 0xC0010019: |
| regNum = MISCREG_IORR_MASK1; |
| break; |
| case 0xC001001A: |
| regNum = MISCREG_TOP_MEM; |
| break; |
| case 0xC001001D: |
| regNum = MISCREG_TOP_MEM2; |
| break; |
| case 0xC0010114: |
| regNum = MISCREG_VM_CR; |
| break; |
| case 0xC0010115: |
| regNum = MISCREG_IGNNE; |
| break; |
| case 0xC0010116: |
| regNum = MISCREG_SMM_CTL; |
| break; |
| case 0xC0010117: |
| regNum = MISCREG_VM_HSAVE_PA; |
| break; |
| default: |
| return std::make_shared<GeneralProtection>(0); |
| } |
| //The index is multiplied by the size of a MiscReg so that |
| //any memory dependence calculations will not see these as |
| //overlapping. |
| req->setPaddr(regNum * sizeof(MiscReg)); |
| return NoFault; |
| } else if (prefix == IntAddrPrefixIO) { |
| // TODO If CPL > IOPL or in virtual mode, check the I/O permission |
| // bitmap in the TSS. |
| |
| Addr IOPort = vaddr & ~IntAddrPrefixMask; |
| // Make sure the address fits in the expected 16 bit IO address |
| // space. |
| assert(!(IOPort & ~0xFFFF)); |
| |
| if (IOPort == 0xCF8 && req->getSize() == 4) { |
| req->setFlags(Request::MMAPPED_IPR); |
| req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg)); |
| } else if ((IOPort & ~mask(2)) == 0xCFC) { |
| req->setFlags(Request::UNCACHEABLE); |
| |
| Addr configAddress = |
| tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS); |
| |
| if (bits(configAddress, 31, 31)) { |
| req->setPaddr(PhysAddrPrefixPciConfig | |
| mbits(configAddress, 30, 2) | |
| (IOPort & mask(2))); |
| } else { |
| req->setPaddr(PhysAddrPrefixIO | IOPort); |
| } |
| } else { |
| req->setFlags(Request::UNCACHEABLE); |
| req->setPaddr(PhysAddrPrefixIO | IOPort); |
| } |
| return NoFault; |
| } else { |
| panic("Access to unrecognized internal address space %#x.\n", |
| prefix); |
| } |
| } |
| |
| /** |
| * TLB_lookup will only perform a TLB lookup returning true on a TLB hit |
| * and false on a TLB miss. |
| * Many of the checks about different modes have been converted to |
| * assertions, since these parts of the code are not really used. |
| * On a hit it will update the LRU stack. |
| */ |
| bool |
| GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats) |
| { |
| bool tlb_hit = false; |
| #ifndef NDEBUG |
| uint32_t flags = req->getFlags(); |
| int seg = flags & SegmentFlagMask; |
| #endif |
| |
| assert(seg != SEGMENT_REG_MS); |
| Addr vaddr = req->getVaddr(); |
| DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr); |
| HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); |
| |
| if (m5Reg.prot) { |
| DPRINTF(GPUTLB, "In protected mode.\n"); |
| // make sure we are in 64-bit mode |
| assert(m5Reg.mode == LongMode); |
| |
| // If paging is enabled, do the translation. |
| if (m5Reg.paging) { |
| DPRINTF(GPUTLB, "Paging enabled.\n"); |
| //update LRU stack on a hit |
| GpuTlbEntry *entry = lookup(vaddr, true); |
| |
| if (entry) |
| tlb_hit = true; |
| |
| if (!update_stats) { |
| // functional tlb access for memory initialization |
| // i.e., memory seeding or instr. seeding -> don't update |
| // TLB and stats |
| return tlb_hit; |
| } |
| |
| localNumTLBAccesses++; |
| |
| if (!entry) { |
| localNumTLBMisses++; |
| } else { |
| localNumTLBHits++; |
| } |
| } |
| } |
| |
| return tlb_hit; |
| } |
| |
| Fault |
| GpuTLB::translate(RequestPtr req, ThreadContext *tc, |
| Translation *translation, Mode mode, |
| bool &delayedResponse, bool timing, int &latency) |
| { |
| uint32_t flags = req->getFlags(); |
| int seg = flags & SegmentFlagMask; |
| bool storeCheck = flags & (StoreCheck << FlagShift); |
| |
| // If this is true, we're dealing with a request |
| // to a non-memory address space. |
| if (seg == SEGMENT_REG_MS) { |
| return translateInt(req, tc); |
| } |
| |
| delayedResponse = false; |
| Addr vaddr = req->getVaddr(); |
| DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr); |
| |
| HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); |
| |
| // If protected mode has been enabled... |
| if (m5Reg.prot) { |
| DPRINTF(GPUTLB, "In protected mode.\n"); |
| // If we're not in 64-bit mode, do protection/limit checks |
| if (m5Reg.mode != LongMode) { |
| DPRINTF(GPUTLB, "Not in long mode. Checking segment " |
| "protection.\n"); |
| |
| // Check for a null segment selector. |
| if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR || |
| seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS) |
| && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) { |
| return std::make_shared<GeneralProtection>(0); |
| } |
| |
| bool expandDown = false; |
| SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg)); |
| |
| if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) { |
| if (!attr.writable && (mode == BaseTLB::Write || |
| storeCheck)) |
| return std::make_shared<GeneralProtection>(0); |
| |
| if (!attr.readable && mode == BaseTLB::Read) |
| return std::make_shared<GeneralProtection>(0); |
| |
| expandDown = attr.expandDown; |
| |
| } |
| |
| Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg)); |
| Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg)); |
| // This assumes we're not in 64 bit mode. If we were, the |
| // default address size is 64 bits, overridable to 32. |
| int size = 32; |
| bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift)); |
| SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR); |
| |
| if ((csAttr.defaultSize && sizeOverride) || |
| (!csAttr.defaultSize && !sizeOverride)) { |
| size = 16; |
| } |
| |
| Addr offset = bits(vaddr - base, size - 1, 0); |
| Addr endOffset = offset + req->getSize() - 1; |
| |
| if (expandDown) { |
| DPRINTF(GPUTLB, "Checking an expand down segment.\n"); |
| warn_once("Expand down segments are untested.\n"); |
| |
| if (offset <= limit || endOffset <= limit) |
| return std::make_shared<GeneralProtection>(0); |
| } else { |
| if (offset > limit || endOffset > limit) |
| return std::make_shared<GeneralProtection>(0); |
| } |
| } |
| |
| // If paging is enabled, do the translation. |
| if (m5Reg.paging) { |
| DPRINTF(GPUTLB, "Paging enabled.\n"); |
| // The vaddr already has the segment base applied. |
| GpuTlbEntry *entry = lookup(vaddr); |
| localNumTLBAccesses++; |
| |
| if (!entry) { |
| localNumTLBMisses++; |
| if (timing) { |
| latency = missLatency1; |
| } |
| |
| if (FullSystem) { |
| fatal("GpuTLB doesn't support full-system mode\n"); |
| } else { |
| DPRINTF(GPUTLB, "Handling a TLB miss for address %#x " |
| "at pc %#x.\n", vaddr, tc->instAddr()); |
| |
| Process *p = tc->getProcessPtr(); |
| const EmulationPageTable::Entry *pte = |
| p->pTable->lookup(vaddr); |
| |
| if (!pte && mode != BaseTLB::Execute) { |
| // penalize a "page fault" more |
| if (timing) |
| latency += missLatency2; |
| |
| if (p->fixupStackFault(vaddr)) |
| pte = p->pTable->lookup(vaddr); |
| } |
| |
| if (!pte) { |
| return std::make_shared<PageFault>(vaddr, true, |
| mode, true, |
| false); |
| } else { |
| Addr alignedVaddr = p->pTable->pageAlign(vaddr); |
| |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", |
| alignedVaddr, pte->paddr); |
| |
| GpuTlbEntry gpuEntry( |
| p->pTable->pid(), alignedVaddr, |
| pte->paddr, true); |
| entry = insert(alignedVaddr, gpuEntry); |
| } |
| |
| DPRINTF(GPUTLB, "Miss was serviced.\n"); |
| } |
| } else { |
| localNumTLBHits++; |
| |
| if (timing) { |
| latency = hitLatency; |
| } |
| } |
| |
| // Do paging protection checks. |
| bool inUser = (m5Reg.cpl == 3 && |
| !(flags & (CPL0FlagBit << FlagShift))); |
| |
| CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); |
| bool badWrite = (!entry->writable && (inUser || cr0.wp)); |
| |
| if ((inUser && !entry->user) || (mode == BaseTLB::Write && |
| badWrite)) { |
| // The page must have been present to get into the TLB in |
| // the first place. We'll assume the reserved bits are |
| // fine even though we're not checking them. |
| return std::make_shared<PageFault>(vaddr, true, mode, |
| inUser, false); |
| } |
| |
| if (storeCheck && badWrite) { |
| // This would fault if this were a write, so return a page |
| // fault that reflects that happening. |
| return std::make_shared<PageFault>(vaddr, true, |
| BaseTLB::Write, |
| inUser, false); |
| } |
| |
| |
| DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection " |
| "checks.\n", entry->paddr); |
| |
| int page_size = entry->size(); |
| Addr paddr = entry->paddr | (vaddr & (page_size - 1)); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); |
| req->setPaddr(paddr); |
| |
| if (entry->uncacheable) |
| req->setFlags(Request::UNCACHEABLE); |
| } else { |
| //Use the address which already has segmentation applied. |
| DPRINTF(GPUTLB, "Paging disabled.\n"); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); |
| req->setPaddr(vaddr); |
| } |
| } else { |
| // Real mode |
| DPRINTF(GPUTLB, "In real mode.\n"); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr); |
| req->setPaddr(vaddr); |
| } |
| |
| // Check for an access to the local APIC |
| if (FullSystem) { |
| LocalApicBase localApicBase = |
| tc->readMiscRegNoEffect(MISCREG_APIC_BASE); |
| |
| Addr baseAddr = localApicBase.base * PageBytes; |
| Addr paddr = req->getPaddr(); |
| |
| if (baseAddr <= paddr && baseAddr + PageBytes > paddr) { |
| // Force the access to be uncacheable. |
| req->setFlags(Request::UNCACHEABLE); |
| req->setPaddr(x86LocalAPICAddress(tc->contextId(), |
| paddr - baseAddr)); |
| } |
| } |
| |
| return NoFault; |
| }; |
| |
| Fault |
| GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, |
| int &latency) |
| { |
| bool delayedResponse; |
| |
| return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false, |
| latency); |
| } |
| |
| void |
| GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc, |
| Translation *translation, Mode mode, int &latency) |
| { |
| bool delayedResponse; |
| assert(translation); |
| |
| Fault fault = GpuTLB::translate(req, tc, translation, mode, |
| delayedResponse, true, latency); |
| |
| if (!delayedResponse) |
| translation->finish(fault, req, tc, mode); |
| } |
| |
| Walker* |
| GpuTLB::getWalker() |
| { |
| return walker; |
| } |
| |
| |
| void |
| GpuTLB::serialize(CheckpointOut &cp) const |
| { |
| } |
| |
| void |
| GpuTLB::unserialize(CheckpointIn &cp) |
| { |
| } |
| |
| void |
| GpuTLB::regStats() |
| { |
| MemObject::regStats(); |
| |
| localNumTLBAccesses |
| .name(name() + ".local_TLB_accesses") |
| .desc("Number of TLB accesses") |
| ; |
| |
| localNumTLBHits |
| .name(name() + ".local_TLB_hits") |
| .desc("Number of TLB hits") |
| ; |
| |
| localNumTLBMisses |
| .name(name() + ".local_TLB_misses") |
| .desc("Number of TLB misses") |
| ; |
| |
| localTLBMissRate |
| .name(name() + ".local_TLB_miss_rate") |
| .desc("TLB miss rate") |
| ; |
| |
| accessCycles |
| .name(name() + ".access_cycles") |
| .desc("Cycles spent accessing this TLB level") |
| ; |
| |
| pageTableCycles |
| .name(name() + ".page_table_cycles") |
| .desc("Cycles spent accessing the page table") |
| ; |
| |
| localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses; |
| |
| numUniquePages |
| .name(name() + ".unique_pages") |
| .desc("Number of unique pages touched") |
| ; |
| |
| localCycles |
| .name(name() + ".local_cycles") |
| .desc("Number of cycles spent in queue for all incoming reqs") |
| ; |
| |
| localLatency |
| .name(name() + ".local_latency") |
| .desc("Avg. latency over incoming coalesced reqs") |
| ; |
| |
| localLatency = localCycles / localNumTLBAccesses; |
| |
| globalNumTLBAccesses |
| .name(name() + ".global_TLB_accesses") |
| .desc("Number of TLB accesses") |
| ; |
| |
| globalNumTLBHits |
| .name(name() + ".global_TLB_hits") |
| .desc("Number of TLB hits") |
| ; |
| |
| globalNumTLBMisses |
| .name(name() + ".global_TLB_misses") |
| .desc("Number of TLB misses") |
| ; |
| |
| globalTLBMissRate |
| .name(name() + ".global_TLB_miss_rate") |
| .desc("TLB miss rate") |
| ; |
| |
| globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses; |
| |
| avgReuseDistance |
| .name(name() + ".avg_reuse_distance") |
| .desc("avg. reuse distance over all pages (in ticks)") |
| ; |
| |
| } |
| |
| /** |
| * Do the TLB lookup for this coalesced request and schedule |
| * another event <TLB access latency> cycles later. |
| */ |
| |
| void |
| GpuTLB::issueTLBLookup(PacketPtr pkt) |
| { |
| assert(pkt); |
| assert(pkt->senderState); |
| |
| Addr virt_page_addr = roundDown(pkt->req->getVaddr(), |
| TheISA::PageBytes); |
| |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| bool update_stats = !sender_state->prefetch; |
| ThreadContext * tmp_tc = sender_state->tc; |
| |
| DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n", |
| virt_page_addr); |
| |
| int req_cnt = sender_state->reqCnt.back(); |
| |
| if (update_stats) { |
| accessCycles -= (curTick() * req_cnt); |
| localCycles -= curTick(); |
| updatePageFootprint(virt_page_addr); |
| globalNumTLBAccesses += req_cnt; |
| } |
| |
| tlbOutcome lookup_outcome = TLB_MISS; |
| RequestPtr tmp_req = pkt->req; |
| |
| // Access the TLB and figure out if it's a hit or a miss. |
| bool success = tlbLookup(tmp_req, tmp_tc, update_stats); |
| |
| if (success) { |
| lookup_outcome = TLB_HIT; |
| // Put the entry in SenderState |
| GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false); |
| assert(entry); |
| |
| sender_state->tlbEntry = |
| new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); |
| |
| if (update_stats) { |
| // the reqCnt has an entry per level, so its size tells us |
| // which level we are in |
| sender_state->hitLevel = sender_state->reqCnt.size(); |
| globalNumTLBHits += req_cnt; |
| } |
| } else { |
| if (update_stats) |
| globalNumTLBMisses += req_cnt; |
| } |
| |
| /* |
| * We now know the TLB lookup outcome (if it's a hit or a miss), as well |
| * as the TLB access latency. |
| * |
| * We create and schedule a new TLBEvent which will help us take the |
| * appropriate actions (e.g., update TLB on a hit, send request to lower |
| * level TLB on a miss, or start a page walk if this was the last-level |
| * TLB) |
| */ |
| TLBEvent *tlb_event = |
| new TLBEvent(this, virt_page_addr, lookup_outcome, pkt); |
| |
| if (translationReturnEvent.count(virt_page_addr)) { |
| panic("Virtual Page Address %#x already has a return event\n", |
| virt_page_addr); |
| } |
| |
| translationReturnEvent[virt_page_addr] = tlb_event; |
| assert(tlb_event); |
| |
| DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n", |
| curTick() + this->ticks(hitLatency)); |
| |
| schedule(tlb_event, curTick() + this->ticks(hitLatency)); |
| } |
| |
| GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome, |
| PacketPtr _pkt) |
| : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr), |
| outcome(tlb_outcome), pkt(_pkt) |
| { |
| } |
| |
| /** |
| * Do Paging protection checks. If we encounter a page fault, then |
| * an assertion is fired. |
| */ |
| void |
| GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, |
| GpuTlbEntry * tlb_entry, Mode mode) |
| { |
| HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); |
| uint32_t flags = pkt->req->getFlags(); |
| bool storeCheck = flags & (StoreCheck << FlagShift); |
| |
| // Do paging protection checks. |
| bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift))); |
| CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0); |
| |
| bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp)); |
| |
| if ((inUser && !tlb_entry->user) || |
| (mode == BaseTLB::Write && badWrite)) { |
| // The page must have been present to get into the TLB in |
| // the first place. We'll assume the reserved bits are |
| // fine even though we're not checking them. |
| assert(false); |
| } |
| |
| if (storeCheck && badWrite) { |
| // This would fault if this were a write, so return a page |
| // fault that reflects that happening. |
| assert(false); |
| } |
| } |
| |
| /** |
| * handleTranslationReturn is called on a TLB hit, |
| * when a TLB miss returns or when a page fault returns. |
| * The latter calls handelHit with TLB miss as tlbOutcome. |
| */ |
| void |
| GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome, |
| PacketPtr pkt) |
| { |
| |
| assert(pkt); |
| Addr vaddr = pkt->req->getVaddr(); |
| |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| ThreadContext *tc = sender_state->tc; |
| Mode mode = sender_state->tlbMode; |
| |
| GpuTlbEntry *local_entry, *new_entry; |
| |
| if (tlb_outcome == TLB_HIT) { |
| DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr); |
| local_entry = sender_state->tlbEntry; |
| } else { |
| DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n", |
| vaddr); |
| |
| // We are returning either from a page walk or from a hit at a lower |
| // TLB level. The senderState should be "carrying" a pointer to the |
| // correct TLBEntry. |
| new_entry = sender_state->tlbEntry; |
| assert(new_entry); |
| local_entry = new_entry; |
| |
| if (allocationPolicy) { |
| DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", |
| virt_page_addr); |
| |
| local_entry = insert(virt_page_addr, *new_entry); |
| } |
| |
| assert(local_entry); |
| } |
| |
| /** |
| * At this point the packet carries an up-to-date tlbEntry pointer |
| * in its senderState. |
| * Next step is to do the paging protection checks. |
| */ |
| DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " |
| "while paddr was %#x.\n", local_entry->vaddr, |
| local_entry->paddr); |
| |
| pagingProtectionChecks(tc, pkt, local_entry, mode); |
| int page_size = local_entry->size(); |
| Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); |
| |
| // Since this packet will be sent through the cpu side slave port, |
| // it must be converted to a response pkt if it is not one already |
| if (pkt->isRequest()) { |
| pkt->makeTimingResponse(); |
| } |
| |
| pkt->req->setPaddr(paddr); |
| |
| if (local_entry->uncacheable) { |
| pkt->req->setFlags(Request::UNCACHEABLE); |
| } |
| |
| //send packet back to coalescer |
| cpuSidePort[0]->sendTimingResp(pkt); |
| //schedule cleanup event |
| cleanupQueue.push(virt_page_addr); |
| |
| // schedule this only once per cycle. |
| // The check is required because we might have multiple translations |
| // returning the same cycle |
| // this is a maximum priority event and must be on the same cycle |
| // as the cleanup event in TLBCoalescer to avoid a race with |
| // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry |
| if (!cleanupEvent.scheduled()) |
| schedule(cleanupEvent, curTick()); |
| } |
| |
| /** |
| * Here we take the appropriate actions based on the result of the |
| * TLB lookup. |
| */ |
| void |
| GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome, |
| PacketPtr pkt) |
| { |
| DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr); |
| |
| assert(translationReturnEvent[virtPageAddr]); |
| assert(pkt); |
| |
| TranslationState *tmp_sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| int req_cnt = tmp_sender_state->reqCnt.back(); |
| bool update_stats = !tmp_sender_state->prefetch; |
| |
| |
| if (outcome == TLB_HIT) { |
| handleTranslationReturn(virtPageAddr, TLB_HIT, pkt); |
| |
| if (update_stats) { |
| accessCycles += (req_cnt * curTick()); |
| localCycles += curTick(); |
| } |
| |
| } else if (outcome == TLB_MISS) { |
| |
| DPRINTF(GPUTLB, "This is a TLB miss\n"); |
| if (update_stats) { |
| accessCycles += (req_cnt*curTick()); |
| localCycles += curTick(); |
| } |
| |
| if (hasMemSidePort) { |
| // the one cyle added here represent the delay from when we get |
| // the reply back till when we propagate it to the coalescer |
| // above. |
| if (update_stats) { |
| accessCycles += (req_cnt * 1); |
| localCycles += 1; |
| } |
| |
| /** |
| * There is a TLB below. Send the coalesced request. |
| * We actually send the very first packet of all the |
| * pending packets for this virtual page address. |
| */ |
| if (!memSidePort[0]->sendTimingReq(pkt)) { |
| DPRINTF(GPUTLB, "Failed sending translation request to " |
| "lower level TLB for addr %#x\n", virtPageAddr); |
| |
| memSidePort[0]->retries.push_back(pkt); |
| } else { |
| DPRINTF(GPUTLB, "Sent translation request to lower level " |
| "TLB for addr %#x\n", virtPageAddr); |
| } |
| } else { |
| //this is the last level TLB. Start a page walk |
| DPRINTF(GPUTLB, "Last level TLB - start a page walk for " |
| "addr %#x\n", virtPageAddr); |
| |
| if (update_stats) |
| pageTableCycles -= (req_cnt*curTick()); |
| |
| TLBEvent *tlb_event = translationReturnEvent[virtPageAddr]; |
| assert(tlb_event); |
| tlb_event->updateOutcome(PAGE_WALK); |
| schedule(tlb_event, curTick() + ticks(missLatency2)); |
| } |
| } else if (outcome == PAGE_WALK) { |
| if (update_stats) |
| pageTableCycles += (req_cnt*curTick()); |
| |
| // Need to access the page table and update the TLB |
| DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", |
| virtPageAddr); |
| |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| Process *p = sender_state->tc->getProcessPtr(); |
| Addr vaddr = pkt->req->getVaddr(); |
| #ifndef NDEBUG |
| Addr alignedVaddr = p->pTable->pageAlign(vaddr); |
| assert(alignedVaddr == virtPageAddr); |
| #endif |
| const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr); |
| if (!pte && sender_state->tlbMode != BaseTLB::Execute && |
| p->fixupStackFault(vaddr)) { |
| pte = p->pTable->lookup(vaddr); |
| } |
| |
| if (pte) { |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, |
| pte->paddr); |
| |
| sender_state->tlbEntry = |
| new GpuTlbEntry(0, virtPageAddr, pte->paddr, true); |
| } else { |
| sender_state->tlbEntry = |
| new GpuTlbEntry(0, 0, 0, false); |
| } |
| |
| handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); |
| } else if (outcome == MISS_RETURN) { |
| /** we add an extra cycle in the return path of the translation |
| * requests in between the various TLB levels. |
| */ |
| handleTranslationReturn(virtPageAddr, TLB_MISS, pkt); |
| } else { |
| assert(false); |
| } |
| } |
| |
| void |
| GpuTLB::TLBEvent::process() |
| { |
| tlb->translationReturn(virtPageAddr, outcome, pkt); |
| } |
| |
| const char* |
| GpuTLB::TLBEvent::description() const |
| { |
| return "trigger translationDoneEvent"; |
| } |
| |
| void |
| GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome) |
| { |
| outcome = _outcome; |
| } |
| |
| Addr |
| GpuTLB::TLBEvent::getTLBEventVaddr() |
| { |
| return virtPageAddr; |
| } |
| |
| /* |
| * recvTiming receives a coalesced timing request from a TLBCoalescer |
| * and it calls issueTLBLookup() |
| * It only rejects the packet if we have exceeded the max |
| * outstanding number of requests for the TLB |
| */ |
| bool |
| GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt) |
| { |
| if (tlb->outstandingReqs < tlb->maxCoalescedReqs) { |
| tlb->issueTLBLookup(pkt); |
| // update number of outstanding translation requests |
| tlb->outstandingReqs++; |
| return true; |
| } else { |
| DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n", |
| tlb->outstandingReqs); |
| return false; |
| } |
| } |
| |
| /** |
| * handleFuncTranslationReturn is called on a TLB hit, |
| * when a TLB miss returns or when a page fault returns. |
| * It updates LRU, inserts the TLB entry on a miss |
| * depending on the allocation policy and does the required |
| * protection checks. It does NOT create a new packet to |
| * update the packet's addr; this is done in hsail-gpu code. |
| */ |
| void |
| GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome) |
| { |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| ThreadContext *tc = sender_state->tc; |
| Mode mode = sender_state->tlbMode; |
| Addr vaddr = pkt->req->getVaddr(); |
| |
| GpuTlbEntry *local_entry, *new_entry; |
| |
| if (tlb_outcome == TLB_HIT) { |
| DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr " |
| "%#x\n", vaddr); |
| |
| local_entry = sender_state->tlbEntry; |
| } else { |
| DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr " |
| "%#x\n", vaddr); |
| |
| // We are returning either from a page walk or from a hit at a lower |
| // TLB level. The senderState should be "carrying" a pointer to the |
| // correct TLBEntry. |
| new_entry = sender_state->tlbEntry; |
| assert(new_entry); |
| local_entry = new_entry; |
| |
| if (allocationPolicy) { |
| Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes); |
| |
| DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n", |
| virt_page_addr); |
| |
| local_entry = insert(virt_page_addr, *new_entry); |
| } |
| |
| assert(local_entry); |
| } |
| |
| DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks " |
| "while paddr was %#x.\n", local_entry->vaddr, |
| local_entry->paddr); |
| |
| // Do paging checks if it's a normal functional access. If it's for a |
| // prefetch, then sometimes you can try to prefetch something that won't |
| // pass protection. We don't actually want to fault becuase there is no |
| // demand access to deem this a violation. Just put it in the TLB and |
| // it will fault if indeed a future demand access touches it in |
| // violation. |
| if (!sender_state->prefetch && sender_state->tlbEntry->valid) |
| pagingProtectionChecks(tc, pkt, local_entry, mode); |
| |
| int page_size = local_entry->size(); |
| Addr paddr = local_entry->paddr | (vaddr & (page_size - 1)); |
| DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr); |
| |
| pkt->req->setPaddr(paddr); |
| |
| if (local_entry->uncacheable) |
| pkt->req->setFlags(Request::UNCACHEABLE); |
| } |
| |
| // This is used for atomic translations. Need to |
| // make it all happen during the same cycle. |
| void |
| GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt) |
| { |
| TranslationState *sender_state = |
| safe_cast<TranslationState*>(pkt->senderState); |
| |
| ThreadContext *tc = sender_state->tc; |
| bool update_stats = !sender_state->prefetch; |
| |
| Addr virt_page_addr = roundDown(pkt->req->getVaddr(), |
| TheISA::PageBytes); |
| |
| if (update_stats) |
| tlb->updatePageFootprint(virt_page_addr); |
| |
| // do the TLB lookup without updating the stats |
| bool success = tlb->tlbLookup(pkt->req, tc, update_stats); |
| tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS; |
| |
| // functional mode means no coalescing |
| // global metrics are the same as the local metrics |
| if (update_stats) { |
| tlb->globalNumTLBAccesses++; |
| |
| if (success) { |
| sender_state->hitLevel = sender_state->reqCnt.size(); |
| tlb->globalNumTLBHits++; |
| } |
| } |
| |
| if (!success) { |
| if (update_stats) |
| tlb->globalNumTLBMisses++; |
| if (tlb->hasMemSidePort) { |
| // there is a TLB below -> propagate down the TLB hierarchy |
| tlb->memSidePort[0]->sendFunctional(pkt); |
| // If no valid translation from a prefetch, then just return |
| if (sender_state->prefetch && !pkt->req->hasPaddr()) |
| return; |
| } else { |
| // Need to access the page table and update the TLB |
| DPRINTF(GPUTLB, "Doing a page walk for address %#x\n", |
| virt_page_addr); |
| |
| Process *p = tc->getProcessPtr(); |
| |
| Addr vaddr = pkt->req->getVaddr(); |
| #ifndef NDEBUG |
| Addr alignedVaddr = p->pTable->pageAlign(vaddr); |
| assert(alignedVaddr == virt_page_addr); |
| #endif |
| |
| const EmulationPageTable::Entry *pte = |
| p->pTable->lookup(vaddr); |
| if (!pte && sender_state->tlbMode != BaseTLB::Execute && |
| p->fixupStackFault(vaddr)) { |
| pte = p->pTable->lookup(vaddr); |
| } |
| |
| if (!sender_state->prefetch) { |
| // no PageFaults are permitted after |
| // the second page table lookup |
| assert(success); |
| |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, |
| pte->paddr); |
| |
| sender_state->tlbEntry = |
| new GpuTlbEntry(0, virt_page_addr, |
| pte->paddr, success); |
| } else { |
| // If this was a prefetch, then do the normal thing if it |
| // was a successful translation. Otherwise, send an empty |
| // TLB entry back so that it can be figured out as empty and |
| // handled accordingly. |
| if (pte) { |
| DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr, |
| pte->paddr); |
| |
| sender_state->tlbEntry = |
| new GpuTlbEntry(0, virt_page_addr, |
| pte->paddr, success); |
| } else { |
| DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", |
| alignedVaddr); |
| |
| sender_state->tlbEntry = new GpuTlbEntry(); |
| |
| return; |
| } |
| } |
| } |
| } else { |
| DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n", |
| tlb->lookup(pkt->req->getVaddr())); |
| |
| GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(), |
| update_stats); |
| |
| assert(entry); |
| |
| sender_state->tlbEntry = |
| new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid); |
| } |
| // This is the function that would populate pkt->req with the paddr of |
| // the translation. But if no translation happens (i.e Prefetch fails) |
| // then the early returns in the above code wiill keep this function |
| // from executing. |
| tlb->handleFuncTranslationReturn(pkt, tlb_outcome); |
| } |
| |
| void |
| GpuTLB::CpuSidePort::recvReqRetry() |
| { |
| // The CPUSidePort never sends anything but replies. No retries |
| // expected. |
| assert(false); |
| } |
| |
| AddrRangeList |
| GpuTLB::CpuSidePort::getAddrRanges() const |
| { |
| // currently not checked by the master |
| AddrRangeList ranges; |
| |
| return ranges; |
| } |
| |
| /** |
| * MemSidePort receives the packet back. |
| * We need to call the handleTranslationReturn |
| * and propagate up the hierarchy. |
| */ |
| bool |
| GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt) |
| { |
| Addr virt_page_addr = roundDown(pkt->req->getVaddr(), |
| TheISA::PageBytes); |
| |
| DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n", |
| virt_page_addr); |
| |
| TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr]; |
| assert(tlb_event); |
| assert(virt_page_addr == tlb_event->getTLBEventVaddr()); |
| |
| tlb_event->updateOutcome(MISS_RETURN); |
| tlb->schedule(tlb_event, curTick()+tlb->ticks(1)); |
| |
| return true; |
| } |
| |
| void |
| GpuTLB::MemSidePort::recvReqRetry() |
| { |
| // No retries should reach the TLB. The retries |
| // should only reach the TLBCoalescer. |
| assert(false); |
| } |
| |
| void |
| GpuTLB::cleanup() |
| { |
| while (!cleanupQueue.empty()) { |
| Addr cleanup_addr = cleanupQueue.front(); |
| cleanupQueue.pop(); |
| |
| // delete TLBEvent |
| TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr]; |
| delete old_tlb_event; |
| translationReturnEvent.erase(cleanup_addr); |
| |
| // update number of outstanding requests |
| outstandingReqs--; |
| } |
| |
| /** the higher level coalescer should retry if it has |
| * any pending requests. |
| */ |
| for (int i = 0; i < cpuSidePort.size(); ++i) { |
| cpuSidePort[i]->sendRetryReq(); |
| } |
| } |
| |
| void |
| GpuTLB::updatePageFootprint(Addr virt_page_addr) |
| { |
| |
| std::pair<AccessPatternTable::iterator, bool> ret; |
| |
| AccessInfo tmp_access_info; |
| tmp_access_info.lastTimeAccessed = 0; |
| tmp_access_info.accessesPerPage = 0; |
| tmp_access_info.totalReuseDistance = 0; |
| tmp_access_info.sumDistance = 0; |
| tmp_access_info.meanDistance = 0; |
| |
| ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr, |
| tmp_access_info)); |
| |
| bool first_page_access = ret.second; |
| |
| if (first_page_access) { |
| numUniquePages++; |
| } else { |
| int accessed_before; |
| accessed_before = curTick() - ret.first->second.lastTimeAccessed; |
| ret.first->second.totalReuseDistance += accessed_before; |
| } |
| |
| ret.first->second.accessesPerPage++; |
| ret.first->second.lastTimeAccessed = curTick(); |
| |
| if (accessDistance) { |
| ret.first->second.localTLBAccesses |
| .push_back(localNumTLBAccesses.value()); |
| } |
| } |
| |
| void |
| GpuTLB::exitCallback() |
| { |
| std::ostream *page_stat_file = nullptr; |
| |
| if (accessDistance) { |
| |
| // print per page statistics to a separate file (.csv format) |
| // simout is the gem5 output directory (default is m5out or the one |
| // specified with -d |
| page_stat_file = simout.create(name().c_str())->stream(); |
| |
| // print header |
| *page_stat_file << "page,max_access_distance,mean_access_distance, " |
| << "stddev_distance" << std::endl; |
| } |
| |
| // update avg. reuse distance footprint |
| AccessPatternTable::iterator iter, iter_begin, iter_end; |
| unsigned int sum_avg_reuse_distance_per_page = 0; |
| |
| // iterate through all pages seen by this TLB |
| for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) { |
| sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance / |
| iter->second.accessesPerPage; |
| |
| if (accessDistance) { |
| unsigned int tmp = iter->second.localTLBAccesses[0]; |
| unsigned int prev = tmp; |
| |
| for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { |
| if (i) { |
| tmp = prev + 1; |
| } |
| |
| prev = iter->second.localTLBAccesses[i]; |
| // update the localTLBAccesses value |
| // with the actual differece |
| iter->second.localTLBAccesses[i] -= tmp; |
| // compute the sum of AccessDistance per page |
| // used later for mean |
| iter->second.sumDistance += |
| iter->second.localTLBAccesses[i]; |
| } |
| |
| iter->second.meanDistance = |
| iter->second.sumDistance / iter->second.accessesPerPage; |
| |
| // compute std_dev and max (we need a second round because we |
| // need to know the mean value |
| unsigned int max_distance = 0; |
| unsigned int stddev_distance = 0; |
| |
| for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) { |
| unsigned int tmp_access_distance = |
| iter->second.localTLBAccesses[i]; |
| |
| if (tmp_access_distance > max_distance) { |
| max_distance = tmp_access_distance; |
| } |
| |
| unsigned int diff = |
| tmp_access_distance - iter->second.meanDistance; |
| stddev_distance += pow(diff, 2); |
| |
| } |
| |
| stddev_distance = |
| sqrt(stddev_distance/iter->second.accessesPerPage); |
| |
| if (page_stat_file) { |
| *page_stat_file << std::hex << iter->first << ","; |
| *page_stat_file << std::dec << max_distance << ","; |
| *page_stat_file << std::dec << iter->second.meanDistance |
| << ","; |
| *page_stat_file << std::dec << stddev_distance; |
| *page_stat_file << std::endl; |
| } |
| |
| // erase the localTLBAccesses array |
| iter->second.localTLBAccesses.clear(); |
| } |
| } |
| |
| if (!TLBFootprint.empty()) { |
| avgReuseDistance = |
| sum_avg_reuse_distance_per_page / TLBFootprint.size(); |
| } |
| |
| //clear the TLBFootprint map |
| TLBFootprint.clear(); |
| } |
| } // namespace X86ISA |
| |
| X86ISA::GpuTLB* |
| X86GPUTLBParams::create() |
| { |
| return new X86ISA::GpuTLB(this); |
| } |
| |