blob: 05d22dad658169d4832ab46d6225abf04ab9a2a9 [file] [log] [blame]
/*
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Lisa Hsu
*/
#include "gpu-compute/gpu_tlb.hh"
#include <cmath>
#include <cstring>
#include "arch/x86/faults.hh"
#include "arch/x86/insts/microldstop.hh"
#include "arch/x86/pagetable.hh"
#include "arch/x86/pagetable_walker.hh"
#include "arch/x86/regs/misc.hh"
#include "arch/x86/x86_traits.hh"
#include "base/bitfield.hh"
#include "base/output.hh"
#include "base/trace.hh"
#include "cpu/base.hh"
#include "cpu/thread_context.hh"
#include "debug/GPUPrefetch.hh"
#include "debug/GPUTLB.hh"
#include "mem/packet_access.hh"
#include "mem/page_table.hh"
#include "mem/request.hh"
#include "sim/process.hh"
namespace X86ISA
{
GpuTLB::GpuTLB(const Params *p)
: MemObject(p), configAddress(0), size(p->size),
cleanupEvent([this]{ cleanup(); }, name(), false,
Event::Maximum_Pri),
exitEvent([this]{ exitCallback(); }, name())
{
assoc = p->assoc;
assert(assoc <= size);
numSets = size/assoc;
allocationPolicy = p->allocationPolicy;
hasMemSidePort = false;
accessDistance = p->accessDistance;
clock = p->clk_domain->clockPeriod();
tlb.assign(size, GpuTlbEntry());
freeList.resize(numSets);
entryList.resize(numSets);
for (int set = 0; set < numSets; ++set) {
for (int way = 0; way < assoc; ++way) {
int x = set * assoc + way;
freeList[set].push_back(&tlb.at(x));
}
}
FA = (size == assoc);
/**
* @warning: the set-associative version assumes you have a
* fixed page size of 4KB.
* If the page size is greather than 4KB (as defined in the
* TheISA::PageBytes), then there are various issues w/ the current
* implementation (you'd have the same 8KB page being replicated in
* different sets etc)
*/
setMask = numSets - 1;
#if 0
// GpuTLB doesn't yet support full system
walker = p->walker;
walker->setTLB(this);
#endif
maxCoalescedReqs = p->maxOutstandingReqs;
// Do not allow maxCoalescedReqs to be more than the TLB associativity
if (maxCoalescedReqs > assoc) {
maxCoalescedReqs = assoc;
cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
}
outstandingReqs = 0;
hitLatency = p->hitLatency;
missLatency1 = p->missLatency1;
missLatency2 = p->missLatency2;
// create the slave ports based on the number of connected ports
for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
name(), i), this, i));
}
// create the master ports based on the number of connected ports
for (size_t i = 0; i < p->port_master_connection_count; ++i) {
memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
name(), i), this, i));
}
}
// fixme: this is never called?
GpuTLB::~GpuTLB()
{
// make sure all the hash-maps are empty
assert(translationReturnEvent.empty());
}
BaseSlavePort&
GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
{
if (if_name == "slave") {
if (idx >= static_cast<PortID>(cpuSidePort.size())) {
panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
}
return *cpuSidePort[idx];
} else {
panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
}
}
BaseMasterPort&
GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
{
if (if_name == "master") {
if (idx >= static_cast<PortID>(memSidePort.size())) {
panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
}
hasMemSidePort = true;
return *memSidePort[idx];
} else {
panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
}
}
GpuTlbEntry*
GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
{
GpuTlbEntry *newEntry = nullptr;
/**
* vpn holds the virtual page address
* The least significant bits are simply masked
*/
int set = (vpn >> TheISA::PageShift) & setMask;
if (!freeList[set].empty()) {
newEntry = freeList[set].front();
freeList[set].pop_front();
} else {
newEntry = entryList[set].back();
entryList[set].pop_back();
}
*newEntry = entry;
newEntry->vaddr = vpn;
entryList[set].push_front(newEntry);
return newEntry;
}
GpuTLB::EntryList::iterator
GpuTLB::lookupIt(Addr va, bool update_lru)
{
int set = (va >> TheISA::PageShift) & setMask;
if (FA) {
assert(!set);
}
auto entry = entryList[set].begin();
for (; entry != entryList[set].end(); ++entry) {
int page_size = (*entry)->size();
if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
"with size %#x.\n", va, (*entry)->vaddr, page_size);
if (update_lru) {
entryList[set].push_front(*entry);
entryList[set].erase(entry);
entry = entryList[set].begin();
}
break;
}
}
return entry;
}
GpuTlbEntry*
GpuTLB::lookup(Addr va, bool update_lru)
{
int set = (va >> TheISA::PageShift) & setMask;
auto entry = lookupIt(va, update_lru);
if (entry == entryList[set].end())
return nullptr;
else
return *entry;
}
void
GpuTLB::invalidateAll()
{
DPRINTF(GPUTLB, "Invalidating all entries.\n");
for (int i = 0; i < numSets; ++i) {
while (!entryList[i].empty()) {
GpuTlbEntry *entry = entryList[i].front();
entryList[i].pop_front();
freeList[i].push_back(entry);
}
}
}
void
GpuTLB::setConfigAddress(uint32_t addr)
{
configAddress = addr;
}
void
GpuTLB::invalidateNonGlobal()
{
DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
for (int i = 0; i < numSets; ++i) {
for (auto entryIt = entryList[i].begin();
entryIt != entryList[i].end();) {
if (!(*entryIt)->global) {
freeList[i].push_back(*entryIt);
entryList[i].erase(entryIt++);
} else {
++entryIt;
}
}
}
}
void
GpuTLB::demapPage(Addr va, uint64_t asn)
{
int set = (va >> TheISA::PageShift) & setMask;
auto entry = lookupIt(va, false);
if (entry != entryList[set].end()) {
freeList[set].push_back(*entry);
entryList[set].erase(entry);
}
}
Fault
GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
{
DPRINTF(GPUTLB, "Addresses references internal memory.\n");
Addr vaddr = req->getVaddr();
Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
if (prefix == IntAddrPrefixCPUID) {
panic("CPUID memory space not yet implemented!\n");
} else if (prefix == IntAddrPrefixMSR) {
vaddr = vaddr >> 3;
req->setFlags(Request::MMAPPED_IPR);
Addr regNum = 0;
switch (vaddr & ~IntAddrPrefixMask) {
case 0x10:
regNum = MISCREG_TSC;
break;
case 0x1B:
regNum = MISCREG_APIC_BASE;
break;
case 0xFE:
regNum = MISCREG_MTRRCAP;
break;
case 0x174:
regNum = MISCREG_SYSENTER_CS;
break;
case 0x175:
regNum = MISCREG_SYSENTER_ESP;
break;
case 0x176:
regNum = MISCREG_SYSENTER_EIP;
break;
case 0x179:
regNum = MISCREG_MCG_CAP;
break;
case 0x17A:
regNum = MISCREG_MCG_STATUS;
break;
case 0x17B:
regNum = MISCREG_MCG_CTL;
break;
case 0x1D9:
regNum = MISCREG_DEBUG_CTL_MSR;
break;
case 0x1DB:
regNum = MISCREG_LAST_BRANCH_FROM_IP;
break;
case 0x1DC:
regNum = MISCREG_LAST_BRANCH_TO_IP;
break;
case 0x1DD:
regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
break;
case 0x1DE:
regNum = MISCREG_LAST_EXCEPTION_TO_IP;
break;
case 0x200:
regNum = MISCREG_MTRR_PHYS_BASE_0;
break;
case 0x201:
regNum = MISCREG_MTRR_PHYS_MASK_0;
break;
case 0x202:
regNum = MISCREG_MTRR_PHYS_BASE_1;
break;
case 0x203:
regNum = MISCREG_MTRR_PHYS_MASK_1;
break;
case 0x204:
regNum = MISCREG_MTRR_PHYS_BASE_2;
break;
case 0x205:
regNum = MISCREG_MTRR_PHYS_MASK_2;
break;
case 0x206:
regNum = MISCREG_MTRR_PHYS_BASE_3;
break;
case 0x207:
regNum = MISCREG_MTRR_PHYS_MASK_3;
break;
case 0x208:
regNum = MISCREG_MTRR_PHYS_BASE_4;
break;
case 0x209:
regNum = MISCREG_MTRR_PHYS_MASK_4;
break;
case 0x20A:
regNum = MISCREG_MTRR_PHYS_BASE_5;
break;
case 0x20B:
regNum = MISCREG_MTRR_PHYS_MASK_5;
break;
case 0x20C:
regNum = MISCREG_MTRR_PHYS_BASE_6;
break;
case 0x20D:
regNum = MISCREG_MTRR_PHYS_MASK_6;
break;
case 0x20E:
regNum = MISCREG_MTRR_PHYS_BASE_7;
break;
case 0x20F:
regNum = MISCREG_MTRR_PHYS_MASK_7;
break;
case 0x250:
regNum = MISCREG_MTRR_FIX_64K_00000;
break;
case 0x258:
regNum = MISCREG_MTRR_FIX_16K_80000;
break;
case 0x259:
regNum = MISCREG_MTRR_FIX_16K_A0000;
break;
case 0x268:
regNum = MISCREG_MTRR_FIX_4K_C0000;
break;
case 0x269:
regNum = MISCREG_MTRR_FIX_4K_C8000;
break;
case 0x26A:
regNum = MISCREG_MTRR_FIX_4K_D0000;
break;
case 0x26B:
regNum = MISCREG_MTRR_FIX_4K_D8000;
break;
case 0x26C:
regNum = MISCREG_MTRR_FIX_4K_E0000;
break;
case 0x26D:
regNum = MISCREG_MTRR_FIX_4K_E8000;
break;
case 0x26E:
regNum = MISCREG_MTRR_FIX_4K_F0000;
break;
case 0x26F:
regNum = MISCREG_MTRR_FIX_4K_F8000;
break;
case 0x277:
regNum = MISCREG_PAT;
break;
case 0x2FF:
regNum = MISCREG_DEF_TYPE;
break;
case 0x400:
regNum = MISCREG_MC0_CTL;
break;
case 0x404:
regNum = MISCREG_MC1_CTL;
break;
case 0x408:
regNum = MISCREG_MC2_CTL;
break;
case 0x40C:
regNum = MISCREG_MC3_CTL;
break;
case 0x410:
regNum = MISCREG_MC4_CTL;
break;
case 0x414:
regNum = MISCREG_MC5_CTL;
break;
case 0x418:
regNum = MISCREG_MC6_CTL;
break;
case 0x41C:
regNum = MISCREG_MC7_CTL;
break;
case 0x401:
regNum = MISCREG_MC0_STATUS;
break;
case 0x405:
regNum = MISCREG_MC1_STATUS;
break;
case 0x409:
regNum = MISCREG_MC2_STATUS;
break;
case 0x40D:
regNum = MISCREG_MC3_STATUS;
break;
case 0x411:
regNum = MISCREG_MC4_STATUS;
break;
case 0x415:
regNum = MISCREG_MC5_STATUS;
break;
case 0x419:
regNum = MISCREG_MC6_STATUS;
break;
case 0x41D:
regNum = MISCREG_MC7_STATUS;
break;
case 0x402:
regNum = MISCREG_MC0_ADDR;
break;
case 0x406:
regNum = MISCREG_MC1_ADDR;
break;
case 0x40A:
regNum = MISCREG_MC2_ADDR;
break;
case 0x40E:
regNum = MISCREG_MC3_ADDR;
break;
case 0x412:
regNum = MISCREG_MC4_ADDR;
break;
case 0x416:
regNum = MISCREG_MC5_ADDR;
break;
case 0x41A:
regNum = MISCREG_MC6_ADDR;
break;
case 0x41E:
regNum = MISCREG_MC7_ADDR;
break;
case 0x403:
regNum = MISCREG_MC0_MISC;
break;
case 0x407:
regNum = MISCREG_MC1_MISC;
break;
case 0x40B:
regNum = MISCREG_MC2_MISC;
break;
case 0x40F:
regNum = MISCREG_MC3_MISC;
break;
case 0x413:
regNum = MISCREG_MC4_MISC;
break;
case 0x417:
regNum = MISCREG_MC5_MISC;
break;
case 0x41B:
regNum = MISCREG_MC6_MISC;
break;
case 0x41F:
regNum = MISCREG_MC7_MISC;
break;
case 0xC0000080:
regNum = MISCREG_EFER;
break;
case 0xC0000081:
regNum = MISCREG_STAR;
break;
case 0xC0000082:
regNum = MISCREG_LSTAR;
break;
case 0xC0000083:
regNum = MISCREG_CSTAR;
break;
case 0xC0000084:
regNum = MISCREG_SF_MASK;
break;
case 0xC0000100:
regNum = MISCREG_FS_BASE;
break;
case 0xC0000101:
regNum = MISCREG_GS_BASE;
break;
case 0xC0000102:
regNum = MISCREG_KERNEL_GS_BASE;
break;
case 0xC0000103:
regNum = MISCREG_TSC_AUX;
break;
case 0xC0010000:
regNum = MISCREG_PERF_EVT_SEL0;
break;
case 0xC0010001:
regNum = MISCREG_PERF_EVT_SEL1;
break;
case 0xC0010002:
regNum = MISCREG_PERF_EVT_SEL2;
break;
case 0xC0010003:
regNum = MISCREG_PERF_EVT_SEL3;
break;
case 0xC0010004:
regNum = MISCREG_PERF_EVT_CTR0;
break;
case 0xC0010005:
regNum = MISCREG_PERF_EVT_CTR1;
break;
case 0xC0010006:
regNum = MISCREG_PERF_EVT_CTR2;
break;
case 0xC0010007:
regNum = MISCREG_PERF_EVT_CTR3;
break;
case 0xC0010010:
regNum = MISCREG_SYSCFG;
break;
case 0xC0010016:
regNum = MISCREG_IORR_BASE0;
break;
case 0xC0010017:
regNum = MISCREG_IORR_BASE1;
break;
case 0xC0010018:
regNum = MISCREG_IORR_MASK0;
break;
case 0xC0010019:
regNum = MISCREG_IORR_MASK1;
break;
case 0xC001001A:
regNum = MISCREG_TOP_MEM;
break;
case 0xC001001D:
regNum = MISCREG_TOP_MEM2;
break;
case 0xC0010114:
regNum = MISCREG_VM_CR;
break;
case 0xC0010115:
regNum = MISCREG_IGNNE;
break;
case 0xC0010116:
regNum = MISCREG_SMM_CTL;
break;
case 0xC0010117:
regNum = MISCREG_VM_HSAVE_PA;
break;
default:
return std::make_shared<GeneralProtection>(0);
}
//The index is multiplied by the size of a MiscReg so that
//any memory dependence calculations will not see these as
//overlapping.
req->setPaddr(regNum * sizeof(MiscReg));
return NoFault;
} else if (prefix == IntAddrPrefixIO) {
// TODO If CPL > IOPL or in virtual mode, check the I/O permission
// bitmap in the TSS.
Addr IOPort = vaddr & ~IntAddrPrefixMask;
// Make sure the address fits in the expected 16 bit IO address
// space.
assert(!(IOPort & ~0xFFFF));
if (IOPort == 0xCF8 && req->getSize() == 4) {
req->setFlags(Request::MMAPPED_IPR);
req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
} else if ((IOPort & ~mask(2)) == 0xCFC) {
req->setFlags(Request::UNCACHEABLE);
Addr configAddress =
tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
if (bits(configAddress, 31, 31)) {
req->setPaddr(PhysAddrPrefixPciConfig |
mbits(configAddress, 30, 2) |
(IOPort & mask(2)));
} else {
req->setPaddr(PhysAddrPrefixIO | IOPort);
}
} else {
req->setFlags(Request::UNCACHEABLE);
req->setPaddr(PhysAddrPrefixIO | IOPort);
}
return NoFault;
} else {
panic("Access to unrecognized internal address space %#x.\n",
prefix);
}
}
/**
* TLB_lookup will only perform a TLB lookup returning true on a TLB hit
* and false on a TLB miss.
* Many of the checks about different modes have been converted to
* assertions, since these parts of the code are not really used.
* On a hit it will update the LRU stack.
*/
bool
GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
{
bool tlb_hit = false;
#ifndef NDEBUG
uint32_t flags = req->getFlags();
int seg = flags & SegmentFlagMask;
#endif
assert(seg != SEGMENT_REG_MS);
Addr vaddr = req->getVaddr();
DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
if (m5Reg.prot) {
DPRINTF(GPUTLB, "In protected mode.\n");
// make sure we are in 64-bit mode
assert(m5Reg.mode == LongMode);
// If paging is enabled, do the translation.
if (m5Reg.paging) {
DPRINTF(GPUTLB, "Paging enabled.\n");
//update LRU stack on a hit
GpuTlbEntry *entry = lookup(vaddr, true);
if (entry)
tlb_hit = true;
if (!update_stats) {
// functional tlb access for memory initialization
// i.e., memory seeding or instr. seeding -> don't update
// TLB and stats
return tlb_hit;
}
localNumTLBAccesses++;
if (!entry) {
localNumTLBMisses++;
} else {
localNumTLBHits++;
}
}
}
return tlb_hit;
}
Fault
GpuTLB::translate(RequestPtr req, ThreadContext *tc,
Translation *translation, Mode mode,
bool &delayedResponse, bool timing, int &latency)
{
uint32_t flags = req->getFlags();
int seg = flags & SegmentFlagMask;
bool storeCheck = flags & (StoreCheck << FlagShift);
// If this is true, we're dealing with a request
// to a non-memory address space.
if (seg == SEGMENT_REG_MS) {
return translateInt(req, tc);
}
delayedResponse = false;
Addr vaddr = req->getVaddr();
DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
// If protected mode has been enabled...
if (m5Reg.prot) {
DPRINTF(GPUTLB, "In protected mode.\n");
// If we're not in 64-bit mode, do protection/limit checks
if (m5Reg.mode != LongMode) {
DPRINTF(GPUTLB, "Not in long mode. Checking segment "
"protection.\n");
// Check for a null segment selector.
if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
&& !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
return std::make_shared<GeneralProtection>(0);
}
bool expandDown = false;
SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
if (!attr.writable && (mode == BaseTLB::Write ||
storeCheck))
return std::make_shared<GeneralProtection>(0);
if (!attr.readable && mode == BaseTLB::Read)
return std::make_shared<GeneralProtection>(0);
expandDown = attr.expandDown;
}
Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
// This assumes we're not in 64 bit mode. If we were, the
// default address size is 64 bits, overridable to 32.
int size = 32;
bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
if ((csAttr.defaultSize && sizeOverride) ||
(!csAttr.defaultSize && !sizeOverride)) {
size = 16;
}
Addr offset = bits(vaddr - base, size - 1, 0);
Addr endOffset = offset + req->getSize() - 1;
if (expandDown) {
DPRINTF(GPUTLB, "Checking an expand down segment.\n");
warn_once("Expand down segments are untested.\n");
if (offset <= limit || endOffset <= limit)
return std::make_shared<GeneralProtection>(0);
} else {
if (offset > limit || endOffset > limit)
return std::make_shared<GeneralProtection>(0);
}
}
// If paging is enabled, do the translation.
if (m5Reg.paging) {
DPRINTF(GPUTLB, "Paging enabled.\n");
// The vaddr already has the segment base applied.
GpuTlbEntry *entry = lookup(vaddr);
localNumTLBAccesses++;
if (!entry) {
localNumTLBMisses++;
if (timing) {
latency = missLatency1;
}
if (FullSystem) {
fatal("GpuTLB doesn't support full-system mode\n");
} else {
DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
"at pc %#x.\n", vaddr, tc->instAddr());
Process *p = tc->getProcessPtr();
const EmulationPageTable::Entry *pte =
p->pTable->lookup(vaddr);
if (!pte && mode != BaseTLB::Execute) {
// penalize a "page fault" more
if (timing)
latency += missLatency2;
if (p->fixupStackFault(vaddr))
pte = p->pTable->lookup(vaddr);
}
if (!pte) {
return std::make_shared<PageFault>(vaddr, true,
mode, true,
false);
} else {
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
alignedVaddr, pte->paddr);
GpuTlbEntry gpuEntry(
p->pTable->pid(), alignedVaddr,
pte->paddr, true);
entry = insert(alignedVaddr, gpuEntry);
}
DPRINTF(GPUTLB, "Miss was serviced.\n");
}
} else {
localNumTLBHits++;
if (timing) {
latency = hitLatency;
}
}
// Do paging protection checks.
bool inUser = (m5Reg.cpl == 3 &&
!(flags & (CPL0FlagBit << FlagShift)));
CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
bool badWrite = (!entry->writable && (inUser || cr0.wp));
if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
badWrite)) {
// The page must have been present to get into the TLB in
// the first place. We'll assume the reserved bits are
// fine even though we're not checking them.
return std::make_shared<PageFault>(vaddr, true, mode,
inUser, false);
}
if (storeCheck && badWrite) {
// This would fault if this were a write, so return a page
// fault that reflects that happening.
return std::make_shared<PageFault>(vaddr, true,
BaseTLB::Write,
inUser, false);
}
DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
"checks.\n", entry->paddr);
int page_size = entry->size();
Addr paddr = entry->paddr | (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
req->setPaddr(paddr);
if (entry->uncacheable)
req->setFlags(Request::UNCACHEABLE);
} else {
//Use the address which already has segmentation applied.
DPRINTF(GPUTLB, "Paging disabled.\n");
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
req->setPaddr(vaddr);
}
} else {
// Real mode
DPRINTF(GPUTLB, "In real mode.\n");
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
req->setPaddr(vaddr);
}
// Check for an access to the local APIC
if (FullSystem) {
LocalApicBase localApicBase =
tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
Addr baseAddr = localApicBase.base * PageBytes;
Addr paddr = req->getPaddr();
if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
// Force the access to be uncacheable.
req->setFlags(Request::UNCACHEABLE);
req->setPaddr(x86LocalAPICAddress(tc->contextId(),
paddr - baseAddr));
}
}
return NoFault;
};
Fault
GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
int &latency)
{
bool delayedResponse;
return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
latency);
}
void
GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
Translation *translation, Mode mode, int &latency)
{
bool delayedResponse;
assert(translation);
Fault fault = GpuTLB::translate(req, tc, translation, mode,
delayedResponse, true, latency);
if (!delayedResponse)
translation->finish(fault, req, tc, mode);
}
Walker*
GpuTLB::getWalker()
{
return walker;
}
void
GpuTLB::serialize(CheckpointOut &cp) const
{
}
void
GpuTLB::unserialize(CheckpointIn &cp)
{
}
void
GpuTLB::regStats()
{
MemObject::regStats();
localNumTLBAccesses
.name(name() + ".local_TLB_accesses")
.desc("Number of TLB accesses")
;
localNumTLBHits
.name(name() + ".local_TLB_hits")
.desc("Number of TLB hits")
;
localNumTLBMisses
.name(name() + ".local_TLB_misses")
.desc("Number of TLB misses")
;
localTLBMissRate
.name(name() + ".local_TLB_miss_rate")
.desc("TLB miss rate")
;
accessCycles
.name(name() + ".access_cycles")
.desc("Cycles spent accessing this TLB level")
;
pageTableCycles
.name(name() + ".page_table_cycles")
.desc("Cycles spent accessing the page table")
;
localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
numUniquePages
.name(name() + ".unique_pages")
.desc("Number of unique pages touched")
;
localCycles
.name(name() + ".local_cycles")
.desc("Number of cycles spent in queue for all incoming reqs")
;
localLatency
.name(name() + ".local_latency")
.desc("Avg. latency over incoming coalesced reqs")
;
localLatency = localCycles / localNumTLBAccesses;
globalNumTLBAccesses
.name(name() + ".global_TLB_accesses")
.desc("Number of TLB accesses")
;
globalNumTLBHits
.name(name() + ".global_TLB_hits")
.desc("Number of TLB hits")
;
globalNumTLBMisses
.name(name() + ".global_TLB_misses")
.desc("Number of TLB misses")
;
globalTLBMissRate
.name(name() + ".global_TLB_miss_rate")
.desc("TLB miss rate")
;
globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
avgReuseDistance
.name(name() + ".avg_reuse_distance")
.desc("avg. reuse distance over all pages (in ticks)")
;
}
/**
* Do the TLB lookup for this coalesced request and schedule
* another event <TLB access latency> cycles later.
*/
void
GpuTLB::issueTLBLookup(PacketPtr pkt)
{
assert(pkt);
assert(pkt->senderState);
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
TheISA::PageBytes);
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
bool update_stats = !sender_state->prefetch;
ThreadContext * tmp_tc = sender_state->tc;
DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
virt_page_addr);
int req_cnt = sender_state->reqCnt.back();
if (update_stats) {
accessCycles -= (curTick() * req_cnt);
localCycles -= curTick();
updatePageFootprint(virt_page_addr);
globalNumTLBAccesses += req_cnt;
}
tlbOutcome lookup_outcome = TLB_MISS;
RequestPtr tmp_req = pkt->req;
// Access the TLB and figure out if it's a hit or a miss.
bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
if (success) {
lookup_outcome = TLB_HIT;
// Put the entry in SenderState
GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
assert(entry);
sender_state->tlbEntry =
new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
if (update_stats) {
// the reqCnt has an entry per level, so its size tells us
// which level we are in
sender_state->hitLevel = sender_state->reqCnt.size();
globalNumTLBHits += req_cnt;
}
} else {
if (update_stats)
globalNumTLBMisses += req_cnt;
}
/*
* We now know the TLB lookup outcome (if it's a hit or a miss), as well
* as the TLB access latency.
*
* We create and schedule a new TLBEvent which will help us take the
* appropriate actions (e.g., update TLB on a hit, send request to lower
* level TLB on a miss, or start a page walk if this was the last-level
* TLB)
*/
TLBEvent *tlb_event =
new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
if (translationReturnEvent.count(virt_page_addr)) {
panic("Virtual Page Address %#x already has a return event\n",
virt_page_addr);
}
translationReturnEvent[virt_page_addr] = tlb_event;
assert(tlb_event);
DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
curTick() + this->ticks(hitLatency));
schedule(tlb_event, curTick() + this->ticks(hitLatency));
}
GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
PacketPtr _pkt)
: Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
outcome(tlb_outcome), pkt(_pkt)
{
}
/**
* Do Paging protection checks. If we encounter a page fault, then
* an assertion is fired.
*/
void
GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
GpuTlbEntry * tlb_entry, Mode mode)
{
HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
uint32_t flags = pkt->req->getFlags();
bool storeCheck = flags & (StoreCheck << FlagShift);
// Do paging protection checks.
bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
if ((inUser && !tlb_entry->user) ||
(mode == BaseTLB::Write && badWrite)) {
// The page must have been present to get into the TLB in
// the first place. We'll assume the reserved bits are
// fine even though we're not checking them.
assert(false);
}
if (storeCheck && badWrite) {
// This would fault if this were a write, so return a page
// fault that reflects that happening.
assert(false);
}
}
/**
* handleTranslationReturn is called on a TLB hit,
* when a TLB miss returns or when a page fault returns.
* The latter calls handelHit with TLB miss as tlbOutcome.
*/
void
GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
PacketPtr pkt)
{
assert(pkt);
Addr vaddr = pkt->req->getVaddr();
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
ThreadContext *tc = sender_state->tc;
Mode mode = sender_state->tlbMode;
GpuTlbEntry *local_entry, *new_entry;
if (tlb_outcome == TLB_HIT) {
DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
local_entry = sender_state->tlbEntry;
} else {
DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
vaddr);
// We are returning either from a page walk or from a hit at a lower
// TLB level. The senderState should be "carrying" a pointer to the
// correct TLBEntry.
new_entry = sender_state->tlbEntry;
assert(new_entry);
local_entry = new_entry;
if (allocationPolicy) {
DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
virt_page_addr);
local_entry = insert(virt_page_addr, *new_entry);
}
assert(local_entry);
}
/**
* At this point the packet carries an up-to-date tlbEntry pointer
* in its senderState.
* Next step is to do the paging protection checks.
*/
DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
"while paddr was %#x.\n", local_entry->vaddr,
local_entry->paddr);
pagingProtectionChecks(tc, pkt, local_entry, mode);
int page_size = local_entry->size();
Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
// Since this packet will be sent through the cpu side slave port,
// it must be converted to a response pkt if it is not one already
if (pkt->isRequest()) {
pkt->makeTimingResponse();
}
pkt->req->setPaddr(paddr);
if (local_entry->uncacheable) {
pkt->req->setFlags(Request::UNCACHEABLE);
}
//send packet back to coalescer
cpuSidePort[0]->sendTimingResp(pkt);
//schedule cleanup event
cleanupQueue.push(virt_page_addr);
// schedule this only once per cycle.
// The check is required because we might have multiple translations
// returning the same cycle
// this is a maximum priority event and must be on the same cycle
// as the cleanup event in TLBCoalescer to avoid a race with
// IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
if (!cleanupEvent.scheduled())
schedule(cleanupEvent, curTick());
}
/**
* Here we take the appropriate actions based on the result of the
* TLB lookup.
*/
void
GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
PacketPtr pkt)
{
DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
assert(translationReturnEvent[virtPageAddr]);
assert(pkt);
TranslationState *tmp_sender_state =
safe_cast<TranslationState*>(pkt->senderState);
int req_cnt = tmp_sender_state->reqCnt.back();
bool update_stats = !tmp_sender_state->prefetch;
if (outcome == TLB_HIT) {
handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
if (update_stats) {
accessCycles += (req_cnt * curTick());
localCycles += curTick();
}
} else if (outcome == TLB_MISS) {
DPRINTF(GPUTLB, "This is a TLB miss\n");
if (update_stats) {
accessCycles += (req_cnt*curTick());
localCycles += curTick();
}
if (hasMemSidePort) {
// the one cyle added here represent the delay from when we get
// the reply back till when we propagate it to the coalescer
// above.
if (update_stats) {
accessCycles += (req_cnt * 1);
localCycles += 1;
}
/**
* There is a TLB below. Send the coalesced request.
* We actually send the very first packet of all the
* pending packets for this virtual page address.
*/
if (!memSidePort[0]->sendTimingReq(pkt)) {
DPRINTF(GPUTLB, "Failed sending translation request to "
"lower level TLB for addr %#x\n", virtPageAddr);
memSidePort[0]->retries.push_back(pkt);
} else {
DPRINTF(GPUTLB, "Sent translation request to lower level "
"TLB for addr %#x\n", virtPageAddr);
}
} else {
//this is the last level TLB. Start a page walk
DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
"addr %#x\n", virtPageAddr);
if (update_stats)
pageTableCycles -= (req_cnt*curTick());
TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
assert(tlb_event);
tlb_event->updateOutcome(PAGE_WALK);
schedule(tlb_event, curTick() + ticks(missLatency2));
}
} else if (outcome == PAGE_WALK) {
if (update_stats)
pageTableCycles += (req_cnt*curTick());
// Need to access the page table and update the TLB
DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
virtPageAddr);
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
Process *p = sender_state->tc->getProcessPtr();
Addr vaddr = pkt->req->getVaddr();
#ifndef NDEBUG
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
assert(alignedVaddr == virtPageAddr);
#endif
const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
p->fixupStackFault(vaddr)) {
pte = p->pTable->lookup(vaddr);
}
if (pte) {
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
pte->paddr);
sender_state->tlbEntry =
new GpuTlbEntry(0, virtPageAddr, pte->paddr, true);
} else {
sender_state->tlbEntry =
new GpuTlbEntry(0, 0, 0, false);
}
handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
} else if (outcome == MISS_RETURN) {
/** we add an extra cycle in the return path of the translation
* requests in between the various TLB levels.
*/
handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
} else {
assert(false);
}
}
void
GpuTLB::TLBEvent::process()
{
tlb->translationReturn(virtPageAddr, outcome, pkt);
}
const char*
GpuTLB::TLBEvent::description() const
{
return "trigger translationDoneEvent";
}
void
GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
{
outcome = _outcome;
}
Addr
GpuTLB::TLBEvent::getTLBEventVaddr()
{
return virtPageAddr;
}
/*
* recvTiming receives a coalesced timing request from a TLBCoalescer
* and it calls issueTLBLookup()
* It only rejects the packet if we have exceeded the max
* outstanding number of requests for the TLB
*/
bool
GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
{
if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
tlb->issueTLBLookup(pkt);
// update number of outstanding translation requests
tlb->outstandingReqs++;
return true;
} else {
DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
tlb->outstandingReqs);
return false;
}
}
/**
* handleFuncTranslationReturn is called on a TLB hit,
* when a TLB miss returns or when a page fault returns.
* It updates LRU, inserts the TLB entry on a miss
* depending on the allocation policy and does the required
* protection checks. It does NOT create a new packet to
* update the packet's addr; this is done in hsail-gpu code.
*/
void
GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
{
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
ThreadContext *tc = sender_state->tc;
Mode mode = sender_state->tlbMode;
Addr vaddr = pkt->req->getVaddr();
GpuTlbEntry *local_entry, *new_entry;
if (tlb_outcome == TLB_HIT) {
DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
"%#x\n", vaddr);
local_entry = sender_state->tlbEntry;
} else {
DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
"%#x\n", vaddr);
// We are returning either from a page walk or from a hit at a lower
// TLB level. The senderState should be "carrying" a pointer to the
// correct TLBEntry.
new_entry = sender_state->tlbEntry;
assert(new_entry);
local_entry = new_entry;
if (allocationPolicy) {
Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
virt_page_addr);
local_entry = insert(virt_page_addr, *new_entry);
}
assert(local_entry);
}
DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
"while paddr was %#x.\n", local_entry->vaddr,
local_entry->paddr);
// Do paging checks if it's a normal functional access. If it's for a
// prefetch, then sometimes you can try to prefetch something that won't
// pass protection. We don't actually want to fault becuase there is no
// demand access to deem this a violation. Just put it in the TLB and
// it will fault if indeed a future demand access touches it in
// violation.
if (!sender_state->prefetch && sender_state->tlbEntry->valid)
pagingProtectionChecks(tc, pkt, local_entry, mode);
int page_size = local_entry->size();
Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
pkt->req->setPaddr(paddr);
if (local_entry->uncacheable)
pkt->req->setFlags(Request::UNCACHEABLE);
}
// This is used for atomic translations. Need to
// make it all happen during the same cycle.
void
GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
{
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
ThreadContext *tc = sender_state->tc;
bool update_stats = !sender_state->prefetch;
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
TheISA::PageBytes);
if (update_stats)
tlb->updatePageFootprint(virt_page_addr);
// do the TLB lookup without updating the stats
bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
// functional mode means no coalescing
// global metrics are the same as the local metrics
if (update_stats) {
tlb->globalNumTLBAccesses++;
if (success) {
sender_state->hitLevel = sender_state->reqCnt.size();
tlb->globalNumTLBHits++;
}
}
if (!success) {
if (update_stats)
tlb->globalNumTLBMisses++;
if (tlb->hasMemSidePort) {
// there is a TLB below -> propagate down the TLB hierarchy
tlb->memSidePort[0]->sendFunctional(pkt);
// If no valid translation from a prefetch, then just return
if (sender_state->prefetch && !pkt->req->hasPaddr())
return;
} else {
// Need to access the page table and update the TLB
DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
virt_page_addr);
Process *p = tc->getProcessPtr();
Addr vaddr = pkt->req->getVaddr();
#ifndef NDEBUG
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
assert(alignedVaddr == virt_page_addr);
#endif
const EmulationPageTable::Entry *pte =
p->pTable->lookup(vaddr);
if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
p->fixupStackFault(vaddr)) {
pte = p->pTable->lookup(vaddr);
}
if (!sender_state->prefetch) {
// no PageFaults are permitted after
// the second page table lookup
assert(success);
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
pte->paddr);
sender_state->tlbEntry =
new GpuTlbEntry(0, virt_page_addr,
pte->paddr, success);
} else {
// If this was a prefetch, then do the normal thing if it
// was a successful translation. Otherwise, send an empty
// TLB entry back so that it can be figured out as empty and
// handled accordingly.
if (pte) {
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
pte->paddr);
sender_state->tlbEntry =
new GpuTlbEntry(0, virt_page_addr,
pte->paddr, success);
} else {
DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
alignedVaddr);
sender_state->tlbEntry = new GpuTlbEntry();
return;
}
}
}
} else {
DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
tlb->lookup(pkt->req->getVaddr()));
GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
update_stats);
assert(entry);
sender_state->tlbEntry =
new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
}
// This is the function that would populate pkt->req with the paddr of
// the translation. But if no translation happens (i.e Prefetch fails)
// then the early returns in the above code wiill keep this function
// from executing.
tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
}
void
GpuTLB::CpuSidePort::recvReqRetry()
{
// The CPUSidePort never sends anything but replies. No retries
// expected.
assert(false);
}
AddrRangeList
GpuTLB::CpuSidePort::getAddrRanges() const
{
// currently not checked by the master
AddrRangeList ranges;
return ranges;
}
/**
* MemSidePort receives the packet back.
* We need to call the handleTranslationReturn
* and propagate up the hierarchy.
*/
bool
GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
{
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
TheISA::PageBytes);
DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
virt_page_addr);
TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
assert(tlb_event);
assert(virt_page_addr == tlb_event->getTLBEventVaddr());
tlb_event->updateOutcome(MISS_RETURN);
tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
return true;
}
void
GpuTLB::MemSidePort::recvReqRetry()
{
// No retries should reach the TLB. The retries
// should only reach the TLBCoalescer.
assert(false);
}
void
GpuTLB::cleanup()
{
while (!cleanupQueue.empty()) {
Addr cleanup_addr = cleanupQueue.front();
cleanupQueue.pop();
// delete TLBEvent
TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
delete old_tlb_event;
translationReturnEvent.erase(cleanup_addr);
// update number of outstanding requests
outstandingReqs--;
}
/** the higher level coalescer should retry if it has
* any pending requests.
*/
for (int i = 0; i < cpuSidePort.size(); ++i) {
cpuSidePort[i]->sendRetryReq();
}
}
void
GpuTLB::updatePageFootprint(Addr virt_page_addr)
{
std::pair<AccessPatternTable::iterator, bool> ret;
AccessInfo tmp_access_info;
tmp_access_info.lastTimeAccessed = 0;
tmp_access_info.accessesPerPage = 0;
tmp_access_info.totalReuseDistance = 0;
tmp_access_info.sumDistance = 0;
tmp_access_info.meanDistance = 0;
ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
tmp_access_info));
bool first_page_access = ret.second;
if (first_page_access) {
numUniquePages++;
} else {
int accessed_before;
accessed_before = curTick() - ret.first->second.lastTimeAccessed;
ret.first->second.totalReuseDistance += accessed_before;
}
ret.first->second.accessesPerPage++;
ret.first->second.lastTimeAccessed = curTick();
if (accessDistance) {
ret.first->second.localTLBAccesses
.push_back(localNumTLBAccesses.value());
}
}
void
GpuTLB::exitCallback()
{
std::ostream *page_stat_file = nullptr;
if (accessDistance) {
// print per page statistics to a separate file (.csv format)
// simout is the gem5 output directory (default is m5out or the one
// specified with -d
page_stat_file = simout.create(name().c_str())->stream();
// print header
*page_stat_file << "page,max_access_distance,mean_access_distance, "
<< "stddev_distance" << std::endl;
}
// update avg. reuse distance footprint
AccessPatternTable::iterator iter, iter_begin, iter_end;
unsigned int sum_avg_reuse_distance_per_page = 0;
// iterate through all pages seen by this TLB
for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
iter->second.accessesPerPage;
if (accessDistance) {
unsigned int tmp = iter->second.localTLBAccesses[0];
unsigned int prev = tmp;
for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
if (i) {
tmp = prev + 1;
}
prev = iter->second.localTLBAccesses[i];
// update the localTLBAccesses value
// with the actual differece
iter->second.localTLBAccesses[i] -= tmp;
// compute the sum of AccessDistance per page
// used later for mean
iter->second.sumDistance +=
iter->second.localTLBAccesses[i];
}
iter->second.meanDistance =
iter->second.sumDistance / iter->second.accessesPerPage;
// compute std_dev and max (we need a second round because we
// need to know the mean value
unsigned int max_distance = 0;
unsigned int stddev_distance = 0;
for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
unsigned int tmp_access_distance =
iter->second.localTLBAccesses[i];
if (tmp_access_distance > max_distance) {
max_distance = tmp_access_distance;
}
unsigned int diff =
tmp_access_distance - iter->second.meanDistance;
stddev_distance += pow(diff, 2);
}
stddev_distance =
sqrt(stddev_distance/iter->second.accessesPerPage);
if (page_stat_file) {
*page_stat_file << std::hex << iter->first << ",";
*page_stat_file << std::dec << max_distance << ",";
*page_stat_file << std::dec << iter->second.meanDistance
<< ",";
*page_stat_file << std::dec << stddev_distance;
*page_stat_file << std::endl;
}
// erase the localTLBAccesses array
iter->second.localTLBAccesses.clear();
}
}
if (!TLBFootprint.empty()) {
avgReuseDistance =
sum_avg_reuse_distance_per_page / TLBFootprint.size();
}
//clear the TLBFootprint map
TLBFootprint.clear();
}
} // namespace X86ISA
X86ISA::GpuTLB*
X86GPUTLBParams::create()
{
return new X86ISA::GpuTLB(this);
}