blob: c3dd576f0ee301a54a01ccbe3d8feaaeb160233d [file] [log] [blame]
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/amdgpu/vega/tlb.hh"
#include <cmath>
#include <cstring>
#include "arch/amdgpu/common/gpu_translation_state.hh"
#include "arch/amdgpu/vega/faults.hh"
#include "arch/amdgpu/vega/pagetable_walker.hh"
#include "debug/GPUPrefetch.hh"
#include "debug/GPUTLB.hh"
#include "dev/amdgpu/amdgpu_device.hh"
namespace gem5
{
namespace VegaISA
{
// we have no limit for the number of translations we send
// downstream as we depend on the limit of the coalescer
// above us
GpuTLB::GpuTLB(const VegaGPUTLBParams &p)
: ClockedObject(p), walker(p.walker),
gpuDevice(p.gpu_device), size(p.size), stats(this),
cleanupEvent([this]{ cleanup(); }, name(), false,
Event::Maximum_Pri)
{
assoc = p.assoc;
assert(assoc <= size);
numSets = size/assoc;
allocationPolicy = p.allocationPolicy;
hasMemSidePort = false;
tlb.assign(size, VegaTlbEntry());
freeList.resize(numSets);
entryList.resize(numSets);
for (int set = 0; set < numSets; ++set) {
for (int way = 0; way < assoc; ++way) {
int x = set * assoc + way;
freeList[set].push_back(&tlb.at(x));
}
}
FA = (size == assoc);
setMask = numSets - 1;
maxCoalescedReqs = p.maxOutstandingReqs;
outstandingReqs = 0;
hitLatency = p.hitLatency;
missLatency1 = p.missLatency1;
missLatency2 = p.missLatency2;
// create the response ports based on the number of connected ports
for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
name(), i), this, i));
}
// create the requestor ports based on the number of connected ports
for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
name(), i), this, i));
}
// assuming one walker per TLB, set our walker's TLB to this TLB.
walker->setTLB(this);
// gpuDevice should be non-null in full system only and is set by GpuTLB
// params from the config file.
if (gpuDevice) {
gpuDevice->getVM().registerTLB(this);
}
}
GpuTLB::~GpuTLB()
{
}
Port &
GpuTLB::getPort(const std::string &if_name, PortID idx)
{
if (if_name == "cpu_side_ports") {
if (idx >= static_cast<PortID>(cpuSidePort.size())) {
panic("TLBCoalescer::getPort: unknown index %d\n", idx);
}
return *cpuSidePort[idx];
} else if (if_name == "mem_side_ports") {
if (idx >= static_cast<PortID>(memSidePort.size())) {
panic("TLBCoalescer::getPort: unknown index %d\n", idx);
}
hasMemSidePort = true;
return *memSidePort[idx];
} else {
panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
}
}
Fault
GpuTLB::createPagefault(Addr vaddr, Mode mode)
{
DPRINTF(GPUTLB, "GPUTLB: Raising page fault.\n");
ExceptionCode code;
if (mode == BaseMMU::Read)
code = ExceptionCode::LOAD_PAGE;
else if (mode == BaseMMU::Write)
code = ExceptionCode::STORE_PAGE;
else
code = ExceptionCode::INST_PAGE;
return std::make_shared<PageFault>(vaddr, code, true, mode, true);
}
Addr
GpuTLB::pageAlign(Addr vaddr)
{
Addr pageMask = mask(VegaISA::PageShift);
return (vaddr & ~pageMask);
}
VegaTlbEntry*
GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)
{
VegaTlbEntry *newEntry = nullptr;
int set = (entry.vaddr >> VegaISA::PageShift) & setMask;
if (!freeList[set].empty()) {
newEntry = freeList[set].front();
freeList[set].pop_front();
} else {
newEntry = entryList[set].back();
entryList[set].pop_back();
}
*newEntry = entry;
entryList[set].push_front(newEntry);
DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
newEntry->vaddr, newEntry->paddr, entry.size(), set);
return newEntry;
}
GpuTLB::EntryList::iterator
GpuTLB::lookupIt(Addr va, bool update_lru)
{
int set = (va >> VegaISA::PageShift) & setMask;
if (FA) {
assert(!set);
}
auto entry = entryList[set].begin();
for (; entry != entryList[set].end(); ++entry) {
int page_size = (*entry)->size();
if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
"with size %#x.\n", va, (*entry)->vaddr, page_size);
if (update_lru) {
entryList[set].push_front(*entry);
entryList[set].erase(entry);
entry = entryList[set].begin();
}
break;
}
}
return entry;
}
VegaTlbEntry*
GpuTLB::lookup(Addr va, bool update_lru)
{
int set = (va >> VegaISA::PageShift) & setMask;
auto entry = lookupIt(va, update_lru);
if (entry == entryList[set].end())
return nullptr;
else
return *entry;
}
void
GpuTLB::invalidateAll()
{
DPRINTF(GPUTLB, "Invalidating all entries.\n");
for (int i = 0; i < numSets; ++i) {
while (!entryList[i].empty()) {
VegaTlbEntry *entry = entryList[i].front();
entryList[i].pop_front();
freeList[i].push_back(entry);
}
}
}
void
GpuTLB::demapPage(Addr va, uint64_t asn)
{
int set = (va >> VegaISA::PageShift) & setMask;
auto entry = lookupIt(va, false);
if (entry != entryList[set].end()) {
freeList[set].push_back(*entry);
entryList[set].erase(entry);
}
}
/**
* TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB
* hit and nullptr on a TLB miss.
* Many of the checks about different modes have been converted to
* assertions, since these parts of the code are not really used.
* On a hit it will update the LRU stack.
*/
VegaTlbEntry *
GpuTLB::tlbLookup(const RequestPtr &req, bool update_stats)
{
Addr vaddr = req->getVaddr();
Addr alignedVaddr = pageAlign(vaddr);
DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
//update LRU stack on a hit
VegaTlbEntry *entry = lookup(alignedVaddr, true);
if (!update_stats) {
// functional tlb access for memory initialization
// i.e., memory seeding or instr. seeding -> don't update
// TLB and stats
return entry;
}
stats.localNumTLBAccesses++;
if (!entry) {
stats.localNumTLBMisses++;
} else {
stats.localNumTLBHits++;
}
return entry;
}
Walker*
GpuTLB::getWalker()
{
return walker;
}
void
GpuTLB::serialize(CheckpointOut &cp) const
{
}
void
GpuTLB::unserialize(CheckpointIn &cp)
{
}
/**
* Do the TLB lookup for this coalesced request and schedule
* another event <TLB access latency> cycles later.
*/
void
GpuTLB::issueTLBLookup(PacketPtr pkt)
{
assert(pkt);
assert(pkt->senderState);
/**
* The page size is not fixed in Vega and tracking events by VPN could
* potentially lead to redundant page walks by using the smallest page
* size. The actual VPN can be determined after the first walk is done
* and fixed up later.
*/
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
VegaISA::PageBytes);
GpuTranslationState *sender_state =
safe_cast<GpuTranslationState*>(pkt->senderState);
bool update_stats = !sender_state->isPrefetch;
DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
virt_page_addr);
int req_cnt = sender_state->reqCnt.back();
if (update_stats) {
stats.accessCycles -= (curCycle() * req_cnt);
stats.localCycles -= curCycle();
stats.globalNumTLBAccesses += req_cnt;
}
tlbOutcome lookup_outcome = TLB_MISS;
const RequestPtr &tmp_req = pkt->req;
// Access the TLB and figure out if it's a hit or a miss.
auto entry = tlbLookup(tmp_req, update_stats);
if (entry) {
lookup_outcome = TLB_HIT;
// Put the entry in SenderState
VegaTlbEntry *entry = lookup(virt_page_addr, false);
assert(entry);
// Set if this is a system request
pkt->req->setSystemReq(entry->pte.s);
Addr alignedPaddr = pageAlign(entry->paddr);
sender_state->tlbEntry =
new VegaTlbEntry(1 /* VMID */, virt_page_addr, alignedPaddr,
entry->logBytes, entry->pte);
if (update_stats) {
// the reqCnt has an entry per level, so its size tells us
// which level we are in
sender_state->hitLevel = sender_state->reqCnt.size();
stats.globalNumTLBHits += req_cnt;
}
} else {
if (update_stats)
stats.globalNumTLBMisses += req_cnt;
}
/*
* We now know the TLB lookup outcome (if it's a hit or a miss), as
* well as the TLB access latency.
*
* We create and schedule a new TLBEvent which will help us take the
* appropriate actions (e.g., update TLB on a hit, send request to
* lower level TLB on a miss, or start a page walk if this was the
* last-level TLB)
*/
TLBEvent *tlb_event =
new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
if (translationReturnEvent.count(virt_page_addr)) {
panic("Virtual Page Address %#x already has a return event\n",
virt_page_addr);
}
translationReturnEvent[virt_page_addr] = tlb_event;
assert(tlb_event);
DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
curTick() + cyclesToTicks(Cycles(hitLatency)));
schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));
}
GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr,
tlbOutcome tlb_outcome, PacketPtr _pkt)
: Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
outcome(tlb_outcome), pkt(_pkt)
{
}
/**
* Do Paging protection checks. If we encounter a page fault, then
* an assertion is fired.
*/
void
GpuTLB::pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry * tlb_entry,
Mode mode)
{
// Do paging protection checks.
bool badWrite = (!tlb_entry->writable());
if (mode == BaseMMU::Write && badWrite) {
// The page must have been present to get into the TLB in
// the first place. We'll assume the reserved bits are
// fine even though we're not checking them.
fatal("Page fault on addr %lx PTE=%#lx", pkt->req->getVaddr(),
(uint64_t)tlb_entry->pte);
}
}
void
GpuTLB::walkerResponse(VegaTlbEntry& entry, PacketPtr pkt)
{
DPRINTF(GPUTLB, "WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",
pkt->req->getVaddr(), entry.vaddr, entry.paddr, entry.size());
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
VegaISA::PageBytes);
Addr page_addr = entry.pte.ppn << VegaISA::PageShift;
Addr paddr = page_addr + (entry.vaddr & mask(entry.logBytes));
pkt->req->setPaddr(paddr);
pkt->req->setSystemReq(entry.pte.s);
GpuTranslationState *sender_state =
safe_cast<GpuTranslationState*>(pkt->senderState);
sender_state->tlbEntry = new VegaTlbEntry(entry);
handleTranslationReturn(virt_page_addr, TLB_MISS, pkt);
}
/**
* handleTranslationReturn is called on a TLB hit,
* when a TLB miss returns or when a page fault returns.
* The latter calls handelHit with TLB miss as tlbOutcome.
*/
void
GpuTLB::handleTranslationReturn(Addr virt_page_addr,
tlbOutcome tlb_outcome, PacketPtr pkt)
{
assert(pkt);
Addr vaddr = pkt->req->getVaddr();
GpuTranslationState *sender_state =
safe_cast<GpuTranslationState*>(pkt->senderState);
Mode mode = sender_state->tlbMode;
VegaTlbEntry *local_entry, *new_entry;
int req_cnt = sender_state->reqCnt.back();
bool update_stats = !sender_state->isPrefetch;
if (update_stats) {
stats.accessCycles += (req_cnt * curCycle());
stats.localCycles += curCycle();
}
if (tlb_outcome == TLB_HIT) {
DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
vaddr);
local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
} else {
DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
vaddr);
/**
* We are returning either from a page walk or from a hit at a
* lower TLB level. The senderState should be "carrying" a pointer
* to the correct TLBEntry.
*/
new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
assert(new_entry);
local_entry = new_entry;
if (allocationPolicy) {
assert(new_entry->pte);
DPRINTF(GPUTLB, "allocating entry w/ addr %#lx of size %#lx\n",
virt_page_addr, new_entry->size());
local_entry = insert(virt_page_addr, *new_entry);
}
assert(local_entry);
}
/**
* At this point the packet carries an up-to-date tlbEntry pointer
* in its senderState.
* Next step is to do the paging protection checks.
*/
DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
"while paddr was %#x.\n", local_entry->vaddr,
local_entry->paddr);
pagingProtectionChecks(pkt, local_entry, mode);
int page_size = local_entry->size();
Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
// Since this packet will be sent through the cpu side port, it must be
// converted to a response pkt if it is not one already
if (pkt->isRequest()) {
pkt->makeTimingResponse();
}
pkt->req->setPaddr(paddr);
if (local_entry->uncacheable()) {
pkt->req->setFlags(Request::UNCACHEABLE);
}
//send packet back to coalescer
cpuSidePort[0]->sendTimingResp(pkt);
//schedule cleanup event
cleanupQueue.push(virt_page_addr);
DPRINTF(GPUTLB, "Scheduled %#lx for cleanup\n", virt_page_addr);
// schedule this only once per cycle.
// The check is required because we might have multiple translations
// returning the same cycle
// this is a maximum priority event and must be on the same cycle
// as the cleanup event in TLBCoalescer to avoid a race with
// IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
if (!cleanupEvent.scheduled())
schedule(cleanupEvent, curTick());
}
/**
* Here we take the appropriate actions based on the result of the
* TLB lookup.
*/
void
GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
PacketPtr pkt)
{
DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
assert(translationReturnEvent[virtPageAddr]);
assert(pkt);
GpuTranslationState *tmp_sender_state =
safe_cast<GpuTranslationState*>(pkt->senderState);
int req_cnt = tmp_sender_state->reqCnt.back();
bool update_stats = !tmp_sender_state->isPrefetch;
if (outcome == TLB_HIT) {
handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
} else if (outcome == TLB_MISS) {
DPRINTF(GPUTLB, "This is a TLB miss\n");
if (hasMemSidePort) {
// the one cyle added here represent the delay from when we get
// the reply back till when we propagate it to the coalescer
// above.
/**
* There is a TLB below. Send the coalesced request.
* We actually send the very first packet of all the
* pending packets for this virtual page address.
*/
tmp_sender_state->deviceId = 1;
tmp_sender_state->pasId = 0;
if (!memSidePort[0]->sendTimingReq(pkt)) {
DPRINTF(GPUTLB, "Failed sending translation request to "
"lower level TLB for addr %#x\n", virtPageAddr);
memSidePort[0]->retries.push_back(pkt);
} else {
DPRINTF(GPUTLB, "Sent translation request to lower level "
"TLB for addr %#x\n", virtPageAddr);
}
} else {
//this is the last level TLB. Start a page walk
DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
"addr %#x\n", virtPageAddr);
if (update_stats)
stats.pageTableCycles -= (req_cnt*curCycle());
TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
assert(tlb_event);
tlb_event->updateOutcome(PAGE_WALK);
schedule(tlb_event,
curTick() + cyclesToTicks(Cycles(missLatency2)));
}
} else if (outcome == PAGE_WALK) {
if (update_stats)
stats.pageTableCycles += (req_cnt*curCycle());
// Need to access the page table and update the TLB
DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
virtPageAddr);
Addr base = gpuDevice->getVM().getPageTableBase(1);
Addr vaddr = pkt->req->getVaddr();
walker->setDevRequestor(gpuDevice->vramRequestorId());
// Do page table walk
walker->startTiming(pkt, base, vaddr, BaseMMU::Mode::Read);
} else if (outcome == MISS_RETURN) {
/** we add an extra cycle in the return path of the translation
* requests in between the various TLB levels.
*/
handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
} else {
panic("Unexpected TLB outcome %d", outcome);
}
}
void
GpuTLB::TLBEvent::process()
{
tlb->translationReturn(virtPageAddr, outcome, pkt);
}
const char*
GpuTLB::TLBEvent::description() const
{
return "trigger translationDoneEvent";
}
void
GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
{
outcome = _outcome;
}
Addr
GpuTLB::TLBEvent::getTLBEventVaddr()
{
return virtPageAddr;
}
/**
* recvTiming receives a coalesced timing request from a TLBCoalescer
* and it calls issueTLBLookup()
* It only rejects the packet if we have exceeded the max
* outstanding number of requests for the TLB
*/
bool
GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
{
bool ret = false;
[[maybe_unused]] Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
VegaISA::PageBytes);
if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
assert(!tlb->translationReturnEvent.count(virt_page_addr));
tlb->issueTLBLookup(pkt);
// update number of outstanding translation requests
tlb->outstandingReqs++;
ret = true;
} else {
DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
tlb->outstandingReqs);
tlb->stats.maxDownstreamReached++;
ret = false;
}
if (tlb->outstandingReqs > tlb->stats.outstandingReqsMax.value())
tlb->stats.outstandingReqsMax = tlb->outstandingReqs;
return ret;
}
/**
* handleFuncTranslationReturn is called on a TLB hit,
* when a TLB miss returns or when a page fault returns.
* It updates LRU, inserts the TLB entry on a miss
* depending on the allocation policy and does the required
* protection checks. It does NOT create a new packet to
* update the packet's addr; this is done in hsail-gpu code.
*/
void
GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
{
GpuTranslationState *sender_state =
safe_cast<GpuTranslationState*>(pkt->senderState);
Mode mode = sender_state->tlbMode;
Addr vaddr = pkt->req->getVaddr();
VegaTlbEntry *local_entry, *new_entry;
if (tlb_outcome == TLB_HIT) {
DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
"%#x\n", vaddr);
local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
} else {
DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
"%#x\n", vaddr);
/**
* We are returning either from a page walk or from a hit at a
* lower TLB level. The senderState should be "carrying" a pointer
* to the correct TLBEntry.
*/
new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
assert(new_entry);
local_entry = new_entry;
if (allocationPolicy) {
Addr virt_page_addr = roundDown(vaddr, VegaISA::PageBytes);
DPRINTF(GPUTLB, "allocating entry w/ addr %#lx\n",
virt_page_addr);
local_entry = insert(virt_page_addr, *new_entry);
}
assert(local_entry);
}
DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
"while paddr was %#x.\n", local_entry->vaddr,
local_entry->paddr);
/**
* Do paging checks if it's a normal functional access. If it's for a
* prefetch, then sometimes you can try to prefetch something that
* won't pass protection. We don't actually want to fault becuase there
* is no demand access to deem this a violation. Just put it in the
* TLB and it will fault if indeed a future demand access touches it in
* violation.
*
* This feature could be used to explore security issues around
* speculative memory accesses.
*/
if (!sender_state->isPrefetch && sender_state->tlbEntry)
pagingProtectionChecks(pkt, local_entry, mode);
int page_size = local_entry->size();
Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
pkt->req->setPaddr(paddr);
if (local_entry->uncacheable())
pkt->req->setFlags(Request::UNCACHEABLE);
}
// This is used for atomic translations. Need to
// make it all happen during the same cycle.
void
GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
{
GpuTranslationState *sender_state =
safe_cast<GpuTranslationState*>(pkt->senderState);
bool update_stats = !sender_state->isPrefetch;
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
VegaISA::PageBytes);
// do the TLB lookup without updating the stats
bool success = tlb->tlbLookup(pkt->req, update_stats);
tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
// functional mode means no coalescing
// global metrics are the same as the local metrics
if (update_stats) {
tlb->stats.globalNumTLBAccesses++;
if (success) {
sender_state->hitLevel = sender_state->reqCnt.size();
tlb->stats.globalNumTLBHits++;
} else {
tlb->stats.globalNumTLBMisses++;
}
}
if (!success) {
if (tlb->hasMemSidePort) {
// there is a TLB below -> propagate down the TLB hierarchy
tlb->memSidePort[0]->sendFunctional(pkt);
// If no valid translation from a prefetch, then just return
if (sender_state->isPrefetch && !pkt->req->hasPaddr())
return;
} else {
// Need to access the page table and update the TLB
DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
virt_page_addr);
Addr vaddr = pkt->req->getVaddr();
[[maybe_unused]] Addr alignedVaddr =
tlb->pageAlign(virt_page_addr);
assert(alignedVaddr == virt_page_addr);
unsigned logBytes;
PageTableEntry pte;
// Initialize walker state for VMID
Addr base = tlb->gpuDevice->getVM().getPageTableBase(1);
tlb->walker->setDevRequestor(tlb->gpuDevice->vramRequestorId());
// Do page table walk
Fault fault = tlb->walker->startFunctional(base, vaddr, pte,
logBytes,
BaseMMU::Mode::Read);
if (fault != NoFault) {
fatal("Translation fault in TLB at %d!", __LINE__);
}
// PPN is already shifted by fragment so we only shift by native
// page size. Fragment is still used via logBytes to select lower
// bits from vaddr.
Addr page_addr = pte.ppn << PageShift;
Addr paddr = page_addr + (vaddr & mask(logBytes));
Addr alignedPaddr = tlb->pageAlign(paddr);
pkt->req->setPaddr(paddr);
pkt->req->setSystemReq(pte.s);
if (!sender_state->isPrefetch) {
assert(paddr);
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
sender_state->tlbEntry =
new VegaTlbEntry(1 /* VMID */, virt_page_addr,
alignedPaddr, logBytes, pte);
} else {
// If this was a prefetch, then do the normal thing if it
// was a successful translation. Otherwise, send an empty
// TLB entry back so that it can be figured out as empty
// and handled accordingly.
if (paddr) {
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
sender_state->tlbEntry =
new VegaTlbEntry(1 /* VMID */, virt_page_addr,
alignedPaddr, logBytes, pte);
} else {
DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", vaddr);
sender_state->tlbEntry = nullptr;
return;
}
}
}
} else {
VegaTlbEntry *entry = tlb->lookup(virt_page_addr, update_stats);
assert(entry);
if (sender_state->isPrefetch) {
DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
entry->vaddr);
}
sender_state->tlbEntry = new VegaTlbEntry(1 /* VMID */, entry->vaddr,
entry->paddr, entry->logBytes,
entry->pte);
}
// This is the function that would populate pkt->req with the paddr of
// the translation. But if no translation happens (i.e Prefetch fails)
// then the early returns in the above code wiill keep this function
// from executing.
tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
}
void
GpuTLB::CpuSidePort::recvReqRetry()
{
// The CPUSidePort never sends anything but replies. No retries
// expected.
panic("recvReqRetry called");
}
AddrRangeList
GpuTLB::CpuSidePort::getAddrRanges() const
{
// currently not checked by the requestor
AddrRangeList ranges;
return ranges;
}
/**
* MemSidePort receives the packet back.
* We need to call the handleTranslationReturn
* and propagate up the hierarchy.
*/
bool
GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
{
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
VegaISA::PageBytes);
DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
virt_page_addr);
TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
assert(tlb_event);
assert(virt_page_addr == tlb_event->getTLBEventVaddr());
tlb_event->updateOutcome(MISS_RETURN);
tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
return true;
}
void
GpuTLB::MemSidePort::recvReqRetry()
{
// No retries should reach the TLB. The retries
// should only reach the TLBCoalescer.
panic("recvReqRetry called");
}
void
GpuTLB::cleanup()
{
while (!cleanupQueue.empty()) {
Addr cleanup_addr = cleanupQueue.front();
cleanupQueue.pop();
DPRINTF(GPUTLB, "Deleting return event for %#lx\n", cleanup_addr);
// delete TLBEvent
TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
delete old_tlb_event;
translationReturnEvent.erase(cleanup_addr);
// update number of outstanding requests
outstandingReqs--;
}
/** the higher level coalescer should retry if it has
* any pending requests.
*/
for (int i = 0; i < cpuSidePort.size(); ++i) {
cpuSidePort[i]->sendRetryReq();
}
}
GpuTLB::VegaTLBStats::VegaTLBStats(statistics::Group *parent)
: statistics::Group(parent),
ADD_STAT(maxDownstreamReached, "Number of refused translation requests"),
ADD_STAT(outstandingReqsMax, "Maximum count in coalesced request queue"),
ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),
ADD_STAT(localNumTLBHits, "Number of TLB hits"),
ADD_STAT(localNumTLBMisses, "Number of TLB misses"),
ADD_STAT(localTLBMissRate, "TLB miss rate"),
ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"),
ADD_STAT(globalNumTLBHits, "Number of TLB hits"),
ADD_STAT(globalNumTLBMisses, "Number of TLB misses"),
ADD_STAT(globalTLBMissRate, "TLB miss rate"),
ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"),
ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"),
ADD_STAT(localCycles, "Number of cycles spent in queue for all "
"incoming reqs"),
ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs")
{
localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
localLatency = localCycles / localNumTLBAccesses;
}
} // namespace VegaISA
} // namespace gem5