blob: c0c920977ee88f74c41cd618b6817bf8797dcf7a [file] [log] [blame]
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/amdgpu/amdgpu_vm.hh"
#include "arch/amdgpu/vega/pagetable_walker.hh"
#include "arch/amdgpu/vega/tlb.hh"
#include "arch/generic/mmu.hh"
#include "base/trace.hh"
#include "debug/AMDGPUDevice.hh"
#include "dev/amdgpu/amdgpu_defines.hh"
#include "mem/packet_access.hh"
namespace gem5
{
AMDGPUVM::AMDGPUVM()
{
// Zero out contexts
memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext));
vmContexts.resize(AMDGPU_VM_COUNT);
for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
}
}
Addr
AMDGPUVM::gartBase()
{
return vmContext0.ptBase;
}
Addr
AMDGPUVM::gartSize()
{
return vmContext0.ptEnd - vmContext0.ptStart;
}
void
AMDGPUVM::readMMIO(PacketPtr pkt, Addr offset)
{
uint32_t value = pkt->getLE<uint32_t>();
switch (offset) {
// MMHUB MMIOs
case mmMMHUB_VM_INVALIDATE_ENG17_SEM:
DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n");
pkt->setLE<uint32_t>(1);
break;
case mmMMHUB_VM_INVALIDATE_ENG17_ACK:
// This is only used by driver initialization and only expects an ACK
// for VMID 0 which is the first bit in the response.
DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n");
pkt->setLE<uint32_t>(1);
break;
case mmMMHUB_VM_FB_LOCATION_BASE:
mmhubBase = ((Addr)bits(value, 23, 0) << 24);
DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase);
break;
case mmMMHUB_VM_FB_LOCATION_TOP:
mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL;
DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop);
break;
// GRBM MMIOs
case mmVM_INVALIDATE_ENG17_ACK:
DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n");
pkt->setLE<uint32_t>(1);
break;
default:
DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset);
break;
}
}
void
AMDGPUVM::writeMMIO(PacketPtr pkt, Addr offset)
{
switch (offset) {
// VMID0 MMIOs
case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32:
vmContext0.ptBaseL = pkt->getLE<uint32_t>();
// Clear extra bits not part of address
vmContext0.ptBaseL = insertBits(vmContext0.ptBaseL, 0, 0, 0);
break;
case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32:
vmContext0.ptBaseH = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32:
vmContext0.ptStartL = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32:
vmContext0.ptStartH = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32:
vmContext0.ptEndL = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32:
vmContext0.ptEndH = pkt->getLE<uint32_t>();
break;
case mmMC_VM_AGP_TOP: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
} break;
case mmMC_VM_AGP_BOT: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_AGP_BASE: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_FB_LOCATION_TOP: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
} break;
case mmMC_VM_FB_LOCATION_BASE: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_FB_OFFSET: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_SYSTEM_APERTURE_LOW_ADDR: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18;
} break;
case mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18;
} break;
default:
break;
}
}
void
AMDGPUVM::registerTLB(VegaISA::GpuTLB *tlb)
{
DPRINTF(AMDGPUDevice, "Registered a TLB with device\n");
gpu_tlbs.push_back(tlb);
}
void
AMDGPUVM::invalidateTLBs()
{
DPRINTF(AMDGPUDevice, "Invalidating all TLBs\n");
for (auto &tlb : gpu_tlbs) {
tlb->invalidateAll();
DPRINTF(AMDGPUDevice, " ... TLB invalidated\n");
}
}
void
AMDGPUVM::serialize(CheckpointOut &cp) const
{
Addr vm0PTBase = vmContext0.ptBase;
Addr vm0PTStart = vmContext0.ptStart;
Addr vm0PTEnd = vmContext0.ptEnd;
SERIALIZE_SCALAR(vm0PTBase);
SERIALIZE_SCALAR(vm0PTStart);
SERIALIZE_SCALAR(vm0PTEnd);
SERIALIZE_SCALAR(vmContext0.agpBase);
SERIALIZE_SCALAR(vmContext0.agpTop);
SERIALIZE_SCALAR(vmContext0.agpBot);
SERIALIZE_SCALAR(vmContext0.fbBase);
SERIALIZE_SCALAR(vmContext0.fbTop);
SERIALIZE_SCALAR(vmContext0.fbOffset);
SERIALIZE_SCALAR(vmContext0.sysAddrL);
SERIALIZE_SCALAR(vmContext0.sysAddrH);
SERIALIZE_SCALAR(mmhubBase);
SERIALIZE_SCALAR(mmhubTop);
Addr ptBase[AMDGPU_VM_COUNT];
Addr ptStart[AMDGPU_VM_COUNT];
Addr ptEnd[AMDGPU_VM_COUNT];
for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
ptBase[i] = vmContexts[i].ptBase;
ptStart[i] = vmContexts[i].ptStart;
ptEnd[i] = vmContexts[i].ptEnd;
}
SERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
SERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
SERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
}
void
AMDGPUVM::unserialize(CheckpointIn &cp)
{
// Unserialize requires fields not be packed
Addr vm0PTBase;
Addr vm0PTStart;
Addr vm0PTEnd;
UNSERIALIZE_SCALAR(vm0PTBase);
UNSERIALIZE_SCALAR(vm0PTStart);
UNSERIALIZE_SCALAR(vm0PTEnd);
vmContext0.ptBase = vm0PTBase;
vmContext0.ptStart = vm0PTStart;
vmContext0.ptEnd = vm0PTEnd;
UNSERIALIZE_SCALAR(vmContext0.agpBase);
UNSERIALIZE_SCALAR(vmContext0.agpTop);
UNSERIALIZE_SCALAR(vmContext0.agpBot);
UNSERIALIZE_SCALAR(vmContext0.fbBase);
UNSERIALIZE_SCALAR(vmContext0.fbTop);
UNSERIALIZE_SCALAR(vmContext0.fbOffset);
UNSERIALIZE_SCALAR(vmContext0.sysAddrL);
UNSERIALIZE_SCALAR(vmContext0.sysAddrH);
UNSERIALIZE_SCALAR(mmhubBase);
UNSERIALIZE_SCALAR(mmhubTop);
Addr ptBase[AMDGPU_VM_COUNT];
Addr ptStart[AMDGPU_VM_COUNT];
Addr ptEnd[AMDGPU_VM_COUNT];
UNSERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
UNSERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
UNSERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
vmContexts[i].ptBase = ptBase[i];
vmContexts[i].ptStart = ptStart[i];
vmContexts[i].ptEnd = ptEnd[i];
}
}
void
AMDGPUVM::AGPTranslationGen::translate(Range &range) const
{
assert(vm->inAGP(range.vaddr));
Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE);
if (next == range.vaddr)
next += AMDGPU_AGP_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
range.vaddr, range.paddr);
}
void
AMDGPUVM::GARTTranslationGen::translate(Range &range) const
{
Addr next = roundUp(range.vaddr, AMDGPU_GART_PAGE_SIZE);
if (next == range.vaddr)
next += AMDGPU_GART_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
Addr gart_addr = bits(range.vaddr, 63, 12);
// This table is a bit hard to iterate over. If we cross a page, the next
// PTE is not necessarily the next entry but actually 7 entries away.
Addr lsb = bits(gart_addr, 2, 0);
gart_addr += lsb * 7;
// GART is a single level translation, so the value at the "virtual" addr
// is the PTE containing the physical address.
auto result = vm->gartTable.find(gart_addr);
if (result == vm->gartTable.end()) {
// There is no reason to fault as there is no recovery mechanism for
// invalid GART entries. Simply panic in this case
warn("GART translation for %p not found", range.vaddr);
// Some PM4 packets have register addresses which we ignore. In that
// case just return the vaddr rather than faulting.
range.paddr = range.vaddr;
} else {
Addr pte = result->second;
Addr lower_bits = bits(range.vaddr, 11, 0);
range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
}
DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
range.vaddr, range.paddr);
}
void
AMDGPUVM::MMHUBTranslationGen::translate(Range &range) const
{
assert(vm->inMMHUB(range.vaddr));
Addr next = roundUp(range.vaddr, AMDGPU_MMHUB_PAGE_SIZE);
if (next == range.vaddr)
next += AMDGPU_MMHUB_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
range.paddr = range.vaddr - vm->getMMHUBBase();
DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
range.vaddr, range.paddr);
}
void
AMDGPUVM::UserTranslationGen::translate(Range &range) const
{
// Get base address of the page table for this vmid
Addr base = vm->getPageTableBase(vmid);
Addr start = vm->getPageTableStart(vmid);
DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
base, start, walker);
bool dummy;
unsigned logBytes;
Addr paddr = range.vaddr;
Fault fault = walker->startFunctional(base, paddr, logBytes,
BaseMMU::Mode::Read, dummy);
if (fault != NoFault) {
fatal("User translation fault");
}
// GPU page size is variable. Use logBytes to determine size.
const Addr page_size = 1 << logBytes;
Addr next = roundUp(range.vaddr, page_size);
if (next == range.vaddr)
// We don't know the size of the next page, use default.
next += AMDGPU_USER_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
range.paddr = paddr;
}
} // namespace gem5