arch-vega: Improve non-native page size support Vega allows for any integer multiple of 4kB pages. However, the current implementation is designed for 4kB page primarily. In order to support variable page sizes, the physical address calculation needs to be updated to add the virtual page offset to the base physical address rather than bitwise-OR. Bitwise-OR assumes physical pages are at aligned to the page size which is generally not the case for very large pages (1GB+). This changeset changes all of the physical address computations to add the virtual offset to the physical page address. This fixes many GPUFS applications which use larger pages. The support was tested by hipMalloc'ing ~5GB to induce a large page being created. The test application now passes verification with this change. Change-Id: Ic8d1475e001def443f3e4ab609449bca0c40b638 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64751 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>

commit: 6a4a12ebbdcc4591f02dca0989ff0353af2b9422 [log] [tgz]
author: Matthew Poremba <matthew.poremba@amd.com> Tue Oct 18 12:56:48 2022 -0700
committer: Matthew Poremba <matthew.poremba@amd.com> Mon Oct 31 14:30:13 2022 +0000
tree: c667575860c4e8e25e47972452eca66994ab1fe9
parent: 6fd2a64656273d5d7d97c6396d3efa456f452b65 [diff]
diff --git a/src/arch/amdgpu/vega/tlb.cc b/src/arch/amdgpu/vega/tlb.cc
index cee445f..c3dd576 100644
--- a/src/arch/amdgpu/vega/tlb.cc
+++ b/src/arch/amdgpu/vega/tlb.cc

@@ -158,24 +158,7 @@
 {
     VegaTlbEntry *newEntry = nullptr;
 
-    /**
-     * vpn holds the virtual page address assuming native page size.
-     * However, we need to check the entry size as Vega supports
-     * flexible page sizes of arbitrary size. The set will assume
-     * native page size but the vpn needs to be fixed up to consider
-     * the flexible page size.
-     */
-    Addr real_vpn = vpn & ~(entry.size() - 1);
-
-    /**
-     * Also fix up the ppn as this is used in the math later to compute paddr.
-     */
-    Addr real_ppn = entry.paddr & ~(entry.size() - 1);
-
-    int set = (real_vpn >> VegaISA::PageShift) & setMask;
-
-    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
-            real_vpn, real_ppn, entry.size(), set);
+    int set = (entry.vaddr >> VegaISA::PageShift) & setMask;
 
     if (!freeList[set].empty()) {
         newEntry = freeList[set].front();
@@ -186,10 +169,11 @@
     }
 
     *newEntry = entry;
-    newEntry->vaddr = real_vpn;
-    newEntry->paddr = real_ppn;
     entryList[set].push_front(newEntry);
 
+    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
+            newEntry->vaddr, newEntry->paddr, entry.size(), set);
+
     return newEntry;
 }
 
@@ -524,7 +508,7 @@
 
     pagingProtectionChecks(pkt, local_entry, mode);
     int page_size = local_entry->size();
-    Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
     DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 
     // Since this packet will be sent through the cpu side port, it must be
@@ -767,7 +751,7 @@
         pagingProtectionChecks(pkt, local_entry, mode);
 
     int page_size = local_entry->size();
-    Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
     DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 
     pkt->req->setPaddr(paddr);

diff --git a/src/arch/amdgpu/vega/tlb_coalescer.cc b/src/arch/amdgpu/vega/tlb_coalescer.cc
index d02c9bc..414bb85 100644
--- a/src/arch/amdgpu/vega/tlb_coalescer.cc
+++ b/src/arch/amdgpu/vega/tlb_coalescer.cc

@@ -162,13 +162,6 @@
     int page_size = tlb_entry.size();
     bool uncacheable = tlb_entry.uncacheable();
     int first_hit_level = sender_state->hitLevel;
-
-    // Get the physical page address of the translated request
-    // Using the page_size specified in the TLBEntry allows us
-    // to support different page sizes.
-    Addr phys_page_paddr = pkt->req->getPaddr();
-    phys_page_paddr &= ~(page_size - 1);
-
     bool is_system = pkt->req->systemReq();
 
     for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
@@ -190,8 +183,8 @@
          * page offsets.
          */
         if (i) {
-            Addr paddr = phys_page_paddr;
-            paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
+            Addr paddr = first_entry_paddr
+                       + (local_pkt->req->getVaddr() & (page_size - 1));
             local_pkt->req->setPaddr(paddr);
 
             if (uncacheable)
commit	6a4a12ebbdcc4591f02dca0989ff0353af2b9422	[log] [tgz]
author	Matthew Poremba <matthew.poremba@amd.com>	Tue Oct 18 12:56:48 2022 -0700
committer	Matthew Poremba <matthew.poremba@amd.com>	Mon Oct 31 14:30:13 2022 +0000
tree	c667575860c4e8e25e47972452eca66994ab1fe9
parent	6fd2a64656273d5d7d97c6396d3efa456f452b65 [diff]