arch-vega: Improve non-native page size support

Vega allows for any integer multiple of 4kB pages. However, the current
implementation is designed for 4kB page primarily. In order to support
variable page sizes, the physical address calculation needs to be
updated to add the virtual page offset to the base physical address
rather than bitwise-OR. Bitwise-OR assumes physical pages are at
aligned to the page size which is generally not the case for very
large pages (1GB+).

This changeset changes all of the physical address computations to add
the virtual offset to the physical page address. This fixes many GPUFS
applications which use larger pages. The support was tested by
hipMalloc'ing ~5GB to induce a large page being created. The test
application now passes verification with this change.

Change-Id: Ic8d1475e001def443f3e4ab609449bca0c40b638
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64751
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/arch/amdgpu/vega/tlb.cc b/src/arch/amdgpu/vega/tlb.cc
index cee445f..c3dd576 100644
--- a/src/arch/amdgpu/vega/tlb.cc
+++ b/src/arch/amdgpu/vega/tlb.cc
@@ -158,24 +158,7 @@
 {
     VegaTlbEntry *newEntry = nullptr;
 
-    /**
-     * vpn holds the virtual page address assuming native page size.
-     * However, we need to check the entry size as Vega supports
-     * flexible page sizes of arbitrary size. The set will assume
-     * native page size but the vpn needs to be fixed up to consider
-     * the flexible page size.
-     */
-    Addr real_vpn = vpn & ~(entry.size() - 1);
-
-    /**
-     * Also fix up the ppn as this is used in the math later to compute paddr.
-     */
-    Addr real_ppn = entry.paddr & ~(entry.size() - 1);
-
-    int set = (real_vpn >> VegaISA::PageShift) & setMask;
-
-    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
-            real_vpn, real_ppn, entry.size(), set);
+    int set = (entry.vaddr >> VegaISA::PageShift) & setMask;
 
     if (!freeList[set].empty()) {
         newEntry = freeList[set].front();
@@ -186,10 +169,11 @@
     }
 
     *newEntry = entry;
-    newEntry->vaddr = real_vpn;
-    newEntry->paddr = real_ppn;
     entryList[set].push_front(newEntry);
 
+    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
+            newEntry->vaddr, newEntry->paddr, entry.size(), set);
+
     return newEntry;
 }
 
@@ -524,7 +508,7 @@
 
     pagingProtectionChecks(pkt, local_entry, mode);
     int page_size = local_entry->size();
-    Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
     DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 
     // Since this packet will be sent through the cpu side port, it must be
@@ -767,7 +751,7 @@
         pagingProtectionChecks(pkt, local_entry, mode);
 
     int page_size = local_entry->size();
-    Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
     DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 
     pkt->req->setPaddr(paddr);
diff --git a/src/arch/amdgpu/vega/tlb_coalescer.cc b/src/arch/amdgpu/vega/tlb_coalescer.cc
index d02c9bc..414bb85 100644
--- a/src/arch/amdgpu/vega/tlb_coalescer.cc
+++ b/src/arch/amdgpu/vega/tlb_coalescer.cc
@@ -162,13 +162,6 @@
     int page_size = tlb_entry.size();
     bool uncacheable = tlb_entry.uncacheable();
     int first_hit_level = sender_state->hitLevel;
-
-    // Get the physical page address of the translated request
-    // Using the page_size specified in the TLBEntry allows us
-    // to support different page sizes.
-    Addr phys_page_paddr = pkt->req->getPaddr();
-    phys_page_paddr &= ~(page_size - 1);
-
     bool is_system = pkt->req->systemReq();
 
     for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
@@ -190,8 +183,8 @@
          * page offsets.
          */
         if (i) {
-            Addr paddr = phys_page_paddr;
-            paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
+            Addr paddr = first_entry_paddr
+                       + (local_pkt->req->getVaddr() & (page_size - 1));
             local_pkt->req->setPaddr(paddr);
 
             if (uncacheable)