gpu-compute: Update GET_PROCESS_APERTURES IOCTLs

The apertures for non-gfx801 GPUs are set differently.
If the apertures aren't set properly, ROCm will error out.

This change sets the apertures appropriately based on the
gfx version of the simulated GPU. It also adds in new
functions to set the scratch and lds apertures in GFX9 to mimic
the linux kernel.

Change-Id: I1fa6f60bc20c7b6eb3896057841d96846460a9f8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47529
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc
index 472ced4..2fe5275 100644
--- a/src/gpu-compute/gpu_compute_driver.cc
+++ b/src/gpu-compute/gpu_compute_driver.cc
@@ -316,18 +316,50 @@
                  * ensure that the base/limit addresses are
                  * calculated correctly.
                  */
-                args->process_apertures[i].scratch_base
-                    = scratchApeBase(i + 1);
+
+                switch (gfxVersion) {
+                  case GfxVersion::gfx801:
+                  case GfxVersion::gfx803:
+                    args->process_apertures[i].scratch_base =
+                        scratchApeBase(i + 1);
+                    args->process_apertures[i].lds_base =
+                        ldsApeBase(i + 1);
+                    break;
+                  case GfxVersion::gfx900:
+                    args->process_apertures[i].scratch_base =
+                        scratchApeBaseV9();
+                    args->process_apertures[i].lds_base =
+                        ldsApeBaseV9();
+                    break;
+                  default:
+                    fatal("Invalid gfx version\n");
+                }
+
+                // GFX8 and GFX9 set lds and scratch limits the same way
                 args->process_apertures[i].scratch_limit =
                     scratchApeLimit(args->process_apertures[i].scratch_base);
 
-                args->process_apertures[i].lds_base = ldsApeBase(i + 1);
                 args->process_apertures[i].lds_limit =
                     ldsApeLimit(args->process_apertures[i].lds_base);
 
-                args->process_apertures[i].gpuvm_base = gpuVmApeBase(i + 1);
-                args->process_apertures[i].gpuvm_limit =
-                    gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
+                switch (gfxVersion) {
+                  case GfxVersion::gfx801:
+                    args->process_apertures[i].gpuvm_base =
+                        gpuVmApeBase(i + 1);
+                    args->process_apertures[i].gpuvm_limit =
+                        gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
+                    break;
+                  case GfxVersion::gfx803:
+                  case GfxVersion::gfx900:
+                    // Taken from SVM_USE_BASE in Linux kernel
+                    args->process_apertures[i].gpuvm_base = 0x1000000ull;
+                    // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
+                    args->process_apertures[i].gpuvm_limit =
+                        0x0000800000000000ULL - 1;
+                    break;
+                  default:
+                    fatal("Invalid gfx version");
+                }
 
                 // NOTE: Must match ID populated by hsaTopology.py
                 //
@@ -396,14 +428,6 @@
                        47) != 0x1ffff);
                 assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
                        47) != 0);
-                assert(bits<Addr>(args->process_apertures[i].gpuvm_base, 63,
-                       47) != 0x1ffff);
-                assert(bits<Addr>(args->process_apertures[i].gpuvm_base, 63,
-                       47) != 0);
-                assert(bits<Addr>(args->process_apertures[i].gpuvm_limit, 63,
-                       47) != 0x1ffff);
-                assert(bits<Addr>(args->process_apertures[i].gpuvm_limit, 63,
-                       47) != 0);
             }
 
             args.copyOut(virt_proxy);
@@ -593,13 +617,41 @@
                 TypedBufferArg<kfd_process_device_apertures> ape_args
                     (ioc_args->kfd_process_device_apertures_ptr);
 
-                ape_args->scratch_base = scratchApeBase(i + 1);
+                switch (gfxVersion) {
+                  case GfxVersion::gfx801:
+                  case GfxVersion::gfx803:
+                    ape_args->scratch_base = scratchApeBase(i + 1);
+                    ape_args->lds_base = ldsApeBase(i + 1);
+                    break;
+                  case GfxVersion::gfx900:
+                    ape_args->scratch_base = scratchApeBaseV9();
+                    ape_args->lds_base = ldsApeBaseV9();
+                    break;
+                  default:
+                    fatal("Invalid gfx version\n");
+                }
+
+                // GFX8 and GFX9 set lds and scratch limits the same way
                 ape_args->scratch_limit =
                     scratchApeLimit(ape_args->scratch_base);
-                ape_args->lds_base = ldsApeBase(i + 1);
                 ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
-                ape_args->gpuvm_base = gpuVmApeBase(i + 1);
-                ape_args->gpuvm_limit = gpuVmApeLimit(ape_args->gpuvm_base);
+
+                switch (gfxVersion) {
+                  case GfxVersion::gfx801:
+                    ape_args->gpuvm_base = gpuVmApeBase(i + 1);
+                    ape_args->gpuvm_limit =
+                        gpuVmApeLimit(ape_args->gpuvm_base);
+                    break;
+                  case GfxVersion::gfx803:
+                  case GfxVersion::gfx900:
+                    // Taken from SVM_USE_BASE in Linux kernel
+                    ape_args->gpuvm_base = 0x1000000ull;
+                    // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
+                    ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
+                    break;
+                  default:
+                    fatal("Invalid gfx version\n");
+                }
 
                 // NOTE: Must match ID populated by hsaTopology.py
                 if (isdGPU) {
@@ -631,10 +683,6 @@
                 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
                 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
                 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
-                assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0x1ffff);
-                assert(bits<Addr>(ape_args->gpuvm_base, 63, 47) != 0);
-                assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0x1ffff);
-                assert(bits<Addr>(ape_args->gpuvm_limit, 63, 47) != 0);
 
                 ape_args.copyOut(virt_proxy);
             }
@@ -895,6 +943,14 @@
     return ((Addr)gpuNum << 61) + 0x100000000L;
 }
 
+// Used for GFX9 devices
+// From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
+Addr
+GPUComputeDriver::scratchApeBaseV9() const
+{
+    return ((Addr)0x1 << 48);
+}
+
 Addr
 GPUComputeDriver::scratchApeLimit(Addr apeBase) const
 {
@@ -907,6 +963,14 @@
     return ((Addr)gpuNum << 61) + 0x0;
 }
 
+//Used for GFX9 devices
+// From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
+Addr
+GPUComputeDriver::ldsApeBaseV9() const
+{
+    return ((Addr)0x2 << 48);
+}
+
 Addr
 GPUComputeDriver::ldsApeLimit(Addr apeBase) const
 {
diff --git a/src/gpu-compute/gpu_compute_driver.hh b/src/gpu-compute/gpu_compute_driver.hh
index 4408052..9441baa 100644
--- a/src/gpu-compute/gpu_compute_driver.hh
+++ b/src/gpu-compute/gpu_compute_driver.hh
@@ -228,8 +228,10 @@
     Addr gpuVmApeBase(int gpuNum) const;
     Addr gpuVmApeLimit(Addr apeBase) const;
     Addr scratchApeBase(int gpuNum) const;
+    Addr scratchApeBaseV9() const;
     Addr scratchApeLimit(Addr apeBase) const;
     Addr ldsApeBase(int gpuNum) const;
+    Addr ldsApeBaseV9() const;
     Addr ldsApeLimit(Addr apeBase) const;
 
     /**