dev-amdgpu: Handle ring buffer wrap for PM4 queue

Change-Id: I27bc274327838add709423b072d437c4e727a714
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65431
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
(cherry picked from commit 623e2d3dac3e75c67b4e1b8f6a7113f0ab376960)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65452
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
diff --git a/src/dev/amdgpu/pm4_mmio.hh b/src/dev/amdgpu/pm4_mmio.hh
index a3ce5f1..3801223 100644
--- a/src/dev/amdgpu/pm4_mmio.hh
+++ b/src/dev/amdgpu/pm4_mmio.hh
@@ -60,6 +60,7 @@
 #define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI                               0x1251
 #define mmCP_HQD_PQ_WPTR_POLL_ADDR                                    0x1252
 #define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI                                 0x1253
+#define mmCP_HQD_PQ_CONTROL                                           0x1256
 #define mmCP_HQD_IB_CONTROL                                           0x125a
 #define mmCP_HQD_PQ_WPTR_LO                                           0x127b
 #define mmCP_HQD_PQ_WPTR_HI                                           0x127c
diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index 404beab..c8e6320 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -147,8 +147,8 @@
     gpuDevice->setDoorbellType(offset, qt);
 
     DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
-            "%d, pipe %d queue: %d\n", id, q->base(), q->offset(), q->me(),
-            q->pipe(), q->queue());
+            "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
+            q->me(), q->pipe(), q->queue(), q->size());
 }
 
 void
@@ -784,6 +784,9 @@
       case mmCP_HQD_PQ_WPTR_POLL_ADDR_HI:
         setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
         break;
+      case mmCP_HQD_PQ_CONTROL:
+        setHqdPqControl(pkt->getLE<uint32_t>());
+        break;
       case mmCP_HQD_IB_CONTROL:
         setHqdIbCtrl(pkt->getLE<uint32_t>());
         break;
@@ -906,6 +909,12 @@
 }
 
 void
+PM4PacketProcessor::setHqdPqControl(uint32_t data)
+{
+    kiq.hqd_pq_control = data;
+}
+
+void
 PM4PacketProcessor::setHqdIbCtrl(uint32_t data)
 {
     kiq.hqd_ib_control = data;
diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh
index 4806671..4617a21 100644
--- a/src/dev/amdgpu/pm4_packet_processor.hh
+++ b/src/dev/amdgpu/pm4_packet_processor.hh
@@ -171,6 +171,7 @@
     void setHqdPqRptrReportAddrHi(uint32_t data);
     void setHqdPqWptrPollAddr(uint32_t data);
     void setHqdPqWptrPollAddrHi(uint32_t data);
+    void setHqdPqControl(uint32_t data);
     void setHqdIbCtrl(uint32_t data);
     void setRbVmid(uint32_t data);
     void setRbCntl(uint32_t data);
diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh
index 19973b1..8b6626d 100644
--- a/src/dev/amdgpu/pm4_queues.hh
+++ b/src/dev/amdgpu/pm4_queues.hh
@@ -396,14 +396,14 @@
     rptr()
     {
         if (ib()) return q->ibBase + q->ibRptr;
-        else return q->base + q->rptr;
+        else return q->base + (q->rptr % size());
     }
 
     Addr
     wptr()
     {
         if (ib()) return q->ibBase + _ibWptr;
-        else return q->base + _wptr;
+        else return q->base + (_wptr % size());
     }
 
     Addr
@@ -470,6 +470,9 @@
     uint32_t pipe() { return _pkt.pipe; }
     uint32_t queue() { return _pkt.queueSlot; }
     bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; }
+
+    // Same computation as processMQD. See comment there for details.
+    uint64_t size() { return 4UL << ((q->hqd_pq_control & 0x3f) + 1); }
 };
 
 } // namespace gem5