dev-arm: Perform SMMUv3 CFG Invalidation at device interface

In the current SMMUv3 model, multiple micro/mainTLB are present at the
device interface (SMMUv3SlaveInterface), caching translations specific
to a device.
Those distributed TLBs are checked for a translation before checking for
centralized TLBs (shared by devices), like the configuration cache, walk
cache etc.  This means that if a hit in these TLBs occurs, there won't
be a need to enter configuration stage (which is where the STE and CD
are retrieved).  So if we invalidate a cached configuration (in
ConfigCache), we need to invalidate those interface TLB entries as well,
otherwise in theory we will keep the same translation even after a
change in configuration tables.

Change-Id: I4aa36ba8392a530267517bef7562318b282bee25
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Michiel van Tol <michiel.vantol@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19813
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/dev/arm/smmu_v3.cc b/src/dev/arm/smmu_v3.cc
index 2853929..f17ef95 100644
--- a/src/dev/arm/smmu_v3.cc
+++ b/src/dev/arm/smmu_v3.cc
@@ -395,6 +395,11 @@
         case CMD_CFGI_STE: {
             DPRINTF(SMMUv3, "CMD_CFGI_STE sid=%#x\n", cmd.dw0.sid);
             configCache.invalidateSID(cmd.dw0.sid);
+
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateSID(cmd.dw0.sid);
+                slave_interface->mainTLB->invalidateSID(cmd.dw0.sid);
+            }
             break;
         }
 
@@ -405,12 +410,23 @@
                 // range = 31
                 DPRINTF(SMMUv3, "CMD_CFGI_ALL\n");
                 configCache.invalidateAll();
+
+                for (auto slave_interface : slaveInterfaces) {
+                    slave_interface->microTLB->invalidateAll();
+                    slave_interface->mainTLB->invalidateAll();
+                }
             } else {
                 DPRINTF(SMMUv3, "CMD_CFGI_STE_RANGE\n");
                 const auto start_sid = cmd.dw0.sid & ~((1 << (range + 1)) - 1);
                 const auto end_sid = start_sid + (1 << (range + 1)) - 1;
-                for (auto sid = start_sid; sid <= end_sid; sid++)
+                for (auto sid = start_sid; sid <= end_sid; sid++) {
                     configCache.invalidateSID(sid);
+
+                    for (auto slave_interface : slaveInterfaces) {
+                        slave_interface->microTLB->invalidateSID(sid);
+                        slave_interface->mainTLB->invalidateSID(sid);
+                    }
+                }
             }
             break;
         }
@@ -419,12 +435,24 @@
             DPRINTF(SMMUv3, "CMD_CFGI_CD sid=%#x ssid=%#x\n",
                     cmd.dw0.sid, cmd.dw0.ssid);
             configCache.invalidateSSID(cmd.dw0.sid, cmd.dw0.ssid);
+
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateSSID(
+                    cmd.dw0.sid, cmd.dw0.ssid);
+                slave_interface->mainTLB->invalidateSSID(
+                    cmd.dw0.sid, cmd.dw0.ssid);
+            }
             break;
         }
 
         case CMD_CFGI_CD_ALL: {
             DPRINTF(SMMUv3, "CMD_CFGI_CD_ALL sid=%#x\n", cmd.dw0.sid);
             configCache.invalidateSID(cmd.dw0.sid);
+
+            for (auto slave_interface : slaveInterfaces) {
+                slave_interface->microTLB->invalidateSID(cmd.dw0.sid);
+                slave_interface->mainTLB->invalidateSID(cmd.dw0.sid);
+            }
             break;
         }
 
diff --git a/src/dev/arm/smmu_v3_caches.cc b/src/dev/arm/smmu_v3_caches.cc
index 6dcaec6..63f0b05 100644
--- a/src/dev/arm/smmu_v3_caches.cc
+++ b/src/dev/arm/smmu_v3_caches.cc
@@ -258,6 +258,34 @@
 }
 
 void
+SMMUTLB::invalidateSSID(uint32_t sid, uint32_t ssid)
+{
+    Set &set = sets[pickSetIdx(sid, ssid)];
+
+    for (size_t i = 0; i < set.size(); i++) {
+        Entry &e = set[i];
+
+        if (e.sid == sid && e.ssid == ssid)
+            e.valid = false;
+    }
+}
+
+void
+SMMUTLB::invalidateSID(uint32_t sid)
+{
+    for (size_t s = 0; s < sets.size(); s++) {
+        Set &set = sets[s];
+
+        for (size_t i = 0; i < set.size(); i++) {
+            Entry &e = set[i];
+
+            if (e.sid == sid)
+                e.valid = false;
+        }
+    }
+}
+
+void
 SMMUTLB::invalidateVA(Addr va, uint16_t asid, uint16_t vmid)
 {
     Set &set = sets[pickSetIdx(va)];
@@ -334,6 +362,12 @@
 }
 
 size_t
+SMMUTLB::pickSetIdx(uint32_t sid, uint32_t ssid) const
+{
+    return (sid^ssid) % sets.size();
+}
+
+size_t
 SMMUTLB::pickEntryIdxToReplace(const Set &set, AllocPolicy alloc)
 {
     if (alloc == ALLOC_LAST_WAY)
diff --git a/src/dev/arm/smmu_v3_caches.hh b/src/dev/arm/smmu_v3_caches.hh
index 060f60e..50b293c 100644
--- a/src/dev/arm/smmu_v3_caches.hh
+++ b/src/dev/arm/smmu_v3_caches.hh
@@ -130,6 +130,8 @@
                              bool updStats=true);
     void store(const Entry &incoming, AllocPolicy alloc);
 
+    void invalidateSSID(uint32_t sid, uint32_t ssid);
+    void invalidateSID(uint32_t sid);
     void invalidateVA(Addr va, uint16_t asid, uint16_t vmid);
     void invalidateVAA(Addr va, uint16_t vmid);
     void invalidateASID(uint16_t asid, uint16_t vmid);
@@ -142,6 +144,7 @@
 
     size_t associativity;
 
+    size_t pickSetIdx(uint32_t sid, uint32_t ssid) const;
     size_t pickSetIdx(Addr va) const;
     size_t pickEntryIdxToReplace(const Set &set, AllocPolicy alloc);
 };