mem: Snoop filter support for large systems

Changed SnoopMask to use std::bitset instead of uint64 so we can simulate
larger systems without having to workaround limitations on the number of
ports. No noticeable performance drop was observed after this change.
The size of the bitset is currently set to 256 which should fit most
needs.

Change-Id: I216882300500e2dcb789889756e73a1033271621
Signed-off-by: Tiago Muck <tiago.muck@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18791
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/mem/snoop_filter.cc b/src/mem/snoop_filter.cc
index 3e1dae6..8168c3d 100644
--- a/src/mem/snoop_filter.cc
+++ b/src/mem/snoop_filter.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2017 ARM Limited
+ * Copyright (c) 2013-2017,2019 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -49,11 +49,13 @@
 #include "debug/SnoopFilter.hh"
 #include "sim/system.hh"
 
+const int SnoopFilter::SNOOP_MASK_SIZE;
+
 void
 SnoopFilter::eraseIfNullEntry(SnoopFilterCache::iterator& sf_it)
 {
     SnoopItem& sf_item = sf_it->second;
-    if (!(sf_item.requested | sf_item.holder)) {
+    if ((sf_item.requested | sf_item.holder).none()) {
         cachedLocations.erase(sf_it);
         DPRINTF(SnoopFilter, "%s:   Removed SF entry.\n",
                 __func__);
@@ -96,8 +98,7 @@
 
     totRequests++;
     if (is_hit) {
-        // Single bit set -> value is a power of two
-        if (isPow2(interested))
+        if (interested.count() == 1)
             hitSingleRequests++;
         else
             hitMultiRequests++;
@@ -114,8 +115,9 @@
     if (cpkt->needsResponse()) {
         if (!cpkt->cacheResponding()) {
             // Max one request per address per port
-            panic_if(sf_item.requested & req_port, "double request :( " \
-                     "SF value %x.%x\n", sf_item.requested, sf_item.holder);
+            panic_if((sf_item.requested & req_port).any(),
+                     "double request :( SF value %x.%x\n",
+                     sf_item.requested, sf_item.holder);
 
             // Mark in-flight requests to distinguish later on
             sf_item.requested |= req_port;
@@ -126,7 +128,8 @@
             // to the CPU, already -> the response will not be seen by this
             // filter -> we do not need to keep the in-flight request, but make
             // sure that we know that that cluster has a copy
-            panic_if(!(sf_item.holder & req_port), "Need to hold the value!");
+            panic_if((sf_item.holder & req_port).none(),
+                     "Need to hold the value!");
             DPRINTF(SnoopFilter,
                     "%s: not marking request. SF value %x.%x\n",
                     __func__,  sf_item.requested, sf_item.holder);
@@ -134,7 +137,7 @@
     } else { // if (!cpkt->needsResponse())
         assert(cpkt->isEviction());
         // make sure that the sender actually had the line
-        panic_if(!(sf_item.holder & req_port), "requester %x is not a " \
+        panic_if((sf_item.holder & req_port).none(), "requester %x is not a " \
                  "holder :( SF value %x.%x\n", req_port,
                  sf_item.requested, sf_item.holder);
         // CleanEvicts and Writebacks -> the sender and all caches above
@@ -206,8 +209,8 @@
     SnoopMask interested = (sf_item.holder | sf_item.requested);
 
     totSnoops++;
-    // Single bit set -> value is a power of two
-    if (isPow2(interested))
+
+    if (interested.count() == 1)
         hitSingleSnoops++;
     else
         hitMultiSnoops++;
@@ -222,7 +225,7 @@
     assert(cpkt->isWriteback() || cpkt->req->isUncacheable() ||
            (cpkt->isInvalidate() == cpkt->needsWritable()) ||
            cpkt->req->isCacheMaintenance());
-    if (cpkt->isInvalidate() && !sf_item.requested) {
+    if (cpkt->isInvalidate() && sf_item.requested.none()) {
         // Early clear of the holder, if no other request is currently going on
         // @todo: This should possibly be updated even though we do not filter
         // upward snoops
@@ -266,11 +269,12 @@
             __func__,  sf_item.requested, sf_item.holder);
 
     // The source should have the line
-    panic_if(!(sf_item.holder & rsp_mask), "SF value %x.%x does not have "\
-             "the line\n", sf_item.requested, sf_item.holder);
+    panic_if((sf_item.holder & rsp_mask).none(),
+             "SF value %x.%x does not have the line\n",
+             sf_item.requested, sf_item.holder);
 
     // The destination should have had a request in
-    panic_if(!(sf_item.requested & req_mask), "SF value %x.%x missing "\
+    panic_if((sf_item.requested & req_mask).none(), "SF value %x.%x missing "\
              "the original request\n",  sf_item.requested, sf_item.holder);
 
     // If the snoop response has no sharers the line is passed in
@@ -287,7 +291,7 @@
     // @todo Deal with invalidating responses
     sf_item.holder |=  req_mask;
     sf_item.requested &= ~req_mask;
-    assert(sf_item.requested | sf_item.holder);
+    assert((sf_item.requested | sf_item.holder).any());
     DPRINTF(SnoopFilter, "%s:   new SF value %x.%x\n",
             __func__, sf_item.requested, sf_item.holder);
 }
@@ -359,8 +363,9 @@
             __func__,  sf_item.requested, sf_item.holder);
 
     // Make sure we have seen the actual request, too
-    panic_if(!(sf_item.requested & slave_mask), "SF value %x.%x missing "\
-             "request bit\n", sf_item.requested, sf_item.holder);
+    panic_if((sf_item.requested & slave_mask).none(),
+             "SF value %x.%x missing request bit\n",
+             sf_item.requested, sf_item.holder);
 
     sf_item.requested &= ~slave_mask;
     // Update the residency of the cache line.
@@ -376,7 +381,7 @@
         // Any other response implies that a cache above will have the
         // block.
         sf_item.holder |= slave_mask;
-        assert(sf_item.holder | sf_item.requested);
+        assert((sf_item.holder | sf_item.requested).any());
     }
     DPRINTF(SnoopFilter, "%s:   new SF value %x.%x\n",
             __func__, sf_item.requested, sf_item.holder);
diff --git a/src/mem/snoop_filter.hh b/src/mem/snoop_filter.hh
index 85cc75e..9c06615 100644
--- a/src/mem/snoop_filter.hh
+++ b/src/mem/snoop_filter.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016 ARM Limited
+ * Copyright (c) 2013-2016,2019 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
 #ifndef __MEM_SNOOP_FILTER_HH__
 #define __MEM_SNOOP_FILTER_HH__
 
+#include <bitset>
 #include <unordered_map>
 #include <utility>
 
@@ -86,6 +87,10 @@
  */
 class SnoopFilter : public SimObject {
   public:
+
+    // Change for systems with more than 256 ports tracked by this object
+    static const int SNOOP_MASK_SIZE = 256;
+
     typedef std::vector<QueuedSlavePort*> SnoopList;
 
     SnoopFilter (const SnoopFilterParams *p) :
@@ -114,9 +119,9 @@
         }
 
         // make sure we can deal with this many ports
-        fatal_if(id > 8 * sizeof(SnoopMask),
+        fatal_if(id > SNOOP_MASK_SIZE,
                  "Snoop filter only supports %d snooping ports, got %d\n",
-                 8 * sizeof(SnoopMask), id);
+                 SNOOP_MASK_SIZE, id);
     }
 
     /**
@@ -198,13 +203,9 @@
 
     /**
      * The underlying type for the bitmask we use for tracking. This
-     * limits the number of snooping ports supported per crossbar. For
-     * the moment it is an uint64_t to offer maximum
-     * scalability. However, it is possible to use e.g. a uint16_t or
-     * uint32_to slim down the footprint of the hash map (and
-     * ultimately improve the simulation performance).
+     * limits the number of snooping ports supported per crossbar.
      */
-    typedef uint64_t SnoopMask;
+    typedef std::bitset<SNOOP_MASK_SIZE> SnoopMask;
 
     /**
     * Per cache line item tracking a bitmask of SlavePorts who have an
@@ -314,7 +315,7 @@
 {
     SnoopList res;
     for (const auto& p : slavePorts)
-        if (port_mask & portToMask(*p))
+        if ((port_mask & portToMask(*p)).any())
             res.push_back(p);
     return res;
 }