mem-cache: Accuracy-based rate control for prefetchers

Added a mechanism to control the number of prefetches generated
based in the effectiveness of the prefetches generated so far.

Change-Id: I33af82546f74a5b5ab372c28574b76dd9a1bd46a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18808
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index bf73526..c7ddcda 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -129,6 +129,18 @@
 
     tag_prefetch = Param.Bool(True, "Tag prefetch with PC of generating access")
 
+    # The throttle_control_percentage controls how many of the candidate
+    # addresses generated by the prefetcher will be finally turned into
+    # prefetch requests
+    # - If set to 100, all candidates can be discarded (one request
+    #   will always be allowed to be generated)
+    # - Setting it to 0 will disable the throttle control, so requests are
+    #   created for all candidates
+    # - If set to 60, 40% of candidates will generate a request, and the
+    #   remaining 60% will be generated depending on the current accuracy
+    throttle_control_percentage = Param.Percent(0, "Percentage of requests \
+        that can be throttled depending on the accuracy of the prefetcher.")
+
 class StridePrefetcher(QueuedPrefetcher):
     type = 'StridePrefetcher'
     cxx_class = 'StridePrefetcher'
diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc
index 12f4a36..94df666 100644
--- a/src/mem/cache/prefetch/queued.cc
+++ b/src/mem/cache/prefetch/queued.cc
@@ -96,7 +96,8 @@
         p->max_prefetch_requests_with_pending_translation),
       latency(p->latency), queueSquash(p->queue_squash),
       queueFilter(p->queue_filter), cacheSnoop(p->cache_snoop),
-      tagPrefetch(p->tag_prefetch)
+      tagPrefetch(p->tag_prefetch),
+      throttleControlPct(p->throttle_control_percentage)
 {
 }
 
@@ -108,6 +109,36 @@
     }
 }
 
+size_t
+QueuedPrefetcher::getMaxPermittedPrefetches(size_t total) const
+{
+    /**
+     * Throttle generated prefetches based in the accuracy of the prefetcher.
+     * Accuracy is computed based in the ratio of useful prefetches with
+     * respect to the number of issued prefetches.
+     *
+     * The throttleControlPct controls how many of the candidate addresses
+     * generated by the prefetcher will be finally turned into prefetch
+     * requests
+     * - If set to 100, all candidates can be discarded (one request
+     *   will always be allowed to be generated)
+     * - Setting it to 0 will disable the throttle control, so requests are
+     *   created for all candidates
+     * - If set to 60, 40% of candidates will generate a request, and the
+     *   remaining 60% will be generated depending on the current accuracy
+     */
+
+    size_t max_pfs = total;
+    if (total > 0 && issuedPrefetches > 0) {
+        size_t throttle_pfs = (total * throttleControlPct) / 100;
+        size_t min_pfs = (total - throttle_pfs) == 0 ?
+            1 : (total - throttle_pfs);
+        max_pfs = min_pfs + (total - min_pfs) *
+            usefulPrefetches / issuedPrefetches;
+    }
+    return max_pfs;
+}
+
 void
 QueuedPrefetcher::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
 {
@@ -132,7 +163,11 @@
     std::vector<AddrPriority> addresses;
     calculatePrefetch(pfi, addresses);
 
+    // Get the maximu number of prefetches that we are allowed to generate
+    size_t max_pfs = getMaxPermittedPrefetches(addresses.size());
+
     // Queue up generated prefetches
+    size_t num_pfs = 0;
     for (AddrPriority& addr_prio : addresses) {
 
         // Block align prefetch address
@@ -150,6 +185,10 @@
                     "inserting into prefetch queue.\n", new_pfi.getAddr());
             // Create and insert the request
             insert(pkt, new_pfi, addr_prio.second);
+            num_pfs += 1;
+            if (num_pfs == max_pfs) {
+                break;
+            }
         } else {
             DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n");
         }
diff --git a/src/mem/cache/prefetch/queued.hh b/src/mem/cache/prefetch/queued.hh
index 1ffbc9a..ae4c5e4 100644
--- a/src/mem/cache/prefetch/queued.hh
+++ b/src/mem/cache/prefetch/queued.hh
@@ -163,6 +163,9 @@
     /** Tag prefetch with PC of generating access? */
     const bool tagPrefetch;
 
+    /** Percentage of requests that can be throttled */
+    const unsigned int throttleControlPct;
+
     // STATS
     Stats::Scalar pfIdentified;
     Stats::Scalar pfBufferHit;
@@ -229,6 +232,16 @@
     bool alreadyInQueue(std::list<DeferredPacket> &queue,
                         const PrefetchInfo &pfi, int32_t priority);
 
+    /**
+     * Returns the maxmimum number of prefetch requests that are allowed
+     * to be created from the number of prefetch candidates provided.
+     * The behavior of this service is controlled with the throttleControlPct
+     * parameter.
+     * @param total number of prefetch candidates generated by the prefetcher
+     * @return the number of these request candidates are allowed to be created
+     */
+    size_t getMaxPermittedPrefetches(size_t total) const;
+
     RequestPtr createPrefetchRequest(Addr addr, PrefetchInfo const &pfi,
                                         PacketPtr pkt);
 };