mem-ruby: Use CircularQueue for prefetcher's unit filter

Ruby prefetcher's unit filter is a circular queue, so use the class
created for this functionality.

This changes the behavior, since previously iterating through the
filter was completely arbitrary, and now it iterates from the
beginning of the queue to the end when accessing and updating
the filter's contents.

Change-Id: I834be88a33580d5857c38e9bae8b289c5a6250b9
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/24532
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
diff --git a/src/mem/ruby/structures/RubyPrefetcher.cc b/src/mem/ruby/structures/RubyPrefetcher.cc
index 8646b99..02526aa 100644
--- a/src/mem/ruby/structures/RubyPrefetcher.cc
+++ b/src/mem/ruby/structures/RubyPrefetcher.cc
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2020 Inria
  * Copyright (c) 2020 ARM Limited
  * All rights reserved
  *
@@ -40,6 +41,8 @@
 
 #include "mem/ruby/structures/RubyPrefetcher.hh"
 
+#include <cassert>
+
 #include "base/bitfield.hh"
 #include "debug/RubyPrefetcher.hh"
 #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
@@ -54,10 +57,10 @@
 RubyPrefetcher::RubyPrefetcher(const Params *p)
     : SimObject(p), m_num_streams(p->num_streams),
     m_array(p->num_streams), m_train_misses(p->train_misses),
-    m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter),
+    m_num_startup_pfs(p->num_startup_pfs),
     m_num_nonunit_filters(p->nonunit_filter),
-    m_unit_filter(p->unit_filter, 0),
-    m_negative_filter(p->unit_filter, 0),
+    unitFilter(p->unit_filter),
+    negativeFilter(p->unit_filter),
     m_nonunit_filter(p->nonunit_filter, 0),
     m_prefetch_cross_pages(p->cross_page),
     m_page_shift(p->sys->getPageShift())
@@ -65,20 +68,6 @@
     assert(m_num_streams > 0);
     assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
 
-    // create +1 stride filter
-    m_unit_filter_index = 0;
-    m_unit_filter_hit = new uint32_t[m_num_unit_filters];
-    for (uint32_t i =0; i < m_num_unit_filters; i++) {
-        m_unit_filter_hit[i] = 0;
-    }
-
-    // create -1 stride filter
-    m_negative_filter_index = 0;
-    m_negative_filter_hit = new uint32_t[m_num_unit_filters];
-    for (int i =0; i < m_num_unit_filters; i++) {
-        m_negative_filter_hit[i] = 0;
-    }
-
     // create nonunit stride filter
     m_nonunit_index = 0;
     m_nonunit_stride = new int[m_num_nonunit_filters];
@@ -91,8 +80,6 @@
 
 RubyPrefetcher::~RubyPrefetcher()
 {
-    delete m_unit_filter_hit;
-    delete m_negative_filter_hit;
     delete m_nonunit_stride;
     delete m_nonunit_hit;
 }
@@ -171,8 +158,7 @@
 
     // check to see if this address is in the unit stride filter
     bool alloc = false;
-    bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit,
-                                m_unit_filter_index, line_addr, 1, alloc);
+    bool hit = accessUnitFilter(&unitFilter, line_addr, 1, alloc);
     if (alloc) {
         // allocate a new prefetch stream
         initializeStream(line_addr, 1, getLRUindex(), type);
@@ -182,8 +168,7 @@
         return;
     }
 
-    hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit,
-        m_negative_filter_index, line_addr, -1, alloc);
+    hit = accessUnitFilter(&negativeFilter, line_addr, -1, alloc);
     if (alloc) {
         // allocate a new prefetch stream
         initializeStream(line_addr, -1, getLRUindex(), type);
@@ -348,35 +333,27 @@
 }
 
 bool
-RubyPrefetcher::accessUnitFilter(std::vector<Addr>& filter_table,
-    uint32_t *filter_hit, uint32_t &index, Addr address,
-    int stride, bool &alloc)
+RubyPrefetcher::accessUnitFilter(CircularQueue<UnitFilterEntry>* const filter,
+    Addr line_addr, int stride, bool &alloc)
 {
     //reset the alloc flag
     alloc = false;
 
-    Addr line_addr = makeLineAddress(address);
-    for (int i = 0; i < m_num_unit_filters; i++) {
-        if (filter_table[i] == line_addr) {
-            filter_table[i] = makeNextStrideAddress(filter_table[i], stride);
-            filter_hit[i]++;
-            if (filter_hit[i] >= m_train_misses) {
+    for (auto& entry : *filter) {
+        if (entry.addr == line_addr) {
+            entry.addr = makeNextStrideAddress(entry.addr, stride);
+            entry.hits++;
+            if (entry.hits >= m_train_misses) {
                 alloc = true;
             }
             return true;
         }
     }
 
-    // enter this address in the table
-    int local_index = index;
-    filter_table[local_index] = makeNextStrideAddress(line_addr, stride);
-    filter_hit[local_index] = 0;
-    local_index = local_index + 1;
-    if (local_index >= m_num_unit_filters) {
-        local_index = 0;
-    }
+    // Enter this address in the filter
+    filter->push_back(UnitFilterEntry(
+        makeNextStrideAddress(line_addr, stride)));
 
-    index = local_index;
     return false;
 }
 
@@ -449,13 +426,13 @@
     out << name() << " Prefetcher State\n";
     // print out unit filter
     out << "unit table:\n";
-    for (int i = 0; i < m_num_unit_filters; i++) {
-        out << m_unit_filter[i] << std::endl;
+    for (const auto& entry : unitFilter) {
+        out << entry.addr << std::endl;
     }
 
     out << "negative table:\n";
-    for (int i = 0; i < m_num_unit_filters; i++) {
-        out << m_negative_filter[i] << std::endl;
+    for (const auto& entry : negativeFilter) {
+        out << entry.addr << std::endl;
     }
 
     // print out non-unit stride filter
diff --git a/src/mem/ruby/structures/RubyPrefetcher.hh b/src/mem/ruby/structures/RubyPrefetcher.hh
index b691d3d..ebf59bd 100644
--- a/src/mem/ruby/structures/RubyPrefetcher.hh
+++ b/src/mem/ruby/structures/RubyPrefetcher.hh
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2020 Inria
  * Copyright (c) 2020 ARM Limited
  * All rights reserved
  *
@@ -45,6 +46,7 @@
 
 #include <bitset>
 
+#include "base/circular_queue.hh"
 #include "base/statistics.hh"
 #include "mem/ruby/common/Address.hh"
 #include "mem/ruby/network/MessageBuffer.hh"
@@ -124,6 +126,19 @@
         void regStats();
 
     private:
+        struct UnitFilterEntry
+        {
+            /** Address to which this filter entry refers. */
+            Addr addr;
+            /** Counter of the number of times this entry has been hit. */
+            uint32_t hits;
+
+            UnitFilterEntry(Addr _addr = 0)
+              : addr(_addr), hits(0)
+            {
+            }
+        };
+
         /**
          * Returns an unused stream buffer (or if all are used, returns the
          * least recently used (accessed) stream buffer).
@@ -143,10 +158,18 @@
         PrefetchEntry* getPrefetchEntry(Addr address,
             uint32_t &index);
 
-        /// access a unit stride filter to determine if there is a hit
-        bool accessUnitFilter(std::vector<Addr>& filter_table,
-            uint32_t *hit_table, uint32_t &index, Addr address,
-            int stride, bool &alloc);
+        /**
+         * Access a unit stride filter to determine if there is a hit, and
+         * update it otherwise.
+         *
+         * @param filter Unit filter being accessed.
+         * @param line_addr Address being accessed, block aligned.
+         * @param stride The stride value.
+         * @param alloc Whether a stream should be allocated on a hit.
+         * @return True if a corresponding entry was found.
+         */
+        bool accessUnitFilter(CircularQueue<UnitFilterEntry>* const filter,
+            Addr line_addr, int stride, bool &alloc);
 
         /// access a unit stride filter to determine if there is a hit
         bool accessNonunitFilter(Addr address, int *stride,
@@ -164,28 +187,20 @@
         uint32_t m_train_misses;
         //! number of initial prefetches to startup a stream
         uint32_t m_num_startup_pfs;
-        //! number of stride filters
-        uint32_t m_num_unit_filters;
         //! number of non-stride filters
         uint32_t m_num_nonunit_filters;
 
-        /// a unit stride filter array: helps reduce BW requirement of
-        /// prefetching
-        std::vector<Addr> m_unit_filter;
-        /// a round robin pointer into the unit filter group
-        uint32_t m_unit_filter_index;
-        //! An array used to count the of times particular filter entries
-        //! have been hit
-        uint32_t *m_unit_filter_hit;
+        /**
+         * A unit stride filter array: helps reduce BW requirement
+         * of prefetching.
+         */
+        CircularQueue<UnitFilterEntry> unitFilter;
 
-        //! a negative unit stride filter array: helps reduce BW requirement
-        //! of prefetching
-        std::vector<Addr> m_negative_filter;
-        /// a round robin pointer into the negative filter group
-        uint32_t m_negative_filter_index;
-        /// An array used to count the of times particular filter entries
-        /// have been hit
-        uint32_t *m_negative_filter_hit;
+        /**
+         * A negative unit stride filter array: helps reduce BW requirement
+         * of prefetching.
+         */
+        CircularQueue<UnitFilterEntry> negativeFilter;
 
         /// a non-unit stride filter array: helps reduce BW requirement of
         /// prefetching