mem-cache: Added the STeMS prefetcher

Reference:
    Stephen Somogyi, Thomas F. Wenisch, Anastasia Ailamaki, and
    Babak Falsafi. 2009. Spatio-temporal memory streaming.
    In Proceedings of the 36th annual international symposium on
    Computer architecture (ISCA '09). ACM, New York, NY, USA, 69-80.

Change-Id: I58cea1a7faa9391f8aa4469eb4973feabd31097a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16423
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index eacdf78..cb0a1c3 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -408,3 +408,39 @@
     sandbox_entries = Param.Int(1024, "Size of the address buffer")
     score_threshold_pct = Param.Percent(25, "Min. threshold to issue a \
         prefetch. The value is the percentage of sandbox entries to use")
+
+class STeMSPrefetcher(QueuedPrefetcher):
+    type = "STeMSPrefetcher"
+    cxx_class = "STeMSPrefetcher"
+    cxx_header = "mem/cache/prefetch/spatio_temporal_memory_streaming.hh"
+
+    spatial_region_size = Param.MemorySize("2kB",
+        "Memory covered by a hot zone")
+    active_generation_table_entries = Param.MemorySize("64",
+        "Number of entries in the active generation table")
+    active_generation_table_assoc = Param.Unsigned(64,
+        "Associativity of the active generation table")
+    active_generation_table_indexing_policy = Param.BaseIndexingPolicy(
+        SetAssociative(entry_size = 1,
+            assoc = Parent.active_generation_table_assoc,
+            size = Parent.active_generation_table_entries),
+        "Indexing policy of the active generation table")
+    active_generation_table_replacement_policy = Param.BaseReplacementPolicy(
+        LRURP(), "Replacement policy of the active generation table")
+
+    pattern_sequence_table_entries = Param.MemorySize("16384",
+        "Number of entries in the pattern sequence table")
+    pattern_sequence_table_assoc = Param.Unsigned(16384,
+        "Associativity of the pattern sequence table")
+    pattern_sequence_table_indexing_policy = Param.BaseIndexingPolicy(
+        SetAssociative(entry_size = 1,
+            assoc = Parent.pattern_sequence_table_assoc,
+            size = Parent.pattern_sequence_table_entries),
+        "Indexing policy of the pattern sequence table")
+    pattern_sequence_table_replacement_policy = Param.BaseReplacementPolicy(
+        LRURP(), "Replacement policy of the pattern sequence table")
+
+    region_miss_order_buffer_entries = Param.Unsigned(131072,
+        "Number of entries of the Region Miss Order Buffer")
+    reconstruction_entries = Param.Unsigned(256,
+        "Number of reconstruction entries")
diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript
index f533b3c..5b65338 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -43,5 +43,6 @@
 Source('signature_path.cc')
 Source('signature_path_v2.cc')
 Source('slim_ampm.cc')
+Source('spatio_temporal_memory_streaming.cc')
 Source('stride.cc')
 Source('tagged.cc')
diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
new file mode 100644
index 0000000..df5190a
--- /dev/null
+++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
@@ -0,0 +1,257 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+#include "mem/cache/prefetch/spatio_temporal_memory_streaming.hh"
+
+#include "debug/HWPrefetch.hh"
+#include "mem/cache/prefetch/associative_set_impl.hh"
+#include "params/STeMSPrefetcher.hh"
+
+STeMSPrefetcher::STeMSPrefetcher(const STeMSPrefetcherParams *p)
+  : QueuedPrefetcher(p), spatialRegionSize(p->spatial_region_size),
+    spatialRegionSizeBits(floorLog2(p->spatial_region_size)),
+    reconstructionEntries(p->reconstruction_entries),
+    activeGenerationTable(p->active_generation_table_assoc,
+                          p->active_generation_table_entries,
+                          p->active_generation_table_indexing_policy,
+                          p->active_generation_table_replacement_policy,
+                          ActiveGenerationTableEntry(
+                              spatialRegionSize / blkSize)),
+    patternSequenceTable(p->pattern_sequence_table_assoc,
+                         p->pattern_sequence_table_entries,
+                         p->pattern_sequence_table_indexing_policy,
+                         p->pattern_sequence_table_replacement_policy,
+                         ActiveGenerationTableEntry(
+                             spatialRegionSize / blkSize)),
+    rmob(p->region_miss_order_buffer_entries), rmobHead(0)
+{
+    fatal_if(!isPowerOf2(spatialRegionSize),
+        "The spatial region size must be a power of 2.");
+}
+
+void
+STeMSPrefetcher::checkForActiveGenerationsEnd() {
+    // This prefetcher operates attached to the L1 and it observes all
+    // accesses, this guarantees that no evictions are missed
+
+    // Iterate over all entries, if any recorded cacheline has been evicted,
+    // the generation finishes, move the entry to the PST
+    for (auto &agt_entry : activeGenerationTable) {
+        if (agt_entry.isValid()) {
+            bool generation_ended = false;
+            bool sr_is_secure = agt_entry.isSecure();
+            for (auto &seq_entry : agt_entry.sequence) {
+                if (seq_entry.counter > 0) {
+                    Addr cache_addr =
+                        agt_entry.paddress + seq_entry.offset * blkSize;
+                    if (!inCache(cache_addr, sr_is_secure) &&
+                            !inMissQueue(cache_addr, sr_is_secure)) {
+                        generation_ended = true;
+                        break;
+                    }
+                }
+            }
+            if (generation_ended) {
+                // PST is indexed using the PC (secure bit is unused)
+                ActiveGenerationTableEntry *pst_entry =
+                    patternSequenceTable.findEntry(agt_entry.pc,
+                                                   false /*unused*/);
+                if (pst_entry == nullptr) {
+                    // Tipically an entry will not exist
+                    pst_entry = patternSequenceTable.findVictim(agt_entry.pc);
+                    assert(pst_entry != nullptr);
+                    patternSequenceTable.insertEntry(agt_entry.pc,
+                            false /*unused*/, pst_entry);
+                } else {
+                    patternSequenceTable.accessEntry(pst_entry);
+                }
+                // If the entry existed, this will update the values, if not,
+                // this also sets the values of the entry
+                pst_entry->update(agt_entry);
+                // Free the AGT entry
+                agt_entry.setInvalid();
+            }
+        }
+    }
+}
+
+void
+STeMSPrefetcher::addToRMOB(Addr sr_addr, Addr pst_addr, unsigned int delta)
+{
+    RegionMissOrderBufferEntry &rmob_entry = rmob[rmobHead];
+    rmobHead = (rmobHead + 1) % rmob.size();
+
+    rmob_entry.srAddress = sr_addr;
+    rmob_entry.pstAddress = pst_addr;
+    rmob_entry.delta = delta;
+    rmob_entry.valid = true;
+}
+
+void
+STeMSPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+                                   std::vector<AddrPriority> &addresses)
+{
+    if (!pfi.hasPC()) {
+        DPRINTF(HWPrefetch, "Ignoring request with no PC.\n");
+        return;
+    }
+
+    Addr pc = pfi.getPC();
+    bool is_secure = pfi.isSecure();
+    // Spatial region address
+    Addr sr_addr = pfi.getAddr() / spatialRegionSize;
+    Addr paddr = pfi.getPaddr();
+
+    // Offset in cachelines within the spatial region
+    Addr sr_offset = (pfi.getAddr() % spatialRegionSize) / blkSize;
+
+    // Check if any active generation has ended
+    checkForActiveGenerationsEnd();
+
+    ActiveGenerationTableEntry *agt_entry =
+        activeGenerationTable.findEntry(sr_addr, is_secure);
+    if (agt_entry != nullptr) {
+        // found an entry in the AGT, entry is currently being recorded,
+        // add the offset
+        activeGenerationTable.accessEntry(agt_entry);
+        agt_entry->addOffset(sr_offset);
+        lastTriggerCounter += 1;
+    } else {
+        // Not found, this is the first access (Trigger access)
+
+        // Add entry to RMOB
+        Addr pst_addr = (pc << spatialRegionSizeBits) + sr_offset;
+        addToRMOB(sr_addr, pst_addr, lastTriggerCounter);
+        // Reset last trigger counter
+        lastTriggerCounter = 0;
+
+        // allocate a new AGT entry
+        agt_entry = activeGenerationTable.findVictim(sr_addr);
+        assert(agt_entry != nullptr);
+        activeGenerationTable.insertEntry(sr_addr, is_secure, agt_entry);
+        agt_entry->pc = pc;
+        agt_entry->paddress = paddr;
+        agt_entry->addOffset(sr_offset);
+    }
+    // increase the seq Counter for other entries
+    for (auto &agt_e : activeGenerationTable) {
+        if (agt_e.isValid() && agt_entry != &agt_e) {
+            agt_e.seqCounter += 1;
+        }
+    }
+
+    // Prefetch generation: if this is a miss, search for the most recent
+    // entry in the RMOB, and reconstruct the registered access sequence
+    if (pfi.isCacheMiss()) {
+        for (unsigned int idx = (rmobHead - 1) % rmob.size();
+             idx != rmobHead && rmob[idx].valid;
+             idx = (idx - 1) % rmob.size())
+        {
+            if (rmob[idx].srAddress == sr_addr) {
+                // reconstruct the access sequence
+                reconstructSequence(idx, addresses);
+                break;
+            }
+        }
+    }
+}
+
+void
+STeMSPrefetcher::reconstructSequence(unsigned int rmob_idx,
+    std::vector<AddrPriority> &addresses)
+{
+    std::vector<Addr> reconstruction(reconstructionEntries, MaxAddr);
+    unsigned int idx = 0;
+    // process rmob entries from rmob_idx (most recent with
+    // address = sr_addr) to the last one (rmobHead)
+    for (int i = rmob_idx;
+         i != rmobHead && idx < reconstructionEntries;
+         i = (i + 1) % rmob.size())
+    {
+        reconstruction[idx] = rmob[i].srAddress * spatialRegionSize;
+        unsigned int next_i = (i + 1) % rmob.size();
+        idx += rmob[next_i].delta + 1;
+    }
+    // Now query the PST with the PC of each RMOB entry
+    idx = 0;
+    for (int i = rmob_idx;
+         i != rmobHead && idx < reconstructionEntries;
+         i = (i + 1) % rmob.size())
+    {
+        ActiveGenerationTableEntry *pst_entry =
+            patternSequenceTable.findEntry(rmob[i].pstAddress,
+                                           false /* unused */);
+        if (pst_entry != nullptr) {
+            patternSequenceTable.accessEntry(pst_entry);
+            for (auto &seq_entry : pst_entry->sequence) {
+                if (seq_entry.counter > 1) {
+                    // 3-bit counter: high enough confidence with a
+                    // value greater than 1
+                    Addr rec_addr = rmob[i].srAddress * spatialRegionSize +
+                        seq_entry.offset;
+                    unsigned ridx = idx + seq_entry.delta;
+                    // Try to use the corresponding position, if it has been
+                    // already used, look the surrounding positions
+                    if (ridx < reconstructionEntries &&
+                        reconstruction[ridx] == MaxAddr) {
+                        reconstruction[ridx] = rec_addr;
+                    } else if ((ridx + 1) < reconstructionEntries &&
+                        reconstruction[ridx + 1] == MaxAddr) {
+                        reconstruction[ridx + 1] = rec_addr;
+                    } else if ((ridx + 2) < reconstructionEntries &&
+                        reconstruction[ridx + 2] == MaxAddr) {
+                        reconstruction[ridx + 2] = rec_addr;
+                    } else if ((ridx > 0) &&
+                        ((ridx - 1) < reconstructionEntries) &&
+                        reconstruction[ridx - 1] == MaxAddr) {
+                        reconstruction[ridx - 1] = rec_addr;
+                    } else if ((ridx > 1) &&
+                        ((ridx - 2) < reconstructionEntries) &&
+                        reconstruction[ridx - 2] == MaxAddr) {
+                        reconstruction[ridx - 2] = rec_addr;
+                    }
+                }
+            }
+        }
+        unsigned int next_i = (i + 1) % rmob.size();
+        idx += rmob[next_i].delta + 1;
+    }
+    for (Addr pf_addr : reconstruction) {
+        if (pf_addr != MaxAddr) {
+            addresses.push_back(AddrPriority(pf_addr, 0));
+        }
+    }
+}
+
+STeMSPrefetcher *
+STeMSPrefetcherParams::create()
+{
+   return new STeMSPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
new file mode 100644
index 0000000..a7e25fe
--- /dev/null
+++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
@@ -0,0 +1,199 @@
+/**
+ * Copyright (c) 2019 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+ /**
+  * Implementation of the Spatio-Temporal Memory Streaming Prefetcher (STeMS)
+  * Reference:
+  *    Spatio-temporal memory streaming.
+  *    Somogyi, S., Wenisch, T. F., Ailamaki, A., & Falsafi, B. (2009).
+  *    ACM SIGARCH Computer Architecture News, 37(3), 69-80.
+  *
+  * Notes:
+  * - The functionality described in the paper as Streamed Value Buffer (SVB)
+  *   is not implemented here, as this is handled by the QueuedPrefetcher class
+  */
+
+#ifndef __MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__
+#define __MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__
+
+#include <vector>
+
+#include "mem/cache/prefetch/associative_set.hh"
+#include "mem/cache/prefetch/queued.hh"
+
+struct STeMSPrefetcherParams;
+
+class STeMSPrefetcher : public QueuedPrefetcher
+{
+    /** Size of each spatial region */
+    const size_t spatialRegionSize;
+    /** log_2 of the spatial region size */
+    const size_t spatialRegionSizeBits;
+    /** Number of reconstruction entries */
+    const unsigned int reconstructionEntries;
+
+    /**
+     * Entry data type for the Active Generation Table (AGT) and the Pattern
+     * Sequence Table (PST)
+     */
+    struct ActiveGenerationTableEntry : public TaggedEntry {
+        /** Physical address of the spatial region */
+        Addr paddress;
+        /** PC that started this generation */
+        Addr pc;
+        /** Counter to keep track of the interleaving between sequences */
+        unsigned int seqCounter;
+
+        /** Sequence entry data type */
+        struct SequenceEntry {
+            /** 2-bit confidence counter */
+            unsigned int counter;
+            /** Offset, in cache lines, within the spatial region */
+            unsigned int offset;
+            /** Intearleaving position on the global access sequence */
+            unsigned int delta;
+            SequenceEntry() : counter(0), offset(0), delta(0)
+            {}
+        };
+        /** Sequence of accesses */
+        std::vector<SequenceEntry> sequence;
+
+        ActiveGenerationTableEntry(int num_positions) : paddress(0), pc(0),
+            seqCounter(0), sequence(num_positions)
+        {}
+
+        void reset() override
+        {
+            paddress = 0;
+            pc = 0;
+            seqCounter = 0;
+            for (auto &seq_entry : sequence) {
+                seq_entry.counter = 0;
+                seq_entry.offset = 0;
+                seq_entry.delta = 0;
+            }
+        }
+
+        /**
+         * Update the entry data with an entry from a generation that just
+         * ended. This operation can not be done with the copy constructor,
+         * becasuse the TaggedEntry component must not be copied.
+         * @param e entry which generation has ended
+         */
+        void update(ActiveGenerationTableEntry const &e)
+        {
+            paddress = e.paddress;
+            pc = e.pc;
+            seqCounter = e.seqCounter;
+            sequence = e.sequence;
+        }
+
+        /**
+         * Add a new access to the sequence
+         * @param offset offset in cachelines within the spatial region
+         */
+        void addOffset(unsigned int offset) {
+            // Search for the offset in the deltas array, if it exist, update
+            // the corresponding counter, if not, add the offset to the array
+            for (auto &seq_entry : sequence) {
+                if (seq_entry.counter > 0) {
+                    if (seq_entry.offset == offset) {
+                        //2 bit counter, saturates at 3
+                        if (seq_entry.counter < 3) {
+                            seq_entry.counter += 1;
+                        }
+                    }
+                } else {
+                    // If the counter is 0 it means that this position is not
+                    // being used, and we can allocate the new offset here
+                    seq_entry.counter = 1;
+                    seq_entry.offset = offset;
+                    seq_entry.delta = seqCounter;
+                    break;
+                }
+            }
+            seqCounter = 0;
+        }
+    };
+
+    /** Active Generation Table (AGT) */
+    AssociativeSet<ActiveGenerationTableEntry> activeGenerationTable;
+    /** Pattern Sequence Table (PST) */
+    AssociativeSet<ActiveGenerationTableEntry> patternSequenceTable;
+
+    /** Data type of the Region Miss Order Buffer entry */
+    struct RegionMissOrderBufferEntry {
+        /** Address of the spatial region */
+        Addr srAddress;
+        /**
+         * Address used to index the PST table, generated using the PC and the
+         * offset within the spatial region
+         */
+        Addr pstAddress;
+        /** Delta within the global miss order sequence */
+        unsigned int delta;
+        /** Valid bit */
+        bool valid;
+    };
+
+    /** Region Miss Order Buffer (RMOB) */
+    std::vector<RegionMissOrderBufferEntry> rmob;
+    /** First free position (or older, if it is full) of the RMOB */
+    unsigned int rmobHead;
+
+    /** Counter to keep the count of accesses between trigger accesses */
+    unsigned int lastTriggerCounter;
+
+    /** Checks if the active generations have ended */
+    void checkForActiveGenerationsEnd();
+    /**
+     * Adds an entry to the RMOB
+     * @param sr_addr Spatial region address
+     * @param pst_addr Corresponding PST address
+     * @param delta Number of entries skipped in the global miss order
+     */
+    void addToRMOB(Addr sr_addr, Addr pst_addr, unsigned int delta);
+
+    /**
+     * Reconstructs a sequence of accesses and generates the prefetch
+     * addresses, adding them to the addresses vector
+     * @param rmob_idx rmob position to start generating from
+     * @param addresses vector to add the addresses to be prefetched
+     */
+    void reconstructSequence(unsigned int rmob_idx,
+                             std::vector<AddrPriority> &addresses);
+  public:
+    STeMSPrefetcher(const STeMSPrefetcherParams* p);
+    ~STeMSPrefetcher() {}
+    void calculatePrefetch(const PrefetchInfo &pfi,
+                           std::vector<AddrPriority> &addresses) override;
+};
+
+#endif//__MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__