mem-cache: Added the Indirect Memory Prefetcher

Reference:
    Xiangyao Yu, Christopher J. Hughes, Nadathur Satish, and Srinivas Devadas.
    2015. IMP: indirect memory prefetcher. In Proceedings of the 48th
    International Symposium on Microarchitecture (MICRO-48). ACM,
    New York, NY, USA, 178-190. DOI: https://doi.org/10.1145/2830772.2830807

Change-Id: I52790f69c13ec55b8c1c8b9396ef9a1fb1be9797
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16223
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index e29f121..eacdf78 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -142,6 +142,41 @@
 
     degree = Param.Int(2, "Number of prefetches to generate")
 
+class IndirectMemoryPrefetcher(QueuedPrefetcher):
+    type = 'IndirectMemoryPrefetcher'
+    cxx_class = 'IndirectMemoryPrefetcher'
+    cxx_header = "mem/cache/prefetch/indirect_memory.hh"
+    pt_table_entries = Param.MemorySize("16",
+        "Number of entries of the Prefetch Table")
+    pt_table_assoc = Param.Unsigned(16, "Associativity of the Prefetch Table")
+    pt_table_indexing_policy = Param.BaseIndexingPolicy(
+        SetAssociative(entry_size = 1, assoc = Parent.pt_table_assoc,
+        size = Parent.pt_table_entries),
+        "Indexing policy of the pattern table")
+    pt_table_replacement_policy = Param.BaseReplacementPolicy(LRURP(),
+        "Replacement policy of the pattern table")
+    max_prefetch_distance = Param.Unsigned(16, "Maximum prefetch distance")
+    max_indirect_counter_value = Param.Unsigned(8,
+        "Maximum value of the indirect counter")
+    ipd_table_entries = Param.MemorySize("4",
+        "Number of entries of the Indirect Pattern Detector")
+    ipd_table_assoc = Param.Unsigned(4,
+        "Associativity of the Indirect Pattern Detector")
+    ipd_table_indexing_policy = Param.BaseIndexingPolicy(
+        SetAssociative(entry_size = 1, assoc = Parent.ipd_table_assoc,
+        size = Parent.ipd_table_entries),
+        "Indexing policy of the Indirect Pattern Detector")
+    ipd_table_replacement_policy = Param.BaseReplacementPolicy(LRURP(),
+        "Replacement policy of the Indirect Pattern Detector")
+    shift_values = VectorParam.Int([2, 3, 4, -3], "Shift values to evaluate")
+    addr_array_len = Param.Unsigned(4, "Number of misses tracked")
+    prefetch_threshold = Param.Unsigned(2,
+        "Counter threshold to start the indirect prefetching")
+    stream_counter_threshold = Param.Unsigned(4,
+        "Counter threshold to enable the stream prefetcher")
+    streaming_distance = Param.Unsigned(4,
+        "Number of prefetches to generate when using the stream prefetcher")
+
 class SignaturePathPrefetcher(QueuedPrefetcher):
     type = 'SignaturePathPrefetcher'
     cxx_class = 'SignaturePathPrefetcher'
diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript
index c96ec4e..f533b3c 100644
--- a/src/mem/cache/prefetch/SConscript
+++ b/src/mem/cache/prefetch/SConscript
@@ -37,6 +37,7 @@
 Source('bop.cc')
 Source('delta_correlating_prediction_tables.cc')
 Source('irregular_stream_buffer.cc')
+Source('indirect_memory.cc')
 Source('queued.cc')
 Source('sbooe.cc')
 Source('signature_path.cc')
diff --git a/src/mem/cache/prefetch/indirect_memory.cc b/src/mem/cache/prefetch/indirect_memory.cc
new file mode 100644
index 0000000..41edfe0
--- /dev/null
+++ b/src/mem/cache/prefetch/indirect_memory.cc
@@ -0,0 +1,269 @@
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+ #include "mem/cache/prefetch/indirect_memory.hh"
+
+ #include "mem/cache/base.hh"
+ #include "mem/cache/prefetch/associative_set_impl.hh"
+ #include "params/IndirectMemoryPrefetcher.hh"
+
+IndirectMemoryPrefetcher::IndirectMemoryPrefetcher(
+    const IndirectMemoryPrefetcherParams *p) : QueuedPrefetcher(p),
+    maxPrefetchDistance(p->max_prefetch_distance),
+    shiftValues(p->shift_values), prefetchThreshold(p->prefetch_threshold),
+    maxIndirectCounterValue(p->max_indirect_counter_value),
+    streamCounterThreshold(p->stream_counter_threshold),
+    streamingDistance(p->streaming_distance),
+    prefetchTable(p->pt_table_assoc, p->pt_table_entries,
+                  p->pt_table_indexing_policy, p->pt_table_replacement_policy),
+    ipd(p->ipd_table_assoc, p->ipd_table_entries, p->ipd_table_indexing_policy,
+        p->ipd_table_replacement_policy,
+        IndirectPatternDetectorEntry(p->addr_array_len, shiftValues.size())),
+    ipdEntryTrackingMisses(nullptr),
+#if THE_ISA != NULL_ISA
+    byteOrder(TheISA::GuestByteOrder)
+#else
+    byteOrder((ByteOrder) -1)
+#endif
+{
+    fatal_if(byteOrder == -1, "This prefetcher requires a defined ISA\n");
+}
+
+void
+IndirectMemoryPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+    std::vector<AddrPriority> &addresses)
+{
+    // This prefetcher requires a PC
+    if (!pfi.hasPC()) {
+        return;
+    }
+
+    bool is_secure = pfi.isSecure();
+    Addr pc = pfi.getPC();
+    Addr addr = pfi.getAddr();
+    bool miss = pfi.isCacheMiss();
+
+    checkAccessMatchOnActiveEntries(addr);
+
+    // First check if this is a miss, if the prefetcher is tracking misses
+    if (ipdEntryTrackingMisses != nullptr && miss) {
+        // Check if the entry tracking misses has already set its second index
+        if (!ipdEntryTrackingMisses->secondIndexSet) {
+            trackMissIndex1(addr);
+        } else {
+            trackMissIndex2(addr);
+        }
+    } else {
+        // if misses are not being tracked, attempt to detect stream accesses
+        PrefetchTableEntry *pt_entry =
+            prefetchTable.findEntry(pc, false /* unused */);
+        if (pt_entry != nullptr) {
+            prefetchTable.accessEntry(pt_entry);
+
+            if (pt_entry->address != addr) {
+                // Streaming access found
+                pt_entry->streamCounter += 1;
+                if (pt_entry->streamCounter >= streamCounterThreshold) {
+                    int64_t delta = addr - pt_entry->address;
+                    for (unsigned int i = 1; i <= streamingDistance; i += 1) {
+                        addresses.push_back(AddrPriority(addr + delta * i, 0));
+                    }
+                }
+                pt_entry->address = addr;
+                pt_entry->secure = is_secure;
+
+
+                // if this is a read, read the data from the cache and assume
+                // it is an index (this is only possible if the data is already
+                // in the cache), also, only indexes up to 8 bytes are
+                // considered
+
+                if (!miss && !pfi.isWrite() && pfi.getSize() <= 8) {
+                    int64_t index = 0;
+                    switch(pfi.getSize()) {
+                        case sizeof(uint8_t):
+                            index = pfi.get<uint8_t>(byteOrder);
+                            break;
+                        case sizeof(uint16_t):
+                            index = pfi.get<uint16_t>(byteOrder);
+                            break;
+                        case sizeof(uint32_t):
+                            index = pfi.get<uint32_t>(byteOrder);
+                            break;
+                        case sizeof(uint64_t):
+                            index = pfi.get<uint64_t>(byteOrder);
+                            break;
+                        default:
+                            panic("Invalid access size\n");
+                    }
+                    if (!pt_entry->enabled) {
+                        // Not enabled (no pattern detected in this stream),
+                        // add or update an entry in the pattern detector and
+                        // start tracking misses
+                        allocateOrUpdateIPDEntry(pt_entry, index);
+                    } else {
+                        // Enabled entry, update the index
+                        pt_entry->index = index;
+                        if (!pt_entry->increasedIndirectCounter) {
+                            if (pt_entry->indirectCounter > 0) {
+                                pt_entry->indirectCounter -= 1;
+                            }
+                        } else {
+                            // Set this to false, to see if the new index
+                            // has any match
+                            pt_entry->increasedIndirectCounter = false;
+                        }
+
+                        // If the counter is high enough, start prefetching
+                        if (pt_entry->indirectCounter > prefetchThreshold) {
+                            unsigned distance = pt_entry->indirectCounter *
+                                maxPrefetchDistance / maxIndirectCounterValue;
+                            for (int delta = 1; delta < distance; delta += 1) {
+                                Addr pf_addr = pt_entry->baseAddr +
+                                    (pt_entry->index << pt_entry->shift);
+                                addresses.push_back(AddrPriority(pf_addr, 0));
+                            }
+                        }
+                    }
+                }
+            }
+        } else {
+            pt_entry = prefetchTable.findVictim(pc);
+            assert(pt_entry != nullptr);
+            prefetchTable.insertEntry(pc, false /* unused */, pt_entry);
+            pt_entry->address = addr;
+            pt_entry->secure = is_secure;
+        }
+    }
+}
+
+void
+IndirectMemoryPrefetcher::allocateOrUpdateIPDEntry(
+    const PrefetchTableEntry *pt_entry, int64_t index)
+{
+    // The address of the pt_entry is used to index the IPD
+    Addr ipd_entry_addr = (Addr) pt_entry;
+    IndirectPatternDetectorEntry *ipd_entry = ipd.findEntry(ipd_entry_addr,
+                                                            false/* unused */);
+    if (ipd_entry != nullptr) {
+        ipd.accessEntry(ipd_entry);
+        if (!ipd_entry->secondIndexSet) {
+            // Second time we see an index, fill idx2
+            ipd_entry->idx2 = index;
+            ipd_entry->secondIndexSet = true;
+            ipdEntryTrackingMisses = ipd_entry;
+        } else {
+            // Third access! no pattern has been found so far,
+            // release the IPD entry
+            ipd_entry->reset();
+            ipdEntryTrackingMisses = nullptr;
+        }
+    } else {
+        ipd_entry = ipd.findVictim(ipd_entry_addr);
+        assert(ipd_entry != nullptr);
+        ipd.insertEntry(ipd_entry_addr, false /* unused */, ipd_entry);
+        ipd_entry->idx1 = index;
+        ipdEntryTrackingMisses = ipd_entry;
+    }
+}
+
+void
+IndirectMemoryPrefetcher::trackMissIndex1(Addr miss_addr)
+{
+    IndirectPatternDetectorEntry *entry = ipdEntryTrackingMisses;
+    // If the second index is not set, we are just filling the baseAddr
+    // vector
+    assert(entry->numMisses < entry->baseAddr.size());
+    std::vector<Addr> &ba_array = entry->baseAddr[entry->numMisses];
+    int idx = 0;
+    for (int shift : shiftValues) {
+        ba_array[idx] = miss_addr - (entry->idx1 << shift);
+        idx += 1;
+    }
+    entry->numMisses += 1;
+    if (entry->numMisses == entry->baseAddr.size()) {
+        // stop tracking misses once we have tracked enough
+        ipdEntryTrackingMisses = nullptr;
+    }
+}
+void
+IndirectMemoryPrefetcher::trackMissIndex2(Addr miss_addr)
+{
+    IndirectPatternDetectorEntry *entry = ipdEntryTrackingMisses;
+    // Second index is filled, compare the addresses generated during
+    // the previous misses (using idx1) against newly generated values
+    // using idx2, if a match is found, fill the additional fields
+    // of the PT entry
+    for (int midx = 0; midx < entry->numMisses; midx += 1)
+    {
+        std::vector<Addr> &ba_array = entry->baseAddr[midx];
+        int idx = 0;
+        for (int shift : shiftValues) {
+            if (ba_array[idx] == (miss_addr - (entry->idx2 << shift))) {
+                // Match found!
+                // Fill the corresponding pt_entry
+                PrefetchTableEntry *pt_entry =
+                    (PrefetchTableEntry *) entry->getTag();
+                pt_entry->baseAddr = ba_array[idx];
+                pt_entry->shift = shift;
+                pt_entry->enabled = true;
+                pt_entry->indirectCounter = 0;
+                // Release the current IPD Entry
+                entry->reset();
+                // Do not track more misses
+                ipdEntryTrackingMisses = nullptr;
+                return;
+            }
+            idx += 1;
+        }
+    }
+}
+
+void
+IndirectMemoryPrefetcher::checkAccessMatchOnActiveEntries(Addr addr)
+{
+    for (auto &pt_entry : prefetchTable) {
+        if (pt_entry.enabled) {
+            if (addr == pt_entry.baseAddr +
+                       (pt_entry.index << pt_entry.shift)) {
+                if (pt_entry.indirectCounter < maxIndirectCounterValue) {
+                    pt_entry.indirectCounter += 1;
+                    pt_entry.increasedIndirectCounter = true;
+                }
+            }
+        }
+    }
+}
+
+IndirectMemoryPrefetcher*
+IndirectMemoryPrefetcherParams::create()
+{
+    return new IndirectMemoryPrefetcher(this);
+}
diff --git a/src/mem/cache/prefetch/indirect_memory.hh b/src/mem/cache/prefetch/indirect_memory.hh
new file mode 100644
index 0000000..b67cdfb
--- /dev/null
+++ b/src/mem/cache/prefetch/indirect_memory.hh
@@ -0,0 +1,194 @@
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Javier Bueno
+ */
+
+/**
+ * Implementation of the Indirect Memory Prefetcher
+ *
+ * References:
+ * IMP: Indirect memory prefetcher.
+ * Yu, X., Hughes, C. J., Satish, N., & Devadas, S. (2015, December).
+ * In Proceedings of the 48th International Symposium on Microarchitecture
+ * (pp. 178-190). ACM.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_INDIRECT_MEMORY_HH__
+#define __MEM_CACHE_PREFETCH_INDIRECT_MEMORY_HH__
+
+#include <vector>
+
+#include "mem/cache/prefetch/associative_set.hh"
+#include "mem/cache/prefetch/queued.hh"
+
+struct IndirectMemoryPrefetcherParams;
+
+class IndirectMemoryPrefetcher : public QueuedPrefetcher
+{
+    /** Maximum number of prefetches generated per event */
+    const unsigned int maxPrefetchDistance;
+    /** Shift values considered */
+    const std::vector<int> shiftValues;
+    /** Counter threshold to start prefetching */
+    const unsigned int prefetchThreshold;
+    /** Maximum value of the confidence indirectCounter */
+    const unsigned int maxIndirectCounterValue;
+    /** streamCounter value to trigger the streaming prefetcher */
+    const int streamCounterThreshold;
+    /** Number of prefetches generated when using the streaming prefetcher */
+    const int streamingDistance;
+
+    /** Prefetch Table Entry */
+    struct PrefetchTableEntry : public TaggedEntry
+    {
+        /* Stream table fields */
+
+        /** Accessed address */
+        Addr address;
+        /** Whether this address is in the secure region */
+        bool secure;
+        /** Confidence counter of the stream */
+        unsigned int streamCounter;
+
+        /* Indirect table fields */
+
+        /** Enable bit of the indirect fields */
+        bool enabled;
+        /** Current index value */
+        int64_t index;
+        /** BaseAddr detected */
+        Addr baseAddr;
+        /** Shift detected */
+        int shift;
+        /** Confidence counter of the indirect fields */
+        int indirectCounter;
+        /**
+         * This variable is set to indicate that there has been at least one
+         * match with the current index value. This information is later used
+         * when a new index is updated. If there were no increases in the
+         * indirectCounter, the counter is decremented.
+         */
+        bool increasedIndirectCounter;
+
+        PrefetchTableEntry() : TaggedEntry(), address(0), secure(false),
+            streamCounter(0), enabled(false), index(0), baseAddr(0), shift(0),
+            indirectCounter(0), increasedIndirectCounter(false)
+        {}
+
+        void reset() override {
+            address = 0;
+            secure = false;
+            streamCounter = 0;
+            enabled = false;
+            index = 0;
+            baseAddr = 0;
+            shift = 0;
+            indirectCounter = 0;
+            increasedIndirectCounter = false;
+        }
+    };
+    /** Prefetch table */
+    AssociativeSet<PrefetchTableEntry> prefetchTable;
+
+    /** Indirect Pattern Detector entrt */
+    struct IndirectPatternDetectorEntry : public TaggedEntry
+    {
+        /** First index */
+        int64_t idx1;
+        /** Second index */
+        int64_t idx2;
+        /** Valid bit for the second index */
+        bool secondIndexSet;
+        /** Number of misses currently recorded */
+        int numMisses;
+        /**
+         * Potential BaseAddr candidates for each recorded miss.
+         * The number of candidates per miss is determined by the number of
+         * elements in the shiftValues array.
+         */
+        std::vector<std::vector<Addr>> baseAddr;
+
+        IndirectPatternDetectorEntry(unsigned int num_addresses,
+                                     unsigned int num_shifts)
+          : idx1(0), idx2(0), secondIndexSet(false), numMisses(0),
+            baseAddr(num_addresses, std::vector<Addr>(num_shifts))
+        {}
+
+        void reset() override {
+            idx1 = 0;
+            idx2 = 0;
+            secondIndexSet = false;
+            numMisses = 0;
+            setInvalid();
+        }
+    };
+    /** Indirect Pattern Detector (IPD) table */
+    AssociativeSet<IndirectPatternDetectorEntry> ipd;
+
+    /** Entry currently tracking misses */
+    IndirectPatternDetectorEntry *ipdEntryTrackingMisses;
+
+    /** Byte order used to access the cache */
+    const ByteOrder byteOrder;
+
+    /**
+     * Allocate or update an entry in the IPD
+     * @param pt_entry Pointer to the associated page table entry
+     * @param index Detected first index value
+     */
+    void allocateOrUpdateIPDEntry(const PrefetchTableEntry *pt_entry,
+                                  int64_t index);
+    /**
+     * Update an IPD entry with a detected miss address, when the first index
+     * is being tracked
+     * @param miss_addr The address that caused the miss
+     */
+    void trackMissIndex1(Addr miss_addr);
+
+    /**
+     * Update an IPD entry with a detected miss address, when the second index
+     * is being tracked
+     * @param miss_addr The address that caused the miss
+     */
+    void trackMissIndex2(Addr miss_addr);
+
+    /**
+     * Checks if an access to the cache matches any active PT entry, if so,
+     * the indirect confidence counter is incremented
+     * @param addr address of the access
+     */
+    void checkAccessMatchOnActiveEntries(Addr addr);
+
+  public:
+    IndirectMemoryPrefetcher(const IndirectMemoryPrefetcherParams *p);
+    ~IndirectMemoryPrefetcher() {}
+
+    void calculatePrefetch(const PrefetchInfo &pfi,
+                           std::vector<AddrPriority> &addresses) override;
+};
+#endif//__MEM_CACHE_PREFETCH_INDIRECT_MEMORY_HH__